|
2024-06-29 19:18:03,095 - INFO - allennlp.common.params - random_seed = 13370 |
|
2024-06-29 19:18:03,095 - INFO - allennlp.common.params - numpy_seed = 1337 |
|
2024-06-29 19:18:03,095 - INFO - allennlp.common.params - pytorch_seed = 133 |
|
2024-06-29 19:18:03,096 - INFO - allennlp.common.checks - Pytorch version: 2.3.1+cu121 |
|
2024-06-29 19:18:03,096 - INFO - allennlp.common.params - type = default |
|
2024-06-29 19:18:03,097 - INFO - allennlp.common.params - dataset_reader.type = compreno_ud_dataset_reader |
|
2024-06-29 19:18:03,097 - INFO - allennlp.common.params - dataset_reader.token_indexers.tokens.type = pretrained_transformer_mismatched |
|
2024-06-29 19:18:03,097 - INFO - allennlp.common.params - dataset_reader.token_indexers.tokens.token_min_padding_length = 0 |
|
2024-06-29 19:18:03,097 - INFO - allennlp.common.params - dataset_reader.token_indexers.tokens.model_name = xlm-roberta-base |
|
2024-06-29 19:18:03,097 - INFO - allennlp.common.params - dataset_reader.token_indexers.tokens.namespace = tags |
|
2024-06-29 19:18:03,097 - INFO - allennlp.common.params - dataset_reader.token_indexers.tokens.max_length = None |
|
2024-06-29 19:18:03,097 - INFO - allennlp.common.params - dataset_reader.token_indexers.tokens.tokenizer_kwargs = None |
|
2024-06-29 19:18:05,153 - INFO - allennlp.common.params - train_data_path = data/train.conllu |
|
2024-06-29 19:18:05,153 - INFO - allennlp.common.params - datasets_for_vocab_creation = None |
|
2024-06-29 19:18:05,153 - INFO - allennlp.common.params - validation_dataset_reader = None |
|
2024-06-29 19:18:05,153 - INFO - allennlp.common.params - validation_data_path = data/validation.conllu |
|
2024-06-29 19:18:05,153 - INFO - allennlp.common.params - test_data_path = None |
|
2024-06-29 19:18:05,153 - INFO - allennlp.common.params - evaluate_on_test = False |
|
2024-06-29 19:18:05,153 - INFO - allennlp.common.params - batch_weight_key = |
|
2024-06-29 19:18:05,154 - INFO - allennlp.common.params - data_loader.type = multiprocess |
|
2024-06-29 19:18:05,154 - INFO - allennlp.common.params - data_loader.batch_size = 24 |
|
2024-06-29 19:18:05,154 - INFO - allennlp.common.params - data_loader.drop_last = False |
|
2024-06-29 19:18:05,154 - INFO - allennlp.common.params - data_loader.shuffle = True |
|
2024-06-29 19:18:05,154 - INFO - allennlp.common.params - data_loader.batch_sampler = None |
|
2024-06-29 19:18:05,154 - INFO - allennlp.common.params - data_loader.batches_per_epoch = None |
|
2024-06-29 19:18:05,154 - INFO - allennlp.common.params - data_loader.num_workers = 0 |
|
2024-06-29 19:18:05,154 - INFO - allennlp.common.params - data_loader.max_instances_in_memory = None |
|
2024-06-29 19:18:05,154 - INFO - allennlp.common.params - data_loader.start_method = fork |
|
2024-06-29 19:18:05,154 - INFO - allennlp.common.params - data_loader.cuda_device = None |
|
2024-06-29 19:18:05,154 - INFO - allennlp.common.params - data_loader.quiet = False |
|
2024-06-29 19:18:05,154 - INFO - allennlp.common.params - data_loader.collate_fn = <allennlp.data.data_loaders.data_collator.DefaultDataCollator object at 0x7f657d497df0> |
|
2024-06-29 19:18:05,154 - INFO - tqdm - loading instances: 0it [00:00, ?it/s] |
|
2024-06-29 19:18:15,215 - INFO - tqdm - loading instances: 25625it [00:10, 2590.96it/s] |
|
2024-06-29 19:18:15,764 - INFO - allennlp.common.params - validation_data_loader.type = multiprocess |
|
2024-06-29 19:18:15,764 - INFO - allennlp.common.params - validation_data_loader.batch_size = 24 |
|
2024-06-29 19:18:15,764 - INFO - allennlp.common.params - validation_data_loader.drop_last = False |
|
2024-06-29 19:18:15,764 - INFO - allennlp.common.params - validation_data_loader.shuffle = False |
|
2024-06-29 19:18:15,764 - INFO - allennlp.common.params - validation_data_loader.batch_sampler = None |
|
2024-06-29 19:18:15,764 - INFO - allennlp.common.params - validation_data_loader.batches_per_epoch = None |
|
2024-06-29 19:18:15,765 - INFO - allennlp.common.params - validation_data_loader.num_workers = 0 |
|
2024-06-29 19:18:15,765 - INFO - allennlp.common.params - validation_data_loader.max_instances_in_memory = None |
|
2024-06-29 19:18:15,765 - INFO - allennlp.common.params - validation_data_loader.start_method = fork |
|
2024-06-29 19:18:15,765 - INFO - allennlp.common.params - validation_data_loader.cuda_device = None |
|
2024-06-29 19:18:15,765 - INFO - allennlp.common.params - validation_data_loader.quiet = False |
|
2024-06-29 19:18:15,765 - INFO - allennlp.common.params - validation_data_loader.collate_fn = <allennlp.data.data_loaders.data_collator.DefaultDataCollator object at 0x7f657d497df0> |
|
2024-06-29 19:18:15,765 - INFO - tqdm - loading instances: 0it [00:00, ?it/s] |
|
2024-06-29 19:18:18,810 - INFO - allennlp.common.params - vocabulary.type = from_instances |
|
2024-06-29 19:18:18,811 - INFO - allennlp.common.params - vocabulary.max_vocab_size = None |
|
2024-06-29 19:18:18,811 - INFO - allennlp.common.params - vocabulary.non_padded_namespaces = ('*tags', '*labels') |
|
2024-06-29 19:18:18,811 - INFO - allennlp.common.params - vocabulary.pretrained_files = None |
|
2024-06-29 19:18:18,811 - INFO - allennlp.common.params - vocabulary.only_include_pretrained_words = False |
|
2024-06-29 19:18:18,811 - INFO - allennlp.common.params - vocabulary.min_pretrained_embeddings = None |
|
2024-06-29 19:18:18,811 - INFO - allennlp.common.params - vocabulary.padding_token = @@PADDING@@ |
|
2024-06-29 19:18:18,811 - INFO - allennlp.common.params - vocabulary.oov_token = @@UNKNOWN@@ |
|
2024-06-29 19:18:18,811 - INFO - allennlp.data.vocabulary - Fitting token dictionary from dataset. |
|
2024-06-29 19:18:18,811 - INFO - tqdm - building vocab: 0it [00:00, ?it/s] |
|
2024-06-29 19:18:19,437 - INFO - allennlp.common.params - model.type = morpho_syntax_semantic_parser |
|
2024-06-29 19:18:19,438 - INFO - allennlp.common.params - model.indexer.type = pretrained_transformer_mismatched |
|
2024-06-29 19:18:19,438 - INFO - allennlp.common.params - model.indexer.token_min_padding_length = 0 |
|
2024-06-29 19:18:19,438 - INFO - allennlp.common.params - model.indexer.model_name = xlm-roberta-base |
|
2024-06-29 19:18:19,438 - INFO - allennlp.common.params - model.indexer.namespace = tags |
|
2024-06-29 19:18:19,438 - INFO - allennlp.common.params - model.indexer.max_length = None |
|
2024-06-29 19:18:19,438 - INFO - allennlp.common.params - model.indexer.tokenizer_kwargs = None |
|
2024-06-29 19:18:19,439 - INFO - allennlp.common.params - model.embedder.type = pretrained_transformer_mismatched |
|
2024-06-29 19:18:19,439 - INFO - allennlp.common.params - model.embedder.model_name = xlm-roberta-base |
|
2024-06-29 19:18:19,439 - INFO - allennlp.common.params - model.embedder.max_length = None |
|
2024-06-29 19:18:19,439 - INFO - allennlp.common.params - model.embedder.sub_module = None |
|
2024-06-29 19:18:19,439 - INFO - allennlp.common.params - model.embedder.train_parameters = True |
|
2024-06-29 19:18:19,439 - INFO - allennlp.common.params - model.embedder.last_layer_only = True |
|
2024-06-29 19:18:19,439 - INFO - allennlp.common.params - model.embedder.override_weights_file = None |
|
2024-06-29 19:18:19,439 - INFO - allennlp.common.params - model.embedder.override_weights_strip_prefix = None |
|
2024-06-29 19:18:19,439 - INFO - allennlp.common.params - model.embedder.load_weights = True |
|
2024-06-29 19:18:19,439 - INFO - allennlp.common.params - model.embedder.gradient_checkpointing = None |
|
2024-06-29 19:18:19,439 - INFO - allennlp.common.params - model.embedder.tokenizer_kwargs = None |
|
2024-06-29 19:18:19,439 - INFO - allennlp.common.params - model.embedder.transformer_kwargs = None |
|
2024-06-29 19:18:19,439 - INFO - allennlp.common.params - model.embedder.sub_token_mode = avg |
|
2024-06-29 19:18:20,189 - INFO - allennlp.common.params - model.lemma_rule_classifier.hid_dim = 512 |
|
2024-06-29 19:18:20,189 - INFO - allennlp.common.params - model.lemma_rule_classifier.activation = relu |
|
2024-06-29 19:18:20,189 - INFO - allennlp.common.params - model.lemma_rule_classifier.dropout = 0.1 |
|
2024-06-29 19:18:20,190 - INFO - allennlp.common.params - model.lemma_rule_classifier.dictionaries = [] |
|
2024-06-29 19:18:20,190 - INFO - allennlp.common.params - model.lemma_rule_classifier.topk = None |
|
2024-06-29 19:18:20,192 - INFO - allennlp.common.params - model.pos_feats_classifier.hid_dim = 256 |
|
2024-06-29 19:18:20,192 - INFO - allennlp.common.params - model.pos_feats_classifier.activation = relu |
|
2024-06-29 19:18:20,192 - INFO - allennlp.common.params - model.pos_feats_classifier.dropout = 0.1 |
|
2024-06-29 19:18:20,194 - INFO - allennlp.common.params - model.depencency_classifier.hid_dim = 128 |
|
2024-06-29 19:18:20,194 - INFO - allennlp.common.params - model.depencency_classifier.activation = relu |
|
2024-06-29 19:18:20,194 - INFO - allennlp.common.params - model.depencency_classifier.dropout = 0.1 |
|
2024-06-29 19:18:20,216 - INFO - allennlp.common.params - model.misc_classifier.hid_dim = 128 |
|
2024-06-29 19:18:20,216 - INFO - allennlp.common.params - model.misc_classifier.activation = relu |
|
2024-06-29 19:18:20,216 - INFO - allennlp.common.params - model.misc_classifier.dropout = 0.1 |
|
2024-06-29 19:18:20,217 - INFO - allennlp.common.params - model.semslot_classifier.hid_dim = 1024 |
|
2024-06-29 19:18:20,217 - INFO - allennlp.common.params - model.semslot_classifier.activation = relu |
|
2024-06-29 19:18:20,217 - INFO - allennlp.common.params - model.semslot_classifier.dropout = 0.1 |
|
2024-06-29 19:18:20,220 - INFO - allennlp.common.params - model.semclass_classifier.hid_dim = 1024 |
|
2024-06-29 19:18:20,220 - INFO - allennlp.common.params - model.semclass_classifier.activation = relu |
|
2024-06-29 19:18:20,220 - INFO - allennlp.common.params - model.semclass_classifier.dropout = 0.1 |
|
2024-06-29 19:18:20,227 - INFO - allennlp.common.params - model.null_classifier.hid_dim = 512 |
|
2024-06-29 19:18:20,227 - INFO - allennlp.common.params - model.null_classifier.activation = relu |
|
2024-06-29 19:18:20,227 - INFO - allennlp.common.params - model.null_classifier.dropout = 0.1 |
|
2024-06-29 19:18:20,227 - INFO - allennlp.common.params - model.null_classifier.positive_class_weight = 1.0 |
|
2024-06-29 19:18:34,182 - INFO - allennlp.common.params - trainer.type = gradient_descent |
|
2024-06-29 19:18:34,182 - INFO - allennlp.common.params - trainer.cuda_device = 0 |
|
2024-06-29 19:18:34,182 - INFO - allennlp.common.params - trainer.distributed = False |
|
2024-06-29 19:18:34,182 - INFO - allennlp.common.params - trainer.world_size = 1 |
|
2024-06-29 19:18:34,182 - INFO - allennlp.common.params - trainer.patience = None |
|
2024-06-29 19:18:34,182 - INFO - allennlp.common.params - trainer.validation_metric = +Avg |
|
2024-06-29 19:18:34,183 - INFO - allennlp.common.params - trainer.num_epochs = 10 |
|
2024-06-29 19:18:34,183 - INFO - allennlp.common.params - trainer.grad_norm = False |
|
2024-06-29 19:18:34,183 - INFO - allennlp.common.params - trainer.grad_clipping = 5 |
|
2024-06-29 19:18:34,183 - INFO - allennlp.common.params - trainer.num_gradient_accumulation_steps = 1 |
|
2024-06-29 19:18:34,183 - INFO - allennlp.common.params - trainer.use_amp = False |
|
2024-06-29 19:18:34,183 - INFO - allennlp.common.params - trainer.no_grad = None |
|
2024-06-29 19:18:34,183 - INFO - allennlp.common.params - trainer.momentum_scheduler = None |
|
2024-06-29 19:18:34,183 - INFO - allennlp.common.params - trainer.moving_average = None |
|
2024-06-29 19:18:34,183 - INFO - allennlp.common.params - trainer.checkpointer = <allennlp.common.lazy.Lazy object at 0x7f657755ed30> |
|
2024-06-29 19:18:34,183 - INFO - allennlp.common.params - trainer.enable_default_callbacks = True |
|
2024-06-29 19:18:34,183 - INFO - allennlp.common.params - trainer.run_confidence_checks = True |
|
2024-06-29 19:18:34,183 - INFO - allennlp.common.params - trainer.grad_scaling = True |
|
2024-06-29 19:18:34,391 - INFO - allennlp.common.params - trainer.optimizer.type = adam |
|
2024-06-29 19:18:34,391 - INFO - allennlp.common.params - trainer.optimizer.lr = 0.01 |
|
2024-06-29 19:18:34,392 - INFO - allennlp.common.params - trainer.optimizer.betas = (0.9, 0.999) |
|
2024-06-29 19:18:34,392 - INFO - allennlp.common.params - trainer.optimizer.eps = 1e-08 |
|
2024-06-29 19:18:34,392 - INFO - allennlp.common.params - trainer.optimizer.weight_decay = 0.0 |
|
2024-06-29 19:18:34,392 - INFO - allennlp.common.params - trainer.optimizer.amsgrad = False |
|
2024-06-29 19:18:34,393 - INFO - allennlp.training.optimizers - Done constructing parameter groups. |
|
2024-06-29 19:18:34,393 - INFO - allennlp.training.optimizers - Group 0: ['embedder._matched_embedder.transformer_model.encoder.layer.5.attention.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.5.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.10.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.2.attention.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.3.attention.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.0.attention.self.value.weight', 'embedder._matched_embedder.transformer_model.embeddings.token_type_embeddings.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.6.attention.self.query.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.1.attention.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.0.attention.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.7.attention.self.query.weight', 'embedder._matched_embedder.transformer_model.embeddings.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.8.attention.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.6.attention.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.5.attention.self.query.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.6.intermediate.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.7.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.5.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.4.attention.self.query.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.3.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.2.attention.self.value.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.5.attention.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.3.attention.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.1.attention.output.dense.weight', 'embedder._matched_embedder.transformer_model.embeddings.word_embeddings.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.2.attention.self.query.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.3.attention.self.value.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.3.intermediate.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.4.attention.self.key.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.3.attention.self.query.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.5.attention.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.8.attention.self.value.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.2.intermediate.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.2.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.6.attention.self.query.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.6.attention.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.9.attention.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.10.attention.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.0.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.0.intermediate.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.11.attention.self.key.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.10.intermediate.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.0.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.0.attention.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.4.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.8.attention.self.query.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.1.attention.self.value.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.10.attention.self.value.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.9.attention.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.0.attention.self.query.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.11.intermediate.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.11.attention.self.value.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.1.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.4.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.6.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.7.attention.self.query.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.2.attention.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.2.attention.self.value.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.10.attention.self.value.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.7.attention.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.7.attention.self.value.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.9.attention.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.1.attention.self.query.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.8.attention.self.key.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.10.attention.self.query.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.11.attention.self.value.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.9.intermediate.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.8.attention.self.query.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.10.attention.self.key.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.5.attention.self.value.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.0.attention.self.key.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.2.attention.self.key.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.0.attention.self.value.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.4.attention.self.value.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.11.intermediate.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.11.attention.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.0.attention.self.key.bias', 'embedder._matched_embedder.transformer_model.pooler.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.0.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.1.attention.self.key.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.11.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.6.attention.self.key.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.7.attention.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.5.attention.self.key.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.3.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.embeddings.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.1.intermediate.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.8.attention.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.3.attention.self.key.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.10.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.6.intermediate.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.9.attention.self.value.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.2.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.11.attention.self.query.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.9.attention.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.7.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.9.attention.self.key.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.4.attention.self.query.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.5.attention.self.value.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.6.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.3.intermediate.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.8.intermediate.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.11.attention.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.3.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.5.intermediate.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.9.attention.self.query.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.4.attention.self.key.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.10.intermediate.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.9.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.8.attention.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.2.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.0.attention.self.query.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.10.attention.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.8.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.8.attention.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.7.attention.self.key.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.11.attention.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.7.intermediate.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.10.attention.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.0.attention.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.9.attention.self.value.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.7.intermediate.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.9.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.11.attention.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.5.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.3.attention.self.key.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.2.intermediate.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.6.attention.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.7.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.3.attention.self.query.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.1.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.2.attention.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.11.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.4.intermediate.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.6.attention.self.key.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.7.attention.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.8.attention.self.key.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.7.attention.self.key.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.7.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.2.attention.self.key.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.1.attention.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.8.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.3.attention.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.5.attention.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.8.attention.self.value.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.6.attention.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.9.intermediate.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.5.attention.self.key.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.8.intermediate.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.1.attention.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.4.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.1.intermediate.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.4.attention.self.value.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.4.intermediate.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.9.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.11.attention.self.key.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.1.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.7.attention.self.value.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.0.intermediate.dense.weight', 'embedder._matched_embedder.transformer_model.embeddings.position_embeddings.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.2.attention.self.query.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.9.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.4.attention.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.3.attention.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.1.attention.self.value.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.4.attention.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.10.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.2.attention.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.1.attention.self.key.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.1.attention.self.query.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.4.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.10.attention.self.query.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.6.attention.self.value.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.6.attention.self.value.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.10.attention.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.3.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.8.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.0.attention.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.pooler.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.7.attention.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.6.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.11.attention.self.query.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.4.attention.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.5.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.11.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.6.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.9.attention.self.key.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.9.attention.self.query.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.10.attention.self.key.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.11.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.2.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.4.attention.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.1.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.8.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.10.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.5.attention.self.query.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.3.attention.self.value.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.5.intermediate.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.0.output.dense.weight'], {} |
|
2024-06-29 19:18:34,393 - INFO - allennlp.training.optimizers - Group 1: ['lemma_rule_classifier.classifier.1.bias', 'null_classifier.classifier.1.weight', 'semclass_classifier.classifier.4.bias', 'lemma_rule_classifier.classifier.1.weight', 'dependency_classifier.arc_attention_ud._bias', 'dependency_classifier.arc_head_mlp.1.weight', 'dependency_classifier.rel_dep_mlp.1.bias', 'pos_feats_classifier.classifier.4.bias', 'lemma_rule_classifier.classifier.4.weight', 'dependency_classifier.arc_attention_eud._bias', 'null_classifier.classifier.1.bias', 'null_classifier.classifier.4.bias', 'dependency_classifier.rel_attention_ud._weight_matrix', 'dependency_classifier.arc_head_mlp.1.bias', 'dependency_classifier.rel_dep_mlp.1.weight', 'dependency_classifier.arc_attention_ud._weight_matrix', 'null_classifier.classifier.4.weight', 'semslot_classifier.classifier.4.weight', 'pos_feats_classifier.classifier.1.weight', 'dependency_classifier.rel_head_mlp.1.weight', 'semclass_classifier.classifier.4.weight', 'semclass_classifier.classifier.1.weight', 'dependency_classifier.arc_dep_mlp.1.bias', 'lemma_rule_classifier.classifier.4.bias', 'pos_feats_classifier.classifier.1.bias', 'misc_classifier.classifier.1.weight', 'semslot_classifier.classifier.1.bias', 'semclass_classifier.classifier.1.bias', 'misc_classifier.classifier.4.bias', 'dependency_classifier.rel_attention_eud._weight_matrix', 'pos_feats_classifier.classifier.4.weight', 'semslot_classifier.classifier.4.bias', 'semslot_classifier.classifier.1.weight', 'dependency_classifier.arc_attention_eud._weight_matrix', 'misc_classifier.classifier.4.weight', 'misc_classifier.classifier.1.bias', 'dependency_classifier.arc_dep_mlp.1.weight', 'dependency_classifier.rel_head_mlp.1.bias', 'dependency_classifier.rel_attention_ud._bias', 'dependency_classifier.rel_attention_eud._bias'], {} |
|
2024-06-29 19:18:34,393 - INFO - allennlp.training.optimizers - Group 2: [], {} |
|
2024-06-29 19:18:34,393 - INFO - allennlp.training.optimizers - Number of trainable parameters: 287815418 |
|
2024-06-29 19:18:34,394 - INFO - allennlp.common.util - The following parameters are Frozen (without gradient): |
|
2024-06-29 19:18:34,394 - INFO - allennlp.common.util - The following parameters are Tunable (with gradient): |
|
2024-06-29 19:18:34,394 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.embeddings.word_embeddings.weight |
|
2024-06-29 19:18:34,394 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.embeddings.position_embeddings.weight |
|
2024-06-29 19:18:34,394 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.embeddings.token_type_embeddings.weight |
|
2024-06-29 19:18:34,394 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.embeddings.LayerNorm.weight |
|
2024-06-29 19:18:34,394 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.embeddings.LayerNorm.bias |
|
2024-06-29 19:18:34,394 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.0.attention.self.query.weight |
|
2024-06-29 19:18:34,394 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.0.attention.self.query.bias |
|
2024-06-29 19:18:34,394 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.0.attention.self.key.weight |
|
2024-06-29 19:18:34,394 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.0.attention.self.key.bias |
|
2024-06-29 19:18:34,394 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.0.attention.self.value.weight |
|
2024-06-29 19:18:34,394 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.0.attention.self.value.bias |
|
2024-06-29 19:18:34,394 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.0.attention.output.dense.weight |
|
2024-06-29 19:18:34,394 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.0.attention.output.dense.bias |
|
2024-06-29 19:18:34,395 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.0.attention.output.LayerNorm.weight |
|
2024-06-29 19:18:34,395 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.0.attention.output.LayerNorm.bias |
|
2024-06-29 19:18:34,395 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.0.intermediate.dense.weight |
|
2024-06-29 19:18:34,395 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.0.intermediate.dense.bias |
|
2024-06-29 19:18:34,395 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.0.output.dense.weight |
|
2024-06-29 19:18:34,395 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.0.output.dense.bias |
|
2024-06-29 19:18:34,395 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.0.output.LayerNorm.weight |
|
2024-06-29 19:18:34,395 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.0.output.LayerNorm.bias |
|
2024-06-29 19:18:34,395 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.1.attention.self.query.weight |
|
2024-06-29 19:18:34,395 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.1.attention.self.query.bias |
|
2024-06-29 19:18:34,395 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.1.attention.self.key.weight |
|
2024-06-29 19:18:34,395 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.1.attention.self.key.bias |
|
2024-06-29 19:18:34,395 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.1.attention.self.value.weight |
|
2024-06-29 19:18:34,395 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.1.attention.self.value.bias |
|
2024-06-29 19:18:34,395 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.1.attention.output.dense.weight |
|
2024-06-29 19:18:34,395 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.1.attention.output.dense.bias |
|
2024-06-29 19:18:34,395 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.1.attention.output.LayerNorm.weight |
|
2024-06-29 19:18:34,395 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.1.attention.output.LayerNorm.bias |
|
2024-06-29 19:18:34,395 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.1.intermediate.dense.weight |
|
2024-06-29 19:18:34,395 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.1.intermediate.dense.bias |
|
2024-06-29 19:18:34,395 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.1.output.dense.weight |
|
2024-06-29 19:18:34,395 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.1.output.dense.bias |
|
2024-06-29 19:18:34,395 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.1.output.LayerNorm.weight |
|
2024-06-29 19:18:34,395 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.1.output.LayerNorm.bias |
|
2024-06-29 19:18:34,395 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.2.attention.self.query.weight |
|
2024-06-29 19:18:34,395 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.2.attention.self.query.bias |
|
2024-06-29 19:18:34,395 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.2.attention.self.key.weight |
|
2024-06-29 19:18:34,395 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.2.attention.self.key.bias |
|
2024-06-29 19:18:34,395 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.2.attention.self.value.weight |
|
2024-06-29 19:18:34,395 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.2.attention.self.value.bias |
|
2024-06-29 19:18:34,395 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.2.attention.output.dense.weight |
|
2024-06-29 19:18:34,395 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.2.attention.output.dense.bias |
|
2024-06-29 19:18:34,395 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.2.attention.output.LayerNorm.weight |
|
2024-06-29 19:18:34,395 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.2.attention.output.LayerNorm.bias |
|
2024-06-29 19:18:34,395 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.2.intermediate.dense.weight |
|
2024-06-29 19:18:34,395 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.2.intermediate.dense.bias |
|
2024-06-29 19:18:34,395 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.2.output.dense.weight |
|
2024-06-29 19:18:34,396 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.2.output.dense.bias |
|
2024-06-29 19:18:34,396 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.2.output.LayerNorm.weight |
|
2024-06-29 19:18:34,396 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.2.output.LayerNorm.bias |
|
2024-06-29 19:18:34,396 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.3.attention.self.query.weight |
|
2024-06-29 19:18:34,396 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.3.attention.self.query.bias |
|
2024-06-29 19:18:34,396 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.3.attention.self.key.weight |
|
2024-06-29 19:18:34,396 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.3.attention.self.key.bias |
|
2024-06-29 19:18:34,396 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.3.attention.self.value.weight |
|
2024-06-29 19:18:34,396 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.3.attention.self.value.bias |
|
2024-06-29 19:18:34,396 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.3.attention.output.dense.weight |
|
2024-06-29 19:18:34,396 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.3.attention.output.dense.bias |
|
2024-06-29 19:18:34,396 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.3.attention.output.LayerNorm.weight |
|
2024-06-29 19:18:34,396 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.3.attention.output.LayerNorm.bias |
|
2024-06-29 19:18:34,396 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.3.intermediate.dense.weight |
|
2024-06-29 19:18:34,396 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.3.intermediate.dense.bias |
|
2024-06-29 19:18:34,396 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.3.output.dense.weight |
|
2024-06-29 19:18:34,396 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.3.output.dense.bias |
|
2024-06-29 19:18:34,396 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.3.output.LayerNorm.weight |
|
2024-06-29 19:18:34,396 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.3.output.LayerNorm.bias |
|
2024-06-29 19:18:34,396 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.4.attention.self.query.weight |
|
2024-06-29 19:18:34,396 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.4.attention.self.query.bias |
|
2024-06-29 19:18:34,396 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.4.attention.self.key.weight |
|
2024-06-29 19:18:34,396 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.4.attention.self.key.bias |
|
2024-06-29 19:18:34,396 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.4.attention.self.value.weight |
|
2024-06-29 19:18:34,396 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.4.attention.self.value.bias |
|
2024-06-29 19:18:34,396 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.4.attention.output.dense.weight |
|
2024-06-29 19:18:34,396 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.4.attention.output.dense.bias |
|
2024-06-29 19:18:34,396 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.4.attention.output.LayerNorm.weight |
|
2024-06-29 19:18:34,396 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.4.attention.output.LayerNorm.bias |
|
2024-06-29 19:18:34,396 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.4.intermediate.dense.weight |
|
2024-06-29 19:18:34,396 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.4.intermediate.dense.bias |
|
2024-06-29 19:18:34,396 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.4.output.dense.weight |
|
2024-06-29 19:18:34,396 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.4.output.dense.bias |
|
2024-06-29 19:18:34,396 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.4.output.LayerNorm.weight |
|
2024-06-29 19:18:34,396 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.4.output.LayerNorm.bias |
|
2024-06-29 19:18:34,396 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.5.attention.self.query.weight |
|
2024-06-29 19:18:34,396 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.5.attention.self.query.bias |
|
2024-06-29 19:18:34,397 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.5.attention.self.key.weight |
|
2024-06-29 19:18:34,397 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.5.attention.self.key.bias |
|
2024-06-29 19:18:34,397 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.5.attention.self.value.weight |
|
2024-06-29 19:18:34,397 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.5.attention.self.value.bias |
|
2024-06-29 19:18:34,397 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.5.attention.output.dense.weight |
|
2024-06-29 19:18:34,397 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.5.attention.output.dense.bias |
|
2024-06-29 19:18:34,397 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.5.attention.output.LayerNorm.weight |
|
2024-06-29 19:18:34,397 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.5.attention.output.LayerNorm.bias |
|
2024-06-29 19:18:34,397 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.5.intermediate.dense.weight |
|
2024-06-29 19:18:34,397 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.5.intermediate.dense.bias |
|
2024-06-29 19:18:34,397 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.5.output.dense.weight |
|
2024-06-29 19:18:34,397 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.5.output.dense.bias |
|
2024-06-29 19:18:34,397 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.5.output.LayerNorm.weight |
|
2024-06-29 19:18:34,397 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.5.output.LayerNorm.bias |
|
2024-06-29 19:18:34,397 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.6.attention.self.query.weight |
|
2024-06-29 19:18:34,397 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.6.attention.self.query.bias |
|
2024-06-29 19:18:34,397 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.6.attention.self.key.weight |
|
2024-06-29 19:18:34,397 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.6.attention.self.key.bias |
|
2024-06-29 19:18:34,397 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.6.attention.self.value.weight |
|
2024-06-29 19:18:34,397 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.6.attention.self.value.bias |
|
2024-06-29 19:18:34,397 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.6.attention.output.dense.weight |
|
2024-06-29 19:18:34,397 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.6.attention.output.dense.bias |
|
2024-06-29 19:18:34,397 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.6.attention.output.LayerNorm.weight |
|
2024-06-29 19:18:34,397 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.6.attention.output.LayerNorm.bias |
|
2024-06-29 19:18:34,397 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.6.intermediate.dense.weight |
|
2024-06-29 19:18:34,397 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.6.intermediate.dense.bias |
|
2024-06-29 19:18:34,397 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.6.output.dense.weight |
|
2024-06-29 19:18:34,397 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.6.output.dense.bias |
|
2024-06-29 19:18:34,397 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.6.output.LayerNorm.weight |
|
2024-06-29 19:18:34,397 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.6.output.LayerNorm.bias |
|
2024-06-29 19:18:34,397 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.7.attention.self.query.weight |
|
2024-06-29 19:18:34,397 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.7.attention.self.query.bias |
|
2024-06-29 19:18:34,397 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.7.attention.self.key.weight |
|
2024-06-29 19:18:34,397 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.7.attention.self.key.bias |
|
2024-06-29 19:18:34,397 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.7.attention.self.value.weight |
|
2024-06-29 19:18:34,397 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.7.attention.self.value.bias |
|
2024-06-29 19:18:34,397 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.7.attention.output.dense.weight |
|
2024-06-29 19:18:34,398 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.7.attention.output.dense.bias |
|
2024-06-29 19:18:34,398 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.7.attention.output.LayerNorm.weight |
|
2024-06-29 19:18:34,398 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.7.attention.output.LayerNorm.bias |
|
2024-06-29 19:18:34,398 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.7.intermediate.dense.weight |
|
2024-06-29 19:18:34,398 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.7.intermediate.dense.bias |
|
2024-06-29 19:18:34,398 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.7.output.dense.weight |
|
2024-06-29 19:18:34,398 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.7.output.dense.bias |
|
2024-06-29 19:18:34,398 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.7.output.LayerNorm.weight |
|
2024-06-29 19:18:34,398 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.7.output.LayerNorm.bias |
|
2024-06-29 19:18:34,398 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.8.attention.self.query.weight |
|
2024-06-29 19:18:34,398 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.8.attention.self.query.bias |
|
2024-06-29 19:18:34,398 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.8.attention.self.key.weight |
|
2024-06-29 19:18:34,398 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.8.attention.self.key.bias |
|
2024-06-29 19:18:34,398 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.8.attention.self.value.weight |
|
2024-06-29 19:18:34,398 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.8.attention.self.value.bias |
|
2024-06-29 19:18:34,398 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.8.attention.output.dense.weight |
|
2024-06-29 19:18:34,398 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.8.attention.output.dense.bias |
|
2024-06-29 19:18:34,398 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.8.attention.output.LayerNorm.weight |
|
2024-06-29 19:18:34,398 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.8.attention.output.LayerNorm.bias |
|
2024-06-29 19:18:34,398 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.8.intermediate.dense.weight |
|
2024-06-29 19:18:34,398 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.8.intermediate.dense.bias |
|
2024-06-29 19:18:34,398 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.8.output.dense.weight |
|
2024-06-29 19:18:34,398 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.8.output.dense.bias |
|
2024-06-29 19:18:34,398 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.8.output.LayerNorm.weight |
|
2024-06-29 19:18:34,398 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.8.output.LayerNorm.bias |
|
2024-06-29 19:18:34,398 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.9.attention.self.query.weight |
|
2024-06-29 19:18:34,398 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.9.attention.self.query.bias |
|
2024-06-29 19:18:34,398 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.9.attention.self.key.weight |
|
2024-06-29 19:18:34,398 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.9.attention.self.key.bias |
|
2024-06-29 19:18:34,398 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.9.attention.self.value.weight |
|
2024-06-29 19:18:34,398 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.9.attention.self.value.bias |
|
2024-06-29 19:18:34,398 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.9.attention.output.dense.weight |
|
2024-06-29 19:18:34,398 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.9.attention.output.dense.bias |
|
2024-06-29 19:18:34,398 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.9.attention.output.LayerNorm.weight |
|
2024-06-29 19:18:34,398 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.9.attention.output.LayerNorm.bias |
|
2024-06-29 19:18:34,398 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.9.intermediate.dense.weight |
|
2024-06-29 19:18:34,398 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.9.intermediate.dense.bias |
|
2024-06-29 19:18:34,398 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.9.output.dense.weight |
|
2024-06-29 19:18:34,399 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.9.output.dense.bias |
|
2024-06-29 19:18:34,399 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.9.output.LayerNorm.weight |
|
2024-06-29 19:18:34,399 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.9.output.LayerNorm.bias |
|
2024-06-29 19:18:34,399 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.10.attention.self.query.weight |
|
2024-06-29 19:18:34,399 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.10.attention.self.query.bias |
|
2024-06-29 19:18:34,399 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.10.attention.self.key.weight |
|
2024-06-29 19:18:34,399 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.10.attention.self.key.bias |
|
2024-06-29 19:18:34,399 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.10.attention.self.value.weight |
|
2024-06-29 19:18:34,399 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.10.attention.self.value.bias |
|
2024-06-29 19:18:34,399 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.10.attention.output.dense.weight |
|
2024-06-29 19:18:34,399 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.10.attention.output.dense.bias |
|
2024-06-29 19:18:34,399 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.10.attention.output.LayerNorm.weight |
|
2024-06-29 19:18:34,399 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.10.attention.output.LayerNorm.bias |
|
2024-06-29 19:18:34,399 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.10.intermediate.dense.weight |
|
2024-06-29 19:18:34,399 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.10.intermediate.dense.bias |
|
2024-06-29 19:18:34,399 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.10.output.dense.weight |
|
2024-06-29 19:18:34,399 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.10.output.dense.bias |
|
2024-06-29 19:18:34,399 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.10.output.LayerNorm.weight |
|
2024-06-29 19:18:34,399 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.10.output.LayerNorm.bias |
|
2024-06-29 19:18:34,399 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.11.attention.self.query.weight |
|
2024-06-29 19:18:34,399 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.11.attention.self.query.bias |
|
2024-06-29 19:18:34,399 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.11.attention.self.key.weight |
|
2024-06-29 19:18:34,399 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.11.attention.self.key.bias |
|
2024-06-29 19:18:34,399 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.11.attention.self.value.weight |
|
2024-06-29 19:18:34,399 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.11.attention.self.value.bias |
|
2024-06-29 19:18:34,399 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.11.attention.output.dense.weight |
|
2024-06-29 19:18:34,399 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.11.attention.output.dense.bias |
|
2024-06-29 19:18:34,399 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.11.attention.output.LayerNorm.weight |
|
2024-06-29 19:18:34,399 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.11.attention.output.LayerNorm.bias |
|
2024-06-29 19:18:34,399 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.11.intermediate.dense.weight |
|
2024-06-29 19:18:34,399 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.11.intermediate.dense.bias |
|
2024-06-29 19:18:34,399 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.11.output.dense.weight |
|
2024-06-29 19:18:34,399 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.11.output.dense.bias |
|
2024-06-29 19:18:34,399 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.11.output.LayerNorm.weight |
|
2024-06-29 19:18:34,399 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.11.output.LayerNorm.bias |
|
2024-06-29 19:18:34,399 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.pooler.dense.weight |
|
2024-06-29 19:18:34,399 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.pooler.dense.bias |
|
2024-06-29 19:18:34,399 - INFO - allennlp.common.util - lemma_rule_classifier.classifier.1.weight |
|
2024-06-29 19:18:34,400 - INFO - allennlp.common.util - lemma_rule_classifier.classifier.1.bias |
|
2024-06-29 19:18:34,400 - INFO - allennlp.common.util - lemma_rule_classifier.classifier.4.weight |
|
2024-06-29 19:18:34,400 - INFO - allennlp.common.util - lemma_rule_classifier.classifier.4.bias |
|
2024-06-29 19:18:34,400 - INFO - allennlp.common.util - pos_feats_classifier.classifier.1.weight |
|
2024-06-29 19:18:34,400 - INFO - allennlp.common.util - pos_feats_classifier.classifier.1.bias |
|
2024-06-29 19:18:34,400 - INFO - allennlp.common.util - pos_feats_classifier.classifier.4.weight |
|
2024-06-29 19:18:34,400 - INFO - allennlp.common.util - pos_feats_classifier.classifier.4.bias |
|
2024-06-29 19:18:34,400 - INFO - allennlp.common.util - dependency_classifier.arc_dep_mlp.1.weight |
|
2024-06-29 19:18:34,400 - INFO - allennlp.common.util - dependency_classifier.arc_dep_mlp.1.bias |
|
2024-06-29 19:18:34,400 - INFO - allennlp.common.util - dependency_classifier.arc_head_mlp.1.weight |
|
2024-06-29 19:18:34,400 - INFO - allennlp.common.util - dependency_classifier.arc_head_mlp.1.bias |
|
2024-06-29 19:18:34,400 - INFO - allennlp.common.util - dependency_classifier.rel_dep_mlp.1.weight |
|
2024-06-29 19:18:34,400 - INFO - allennlp.common.util - dependency_classifier.rel_dep_mlp.1.bias |
|
2024-06-29 19:18:34,400 - INFO - allennlp.common.util - dependency_classifier.rel_head_mlp.1.weight |
|
2024-06-29 19:18:34,400 - INFO - allennlp.common.util - dependency_classifier.rel_head_mlp.1.bias |
|
2024-06-29 19:18:34,400 - INFO - allennlp.common.util - dependency_classifier.arc_attention_ud._weight_matrix |
|
2024-06-29 19:18:34,400 - INFO - allennlp.common.util - dependency_classifier.arc_attention_ud._bias |
|
2024-06-29 19:18:34,400 - INFO - allennlp.common.util - dependency_classifier.rel_attention_ud._weight_matrix |
|
2024-06-29 19:18:34,400 - INFO - allennlp.common.util - dependency_classifier.rel_attention_ud._bias |
|
2024-06-29 19:18:34,400 - INFO - allennlp.common.util - dependency_classifier.arc_attention_eud._weight_matrix |
|
2024-06-29 19:18:34,400 - INFO - allennlp.common.util - dependency_classifier.arc_attention_eud._bias |
|
2024-06-29 19:18:34,400 - INFO - allennlp.common.util - dependency_classifier.rel_attention_eud._weight_matrix |
|
2024-06-29 19:18:34,400 - INFO - allennlp.common.util - dependency_classifier.rel_attention_eud._bias |
|
2024-06-29 19:18:34,400 - INFO - allennlp.common.util - misc_classifier.classifier.1.weight |
|
2024-06-29 19:18:34,400 - INFO - allennlp.common.util - misc_classifier.classifier.1.bias |
|
2024-06-29 19:18:34,400 - INFO - allennlp.common.util - misc_classifier.classifier.4.weight |
|
2024-06-29 19:18:34,400 - INFO - allennlp.common.util - misc_classifier.classifier.4.bias |
|
2024-06-29 19:18:34,400 - INFO - allennlp.common.util - semslot_classifier.classifier.1.weight |
|
2024-06-29 19:18:34,400 - INFO - allennlp.common.util - semslot_classifier.classifier.1.bias |
|
2024-06-29 19:18:34,400 - INFO - allennlp.common.util - semslot_classifier.classifier.4.weight |
|
2024-06-29 19:18:34,400 - INFO - allennlp.common.util - semslot_classifier.classifier.4.bias |
|
2024-06-29 19:18:34,400 - INFO - allennlp.common.util - semclass_classifier.classifier.1.weight |
|
2024-06-29 19:18:34,400 - INFO - allennlp.common.util - semclass_classifier.classifier.1.bias |
|
2024-06-29 19:18:34,400 - INFO - allennlp.common.util - semclass_classifier.classifier.4.weight |
|
2024-06-29 19:18:34,400 - INFO - allennlp.common.util - semclass_classifier.classifier.4.bias |
|
2024-06-29 19:18:34,400 - INFO - allennlp.common.util - null_classifier.classifier.1.weight |
|
2024-06-29 19:18:34,400 - INFO - allennlp.common.util - null_classifier.classifier.1.bias |
|
2024-06-29 19:18:34,401 - INFO - allennlp.common.util - null_classifier.classifier.4.weight |
|
2024-06-29 19:18:34,401 - INFO - allennlp.common.util - null_classifier.classifier.4.bias |
|
2024-06-29 19:18:34,401 - INFO - allennlp.common.params - trainer.learning_rate_scheduler.type = slanted_triangular |
|
2024-06-29 19:18:34,401 - INFO - allennlp.common.params - trainer.learning_rate_scheduler.cut_frac = 0 |
|
2024-06-29 19:18:34,401 - INFO - allennlp.common.params - trainer.learning_rate_scheduler.ratio = 32 |
|
2024-06-29 19:18:34,401 - INFO - allennlp.common.params - trainer.learning_rate_scheduler.last_epoch = -1 |
|
2024-06-29 19:18:34,401 - INFO - allennlp.common.params - trainer.learning_rate_scheduler.gradual_unfreezing = True |
|
2024-06-29 19:18:34,401 - INFO - allennlp.common.params - trainer.learning_rate_scheduler.discriminative_fine_tuning = True |
|
2024-06-29 19:18:34,401 - INFO - allennlp.common.params - trainer.learning_rate_scheduler.decay_factor = 0.001 |
|
2024-06-29 19:18:34,401 - INFO - allennlp.training.learning_rate_schedulers.slanted_triangular - Gradual unfreezing. Training only the top 1 layers. |
|
2024-06-29 19:18:34,401 - INFO - allennlp.common.params - type = default |
|
2024-06-29 19:18:34,401 - INFO - allennlp.common.params - save_completed_epochs = True |
|
2024-06-29 19:18:34,401 - INFO - allennlp.common.params - save_every_num_seconds = None |
|
2024-06-29 19:18:34,401 - INFO - allennlp.common.params - save_every_num_batches = None |
|
2024-06-29 19:18:34,401 - INFO - allennlp.common.params - keep_most_recent_by_count = 2 |
|
2024-06-29 19:18:34,401 - INFO - allennlp.common.params - keep_most_recent_by_age = None |
|
2024-06-29 19:18:34,401 - INFO - allennlp.common.params - trainer.callbacks.0.type = tensorboard |
|
2024-06-29 19:18:34,402 - INFO - allennlp.common.params - trainer.callbacks.0.summary_interval = 100 |
|
2024-06-29 19:18:34,402 - INFO - allennlp.common.params - trainer.callbacks.0.distribution_interval = None |
|
2024-06-29 19:18:34,402 - INFO - allennlp.common.params - trainer.callbacks.0.batch_size_interval = None |
|
2024-06-29 19:18:34,402 - INFO - allennlp.common.params - trainer.callbacks.0.should_log_parameter_statistics = False |
|
2024-06-29 19:18:34,402 - INFO - allennlp.common.params - trainer.callbacks.0.should_log_learning_rate = True |
|
2024-06-29 19:18:34,403 - WARNING - allennlp.training.gradient_descent_trainer - You provided a validation dataset but patience was set to None, meaning that early stopping is disabled |
|
2024-06-29 19:18:34,405 - INFO - allennlp.training.gradient_descent_trainer - Beginning training. |
|
2024-06-29 19:18:34,405 - INFO - allennlp.training.gradient_descent_trainer - Epoch 0/9 |
|
2024-06-29 19:18:34,405 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 4.5G |
|
2024-06-29 19:18:34,405 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.1G |
|
2024-06-29 19:18:34,406 - INFO - allennlp.training.gradient_descent_trainer - Training |
|
2024-06-29 19:18:34,406 - INFO - tqdm - 0%| | 0/1147 [00:00<?, ?it/s] |
|
2024-06-29 19:18:34,804 - INFO - allennlp.training.callbacks.console_logger - Batch inputs |
|
2024-06-29 19:18:34,804 - INFO - allennlp.training.callbacks.console_logger - batch_input/words/tokens/token_ids (Shape: 24 x 78) |
|
tensor([[ 0, 1509, 424, ..., 1, 1, 1], |
|
[ 0, 143770, 468, ..., 1, 1, 1], |
|
[ 0, 804, 6, ..., 1, 1, 1], |
|
..., |
|
[ 0, 417, 20755, ..., 1, 1, 1], |
|
[ 0, 60430, 49, ..., 1, 1, 1], |
|
[ 0, 468, 33261, ..., 1, 1, 1]], |
|
device='cuda:0') |
|
2024-06-29 19:18:34,805 - INFO - allennlp.training.callbacks.console_logger - batch_input/words/tokens/mask (Shape: 24 x 48) |
|
tensor([[ True, True, True, ..., False, False, False], |
|
[ True, True, True, ..., False, False, False], |
|
[ True, True, True, ..., False, False, False], |
|
..., |
|
[ True, True, True, ..., False, False, False], |
|
[ True, True, True, ..., False, False, False], |
|
[ True, True, True, ..., False, False, False]], device='cuda:0') |
|
2024-06-29 19:18:34,806 - INFO - allennlp.training.callbacks.console_logger - batch_input/words/tokens/type_ids (Shape: 24 x 78) |
|
tensor([[0, 0, 0, ..., 0, 0, 0], |
|
[0, 0, 0, ..., 0, 0, 0], |
|
[0, 0, 0, ..., 0, 0, 0], |
|
..., |
|
[0, 0, 0, ..., 0, 0, 0], |
|
[0, 0, 0, ..., 0, 0, 0], |
|
[0, 0, 0, ..., 0, 0, 0]], device='cuda:0') |
|
2024-06-29 19:18:34,808 - INFO - allennlp.training.callbacks.console_logger - batch_input/words/tokens/wordpiece_mask (Shape: 24 x 78) |
|
tensor([[ True, True, True, ..., False, False, False], |
|
[ True, True, True, ..., False, False, False], |
|
[ True, True, True, ..., False, False, False], |
|
..., |
|
[ True, True, True, ..., False, False, False], |
|
[ True, True, True, ..., False, False, False], |
|
[ True, True, True, ..., False, False, False]], device='cuda:0') |
|
2024-06-29 19:18:34,809 - INFO - allennlp.training.callbacks.console_logger - batch_input/words/tokens/offsets (Shape: 24 x 48 x 2) |
|
tensor([[[1, 1], |
|
[2, 4], |
|
[5, 5], |
|
..., |
|
[0, 0], |
|
[0, 0], |
|
[0, 0]], |
|
|
|
[[1, 1], |
|
[2, 4], |
|
[5, 5], |
|
..., |
|
[0, 0], |
|
[0, 0], |
|
[0, 0]], |
|
|
|
[[1, 1], |
|
[2, 3], |
|
[4, 4], |
|
..., |
|
[0, 0], |
|
[0, 0], |
|
[0, 0]], |
|
|
|
..., |
|
|
|
[[1, 1], |
|
[2, 2], |
|
[3, 3], |
|
..., |
|
[0, 0], |
|
[0, 0], |
|
[0, 0]], |
|
|
|
[[1, 1], |
|
[2, 2], |
|
[3, 3], |
|
..., |
|
[0, 0], |
|
[0, 0], |
|
[0, 0]], |
|
|
|
[[1, 3], |
|
[4, 4], |
|
[5, 6], |
|
..., |
|
[0, 0], |
|
[0, 0], |
|
[0, 0]]], device='cuda:0') |
|
2024-06-29 19:18:34,811 - INFO - allennlp.training.callbacks.console_logger - Field : "batch_input/sentences" : (Length 24 of type "<class 'list'>") |
|
2024-06-29 19:18:34,811 - INFO - allennlp.training.callbacks.console_logger - batch_input/lemma_rule_labels (Shape: 24 x 48) |
|
tensor([[ 0, 8, 0, ..., 0, 0, 0], |
|
[ 0, 13, 0, ..., 0, 0, 0], |
|
[ 0, 0, 0, ..., 0, 0, 0], |
|
..., |
|
[ 0, 9, 0, ..., 0, 0, 0], |
|
[ 0, 0, 33, ..., 0, 0, 0], |
|
[ 0, 0, 0, ..., 0, 0, 0]], device='cuda:0') |
|
2024-06-29 19:18:34,813 - INFO - allennlp.training.callbacks.console_logger - batch_input/pos_feats_labels (Shape: 24 x 48) |
|
tensor([[143, 5, 16, ..., 0, 0, 0], |
|
[ 48, 24, 2, ..., 0, 0, 0], |
|
[ 7, 0, 1, ..., 0, 0, 0], |
|
..., |
|
[ 1, 31, 1, ..., 0, 0, 0], |
|
[ 43, 1, 167, ..., 0, 0, 0], |
|
[ 24, 14, 0, ..., 0, 0, 0]], device='cuda:0') |
|
2024-06-29 19:18:34,814 - INFO - allennlp.training.callbacks.console_logger - batch_input/deprel_labels (Shape: 24 x 48 x 48) |
|
tensor([[[-1, 3, -1, ..., -1, -1, -1], |
|
[-1, 5, -1, ..., -1, -1, -1], |
|
[-1, 8, -1, ..., -1, -1, -1], |
|
..., |
|
[-1, -1, -1, ..., -1, -1, -1], |
|
[-1, -1, -1, ..., -1, -1, -1], |
|
[-1, -1, -1, ..., -1, -1, -1]], |
|
|
|
[[-1, -1, -1, ..., -1, -1, -1], |
|
[-1, -1, -1, ..., -1, -1, -1], |
|
[-1, -1, -1, ..., -1, -1, -1], |
|
..., |
|
[-1, -1, -1, ..., -1, -1, -1], |
|
[-1, -1, -1, ..., -1, -1, -1], |
|
[-1, -1, -1, ..., -1, -1, -1]], |
|
|
|
[[-1, -1, -1, ..., -1, -1, -1], |
|
[-1, -1, -1, ..., -1, -1, -1], |
|
[-1, -1, -1, ..., -1, -1, -1], |
|
..., |
|
[-1, -1, -1, ..., -1, -1, -1], |
|
[-1, -1, -1, ..., -1, -1, -1], |
|
[-1, -1, -1, ..., -1, -1, -1]], |
|
|
|
..., |
|
|
|
[[-1, 2, -1, ..., -1, -1, -1], |
|
[-1, -1, -1, ..., -1, -1, -1], |
|
[-1, -1, -1, ..., -1, -1, -1], |
|
..., |
|
[-1, -1, -1, ..., -1, -1, -1], |
|
[-1, -1, -1, ..., -1, -1, -1], |
|
[-1, -1, -1, ..., -1, -1, -1]], |
|
|
|
[[-1, -1, -1, ..., -1, -1, -1], |
|
[-1, -1, -1, ..., -1, -1, -1], |
|
[-1, -1, -1, ..., -1, -1, -1], |
|
..., |
|
[-1, -1, -1, ..., -1, -1, -1], |
|
[-1, -1, -1, ..., -1, -1, -1], |
|
[-1, -1, -1, ..., -1, -1, -1]], |
|
|
|
[[-1, -1, -1, ..., -1, -1, -1], |
|
[-1, -1, -1, ..., -1, -1, -1], |
|
[-1, -1, -1, ..., -1, -1, -1], |
|
..., |
|
[-1, -1, -1, ..., -1, -1, -1], |
|
[-1, -1, -1, ..., -1, -1, -1], |
|
[-1, -1, -1, ..., -1, -1, -1]]], device='cuda:0') |
|
2024-06-29 19:18:34,820 - INFO - allennlp.training.callbacks.console_logger - batch_input/deps_labels (Shape: 24 x 48 x 48) |
|
tensor([[[-1, 3, -1, ..., -1, -1, -1], |
|
[-1, 2, -1, ..., -1, -1, -1], |
|
[-1, 7, -1, ..., -1, -1, -1], |
|
..., |
|
[-1, -1, -1, ..., -1, -1, -1], |
|
[-1, -1, -1, ..., -1, -1, -1], |
|
[-1, -1, -1, ..., -1, -1, -1]], |
|
|
|
[[-1, -1, -1, ..., -1, -1, -1], |
|
[-1, -1, -1, ..., -1, -1, -1], |
|
[-1, -1, -1, ..., -1, -1, -1], |
|
..., |
|
[-1, -1, -1, ..., -1, -1, -1], |
|
[-1, -1, -1, ..., -1, -1, -1], |
|
[-1, -1, -1, ..., -1, -1, -1]], |
|
|
|
[[-1, -1, -1, ..., -1, -1, -1], |
|
[-1, -1, -1, ..., -1, -1, -1], |
|
[-1, -1, -1, ..., -1, -1, -1], |
|
..., |
|
[-1, -1, -1, ..., -1, -1, -1], |
|
[-1, -1, -1, ..., -1, -1, -1], |
|
[-1, -1, -1, ..., -1, -1, -1]], |
|
|
|
..., |
|
|
|
[[-1, 1, -1, ..., -1, -1, -1], |
|
[-1, -1, -1, ..., -1, -1, -1], |
|
[-1, -1, -1, ..., -1, -1, -1], |
|
..., |
|
[-1, -1, -1, ..., -1, -1, -1], |
|
[-1, -1, -1, ..., -1, -1, -1], |
|
[-1, -1, -1, ..., -1, -1, -1]], |
|
|
|
[[-1, -1, -1, ..., -1, -1, -1], |
|
[-1, -1, -1, ..., -1, -1, -1], |
|
[-1, -1, -1, ..., -1, -1, -1], |
|
..., |
|
[-1, -1, -1, ..., -1, -1, -1], |
|
[-1, -1, -1, ..., -1, -1, -1], |
|
[-1, -1, -1, ..., -1, -1, -1]], |
|
|
|
[[ 2, -1, -1, ..., -1, -1, -1], |
|
[-1, -1, -1, ..., -1, -1, -1], |
|
[-1, -1, -1, ..., -1, -1, -1], |
|
..., |
|
[-1, -1, -1, ..., -1, -1, -1], |
|
[-1, -1, -1, ..., -1, -1, -1], |
|
[-1, -1, -1, ..., -1, -1, -1]]], device='cuda:0') |
|
2024-06-29 19:18:34,826 - INFO - allennlp.training.callbacks.console_logger - batch_input/misc_labels (Shape: 24 x 48) |
|
tensor([[0, 0, 0, ..., 0, 0, 0], |
|
[0, 2, 0, ..., 0, 0, 0], |
|
[1, 0, 0, ..., 0, 0, 0], |
|
..., |
|
[0, 0, 0, ..., 0, 0, 0], |
|
[0, 0, 0, ..., 0, 0, 0], |
|
[2, 1, 0, ..., 0, 0, 0]], device='cuda:0') |
|
2024-06-29 19:18:34,827 - INFO - allennlp.training.callbacks.console_logger - batch_input/semslot_labels (Shape: 24 x 48) |
|
tensor([[12, 1, 3, ..., 0, 0, 0], |
|
[21, 1, 35, ..., 0, 0, 0], |
|
[ 0, 0, 0, ..., 0, 0, 0], |
|
..., |
|
[ 0, 7, 0, ..., 0, 0, 0], |
|
[65, 0, 13, ..., 0, 0, 0], |
|
[ 1, 0, 0, ..., 0, 0, 0]], device='cuda:0') |
|
2024-06-29 19:18:34,828 - INFO - allennlp.training.callbacks.console_logger - batch_input/semclass_labels (Shape: 24 x 48) |
|
tensor([[ 2, 143, 35, ..., 0, 0, 0], |
|
[ 2, 8, 11, ..., 0, 0, 0], |
|
[ 7, 0, 1, ..., 0, 0, 0], |
|
..., |
|
[ 1, 10, 1, ..., 0, 0, 0], |
|
[ 11, 1, 3, ..., 0, 0, 0], |
|
[115, 9, 0, ..., 0, 0, 0]], device='cuda:0') |
|
2024-06-29 19:18:34,829 - INFO - allennlp.training.callbacks.console_logger - Field : "batch_input/metadata" : (Length 24 of type "<class 'conllu.models.Metadata'>") |
|
2024-06-29 19:18:44,411 - INFO - tqdm - NullAccuracy: 0.9800, NullF1: 0.0752, Lemma: 0.7294, PosFeats: 0.6473, UD-UAS: 0.4483, UD-LAS: 0.4317, EUD-UAS: 0.1246, EUD-LAS: 0.1064, Misc: 0.8976, SS: 0.6661, SC: 0.6283, Avg: 0.5200, batch_loss: 8.0180, loss: 11.2210 ||: 16%|#5 | 181/1147 [00:10<00:51, 18.59it/s] |
|
2024-06-29 19:18:54,439 - INFO - tqdm - NullAccuracy: 0.9836, NullF1: 0.1334, Lemma: 0.7855, PosFeats: 0.7376, UD-UAS: 0.5092, UD-LAS: 0.4962, EUD-UAS: 0.1905, EUD-LAS: 0.1709, Misc: 0.9121, SS: 0.7062, SC: 0.6972, Avg: 0.5783, batch_loss: 5.6264, loss: 8.8108 ||: 32%|###1 | 365/1147 [00:20<00:40, 19.33it/s] |
|
2024-06-29 19:19:04,496 - INFO - tqdm - NullAccuracy: 0.9852, NullF1: 0.1735, Lemma: 0.8120, PosFeats: 0.7771, UD-UAS: 0.5424, UD-LAS: 0.5309, EUD-UAS: 0.2229, EUD-LAS: 0.2035, Misc: 0.9195, SS: 0.7247, SC: 0.7284, Avg: 0.6068, batch_loss: 5.9012, loss: 7.7643 ||: 48%|####7 | 550/1147 [00:30<00:33, 18.03it/s] |
|
2024-06-29 19:19:14,508 - INFO - tqdm - NullAccuracy: 0.9860, NullF1: 0.2011, Lemma: 0.8282, PosFeats: 0.8002, UD-UAS: 0.5663, UD-LAS: 0.5556, EUD-UAS: 0.2468, EUD-LAS: 0.2276, Misc: 0.9244, SS: 0.7367, SC: 0.7480, Avg: 0.6260, batch_loss: 5.3222, loss: 7.1057 ||: 64%|######4 | 737/1147 [00:40<00:22, 18.18it/s] |
|
2024-06-29 19:19:24,619 - INFO - tqdm - NullAccuracy: 0.9867, NullF1: 0.2289, Lemma: 0.8405, PosFeats: 0.8165, UD-UAS: 0.5840, UD-LAS: 0.5741, EUD-UAS: 0.2648, EUD-LAS: 0.2455, Misc: 0.9275, SS: 0.7457, SC: 0.7636, Avg: 0.6403, batch_loss: 4.6858, loss: 6.6559 ||: 81%|######## | 925/1147 [00:50<00:12, 18.26it/s] |
|
2024-06-29 19:19:34,697 - INFO - tqdm - NullAccuracy: 0.9870, NullF1: 0.2439, Lemma: 0.8494, PosFeats: 0.8288, UD-UAS: 0.5970, UD-LAS: 0.5879, EUD-UAS: 0.2785, EUD-LAS: 0.2598, Misc: 0.9299, SS: 0.7529, SC: 0.7748, Avg: 0.6510, batch_loss: 4.3227, loss: 6.3229 ||: 97%|#########6| 1112/1147 [01:00<00:01, 18.52it/s] |
|
2024-06-29 19:19:36,422 - INFO - tqdm - NullAccuracy: 0.9870, NullF1: 0.2466, Lemma: 0.8507, PosFeats: 0.8306, UD-UAS: 0.5986, UD-LAS: 0.5898, EUD-UAS: 0.2808, EUD-LAS: 0.2622, Misc: 0.9302, SS: 0.7540, SC: 0.7765, Avg: 0.6526, batch_loss: 4.1319, loss: 6.2726 ||: 100%|#########9| 1143/1147 [01:02<00:00, 18.05it/s] |
|
2024-06-29 19:19:36,533 - INFO - tqdm - NullAccuracy: 0.9870, NullF1: 0.2468, Lemma: 0.8508, PosFeats: 0.8307, UD-UAS: 0.5987, UD-LAS: 0.5899, EUD-UAS: 0.2809, EUD-LAS: 0.2622, Misc: 0.9302, SS: 0.7541, SC: 0.7766, Avg: 0.6527, batch_loss: 4.6516, loss: 6.2695 ||: 100%|#########9| 1145/1147 [01:02<00:00, 18.05it/s] |
|
2024-06-29 19:19:36,624 - INFO - tqdm - NullAccuracy: 0.9870, NullF1: 0.2468, Lemma: 0.8509, PosFeats: 0.8308, UD-UAS: 0.5989, UD-LAS: 0.5900, EUD-UAS: 0.2810, EUD-LAS: 0.2624, Misc: 0.9302, SS: 0.7541, SC: 0.7767, Avg: 0.6528, batch_loss: 5.1362, loss: 6.2673 ||: 100%|##########| 1147/1147 [01:02<00:00, 18.44it/s] |
|
2024-06-29 19:19:36,624 - INFO - allennlp.training.gradient_descent_trainer - Validating |
|
2024-06-29 19:19:36,625 - INFO - tqdm - 0%| | 0/287 [00:00<?, ?it/s] |
|
2024-06-29 19:19:36,718 - INFO - allennlp.training.callbacks.console_logger - Batch inputs |
|
2024-06-29 19:19:36,718 - INFO - allennlp.training.callbacks.console_logger - batch_input/words/tokens/token_ids (Shape: 24 x 64) |
|
tensor([[ 0, 89523, 983, ..., 1, 1, 1], |
|
[ 0, 113083, 415, ..., 1, 1, 1], |
|
[ 0, 87149, 227, ..., 1, 1, 1], |
|
..., |
|
[ 0, 242808, 468, ..., 1, 1, 1], |
|
[ 0, 7762, 468, ..., 1, 1, 1], |
|
[ 0, 589, 24010, ..., 1, 1, 1]], |
|
device='cuda:0') |
|
2024-06-29 19:19:36,719 - INFO - allennlp.training.callbacks.console_logger - batch_input/words/tokens/mask (Shape: 24 x 41) |
|
tensor([[ True, True, True, ..., False, False, False], |
|
[ True, True, True, ..., False, False, False], |
|
[ True, True, True, ..., False, False, False], |
|
..., |
|
[ True, True, True, ..., False, False, False], |
|
[ True, True, True, ..., False, False, False], |
|
[ True, True, True, ..., False, False, False]], device='cuda:0') |
|
2024-06-29 19:19:36,721 - INFO - allennlp.training.callbacks.console_logger - batch_input/words/tokens/type_ids (Shape: 24 x 64) |
|
tensor([[0, 0, 0, ..., 0, 0, 0], |
|
[0, 0, 0, ..., 0, 0, 0], |
|
[0, 0, 0, ..., 0, 0, 0], |
|
..., |
|
[0, 0, 0, ..., 0, 0, 0], |
|
[0, 0, 0, ..., 0, 0, 0], |
|
[0, 0, 0, ..., 0, 0, 0]], device='cuda:0') |
|
2024-06-29 19:19:36,722 - INFO - allennlp.training.callbacks.console_logger - batch_input/words/tokens/wordpiece_mask (Shape: 24 x 64) |
|
tensor([[ True, True, True, ..., False, False, False], |
|
[ True, True, True, ..., False, False, False], |
|
[ True, True, True, ..., False, False, False], |
|
..., |
|
[ True, True, True, ..., False, False, False], |
|
[ True, True, True, ..., False, False, False], |
|
[ True, True, True, ..., False, False, False]], device='cuda:0') |
|
2024-06-29 19:19:36,723 - INFO - allennlp.training.callbacks.console_logger - batch_input/words/tokens/offsets (Shape: 24 x 41 x 2) |
|
tensor([[[1, 2], |
|
[3, 3], |
|
[4, 4], |
|
..., |
|
[0, 0], |
|
[0, 0], |
|
[0, 0]], |
|
|
|
[[1, 2], |
|
[3, 3], |
|
[4, 4], |
|
..., |
|
[0, 0], |
|
[0, 0], |
|
[0, 0]], |
|
|
|
[[1, 2], |
|
[3, 5], |
|
[6, 9], |
|
..., |
|
[0, 0], |
|
[0, 0], |
|
[0, 0]], |
|
|
|
..., |
|
|
|
[[1, 1], |
|
[2, 4], |
|
[5, 7], |
|
..., |
|
[0, 0], |
|
[0, 0], |
|
[0, 0]], |
|
|
|
[[1, 1], |
|
[2, 4], |
|
[5, 8], |
|
..., |
|
[0, 0], |
|
[0, 0], |
|
[0, 0]], |
|
|
|
[[1, 1], |
|
[2, 2], |
|
[3, 4], |
|
..., |
|
[0, 0], |
|
[0, 0], |
|
[0, 0]]], device='cuda:0') |
|
2024-06-29 19:19:36,726 - INFO - allennlp.training.callbacks.console_logger - Field : "batch_input/sentences" : (Length 24 of type "<class 'list'>") |
|
2024-06-29 19:19:36,726 - INFO - allennlp.training.callbacks.console_logger - batch_input/lemma_rule_labels (Shape: 24 x 41) |
|
tensor([[ 0, 0, 64, ..., 0, 0, 0], |
|
[ 0, 0, 0, ..., 0, 0, 0], |
|
[ 3, 16, 4, ..., 0, 0, 0], |
|
..., |
|
[ 0, 13, 0, ..., 0, 0, 0], |
|
[32, 0, 2, ..., 0, 0, 0], |
|
[ 0, 17, 4, ..., 0, 0, 0]], device='cuda:0') |
|
2024-06-29 19:19:36,727 - INFO - allennlp.training.callbacks.console_logger - batch_input/pos_feats_labels (Shape: 24 x 41) |
|
tensor([[198, 12, 80, ..., 0, 0, 0], |
|
[ 8, 0, 51, ..., 0, 0, 0], |
|
[ 52, 154, 18, ..., 0, 0, 0], |
|
..., |
|
[ 2, 24, 30, ..., 0, 0, 0], |
|
[152, 38, 41, ..., 0, 0, 0], |
|
[ 1, 323, 53, ..., 0, 0, 0]], device='cuda:0') |
|
2024-06-29 19:19:36,729 - INFO - allennlp.training.callbacks.console_logger - batch_input/deprel_labels (Shape: 24 x 41 x 41) |
|
tensor([[[-1, 6, -1, ..., -1, -1, -1], |
|
[-1, -1, 3, ..., -1, -1, -1], |
|
[-1, -1, 5, ..., -1, -1, -1], |
|
..., |
|
[-1, -1, -1, ..., -1, -1, -1], |
|
[-1, -1, -1, ..., -1, -1, -1], |
|
[-1, -1, -1, ..., -1, -1, -1]], |
|
|
|
[[ 5, -1, -1, ..., -1, -1, -1], |
|
[-1, -1, -1, ..., -1, -1, -1], |
|
[-1, -1, -1, ..., -1, -1, -1], |
|
..., |
|
[-1, -1, -1, ..., -1, -1, -1], |
|
[-1, -1, -1, ..., -1, -1, -1], |
|
[-1, -1, -1, ..., -1, -1, -1]], |
|
|
|
[[-1, -1, -1, ..., -1, -1, -1], |
|
[-1, -1, 17, ..., -1, -1, -1], |
|
[ 1, -1, -1, ..., -1, -1, -1], |
|
..., |
|
[-1, -1, -1, ..., -1, -1, -1], |
|
[-1, -1, -1, ..., -1, -1, -1], |
|
[-1, -1, -1, ..., -1, -1, -1]], |
|
|
|
..., |
|
|
|
[[ 5, -1, -1, ..., -1, -1, -1], |
|
[-1, -1, -1, ..., -1, -1, -1], |
|
[ 4, -1, -1, ..., -1, -1, -1], |
|
..., |
|
[-1, -1, -1, ..., -1, -1, -1], |
|
[-1, -1, -1, ..., -1, -1, -1], |
|
[-1, -1, -1, ..., -1, -1, -1]], |
|
|
|
[[-1, -1, 3, ..., -1, -1, -1], |
|
[-1, -1, -1, ..., -1, -1, -1], |
|
[-1, -1, 5, ..., -1, -1, -1], |
|
..., |
|
[-1, -1, -1, ..., -1, -1, -1], |
|
[-1, -1, -1, ..., -1, -1, -1], |
|
[-1, -1, -1, ..., -1, -1, -1]], |
|
|
|
[[-1, -1, 2, ..., -1, -1, -1], |
|
[-1, -1, 6, ..., -1, -1, -1], |
|
[-1, -1, -1, ..., -1, -1, -1], |
|
..., |
|
[-1, -1, -1, ..., -1, -1, -1], |
|
[-1, -1, -1, ..., -1, -1, -1], |
|
[-1, -1, -1, ..., -1, -1, -1]]], device='cuda:0') |
|
2024-06-29 19:19:36,735 - INFO - allennlp.training.callbacks.console_logger - batch_input/deps_labels (Shape: 24 x 41 x 41) |
|
tensor([[[-1, 5, -1, ..., -1, -1, -1], |
|
[-1, -1, 3, ..., -1, -1, -1], |
|
[-1, -1, 2, ..., -1, -1, -1], |
|
..., |
|
[-1, -1, -1, ..., -1, -1, -1], |
|
[-1, -1, -1, ..., -1, -1, -1], |
|
[-1, -1, -1, ..., -1, -1, -1]], |
|
|
|
[[ 2, -1, -1, ..., -1, -1, -1], |
|
[-1, -1, -1, ..., -1, -1, -1], |
|
[-1, -1, -1, ..., -1, -1, -1], |
|
..., |
|
[-1, -1, -1, ..., -1, -1, -1], |
|
[-1, -1, -1, ..., -1, -1, -1], |
|
[-1, -1, -1, ..., -1, -1, -1]], |
|
|
|
[[-1, -1, -1, ..., -1, -1, -1], |
|
[-1, -1, 17, ..., -1, -1, -1], |
|
[ 4, -1, -1, ..., -1, -1, -1], |
|
..., |
|
[-1, -1, -1, ..., -1, -1, -1], |
|
[-1, -1, -1, ..., -1, -1, -1], |
|
[-1, -1, -1, ..., -1, -1, -1]], |
|
|
|
..., |
|
|
|
[[ 2, -1, -1, ..., -1, -1, -1], |
|
[21, -1, -1, ..., -1, -1, -1], |
|
[25, -1, -1, ..., -1, -1, -1], |
|
..., |
|
[-1, -1, -1, ..., -1, -1, -1], |
|
[-1, -1, -1, ..., -1, -1, -1], |
|
[-1, -1, -1, ..., -1, -1, -1]], |
|
|
|
[[-1, 9, -1, ..., -1, -1, -1], |
|
[-1, -1, 3, ..., -1, -1, -1], |
|
[-1, -1, 2, ..., -1, -1, -1], |
|
..., |
|
[-1, -1, -1, ..., -1, -1, -1], |
|
[-1, -1, -1, ..., -1, -1, -1], |
|
[-1, -1, -1, ..., -1, -1, -1]], |
|
|
|
[[-1, -1, 1, ..., -1, -1, -1], |
|
[-1, -1, 5, ..., -1, -1, -1], |
|
[-1, -1, -1, ..., -1, -1, -1], |
|
..., |
|
[-1, -1, -1, ..., -1, -1, -1], |
|
[-1, -1, -1, ..., -1, -1, -1], |
|
[-1, -1, -1, ..., -1, -1, -1]]], device='cuda:0') |
|
2024-06-29 19:19:36,742 - INFO - allennlp.training.callbacks.console_logger - batch_input/misc_labels (Shape: 24 x 41) |
|
tensor([[0, 0, 0, ..., 0, 0, 0], |
|
[1, 0, 0, ..., 0, 0, 0], |
|
[0, 0, 3, ..., 0, 0, 0], |
|
..., |
|
[0, 2, 0, ..., 0, 0, 0], |
|
[0, 2, 0, ..., 0, 0, 0], |
|
[0, 0, 0, ..., 0, 0, 0]], device='cuda:0') |
|
2024-06-29 19:19:36,743 - INFO - allennlp.training.callbacks.console_logger - batch_input/semslot_labels (Shape: 24 x 41) |
|
tensor([[14, 2, 1, ..., 0, 0, 0], |
|
[18, 0, 3, ..., 0, 0, 0], |
|
[ 3, 11, 17, ..., 0, 0, 0], |
|
..., |
|
[15, 1, 32, ..., 0, 0, 0], |
|
[13, 2, 1, ..., 0, 0, 0], |
|
[ 0, 7, 32, ..., 0, 0, 0]], device='cuda:0') |
|
2024-06-29 19:19:36,744 - INFO - allennlp.training.callbacks.console_logger - batch_input/semclass_labels (Shape: 24 x 41) |
|
tensor([[ 3, 23, 30, ..., 0, 0, 0], |
|
[ 36, 0, 5, ..., 0, 0, 0], |
|
[ 91, 28, 2, ..., 0, 0, 0], |
|
..., |
|
[ 12, 8, 33, ..., 0, 0, 0], |
|
[ 3, 16, 30, ..., 0, 0, 0], |
|
[ 1, 10, 579, ..., 0, 0, 0]], device='cuda:0') |
|
2024-06-29 19:19:36,746 - INFO - allennlp.training.callbacks.console_logger - Field : "batch_input/metadata" : (Length 24 of type "<class 'conllu.models.Metadata'>") |
|
2024-06-29 19:19:46,640 - INFO - tqdm - NullAccuracy: 0.9895, NullF1: 0.3769, Lemma: 0.9290, PosFeats: 0.9248, UD-UAS: 0.7698, UD-LAS: 0.7705, EUD-UAS: 0.3969, EUD-LAS: 0.3773, Misc: 0.9597, SS: 0.8138, SC: 0.8693, Avg: 0.7568, batch_loss: 3.2383, loss: 3.4567 ||: 47%|####7 | 135/287 [00:10<00:10, 14.32it/s] |
|
2024-06-29 19:19:56,700 - INFO - tqdm - NullAccuracy: 0.9899, NullF1: 0.3635, Lemma: 0.9323, PosFeats: 0.9290, UD-UAS: 0.7774, UD-LAS: 0.7774, EUD-UAS: 0.4038, EUD-LAS: 0.3829, Misc: 0.9609, SS: 0.8188, SC: 0.8762, Avg: 0.7621, batch_loss: 2.9325, loss: 3.3026 ||: 98%|#########7| 281/287 [00:20<00:00, 13.32it/s] |
|
2024-06-29 19:19:57,107 - INFO - tqdm - NullAccuracy: 0.9899, NullF1: 0.3634, Lemma: 0.9321, PosFeats: 0.9290, UD-UAS: 0.7776, UD-LAS: 0.7776, EUD-UAS: 0.4039, EUD-LAS: 0.3830, Misc: 0.9610, SS: 0.8186, SC: 0.8761, Avg: 0.7621, batch_loss: 4.3368, loss: 3.3027 ||: 100%|##########| 287/287 [00:20<00:00, 14.49it/s] |
|
2024-06-29 19:19:57,107 - INFO - tqdm - NullAccuracy: 0.9899, NullF1: 0.3634, Lemma: 0.9321, PosFeats: 0.9290, UD-UAS: 0.7776, UD-LAS: 0.7776, EUD-UAS: 0.4039, EUD-LAS: 0.3830, Misc: 0.9610, SS: 0.8186, SC: 0.8761, Avg: 0.7621, batch_loss: 4.3368, loss: 3.3027 ||: 100%|##########| 287/287 [00:20<00:00, 14.01it/s] |
|
2024-06-29 19:19:57,107 - INFO - allennlp.training.learning_rate_schedulers.slanted_triangular - Gradual unfreezing finished. Training all layers. |
|
2024-06-29 19:19:57,110 - INFO - allennlp.training.callbacks.console_logger - Training | Validation |
|
2024-06-29 19:19:57,110 - INFO - allennlp.training.callbacks.console_logger - Avg | 0.653 | 0.762 |
|
2024-06-29 19:19:57,111 - INFO - allennlp.training.callbacks.console_logger - EUD-LAS | 0.262 | 0.383 |
|
2024-06-29 19:19:57,111 - INFO - allennlp.training.callbacks.console_logger - EUD-UAS | 0.281 | 0.404 |
|
2024-06-29 19:19:57,111 - INFO - allennlp.training.callbacks.console_logger - Lemma | 0.851 | 0.932 |
|
2024-06-29 19:19:57,111 - INFO - allennlp.training.callbacks.console_logger - Misc | 0.930 | 0.961 |
|
2024-06-29 19:19:57,111 - INFO - allennlp.training.callbacks.console_logger - NullAccuracy | 0.987 | 0.990 |
|
2024-06-29 19:19:57,111 - INFO - allennlp.training.callbacks.console_logger - NullF1 | 0.247 | 0.363 |
|
2024-06-29 19:19:57,111 - INFO - allennlp.training.callbacks.console_logger - PosFeats | 0.831 | 0.929 |
|
2024-06-29 19:19:57,111 - INFO - allennlp.training.callbacks.console_logger - SC | 0.777 | 0.876 |
|
2024-06-29 19:19:57,111 - INFO - allennlp.training.callbacks.console_logger - SS | 0.754 | 0.819 |
|
2024-06-29 19:19:57,111 - INFO - allennlp.training.callbacks.console_logger - UD-LAS | 0.590 | 0.778 |
|
2024-06-29 19:19:57,111 - INFO - allennlp.training.callbacks.console_logger - UD-UAS | 0.599 | 0.778 |
|
2024-06-29 19:19:57,111 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB | 1099.266 | N/A |
|
2024-06-29 19:19:57,111 - INFO - allennlp.training.callbacks.console_logger - loss | 6.267 | 3.303 |
|
2024-06-29 19:19:57,111 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB | 4585.449 | N/A |
|
2024-06-29 19:19:58,764 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:01:24.359503 |
|
2024-06-29 19:19:58,765 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:12:24 |
|
2024-06-29 19:19:58,765 - INFO - allennlp.training.gradient_descent_trainer - Epoch 1/9 |
|
2024-06-29 19:19:58,765 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 4.9G |
|
2024-06-29 19:19:58,765 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 3.2G |
|
2024-06-29 19:19:58,766 - INFO - allennlp.training.gradient_descent_trainer - Training |
|
2024-06-29 19:19:58,766 - INFO - tqdm - 0%| | 0/1147 [00:00<?, ?it/s] |
|
2024-06-29 19:20:08,861 - INFO - tqdm - NullAccuracy: 0.9889, NullF1: 0.3885, Lemma: 0.8957, PosFeats: 0.8985, UD-UAS: 0.6896, UD-LAS: 0.6841, EUD-UAS: 0.4012, EUD-LAS: 0.3885, Misc: 0.9578, SS: 0.7974, SC: 0.8291, Avg: 0.7269, batch_loss: 4.5622, loss: 4.3827 ||: 7%|6 | 77/1147 [00:10<02:11, 8.16it/s] |
|
2024-06-29 19:20:18,867 - INFO - tqdm - NullAccuracy: 0.9895, NullF1: 0.4105, Lemma: 0.9008, PosFeats: 0.9034, UD-UAS: 0.6965, UD-LAS: 0.6929, EUD-UAS: 0.4240, EUD-LAS: 0.4118, Misc: 0.9668, SS: 0.7989, SC: 0.8275, Avg: 0.7358, batch_loss: 3.9000, loss: 4.2529 ||: 14%|#3 | 155/1147 [00:20<02:11, 7.55it/s] |
|
2024-06-29 19:20:28,939 - INFO - tqdm - NullAccuracy: 0.9897, NullF1: 0.4188, Lemma: 0.9033, PosFeats: 0.9067, UD-UAS: 0.7086, UD-LAS: 0.7053, EUD-UAS: 0.4414, EUD-LAS: 0.4292, Misc: 0.9722, SS: 0.8036, SC: 0.8286, Avg: 0.7443, batch_loss: 3.6851, loss: 4.1387 ||: 21%|## | 236/1147 [00:30<01:53, 8.04it/s] |
|
2024-06-29 19:20:39,011 - INFO - tqdm - NullAccuracy: 0.9898, NullF1: 0.4159, Lemma: 0.9065, PosFeats: 0.9094, UD-UAS: 0.7180, UD-LAS: 0.7149, EUD-UAS: 0.4553, EUD-LAS: 0.4431, Misc: 0.9754, SS: 0.8064, SC: 0.8317, Avg: 0.7512, batch_loss: 3.3486, loss: 4.0117 ||: 28%|##7 | 318/1147 [00:40<01:40, 8.26it/s] |
|
2024-06-29 19:20:49,119 - INFO - tqdm - NullAccuracy: 0.9897, NullF1: 0.4323, Lemma: 0.9091, PosFeats: 0.9113, UD-UAS: 0.7248, UD-LAS: 0.7217, EUD-UAS: 0.4648, EUD-LAS: 0.4528, Misc: 0.9775, SS: 0.8100, SC: 0.8337, Avg: 0.7562, batch_loss: 3.8119, loss: 3.9159 ||: 35%|###4 | 399/1147 [00:50<01:31, 8.18it/s] |
|
2024-06-29 19:20:59,251 - INFO - tqdm - NullAccuracy: 0.9900, NullF1: 0.4499, Lemma: 0.9113, PosFeats: 0.9142, UD-UAS: 0.7335, UD-LAS: 0.7310, EUD-UAS: 0.4761, EUD-LAS: 0.4645, Misc: 0.9792, SS: 0.8136, SC: 0.8368, Avg: 0.7622, batch_loss: 3.7333, loss: 3.8217 ||: 42%|####1 | 481/1147 [01:00<01:28, 7.49it/s] |
|
2024-06-29 19:21:09,324 - INFO - tqdm - NullAccuracy: 0.9902, NullF1: 0.4618, Lemma: 0.9138, PosFeats: 0.9163, UD-UAS: 0.7410, UD-LAS: 0.7389, EUD-UAS: 0.4877, EUD-LAS: 0.4766, Misc: 0.9806, SS: 0.8174, SC: 0.8384, Avg: 0.7679, batch_loss: 2.8634, loss: 3.7283 ||: 49%|####9 | 563/1147 [01:10<01:08, 8.48it/s] |
|
2024-06-29 19:21:19,438 - INFO - tqdm - NullAccuracy: 0.9902, NullF1: 0.4707, Lemma: 0.9151, PosFeats: 0.9182, UD-UAS: 0.7475, UD-LAS: 0.7456, EUD-UAS: 0.4976, EUD-LAS: 0.4868, Misc: 0.9817, SS: 0.8200, SC: 0.8402, Avg: 0.7725, batch_loss: 3.3490, loss: 3.6571 ||: 56%|#####6 | 645/1147 [01:20<01:07, 7.42it/s] |
|
2024-06-29 19:21:29,488 - INFO - tqdm - NullAccuracy: 0.9903, NullF1: 0.4847, Lemma: 0.9168, PosFeats: 0.9196, UD-UAS: 0.7518, UD-LAS: 0.7502, EUD-UAS: 0.5041, EUD-LAS: 0.4937, Misc: 0.9826, SS: 0.8225, SC: 0.8418, Avg: 0.7759, batch_loss: 3.3739, loss: 3.6056 ||: 63%|######3 | 724/1147 [01:30<00:52, 8.04it/s] |
|
2024-06-29 19:21:39,530 - INFO - tqdm - NullAccuracy: 0.9903, NullF1: 0.4942, Lemma: 0.9184, PosFeats: 0.9214, UD-UAS: 0.7559, UD-LAS: 0.7545, EUD-UAS: 0.5114, EUD-LAS: 0.5012, Misc: 0.9835, SS: 0.8245, SC: 0.8434, Avg: 0.7793, batch_loss: 2.8805, loss: 3.5490 ||: 70%|######9 | 802/1147 [01:40<00:41, 8.31it/s] |
|
2024-06-29 19:21:49,588 - INFO - tqdm - NullAccuracy: 0.9904, NullF1: 0.5030, Lemma: 0.9197, PosFeats: 0.9228, UD-UAS: 0.7598, UD-LAS: 0.7587, EUD-UAS: 0.5168, EUD-LAS: 0.5069, Misc: 0.9841, SS: 0.8266, SC: 0.8451, Avg: 0.7823, batch_loss: 2.7777, loss: 3.4950 ||: 77%|#######6 | 882/1147 [01:50<00:32, 8.11it/s] |
|
2024-06-29 19:21:59,632 - INFO - tqdm - NullAccuracy: 0.9905, NullF1: 0.5114, Lemma: 0.9210, PosFeats: 0.9241, UD-UAS: 0.7630, UD-LAS: 0.7621, EUD-UAS: 0.5223, EUD-LAS: 0.5129, Misc: 0.9846, SS: 0.8288, SC: 0.8465, Avg: 0.7850, batch_loss: 2.6155, loss: 3.4451 ||: 84%|########3 | 962/1147 [02:00<00:21, 8.65it/s] |
|
2024-06-29 19:22:09,639 - INFO - tqdm - NullAccuracy: 0.9906, NullF1: 0.5207, Lemma: 0.9224, PosFeats: 0.9256, UD-UAS: 0.7670, UD-LAS: 0.7662, EUD-UAS: 0.5292, EUD-LAS: 0.5201, Misc: 0.9850, SS: 0.8310, SC: 0.8479, Avg: 0.7883, batch_loss: 2.5013, loss: 3.3912 ||: 91%|#########1| 1046/1147 [02:10<00:12, 8.25it/s] |
|
2024-06-29 19:22:19,713 - INFO - tqdm - NullAccuracy: 0.9907, NullF1: 0.5306, Lemma: 0.9234, PosFeats: 0.9267, UD-UAS: 0.7702, UD-LAS: 0.7696, EUD-UAS: 0.5343, EUD-LAS: 0.5253, Misc: 0.9854, SS: 0.8326, SC: 0.8490, Avg: 0.7907, batch_loss: 1.9758, loss: 3.3518 ||: 98%|#########8| 1129/1147 [02:20<00:02, 8.31it/s] |
|
2024-06-29 19:22:21,311 - INFO - tqdm - NullAccuracy: 0.9907, NullF1: 0.5312, Lemma: 0.9236, PosFeats: 0.9270, UD-UAS: 0.7707, UD-LAS: 0.7701, EUD-UAS: 0.5351, EUD-LAS: 0.5262, Misc: 0.9855, SS: 0.8328, SC: 0.8492, Avg: 0.7911, batch_loss: 2.6192, loss: 3.3433 ||: 100%|#########9| 1142/1147 [02:22<00:00, 8.52it/s] |
|
2024-06-29 19:22:21,427 - INFO - tqdm - NullAccuracy: 0.9907, NullF1: 0.5317, Lemma: 0.9237, PosFeats: 0.9270, UD-UAS: 0.7707, UD-LAS: 0.7702, EUD-UAS: 0.5352, EUD-LAS: 0.5262, Misc: 0.9855, SS: 0.8328, SC: 0.8493, Avg: 0.7912, batch_loss: 2.8499, loss: 3.3429 ||: 100%|#########9| 1143/1147 [02:22<00:00, 8.56it/s] |
|
2024-06-29 19:22:21,560 - INFO - tqdm - NullAccuracy: 0.9907, NullF1: 0.5322, Lemma: 0.9237, PosFeats: 0.9270, UD-UAS: 0.7707, UD-LAS: 0.7702, EUD-UAS: 0.5353, EUD-LAS: 0.5264, Misc: 0.9855, SS: 0.8329, SC: 0.8493, Avg: 0.7912, batch_loss: 2.3103, loss: 3.3420 ||: 100%|#########9| 1144/1147 [02:22<00:00, 8.21it/s] |
|
2024-06-29 19:22:21,694 - INFO - tqdm - NullAccuracy: 0.9907, NullF1: 0.5323, Lemma: 0.9237, PosFeats: 0.9270, UD-UAS: 0.7708, UD-LAS: 0.7702, EUD-UAS: 0.5354, EUD-LAS: 0.5264, Misc: 0.9855, SS: 0.8329, SC: 0.8493, Avg: 0.7912, batch_loss: 3.0426, loss: 3.3417 ||: 100%|#########9| 1145/1147 [02:22<00:00, 7.97it/s] |
|
2024-06-29 19:22:21,816 - INFO - tqdm - NullAccuracy: 0.9907, NullF1: 0.5321, Lemma: 0.9237, PosFeats: 0.9270, UD-UAS: 0.7708, UD-LAS: 0.7702, EUD-UAS: 0.5355, EUD-LAS: 0.5265, Misc: 0.9855, SS: 0.8329, SC: 0.8493, Avg: 0.7913, batch_loss: 2.9064, loss: 3.3414 ||: 100%|#########9| 1146/1147 [02:23<00:00, 8.05it/s] |
|
2024-06-29 19:22:21,911 - INFO - tqdm - NullAccuracy: 0.9907, NullF1: 0.5317, Lemma: 0.9237, PosFeats: 0.9270, UD-UAS: 0.7708, UD-LAS: 0.7703, EUD-UAS: 0.5356, EUD-LAS: 0.5266, Misc: 0.9855, SS: 0.8329, SC: 0.8493, Avg: 0.7913, batch_loss: 2.5846, loss: 3.3407 ||: 100%|##########| 1147/1147 [02:23<00:00, 8.01it/s] |
|
2024-06-29 19:22:21,912 - INFO - allennlp.training.gradient_descent_trainer - Validating |
|
2024-06-29 19:22:21,914 - INFO - tqdm - 0%| | 0/287 [00:00<?, ?it/s] |
|
2024-06-29 19:22:32,015 - INFO - tqdm - NullAccuracy: 0.9926, NullF1: 0.6555, Lemma: 0.9546, PosFeats: 0.9534, UD-UAS: 0.8725, UD-LAS: 0.8784, EUD-UAS: 0.7132, EUD-LAS: 0.7156, Misc: 0.9940, SS: 0.8723, SC: 0.8903, Avg: 0.8716, batch_loss: 1.4838, loss: 2.1165 ||: 53%|#####2 | 151/287 [00:10<00:08, 15.24it/s] |
|
2024-06-29 19:22:41,260 - INFO - tqdm - NullAccuracy: 0.9928, NullF1: 0.6462, Lemma: 0.9579, PosFeats: 0.9564, UD-UAS: 0.8798, UD-LAS: 0.8847, EUD-UAS: 0.7206, EUD-LAS: 0.7219, Misc: 0.9939, SS: 0.8755, SC: 0.8945, Avg: 0.8761, batch_loss: 3.2675, loss: 2.0234 ||: 100%|##########| 287/287 [00:19<00:00, 15.68it/s] |
|
2024-06-29 19:22:41,260 - INFO - tqdm - NullAccuracy: 0.9928, NullF1: 0.6462, Lemma: 0.9579, PosFeats: 0.9564, UD-UAS: 0.8798, UD-LAS: 0.8847, EUD-UAS: 0.7206, EUD-LAS: 0.7219, Misc: 0.9939, SS: 0.8755, SC: 0.8945, Avg: 0.8761, batch_loss: 3.2675, loss: 2.0234 ||: 100%|##########| 287/287 [00:19<00:00, 14.83it/s] |
|
2024-06-29 19:22:41,261 - INFO - allennlp.training.learning_rate_schedulers.slanted_triangular - Gradual unfreezing finished. Training all layers. |
|
2024-06-29 19:22:41,263 - INFO - allennlp.training.callbacks.console_logger - Training | Validation |
|
2024-06-29 19:22:41,263 - INFO - allennlp.training.callbacks.console_logger - Avg | 0.791 | 0.876 |
|
2024-06-29 19:22:41,263 - INFO - allennlp.training.callbacks.console_logger - EUD-LAS | 0.527 | 0.722 |
|
2024-06-29 19:22:41,263 - INFO - allennlp.training.callbacks.console_logger - EUD-UAS | 0.536 | 0.721 |
|
2024-06-29 19:22:41,263 - INFO - allennlp.training.callbacks.console_logger - Lemma | 0.924 | 0.958 |
|
2024-06-29 19:22:41,263 - INFO - allennlp.training.callbacks.console_logger - Misc | 0.986 | 0.994 |
|
2024-06-29 19:22:41,263 - INFO - allennlp.training.callbacks.console_logger - NullAccuracy | 0.991 | 0.993 |
|
2024-06-29 19:22:41,263 - INFO - allennlp.training.callbacks.console_logger - NullF1 | 0.532 | 0.646 |
|
2024-06-29 19:22:41,263 - INFO - allennlp.training.callbacks.console_logger - PosFeats | 0.927 | 0.956 |
|
2024-06-29 19:22:41,263 - INFO - allennlp.training.callbacks.console_logger - SC | 0.849 | 0.894 |
|
2024-06-29 19:22:41,263 - INFO - allennlp.training.callbacks.console_logger - SS | 0.833 | 0.875 |
|
2024-06-29 19:22:41,263 - INFO - allennlp.training.callbacks.console_logger - UD-LAS | 0.770 | 0.885 |
|
2024-06-29 19:22:41,263 - INFO - allennlp.training.callbacks.console_logger - UD-UAS | 0.771 | 0.880 |
|
2024-06-29 19:22:41,263 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB | 3289.646 | N/A |
|
2024-06-29 19:22:41,263 - INFO - allennlp.training.callbacks.console_logger - loss | 3.341 | 2.023 |
|
2024-06-29 19:22:41,263 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB | 4991.422 | N/A |
|
2024-06-29 19:22:45,969 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:02:47.204432 |
|
2024-06-29 19:22:45,969 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:16:27 |
|
2024-06-29 19:22:45,969 - INFO - allennlp.training.gradient_descent_trainer - Epoch 2/9 |
|
2024-06-29 19:22:45,969 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 4.9G |
|
2024-06-29 19:22:45,969 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 11G |
|
2024-06-29 19:22:45,971 - INFO - allennlp.training.gradient_descent_trainer - Training |
|
2024-06-29 19:22:45,971 - INFO - tqdm - 0%| | 0/1147 [00:00<?, ?it/s] |
|
2024-06-29 19:22:56,083 - INFO - tqdm - NullAccuracy: 0.9932, NullF1: 0.6917, Lemma: 0.9444, PosFeats: 0.9464, UD-UAS: 0.8248, UD-LAS: 0.8250, EUD-UAS: 0.6130, EUD-LAS: 0.6072, Misc: 0.9923, SS: 0.8651, SC: 0.8703, Avg: 0.8321, batch_loss: 2.5195, loss: 2.5009 ||: 7%|6 | 79/1147 [00:10<02:16, 7.83it/s] |
|
2024-06-29 19:23:06,168 - INFO - tqdm - NullAccuracy: 0.9928, NullF1: 0.6768, Lemma: 0.9439, PosFeats: 0.9492, UD-UAS: 0.8242, UD-LAS: 0.8251, EUD-UAS: 0.6194, EUD-LAS: 0.6133, Misc: 0.9925, SS: 0.8653, SC: 0.8717, Avg: 0.8339, batch_loss: 2.3169, loss: 2.5019 ||: 14%|#3 | 160/1147 [00:20<02:02, 8.09it/s] |
|
2024-06-29 19:23:16,243 - INFO - tqdm - NullAccuracy: 0.9927, NullF1: 0.6773, Lemma: 0.9445, PosFeats: 0.9488, UD-UAS: 0.8261, UD-LAS: 0.8272, EUD-UAS: 0.6248, EUD-LAS: 0.6192, Misc: 0.9926, SS: 0.8663, SC: 0.8748, Avg: 0.8360, batch_loss: 2.6540, loss: 2.4919 ||: 21%|##1 | 242/1147 [00:30<01:57, 7.71it/s] |
|
2024-06-29 19:23:26,356 - INFO - tqdm - NullAccuracy: 0.9924, NullF1: 0.6667, Lemma: 0.9447, PosFeats: 0.9485, UD-UAS: 0.8256, UD-LAS: 0.8269, EUD-UAS: 0.6238, EUD-LAS: 0.6176, Misc: 0.9926, SS: 0.8665, SC: 0.8753, Avg: 0.8357, batch_loss: 2.8054, loss: 2.4954 ||: 28%|##7 | 321/1147 [00:40<01:44, 7.88it/s] |
|
2024-06-29 19:23:36,359 - INFO - tqdm - NullAccuracy: 0.9926, NullF1: 0.6771, Lemma: 0.9452, PosFeats: 0.9483, UD-UAS: 0.8263, UD-LAS: 0.8279, EUD-UAS: 0.6252, EUD-LAS: 0.6195, Misc: 0.9926, SS: 0.8661, SC: 0.8762, Avg: 0.8364, batch_loss: 2.4838, loss: 2.4915 ||: 35%|###5 | 402/1147 [00:50<01:31, 8.15it/s] |
|
2024-06-29 19:23:46,399 - INFO - tqdm - NullAccuracy: 0.9926, NullF1: 0.6766, Lemma: 0.9457, PosFeats: 0.9485, UD-UAS: 0.8279, UD-LAS: 0.8299, EUD-UAS: 0.6288, EUD-LAS: 0.6232, Misc: 0.9926, SS: 0.8676, SC: 0.8772, Avg: 0.8379, batch_loss: 2.3959, loss: 2.4712 ||: 42%|####2 | 486/1147 [01:00<01:15, 8.73it/s] |
|
2024-06-29 19:23:56,455 - INFO - tqdm - NullAccuracy: 0.9926, NullF1: 0.6755, Lemma: 0.9464, PosFeats: 0.9484, UD-UAS: 0.8287, UD-LAS: 0.8312, EUD-UAS: 0.6308, EUD-LAS: 0.6256, Misc: 0.9926, SS: 0.8679, SC: 0.8777, Avg: 0.8388, batch_loss: 2.2542, loss: 2.4602 ||: 49%|####9 | 567/1147 [01:10<01:09, 8.35it/s] |
|
2024-06-29 19:24:06,537 - INFO - tqdm - NullAccuracy: 0.9926, NullF1: 0.6782, Lemma: 0.9468, PosFeats: 0.9488, UD-UAS: 0.8299, UD-LAS: 0.8325, EUD-UAS: 0.6325, EUD-LAS: 0.6274, Misc: 0.9927, SS: 0.8689, SC: 0.8785, Avg: 0.8398, batch_loss: 3.0092, loss: 2.4437 ||: 57%|#####6 | 649/1147 [01:20<01:01, 8.14it/s] |
|
2024-06-29 19:24:16,604 - INFO - tqdm - NullAccuracy: 0.9926, NullF1: 0.6758, Lemma: 0.9468, PosFeats: 0.9489, UD-UAS: 0.8312, UD-LAS: 0.8338, EUD-UAS: 0.6352, EUD-LAS: 0.6303, Misc: 0.9926, SS: 0.8694, SC: 0.8789, Avg: 0.8408, batch_loss: 1.6713, loss: 2.4371 ||: 64%|######3 | 730/1147 [01:30<00:50, 8.22it/s] |
|
2024-06-29 19:24:26,645 - INFO - tqdm - NullAccuracy: 0.9927, NullF1: 0.6777, Lemma: 0.9472, PosFeats: 0.9491, UD-UAS: 0.8321, UD-LAS: 0.8348, EUD-UAS: 0.6363, EUD-LAS: 0.6316, Misc: 0.9928, SS: 0.8699, SC: 0.8788, Avg: 0.8414, batch_loss: 2.9964, loss: 2.4257 ||: 71%|####### | 809/1147 [01:40<00:45, 7.48it/s] |
|
2024-06-29 19:24:36,748 - INFO - tqdm - NullAccuracy: 0.9928, NullF1: 0.6822, Lemma: 0.9476, PosFeats: 0.9495, UD-UAS: 0.8335, UD-LAS: 0.8363, EUD-UAS: 0.6392, EUD-LAS: 0.6346, Misc: 0.9928, SS: 0.8706, SC: 0.8792, Avg: 0.8426, batch_loss: 2.0955, loss: 2.4113 ||: 78%|#######7 | 891/1147 [01:50<00:30, 8.45it/s] |
|
2024-06-29 19:24:46,751 - INFO - tqdm - NullAccuracy: 0.9928, NullF1: 0.6883, Lemma: 0.9479, PosFeats: 0.9497, UD-UAS: 0.8351, UD-LAS: 0.8382, EUD-UAS: 0.6419, EUD-LAS: 0.6376, Misc: 0.9928, SS: 0.8711, SC: 0.8791, Avg: 0.8437, batch_loss: 2.2052, loss: 2.4007 ||: 85%|########4 | 972/1147 [02:00<00:20, 8.49it/s] |
|
2024-06-29 19:24:56,860 - INFO - tqdm - NullAccuracy: 0.9929, NullF1: 0.6901, Lemma: 0.9480, PosFeats: 0.9498, UD-UAS: 0.8358, UD-LAS: 0.8387, EUD-UAS: 0.6433, EUD-LAS: 0.6390, Misc: 0.9928, SS: 0.8717, SC: 0.8794, Avg: 0.8443, batch_loss: 2.7321, loss: 2.3951 ||: 92%|#########1| 1052/1147 [02:10<00:11, 8.07it/s] |
|
2024-06-29 19:25:06,873 - INFO - tqdm - NullAccuracy: 0.9930, NullF1: 0.6947, Lemma: 0.9482, PosFeats: 0.9497, UD-UAS: 0.8367, UD-LAS: 0.8396, EUD-UAS: 0.6450, EUD-LAS: 0.6408, Misc: 0.9929, SS: 0.8720, SC: 0.8795, Avg: 0.8449, batch_loss: 2.4578, loss: 2.3910 ||: 99%|#########8| 1133/1147 [02:20<00:01, 8.14it/s] |
|
2024-06-29 19:25:08,005 - INFO - tqdm - NullAccuracy: 0.9930, NullF1: 0.6946, Lemma: 0.9483, PosFeats: 0.9497, UD-UAS: 0.8367, UD-LAS: 0.8396, EUD-UAS: 0.6451, EUD-LAS: 0.6409, Misc: 0.9929, SS: 0.8721, SC: 0.8796, Avg: 0.8450, batch_loss: 2.3263, loss: 2.3899 ||: 100%|#########9| 1142/1147 [02:22<00:00, 8.21it/s] |
|
2024-06-29 19:25:08,117 - INFO - tqdm - NullAccuracy: 0.9930, NullF1: 0.6947, Lemma: 0.9483, PosFeats: 0.9497, UD-UAS: 0.8367, UD-LAS: 0.8396, EUD-UAS: 0.6451, EUD-LAS: 0.6409, Misc: 0.9929, SS: 0.8721, SC: 0.8797, Avg: 0.8450, batch_loss: 2.1296, loss: 2.3897 ||: 100%|#########9| 1143/1147 [02:22<00:00, 8.41it/s] |
|
2024-06-29 19:25:08,218 - INFO - tqdm - NullAccuracy: 0.9930, NullF1: 0.6947, Lemma: 0.9483, PosFeats: 0.9497, UD-UAS: 0.8368, UD-LAS: 0.8397, EUD-UAS: 0.6452, EUD-LAS: 0.6410, Misc: 0.9929, SS: 0.8721, SC: 0.8797, Avg: 0.8450, batch_loss: 1.8111, loss: 2.3892 ||: 100%|#########9| 1144/1147 [02:22<00:00, 8.80it/s] |
|
2024-06-29 19:25:08,326 - INFO - tqdm - NullAccuracy: 0.9930, NullF1: 0.6945, Lemma: 0.9483, PosFeats: 0.9497, UD-UAS: 0.8369, UD-LAS: 0.8397, EUD-UAS: 0.6453, EUD-LAS: 0.6411, Misc: 0.9929, SS: 0.8721, SC: 0.8797, Avg: 0.8451, batch_loss: 2.2675, loss: 2.3891 ||: 100%|#########9| 1145/1147 [02:22<00:00, 8.94it/s] |
|
2024-06-29 19:25:08,432 - INFO - tqdm - NullAccuracy: 0.9930, NullF1: 0.6946, Lemma: 0.9483, PosFeats: 0.9497, UD-UAS: 0.8369, UD-LAS: 0.8398, EUD-UAS: 0.6454, EUD-LAS: 0.6412, Misc: 0.9929, SS: 0.8721, SC: 0.8797, Avg: 0.8451, batch_loss: 1.9238, loss: 2.3887 ||: 100%|#########9| 1146/1147 [02:22<00:00, 9.08it/s] |
|
2024-06-29 19:25:08,534 - INFO - tqdm - NullAccuracy: 0.9930, NullF1: 0.6947, Lemma: 0.9483, PosFeats: 0.9497, UD-UAS: 0.8369, UD-LAS: 0.8398, EUD-UAS: 0.6455, EUD-LAS: 0.6413, Misc: 0.9929, SS: 0.8721, SC: 0.8797, Avg: 0.8451, batch_loss: 2.0053, loss: 2.3884 ||: 100%|##########| 1147/1147 [02:22<00:00, 9.29it/s] |
|
2024-06-29 19:25:08,534 - INFO - tqdm - NullAccuracy: 0.9930, NullF1: 0.6947, Lemma: 0.9483, PosFeats: 0.9497, UD-UAS: 0.8369, UD-LAS: 0.8398, EUD-UAS: 0.6455, EUD-LAS: 0.6413, Misc: 0.9929, SS: 0.8721, SC: 0.8797, Avg: 0.8451, batch_loss: 2.0053, loss: 2.3884 ||: 100%|##########| 1147/1147 [02:22<00:00, 8.05it/s] |
|
2024-06-29 19:25:08,535 - INFO - allennlp.training.gradient_descent_trainer - Validating |
|
2024-06-29 19:25:08,536 - INFO - tqdm - 0%| | 0/287 [00:00<?, ?it/s] |
|
2024-06-29 19:25:18,575 - INFO - tqdm - NullAccuracy: 0.9941, NullF1: 0.7521, Lemma: 0.9651, PosFeats: 0.9622, UD-UAS: 0.9042, UD-LAS: 0.9126, EUD-UAS: 0.8064, EUD-LAS: 0.8094, Misc: 0.9949, SS: 0.8944, SC: 0.9020, Avg: 0.9057, batch_loss: 1.7717, loss: 1.7578 ||: 51%|##### | 145/287 [00:10<00:10, 13.90it/s] |
|
2024-06-29 19:25:27,628 - INFO - tqdm - NullAccuracy: 0.9944, NullF1: 0.7548, Lemma: 0.9679, PosFeats: 0.9647, UD-UAS: 0.9096, UD-LAS: 0.9172, EUD-UAS: 0.8128, EUD-LAS: 0.8148, Misc: 0.9950, SS: 0.8975, SC: 0.9071, Avg: 0.9096, batch_loss: 2.8492, loss: 1.6721 ||: 100%|##########| 287/287 [00:19<00:00, 16.14it/s] |
|
2024-06-29 19:25:27,628 - INFO - tqdm - NullAccuracy: 0.9944, NullF1: 0.7548, Lemma: 0.9679, PosFeats: 0.9647, UD-UAS: 0.9096, UD-LAS: 0.9172, EUD-UAS: 0.8128, EUD-LAS: 0.8148, Misc: 0.9950, SS: 0.8975, SC: 0.9071, Avg: 0.9096, batch_loss: 2.8492, loss: 1.6721 ||: 100%|##########| 287/287 [00:19<00:00, 15.03it/s] |
|
2024-06-29 19:25:27,629 - INFO - allennlp.training.learning_rate_schedulers.slanted_triangular - Gradual unfreezing finished. Training all layers. |
|
2024-06-29 19:25:27,631 - INFO - allennlp.training.callbacks.console_logger - Training | Validation |
|
2024-06-29 19:25:27,631 - INFO - allennlp.training.callbacks.console_logger - Avg | 0.845 | 0.910 |
|
2024-06-29 19:25:27,631 - INFO - allennlp.training.callbacks.console_logger - EUD-LAS | 0.641 | 0.815 |
|
2024-06-29 19:25:27,631 - INFO - allennlp.training.callbacks.console_logger - EUD-UAS | 0.645 | 0.813 |
|
2024-06-29 19:25:27,631 - INFO - allennlp.training.callbacks.console_logger - Lemma | 0.948 | 0.968 |
|
2024-06-29 19:25:27,631 - INFO - allennlp.training.callbacks.console_logger - Misc | 0.993 | 0.995 |
|
2024-06-29 19:25:27,631 - INFO - allennlp.training.callbacks.console_logger - NullAccuracy | 0.993 | 0.994 |
|
2024-06-29 19:25:27,631 - INFO - allennlp.training.callbacks.console_logger - NullF1 | 0.695 | 0.755 |
|
2024-06-29 19:25:27,631 - INFO - allennlp.training.callbacks.console_logger - PosFeats | 0.950 | 0.965 |
|
2024-06-29 19:25:27,631 - INFO - allennlp.training.callbacks.console_logger - SC | 0.880 | 0.907 |
|
2024-06-29 19:25:27,631 - INFO - allennlp.training.callbacks.console_logger - SS | 0.872 | 0.898 |
|
2024-06-29 19:25:27,631 - INFO - allennlp.training.callbacks.console_logger - UD-LAS | 0.840 | 0.917 |
|
2024-06-29 19:25:27,632 - INFO - allennlp.training.callbacks.console_logger - UD-UAS | 0.837 | 0.910 |
|
2024-06-29 19:25:27,632 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB | 11256.346 | N/A |
|
2024-06-29 19:25:27,632 - INFO - allennlp.training.callbacks.console_logger - loss | 2.388 | 1.672 |
|
2024-06-29 19:25:27,632 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB | 5001.855 | N/A |
|
2024-06-29 19:25:32,461 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:02:46.491371 |
|
2024-06-29 19:25:32,461 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:16:04 |
|
2024-06-29 19:25:32,461 - INFO - allennlp.training.gradient_descent_trainer - Epoch 3/9 |
|
2024-06-29 19:25:32,461 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 4.9G |
|
2024-06-29 19:25:32,461 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 11G |
|
2024-06-29 19:25:32,462 - INFO - allennlp.training.gradient_descent_trainer - Training |
|
2024-06-29 19:25:32,462 - INFO - tqdm - 0%| | 0/1147 [00:00<?, ?it/s] |
|
2024-06-29 19:25:42,527 - INFO - tqdm - NullAccuracy: 0.9923, NullF1: 0.6938, Lemma: 0.9561, PosFeats: 0.9603, UD-UAS: 0.8502, UD-LAS: 0.8525, EUD-UAS: 0.6673, EUD-LAS: 0.6636, Misc: 0.9937, SS: 0.8862, SC: 0.8948, Avg: 0.8583, batch_loss: 2.1601, loss: 2.0514 ||: 7%|7 | 82/1147 [00:10<02:15, 7.84it/s] |
|
2024-06-29 19:25:52,568 - INFO - tqdm - NullAccuracy: 0.9930, NullF1: 0.7211, Lemma: 0.9556, PosFeats: 0.9584, UD-UAS: 0.8509, UD-LAS: 0.8541, EUD-UAS: 0.6694, EUD-LAS: 0.6661, Misc: 0.9933, SS: 0.8841, SC: 0.8923, Avg: 0.8582, batch_loss: 1.8708, loss: 2.0802 ||: 14%|#4 | 165/1147 [00:20<01:54, 8.56it/s] |
|
2024-06-29 19:26:02,633 - INFO - tqdm - NullAccuracy: 0.9932, NullF1: 0.7313, Lemma: 0.9561, PosFeats: 0.9582, UD-UAS: 0.8547, UD-LAS: 0.8585, EUD-UAS: 0.6766, EUD-LAS: 0.6745, Misc: 0.9935, SS: 0.8850, SC: 0.8926, Avg: 0.8611, batch_loss: 2.0120, loss: 2.0519 ||: 22%|##1 | 248/1147 [00:30<01:46, 8.45it/s] |
|
2024-06-29 19:26:12,660 - INFO - tqdm - NullAccuracy: 0.9934, NullF1: 0.7323, Lemma: 0.9566, PosFeats: 0.9580, UD-UAS: 0.8565, UD-LAS: 0.8601, EUD-UAS: 0.6820, EUD-LAS: 0.6793, Misc: 0.9937, SS: 0.8863, SC: 0.8933, Avg: 0.8629, batch_loss: 2.2314, loss: 2.0485 ||: 29%|##8 | 331/1147 [00:40<01:39, 8.20it/s] |
|
2024-06-29 19:26:22,664 - INFO - tqdm - NullAccuracy: 0.9934, NullF1: 0.7319, Lemma: 0.9566, PosFeats: 0.9581, UD-UAS: 0.8567, UD-LAS: 0.8602, EUD-UAS: 0.6813, EUD-LAS: 0.6786, Misc: 0.9939, SS: 0.8870, SC: 0.8934, Avg: 0.8629, batch_loss: 2.0134, loss: 2.0442 ||: 36%|###5 | 412/1147 [00:50<01:27, 8.42it/s] |
|
2024-06-29 19:26:32,722 - INFO - tqdm - NullAccuracy: 0.9935, NullF1: 0.7352, Lemma: 0.9568, PosFeats: 0.9583, UD-UAS: 0.8566, UD-LAS: 0.8600, EUD-UAS: 0.6825, EUD-LAS: 0.6795, Misc: 0.9939, SS: 0.8876, SC: 0.8927, Avg: 0.8631, batch_loss: 1.3696, loss: 2.0397 ||: 43%|####3 | 495/1147 [01:00<01:18, 8.29it/s] |
|
2024-06-29 19:26:42,742 - INFO - tqdm - NullAccuracy: 0.9936, NullF1: 0.7370, Lemma: 0.9567, PosFeats: 0.9583, UD-UAS: 0.8572, UD-LAS: 0.8607, EUD-UAS: 0.6837, EUD-LAS: 0.6807, Misc: 0.9940, SS: 0.8880, SC: 0.8928, Avg: 0.8636, batch_loss: 2.2186, loss: 2.0316 ||: 50%|##### | 576/1147 [01:10<01:09, 8.24it/s] |
|
2024-06-29 19:26:52,816 - INFO - tqdm - NullAccuracy: 0.9936, NullF1: 0.7371, Lemma: 0.9568, PosFeats: 0.9583, UD-UAS: 0.8586, UD-LAS: 0.8622, EUD-UAS: 0.6858, EUD-LAS: 0.6829, Misc: 0.9941, SS: 0.8883, SC: 0.8928, Avg: 0.8644, batch_loss: 1.7874, loss: 2.0241 ||: 57%|#####7 | 659/1147 [01:20<01:00, 8.02it/s] |
|
2024-06-29 19:27:02,901 - INFO - tqdm - NullAccuracy: 0.9938, NullF1: 0.7382, Lemma: 0.9570, PosFeats: 0.9584, UD-UAS: 0.8594, UD-LAS: 0.8632, EUD-UAS: 0.6879, EUD-LAS: 0.6850, Misc: 0.9940, SS: 0.8885, SC: 0.8922, Avg: 0.8651, batch_loss: 2.6033, loss: 2.0194 ||: 65%|######4 | 743/1147 [01:30<00:53, 7.60it/s] |
|
2024-06-29 19:27:12,954 - INFO - tqdm - NullAccuracy: 0.9937, NullF1: 0.7372, Lemma: 0.9571, PosFeats: 0.9585, UD-UAS: 0.8595, UD-LAS: 0.8634, EUD-UAS: 0.6881, EUD-LAS: 0.6853, Misc: 0.9942, SS: 0.8892, SC: 0.8925, Avg: 0.8653, batch_loss: 2.4531, loss: 2.0119 ||: 72%|#######2 | 826/1147 [01:40<00:37, 8.50it/s] |
|
2024-06-29 19:27:23,007 - INFO - tqdm - NullAccuracy: 0.9937, NullF1: 0.7380, Lemma: 0.9574, PosFeats: 0.9587, UD-UAS: 0.8601, UD-LAS: 0.8640, EUD-UAS: 0.6898, EUD-LAS: 0.6873, Misc: 0.9941, SS: 0.8899, SC: 0.8930, Avg: 0.8660, batch_loss: 1.9132, loss: 2.0046 ||: 79%|#######9 | 908/1147 [01:50<00:27, 8.59it/s] |
|
2024-06-29 19:27:33,079 - INFO - tqdm - NullAccuracy: 0.9938, NullF1: 0.7380, Lemma: 0.9576, PosFeats: 0.9587, UD-UAS: 0.8605, UD-LAS: 0.8645, EUD-UAS: 0.6911, EUD-LAS: 0.6889, Misc: 0.9942, SS: 0.8900, SC: 0.8929, Avg: 0.8665, batch_loss: 1.8705, loss: 2.0014 ||: 86%|########6 | 990/1147 [02:00<00:19, 7.92it/s] |
|
2024-06-29 19:27:43,094 - INFO - tqdm - NullAccuracy: 0.9938, NullF1: 0.7380, Lemma: 0.9578, PosFeats: 0.9588, UD-UAS: 0.8607, UD-LAS: 0.8648, EUD-UAS: 0.6917, EUD-LAS: 0.6894, Misc: 0.9942, SS: 0.8900, SC: 0.8930, Avg: 0.8667, batch_loss: 1.8390, loss: 1.9978 ||: 93%|#########3| 1071/1147 [02:10<00:09, 8.21it/s] |
|
2024-06-29 19:27:51,639 - INFO - tqdm - NullAccuracy: 0.9938, NullF1: 0.7401, Lemma: 0.9581, PosFeats: 0.9589, UD-UAS: 0.8614, UD-LAS: 0.8656, EUD-UAS: 0.6931, EUD-LAS: 0.6908, Misc: 0.9941, SS: 0.8902, SC: 0.8933, Avg: 0.8673, batch_loss: 1.8860, loss: 1.9913 ||: 100%|#########9| 1142/1147 [02:19<00:00, 7.74it/s] |
|
2024-06-29 19:27:51,757 - INFO - tqdm - NullAccuracy: 0.9938, NullF1: 0.7397, Lemma: 0.9581, PosFeats: 0.9589, UD-UAS: 0.8614, UD-LAS: 0.8656, EUD-UAS: 0.6931, EUD-LAS: 0.6908, Misc: 0.9941, SS: 0.8902, SC: 0.8933, Avg: 0.8673, batch_loss: 2.2029, loss: 1.9914 ||: 100%|#########9| 1143/1147 [02:19<00:00, 7.96it/s] |
|
2024-06-29 19:27:51,892 - INFO - tqdm - NullAccuracy: 0.9938, NullF1: 0.7397, Lemma: 0.9581, PosFeats: 0.9589, UD-UAS: 0.8614, UD-LAS: 0.8656, EUD-UAS: 0.6931, EUD-LAS: 0.6908, Misc: 0.9941, SS: 0.8902, SC: 0.8933, Avg: 0.8673, batch_loss: 2.0576, loss: 1.9915 ||: 100%|#########9| 1144/1147 [02:19<00:00, 7.78it/s] |
|
2024-06-29 19:27:52,013 - INFO - tqdm - NullAccuracy: 0.9938, NullF1: 0.7393, Lemma: 0.9581, PosFeats: 0.9589, UD-UAS: 0.8614, UD-LAS: 0.8656, EUD-UAS: 0.6931, EUD-LAS: 0.6908, Misc: 0.9941, SS: 0.8902, SC: 0.8933, Avg: 0.8673, batch_loss: 2.0820, loss: 1.9916 ||: 100%|#########9| 1145/1147 [02:19<00:00, 7.91it/s] |
|
2024-06-29 19:27:52,121 - INFO - tqdm - NullAccuracy: 0.9938, NullF1: 0.7395, Lemma: 0.9581, PosFeats: 0.9589, UD-UAS: 0.8614, UD-LAS: 0.8656, EUD-UAS: 0.6931, EUD-LAS: 0.6908, Misc: 0.9941, SS: 0.8902, SC: 0.8933, Avg: 0.8673, batch_loss: 1.6593, loss: 1.9913 ||: 100%|#########9| 1146/1147 [02:19<00:00, 8.28it/s] |
|
2024-06-29 19:27:52,214 - INFO - tqdm - NullAccuracy: 0.9938, NullF1: 0.7395, Lemma: 0.9581, PosFeats: 0.9589, UD-UAS: 0.8614, UD-LAS: 0.8657, EUD-UAS: 0.6931, EUD-LAS: 0.6908, Misc: 0.9941, SS: 0.8902, SC: 0.8933, Avg: 0.8673, batch_loss: 1.6613, loss: 1.9910 ||: 100%|##########| 1147/1147 [02:19<00:00, 8.21it/s] |
|
2024-06-29 19:27:52,215 - INFO - allennlp.training.gradient_descent_trainer - Validating |
|
2024-06-29 19:27:52,216 - INFO - tqdm - 0%| | 0/287 [00:00<?, ?it/s] |
|
2024-06-29 19:28:02,279 - INFO - tqdm - NullAccuracy: 0.9944, NullF1: 0.7813, Lemma: 0.9702, PosFeats: 0.9667, UD-UAS: 0.9208, UD-LAS: 0.9299, EUD-UAS: 0.8189, EUD-LAS: 0.8245, Misc: 0.9955, SS: 0.9043, SC: 0.9101, Avg: 0.9156, batch_loss: 2.0391, loss: 1.5916 ||: 53%|#####3 | 153/287 [00:10<00:08, 15.40it/s] |
|
2024-06-29 19:28:10,592 - INFO - tqdm - NullAccuracy: 0.9946, NullF1: 0.7798, Lemma: 0.9723, PosFeats: 0.9681, UD-UAS: 0.9251, UD-LAS: 0.9338, EUD-UAS: 0.8248, EUD-LAS: 0.8296, Misc: 0.9956, SS: 0.9076, SC: 0.9133, Avg: 0.9189, batch_loss: 2.8009, loss: 1.5228 ||: 100%|##########| 287/287 [00:18<00:00, 16.62it/s] |
|
2024-06-29 19:28:10,592 - INFO - tqdm - NullAccuracy: 0.9946, NullF1: 0.7798, Lemma: 0.9723, PosFeats: 0.9681, UD-UAS: 0.9251, UD-LAS: 0.9338, EUD-UAS: 0.8248, EUD-LAS: 0.8296, Misc: 0.9956, SS: 0.9076, SC: 0.9133, Avg: 0.9189, batch_loss: 2.8009, loss: 1.5228 ||: 100%|##########| 287/287 [00:18<00:00, 15.62it/s] |
|
2024-06-29 19:28:10,592 - INFO - allennlp.training.learning_rate_schedulers.slanted_triangular - Gradual unfreezing finished. Training all layers. |
|
2024-06-29 19:28:10,595 - INFO - allennlp.training.callbacks.console_logger - Training | Validation |
|
2024-06-29 19:28:10,595 - INFO - allennlp.training.callbacks.console_logger - Avg | 0.867 | 0.919 |
|
2024-06-29 19:28:10,595 - INFO - allennlp.training.callbacks.console_logger - EUD-LAS | 0.691 | 0.830 |
|
2024-06-29 19:28:10,595 - INFO - allennlp.training.callbacks.console_logger - EUD-UAS | 0.693 | 0.825 |
|
2024-06-29 19:28:10,595 - INFO - allennlp.training.callbacks.console_logger - Lemma | 0.958 | 0.972 |
|
2024-06-29 19:28:10,595 - INFO - allennlp.training.callbacks.console_logger - Misc | 0.994 | 0.996 |
|
2024-06-29 19:28:10,595 - INFO - allennlp.training.callbacks.console_logger - NullAccuracy | 0.994 | 0.995 |
|
2024-06-29 19:28:10,595 - INFO - allennlp.training.callbacks.console_logger - NullF1 | 0.739 | 0.780 |
|
2024-06-29 19:28:10,595 - INFO - allennlp.training.callbacks.console_logger - PosFeats | 0.959 | 0.968 |
|
2024-06-29 19:28:10,595 - INFO - allennlp.training.callbacks.console_logger - SC | 0.893 | 0.913 |
|
2024-06-29 19:28:10,595 - INFO - allennlp.training.callbacks.console_logger - SS | 0.890 | 0.908 |
|
2024-06-29 19:28:10,595 - INFO - allennlp.training.callbacks.console_logger - UD-LAS | 0.866 | 0.934 |
|
2024-06-29 19:28:10,595 - INFO - allennlp.training.callbacks.console_logger - UD-UAS | 0.861 | 0.925 |
|
2024-06-29 19:28:10,596 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB | 11258.267 | N/A |
|
2024-06-29 19:28:10,596 - INFO - allennlp.training.callbacks.console_logger - loss | 1.991 | 1.523 |
|
2024-06-29 19:28:10,596 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB | 5001.855 | N/A |
|
2024-06-29 19:28:15,643 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:02:43.182164 |
|
2024-06-29 19:28:15,643 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:14:24 |
|
2024-06-29 19:28:15,643 - INFO - allennlp.training.gradient_descent_trainer - Epoch 4/9 |
|
2024-06-29 19:28:15,643 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 4.9G |
|
2024-06-29 19:28:15,643 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 11G |
|
2024-06-29 19:28:15,645 - INFO - allennlp.training.gradient_descent_trainer - Training |
|
2024-06-29 19:28:15,645 - INFO - tqdm - 0%| | 0/1147 [00:00<?, ?it/s] |
|
2024-06-29 19:28:25,667 - INFO - tqdm - NullAccuracy: 0.9943, NullF1: 0.7439, Lemma: 0.9636, PosFeats: 0.9601, UD-UAS: 0.8702, UD-LAS: 0.8750, EUD-UAS: 0.7100, EUD-LAS: 0.7082, Misc: 0.9940, SS: 0.9056, SC: 0.9046, Avg: 0.8768, batch_loss: 1.7985, loss: 1.7494 ||: 7%|7 | 81/1147 [00:10<02:07, 8.37it/s] |
|
2024-06-29 19:28:35,763 - INFO - tqdm - NullAccuracy: 0.9942, NullF1: 0.7625, Lemma: 0.9634, PosFeats: 0.9625, UD-UAS: 0.8732, UD-LAS: 0.8783, EUD-UAS: 0.7127, EUD-LAS: 0.7107, Misc: 0.9946, SS: 0.9028, SC: 0.9057, Avg: 0.8782, batch_loss: 2.0678, loss: 1.7426 ||: 14%|#4 | 164/1147 [00:20<01:56, 8.42it/s] |
|
2024-06-29 19:28:45,891 - INFO - tqdm - NullAccuracy: 0.9940, NullF1: 0.7539, Lemma: 0.9642, PosFeats: 0.9632, UD-UAS: 0.8709, UD-LAS: 0.8759, EUD-UAS: 0.7100, EUD-LAS: 0.7086, Misc: 0.9946, SS: 0.9009, SC: 0.9053, Avg: 0.8771, batch_loss: 1.5885, loss: 1.7559 ||: 21%|##1 | 246/1147 [00:30<01:55, 7.77it/s] |
|
2024-06-29 19:28:55,901 - INFO - tqdm - NullAccuracy: 0.9941, NullF1: 0.7529, Lemma: 0.9647, PosFeats: 0.9637, UD-UAS: 0.8719, UD-LAS: 0.8764, EUD-UAS: 0.7113, EUD-LAS: 0.7097, Misc: 0.9947, SS: 0.9016, SC: 0.9049, Avg: 0.8776, batch_loss: 1.3044, loss: 1.7376 ||: 29%|##8 | 329/1147 [00:40<01:37, 8.38it/s] |
|
2024-06-29 19:29:05,932 - INFO - tqdm - NullAccuracy: 0.9941, NullF1: 0.7561, Lemma: 0.9647, PosFeats: 0.9638, UD-UAS: 0.8716, UD-LAS: 0.8762, EUD-UAS: 0.7118, EUD-LAS: 0.7103, Misc: 0.9947, SS: 0.9015, SC: 0.9037, Avg: 0.8776, batch_loss: 1.4945, loss: 1.7464 ||: 36%|###5 | 411/1147 [00:50<01:30, 8.17it/s] |
|
2024-06-29 19:29:16,022 - INFO - tqdm - NullAccuracy: 0.9942, NullF1: 0.7587, Lemma: 0.9647, PosFeats: 0.9640, UD-UAS: 0.8722, UD-LAS: 0.8768, EUD-UAS: 0.7126, EUD-LAS: 0.7108, Misc: 0.9948, SS: 0.9016, SC: 0.9034, Avg: 0.8779, batch_loss: 1.8177, loss: 1.7477 ||: 43%|####3 | 494/1147 [01:00<01:21, 8.03it/s] |
|
2024-06-29 19:29:26,084 - INFO - tqdm - NullAccuracy: 0.9942, NullF1: 0.7597, Lemma: 0.9642, PosFeats: 0.9640, UD-UAS: 0.8730, UD-LAS: 0.8776, EUD-UAS: 0.7145, EUD-LAS: 0.7126, Misc: 0.9947, SS: 0.9018, SC: 0.9037, Avg: 0.8785, batch_loss: 1.3929, loss: 1.7487 ||: 50%|##### | 576/1147 [01:10<01:08, 8.34it/s] |
|
2024-06-29 19:29:36,123 - INFO - tqdm - NullAccuracy: 0.9943, NullF1: 0.7599, Lemma: 0.9642, PosFeats: 0.9639, UD-UAS: 0.8741, UD-LAS: 0.8788, EUD-UAS: 0.7167, EUD-LAS: 0.7152, Misc: 0.9947, SS: 0.9019, SC: 0.9036, Avg: 0.8792, batch_loss: 1.4890, loss: 1.7475 ||: 57%|#####7 | 658/1147 [01:20<00:59, 8.28it/s] |
|
2024-06-29 19:29:46,159 - INFO - tqdm - NullAccuracy: 0.9943, NullF1: 0.7582, Lemma: 0.9643, PosFeats: 0.9640, UD-UAS: 0.8746, UD-LAS: 0.8792, EUD-UAS: 0.7184, EUD-LAS: 0.7169, Misc: 0.9948, SS: 0.9019, SC: 0.9035, Avg: 0.8797, batch_loss: 1.5896, loss: 1.7439 ||: 65%|######4 | 740/1147 [01:30<00:48, 8.45it/s] |
|
2024-06-29 19:29:56,269 - INFO - tqdm - NullAccuracy: 0.9943, NullF1: 0.7583, Lemma: 0.9646, PosFeats: 0.9640, UD-UAS: 0.8756, UD-LAS: 0.8804, EUD-UAS: 0.7207, EUD-LAS: 0.7193, Misc: 0.9948, SS: 0.9020, SC: 0.9034, Avg: 0.8805, batch_loss: 1.5503, loss: 1.7403 ||: 72%|#######1 | 822/1147 [01:40<00:41, 7.75it/s] |
|
2024-06-29 19:30:06,279 - INFO - tqdm - NullAccuracy: 0.9943, NullF1: 0.7576, Lemma: 0.9648, PosFeats: 0.9644, UD-UAS: 0.8762, UD-LAS: 0.8808, EUD-UAS: 0.7213, EUD-LAS: 0.7200, Misc: 0.9949, SS: 0.9018, SC: 0.9034, Avg: 0.8808, batch_loss: 1.5092, loss: 1.7373 ||: 79%|#######8 | 903/1147 [01:50<00:29, 8.20it/s] |
|
2024-06-29 19:30:16,315 - INFO - tqdm - NullAccuracy: 0.9943, NullF1: 0.7601, Lemma: 0.9648, PosFeats: 0.9647, UD-UAS: 0.8766, UD-LAS: 0.8813, EUD-UAS: 0.7224, EUD-LAS: 0.7212, Misc: 0.9948, SS: 0.9018, SC: 0.9035, Avg: 0.8812, batch_loss: 1.7932, loss: 1.7347 ||: 86%|########5 | 985/1147 [02:00<00:22, 7.22it/s] |
|
2024-06-29 19:30:26,371 - INFO - tqdm - NullAccuracy: 0.9943, NullF1: 0.7618, Lemma: 0.9650, PosFeats: 0.9649, UD-UAS: 0.8774, UD-LAS: 0.8821, EUD-UAS: 0.7241, EUD-LAS: 0.7231, Misc: 0.9949, SS: 0.9018, SC: 0.9036, Avg: 0.8819, batch_loss: 1.5564, loss: 1.7322 ||: 93%|#########3| 1068/1147 [02:10<00:08, 9.05it/s] |
|
2024-06-29 19:30:35,276 - INFO - tqdm - NullAccuracy: 0.9943, NullF1: 0.7635, Lemma: 0.9651, PosFeats: 0.9649, UD-UAS: 0.8779, UD-LAS: 0.8825, EUD-UAS: 0.7246, EUD-LAS: 0.7235, Misc: 0.9949, SS: 0.9021, SC: 0.9040, Avg: 0.8822, batch_loss: 1.5629, loss: 1.7277 ||: 100%|#########9| 1142/1147 [02:19<00:00, 8.51it/s] |
|
2024-06-29 19:30:35,397 - INFO - tqdm - NullAccuracy: 0.9943, NullF1: 0.7633, Lemma: 0.9651, PosFeats: 0.9649, UD-UAS: 0.8779, UD-LAS: 0.8825, EUD-UAS: 0.7246, EUD-LAS: 0.7236, Misc: 0.9949, SS: 0.9021, SC: 0.9040, Avg: 0.8822, batch_loss: 1.6916, loss: 1.7276 ||: 100%|#########9| 1143/1147 [02:19<00:00, 8.44it/s] |
|
2024-06-29 19:30:35,521 - INFO - tqdm - NullAccuracy: 0.9943, NullF1: 0.7634, Lemma: 0.9651, PosFeats: 0.9649, UD-UAS: 0.8779, UD-LAS: 0.8825, EUD-UAS: 0.7246, EUD-LAS: 0.7235, Misc: 0.9949, SS: 0.9021, SC: 0.9040, Avg: 0.8822, batch_loss: 1.6203, loss: 1.7276 ||: 100%|#########9| 1144/1147 [02:19<00:00, 8.32it/s] |
|
2024-06-29 19:30:35,654 - INFO - tqdm - NullAccuracy: 0.9943, NullF1: 0.7634, Lemma: 0.9651, PosFeats: 0.9649, UD-UAS: 0.8779, UD-LAS: 0.8825, EUD-UAS: 0.7246, EUD-LAS: 0.7236, Misc: 0.9949, SS: 0.9021, SC: 0.9040, Avg: 0.8822, batch_loss: 1.4994, loss: 1.7274 ||: 100%|#########9| 1145/1147 [02:20<00:00, 8.07it/s] |
|
2024-06-29 19:30:35,790 - INFO - tqdm - NullAccuracy: 0.9943, NullF1: 0.7631, Lemma: 0.9651, PosFeats: 0.9648, UD-UAS: 0.8778, UD-LAS: 0.8825, EUD-UAS: 0.7246, EUD-LAS: 0.7236, Misc: 0.9949, SS: 0.9021, SC: 0.9040, Avg: 0.8822, batch_loss: 1.8959, loss: 1.7275 ||: 100%|#########9| 1146/1147 [02:20<00:00, 7.83it/s] |
|
2024-06-29 19:30:35,884 - INFO - tqdm - NullAccuracy: 0.9943, NullF1: 0.7632, Lemma: 0.9651, PosFeats: 0.9648, UD-UAS: 0.8778, UD-LAS: 0.8825, EUD-UAS: 0.7245, EUD-LAS: 0.7235, Misc: 0.9949, SS: 0.9021, SC: 0.9040, Avg: 0.8821, batch_loss: 1.8477, loss: 1.7276 ||: 100%|##########| 1147/1147 [02:20<00:00, 8.18it/s] |
|
2024-06-29 19:30:35,885 - INFO - allennlp.training.gradient_descent_trainer - Validating |
|
2024-06-29 19:30:35,886 - INFO - tqdm - 0%| | 0/287 [00:00<?, ?it/s] |
|
2024-06-29 19:30:45,915 - INFO - tqdm - NullAccuracy: 0.9948, NullF1: 0.7925, Lemma: 0.9736, PosFeats: 0.9702, UD-UAS: 0.9256, UD-LAS: 0.9349, EUD-UAS: 0.7750, EUD-LAS: 0.7824, Misc: 0.9959, SS: 0.9106, SC: 0.9146, Avg: 0.9092, batch_loss: 0.9713, loss: 1.4837 ||: 51%|#####1 | 147/287 [00:10<00:09, 14.45it/s] |
|
2024-06-29 19:30:54,843 - INFO - tqdm - NullAccuracy: 0.9951, NullF1: 0.7952, Lemma: 0.9762, PosFeats: 0.9718, UD-UAS: 0.9295, UD-LAS: 0.9381, EUD-UAS: 0.7816, EUD-LAS: 0.7870, Misc: 0.9959, SS: 0.9135, SC: 0.9194, Avg: 0.9126, batch_loss: 2.8006, loss: 1.4128 ||: 100%|##########| 287/287 [00:18<00:00, 16.21it/s] |
|
2024-06-29 19:30:54,843 - INFO - tqdm - NullAccuracy: 0.9951, NullF1: 0.7952, Lemma: 0.9762, PosFeats: 0.9718, UD-UAS: 0.9295, UD-LAS: 0.9381, EUD-UAS: 0.7816, EUD-LAS: 0.7870, Misc: 0.9959, SS: 0.9135, SC: 0.9194, Avg: 0.9126, batch_loss: 2.8006, loss: 1.4128 ||: 100%|##########| 287/287 [00:18<00:00, 15.14it/s] |
|
2024-06-29 19:30:54,844 - INFO - allennlp.training.learning_rate_schedulers.slanted_triangular - Gradual unfreezing finished. Training all layers. |
|
2024-06-29 19:30:54,846 - INFO - allennlp.training.callbacks.console_logger - Training | Validation |
|
2024-06-29 19:30:54,846 - INFO - allennlp.training.callbacks.console_logger - Avg | 0.882 | 0.913 |
|
2024-06-29 19:30:54,846 - INFO - allennlp.training.callbacks.console_logger - EUD-LAS | 0.724 | 0.787 |
|
2024-06-29 19:30:54,846 - INFO - allennlp.training.callbacks.console_logger - EUD-UAS | 0.725 | 0.782 |
|
2024-06-29 19:30:54,846 - INFO - allennlp.training.callbacks.console_logger - Lemma | 0.965 | 0.976 |
|
2024-06-29 19:30:54,846 - INFO - allennlp.training.callbacks.console_logger - Misc | 0.995 | 0.996 |
|
2024-06-29 19:30:54,846 - INFO - allennlp.training.callbacks.console_logger - NullAccuracy | 0.994 | 0.995 |
|
2024-06-29 19:30:54,846 - INFO - allennlp.training.callbacks.console_logger - NullF1 | 0.763 | 0.795 |
|
2024-06-29 19:30:54,846 - INFO - allennlp.training.callbacks.console_logger - PosFeats | 0.965 | 0.972 |
|
2024-06-29 19:30:54,846 - INFO - allennlp.training.callbacks.console_logger - SC | 0.904 | 0.919 |
|
2024-06-29 19:30:54,846 - INFO - allennlp.training.callbacks.console_logger - SS | 0.902 | 0.913 |
|
2024-06-29 19:30:54,846 - INFO - allennlp.training.callbacks.console_logger - UD-LAS | 0.882 | 0.938 |
|
2024-06-29 19:30:54,846 - INFO - allennlp.training.callbacks.console_logger - UD-UAS | 0.878 | 0.929 |
|
2024-06-29 19:30:54,846 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB | 11269.433 | N/A |
|
2024-06-29 19:30:54,846 - INFO - allennlp.training.callbacks.console_logger - loss | 1.728 | 1.413 |
|
2024-06-29 19:30:54,846 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB | 5001.855 | N/A |
|
2024-06-29 19:30:59,900 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:02:44.257205 |
|
2024-06-29 19:30:59,901 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:12:20 |
|
2024-06-29 19:30:59,901 - INFO - allennlp.training.gradient_descent_trainer - Epoch 5/9 |
|
2024-06-29 19:30:59,901 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 4.9G |
|
2024-06-29 19:30:59,901 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 11G |
|
2024-06-29 19:30:59,902 - INFO - allennlp.training.gradient_descent_trainer - Training |
|
2024-06-29 19:30:59,902 - INFO - tqdm - 0%| | 0/1147 [00:00<?, ?it/s] |
|
2024-06-29 19:31:09,974 - INFO - tqdm - NullAccuracy: 0.9948, NullF1: 0.7919, Lemma: 0.9719, PosFeats: 0.9684, UD-UAS: 0.8842, UD-LAS: 0.8884, EUD-UAS: 0.7340, EUD-LAS: 0.7320, Misc: 0.9954, SS: 0.9114, SC: 0.9156, Avg: 0.8890, batch_loss: 1.6398, loss: 1.5423 ||: 7%|7 | 83/1147 [00:10<02:02, 8.65it/s] |
|
2024-06-29 19:31:20,054 - INFO - tqdm - NullAccuracy: 0.9949, NullF1: 0.7883, Lemma: 0.9707, PosFeats: 0.9693, UD-UAS: 0.8832, UD-LAS: 0.8884, EUD-UAS: 0.7318, EUD-LAS: 0.7311, Misc: 0.9956, SS: 0.9104, SC: 0.9145, Avg: 0.8883, batch_loss: 1.3990, loss: 1.5374 ||: 14%|#4 | 165/1147 [00:20<02:04, 7.90it/s] |
|
2024-06-29 19:31:30,080 - INFO - tqdm - NullAccuracy: 0.9952, NullF1: 0.7955, Lemma: 0.9706, PosFeats: 0.9695, UD-UAS: 0.8835, UD-LAS: 0.8885, EUD-UAS: 0.7305, EUD-LAS: 0.7299, Misc: 0.9955, SS: 0.9106, SC: 0.9140, Avg: 0.8881, batch_loss: 1.4056, loss: 1.5388 ||: 21%|##1 | 245/1147 [00:30<01:53, 7.97it/s] |
|
2024-06-29 19:31:40,213 - INFO - tqdm - NullAccuracy: 0.9950, NullF1: 0.7860, Lemma: 0.9706, PosFeats: 0.9689, UD-UAS: 0.8844, UD-LAS: 0.8897, EUD-UAS: 0.7337, EUD-LAS: 0.7335, Misc: 0.9954, SS: 0.9108, SC: 0.9135, Avg: 0.8890, batch_loss: 1.9566, loss: 1.5382 ||: 28%|##7 | 321/1147 [00:40<01:57, 7.05it/s] |
|
2024-06-29 19:31:50,221 - INFO - tqdm - NullAccuracy: 0.9950, NullF1: 0.7901, Lemma: 0.9704, PosFeats: 0.9688, UD-UAS: 0.8852, UD-LAS: 0.8904, EUD-UAS: 0.7343, EUD-LAS: 0.7338, Misc: 0.9956, SS: 0.9108, SC: 0.9137, Avg: 0.8892, batch_loss: 1.2495, loss: 1.5371 ||: 35%|###4 | 399/1147 [00:50<01:34, 7.91it/s] |
|
2024-06-29 19:32:00,256 - INFO - tqdm - NullAccuracy: 0.9950, NullF1: 0.7868, Lemma: 0.9706, PosFeats: 0.9688, UD-UAS: 0.8854, UD-LAS: 0.8905, EUD-UAS: 0.7363, EUD-LAS: 0.7354, Misc: 0.9957, SS: 0.9109, SC: 0.9141, Avg: 0.8897, batch_loss: 1.5455, loss: 1.5362 ||: 41%|####1 | 475/1147 [01:00<01:35, 7.03it/s] |
|
2024-06-29 19:32:10,342 - INFO - tqdm - NullAccuracy: 0.9949, NullF1: 0.7880, Lemma: 0.9708, PosFeats: 0.9690, UD-UAS: 0.8858, UD-LAS: 0.8909, EUD-UAS: 0.7360, EUD-LAS: 0.7348, Misc: 0.9957, SS: 0.9102, SC: 0.9137, Avg: 0.8896, batch_loss: 1.5492, loss: 1.5383 ||: 48%|####8 | 552/1147 [01:10<01:11, 8.35it/s] |
|
2024-06-29 19:32:20,380 - INFO - tqdm - NullAccuracy: 0.9949, NullF1: 0.7874, Lemma: 0.9708, PosFeats: 0.9691, UD-UAS: 0.8870, UD-LAS: 0.8919, EUD-UAS: 0.7379, EUD-LAS: 0.7367, Misc: 0.9957, SS: 0.9101, SC: 0.9137, Avg: 0.8903, batch_loss: 1.3705, loss: 1.5368 ||: 55%|#####5 | 635/1147 [01:20<01:01, 8.33it/s] |
|
2024-06-29 19:32:30,438 - INFO - tqdm - NullAccuracy: 0.9949, NullF1: 0.7872, Lemma: 0.9710, PosFeats: 0.9690, UD-UAS: 0.8885, UD-LAS: 0.8935, EUD-UAS: 0.7404, EUD-LAS: 0.7394, Misc: 0.9957, SS: 0.9103, SC: 0.9134, Avg: 0.8913, batch_loss: 1.4505, loss: 1.5349 ||: 63%|######2 | 718/1147 [01:30<00:49, 8.71it/s] |
|
2024-06-29 19:32:40,559 - INFO - tqdm - NullAccuracy: 0.9949, NullF1: 0.7872, Lemma: 0.9709, PosFeats: 0.9692, UD-UAS: 0.8893, UD-LAS: 0.8942, EUD-UAS: 0.7417, EUD-LAS: 0.7408, Misc: 0.9957, SS: 0.9105, SC: 0.9130, Avg: 0.8917, batch_loss: 1.7792, loss: 1.5318 ||: 70%|######9 | 802/1147 [01:40<00:43, 7.89it/s] |
|
2024-06-29 19:32:50,636 - INFO - tqdm - NullAccuracy: 0.9949, NullF1: 0.7891, Lemma: 0.9712, PosFeats: 0.9693, UD-UAS: 0.8888, UD-LAS: 0.8938, EUD-UAS: 0.7419, EUD-LAS: 0.7410, Misc: 0.9957, SS: 0.9104, SC: 0.9132, Avg: 0.8917, batch_loss: 1.5494, loss: 1.5303 ||: 77%|#######6 | 883/1147 [01:50<00:31, 8.35it/s] |
|
2024-06-29 19:33:00,749 - INFO - tqdm - NullAccuracy: 0.9949, NullF1: 0.7903, Lemma: 0.9712, PosFeats: 0.9693, UD-UAS: 0.8881, UD-LAS: 0.8932, EUD-UAS: 0.7410, EUD-LAS: 0.7405, Misc: 0.9957, SS: 0.9110, SC: 0.9132, Avg: 0.8915, batch_loss: 1.7464, loss: 1.5329 ||: 84%|########4 | 965/1147 [02:00<00:24, 7.43it/s] |
|
2024-06-29 19:33:10,816 - INFO - tqdm - NullAccuracy: 0.9948, NullF1: 0.7891, Lemma: 0.9712, PosFeats: 0.9696, UD-UAS: 0.8879, UD-LAS: 0.8933, EUD-UAS: 0.7410, EUD-LAS: 0.7408, Misc: 0.9957, SS: 0.9111, SC: 0.9134, Avg: 0.8915, batch_loss: 1.2881, loss: 1.5325 ||: 91%|#########1| 1047/1147 [02:10<00:12, 8.16it/s] |
|
2024-06-29 19:33:20,932 - INFO - tqdm - NullAccuracy: 0.9948, NullF1: 0.7882, Lemma: 0.9712, PosFeats: 0.9695, UD-UAS: 0.8884, UD-LAS: 0.8937, EUD-UAS: 0.7420, EUD-LAS: 0.7418, Misc: 0.9957, SS: 0.9111, SC: 0.9132, Avg: 0.8918, batch_loss: 1.7167, loss: 1.5316 ||: 99%|#########8| 1130/1147 [02:21<00:02, 8.23it/s] |
|
2024-06-29 19:33:22,353 - INFO - tqdm - NullAccuracy: 0.9949, NullF1: 0.7890, Lemma: 0.9711, PosFeats: 0.9695, UD-UAS: 0.8886, UD-LAS: 0.8939, EUD-UAS: 0.7423, EUD-LAS: 0.7421, Misc: 0.9957, SS: 0.9112, SC: 0.9133, Avg: 0.8920, batch_loss: 1.2463, loss: 1.5303 ||: 100%|#########9| 1142/1147 [02:22<00:00, 8.80it/s] |
|
2024-06-29 19:33:22,466 - INFO - tqdm - NullAccuracy: 0.9949, NullF1: 0.7890, Lemma: 0.9712, PosFeats: 0.9695, UD-UAS: 0.8887, UD-LAS: 0.8940, EUD-UAS: 0.7424, EUD-LAS: 0.7422, Misc: 0.9957, SS: 0.9112, SC: 0.9133, Avg: 0.8920, batch_loss: 1.0164, loss: 1.5298 ||: 100%|#########9| 1143/1147 [02:22<00:00, 8.82it/s] |
|
2024-06-29 19:33:22,597 - INFO - tqdm - NullAccuracy: 0.9948, NullF1: 0.7888, Lemma: 0.9712, PosFeats: 0.9695, UD-UAS: 0.8886, UD-LAS: 0.8940, EUD-UAS: 0.7424, EUD-LAS: 0.7422, Misc: 0.9957, SS: 0.9112, SC: 0.9133, Avg: 0.8920, batch_loss: 1.5510, loss: 1.5298 ||: 100%|#########9| 1144/1147 [02:22<00:00, 8.42it/s] |
|
2024-06-29 19:33:22,706 - INFO - tqdm - NullAccuracy: 0.9948, NullF1: 0.7888, Lemma: 0.9712, PosFeats: 0.9695, UD-UAS: 0.8887, UD-LAS: 0.8940, EUD-UAS: 0.7424, EUD-LAS: 0.7422, Misc: 0.9957, SS: 0.9112, SC: 0.9133, Avg: 0.8920, batch_loss: 1.3037, loss: 1.5296 ||: 100%|#########9| 1145/1147 [02:22<00:00, 8.63it/s] |
|
2024-06-29 19:33:22,830 - INFO - tqdm - NullAccuracy: 0.9948, NullF1: 0.7885, Lemma: 0.9712, PosFeats: 0.9696, UD-UAS: 0.8887, UD-LAS: 0.8940, EUD-UAS: 0.7424, EUD-LAS: 0.7422, Misc: 0.9957, SS: 0.9112, SC: 0.9133, Avg: 0.8920, batch_loss: 1.6812, loss: 1.5298 ||: 100%|#########9| 1146/1147 [02:22<00:00, 8.45it/s] |
|
2024-06-29 19:33:22,932 - INFO - tqdm - NullAccuracy: 0.9948, NullF1: 0.7885, Lemma: 0.9712, PosFeats: 0.9696, UD-UAS: 0.8887, UD-LAS: 0.8940, EUD-UAS: 0.7424, EUD-LAS: 0.7422, Misc: 0.9957, SS: 0.9111, SC: 0.9133, Avg: 0.8920, batch_loss: 1.5941, loss: 1.5298 ||: 100%|##########| 1147/1147 [02:23<00:00, 8.81it/s] |
|
2024-06-29 19:33:22,933 - INFO - tqdm - NullAccuracy: 0.9948, NullF1: 0.7885, Lemma: 0.9712, PosFeats: 0.9696, UD-UAS: 0.8887, UD-LAS: 0.8940, EUD-UAS: 0.7424, EUD-LAS: 0.7422, Misc: 0.9957, SS: 0.9111, SC: 0.9133, Avg: 0.8920, batch_loss: 1.5941, loss: 1.5298 ||: 100%|##########| 1147/1147 [02:23<00:00, 8.02it/s] |
|
2024-06-29 19:33:22,933 - INFO - allennlp.training.gradient_descent_trainer - Validating |
|
2024-06-29 19:33:22,934 - INFO - tqdm - 0%| | 0/287 [00:00<?, ?it/s] |
|
2024-06-29 19:33:32,999 - INFO - tqdm - NullAccuracy: 0.9949, NullF1: 0.7966, Lemma: 0.9764, PosFeats: 0.9717, UD-UAS: 0.9323, UD-LAS: 0.9412, EUD-UAS: 0.8439, EUD-LAS: 0.8480, Misc: 0.9962, SS: 0.9150, SC: 0.9211, Avg: 0.9273, batch_loss: 0.7610, loss: 1.4052 ||: 53%|#####2 | 151/287 [00:10<00:08, 15.51it/s] |
|
2024-06-29 19:33:41,487 - INFO - tqdm - NullAccuracy: 0.9952, NullF1: 0.8002, Lemma: 0.9787, PosFeats: 0.9733, UD-UAS: 0.9356, UD-LAS: 0.9434, EUD-UAS: 0.8480, EUD-LAS: 0.8510, Misc: 0.9963, SS: 0.9173, SC: 0.9238, Avg: 0.9297, batch_loss: 2.7584, loss: 1.3465 ||: 100%|##########| 287/287 [00:18<00:00, 16.52it/s] |
|
2024-06-29 19:33:41,488 - INFO - tqdm - NullAccuracy: 0.9952, NullF1: 0.8002, Lemma: 0.9787, PosFeats: 0.9733, UD-UAS: 0.9356, UD-LAS: 0.9434, EUD-UAS: 0.8480, EUD-LAS: 0.8510, Misc: 0.9963, SS: 0.9173, SC: 0.9238, Avg: 0.9297, batch_loss: 2.7584, loss: 1.3465 ||: 100%|##########| 287/287 [00:18<00:00, 15.47it/s] |
|
2024-06-29 19:33:41,507 - INFO - allennlp.training.learning_rate_schedulers.slanted_triangular - Gradual unfreezing finished. Training all layers. |
|
2024-06-29 19:33:41,510 - INFO - allennlp.training.callbacks.console_logger - Training | Validation |
|
2024-06-29 19:33:41,510 - INFO - allennlp.training.callbacks.console_logger - Avg | 0.892 | 0.930 |
|
2024-06-29 19:33:41,510 - INFO - allennlp.training.callbacks.console_logger - EUD-LAS | 0.742 | 0.851 |
|
2024-06-29 19:33:41,510 - INFO - allennlp.training.callbacks.console_logger - EUD-UAS | 0.742 | 0.848 |
|
2024-06-29 19:33:41,510 - INFO - allennlp.training.callbacks.console_logger - Lemma | 0.971 | 0.979 |
|
2024-06-29 19:33:41,510 - INFO - allennlp.training.callbacks.console_logger - Misc | 0.996 | 0.996 |
|
2024-06-29 19:33:41,510 - INFO - allennlp.training.callbacks.console_logger - NullAccuracy | 0.995 | 0.995 |
|
2024-06-29 19:33:41,510 - INFO - allennlp.training.callbacks.console_logger - NullF1 | 0.789 | 0.800 |
|
2024-06-29 19:33:41,510 - INFO - allennlp.training.callbacks.console_logger - PosFeats | 0.970 | 0.973 |
|
2024-06-29 19:33:41,510 - INFO - allennlp.training.callbacks.console_logger - SC | 0.913 | 0.924 |
|
2024-06-29 19:33:41,510 - INFO - allennlp.training.callbacks.console_logger - SS | 0.911 | 0.917 |
|
2024-06-29 19:33:41,510 - INFO - allennlp.training.callbacks.console_logger - UD-LAS | 0.894 | 0.943 |
|
2024-06-29 19:33:41,510 - INFO - allennlp.training.callbacks.console_logger - UD-UAS | 0.889 | 0.936 |
|
2024-06-29 19:33:41,510 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB | 11257.638 | N/A |
|
2024-06-29 19:33:41,511 - INFO - allennlp.training.callbacks.console_logger - loss | 1.530 | 1.346 |
|
2024-06-29 19:33:41,511 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB | 5001.953 | N/A |
|
2024-06-29 19:33:47,202 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:02:47.300998 |
|
2024-06-29 19:33:47,202 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:10:04 |
|
2024-06-29 19:33:47,202 - INFO - allennlp.training.gradient_descent_trainer - Epoch 6/9 |
|
2024-06-29 19:33:47,202 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 4.9G |
|
2024-06-29 19:33:47,202 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 11G |
|
2024-06-29 19:33:47,204 - INFO - allennlp.training.gradient_descent_trainer - Training |
|
2024-06-29 19:33:47,204 - INFO - tqdm - 0%| | 0/1147 [00:00<?, ?it/s] |
|
2024-06-29 19:33:57,291 - INFO - tqdm - NullAccuracy: 0.9948, NullF1: 0.8017, Lemma: 0.9735, PosFeats: 0.9728, UD-UAS: 0.8895, UD-LAS: 0.8956, EUD-UAS: 0.7473, EUD-LAS: 0.7478, Misc: 0.9971, SS: 0.9164, SC: 0.9233, Avg: 0.8959, batch_loss: 1.2881, loss: 1.4102 ||: 7%|7 | 82/1147 [00:10<02:16, 7.78it/s] |
|
2024-06-29 19:34:07,384 - INFO - tqdm - NullAccuracy: 0.9954, NullF1: 0.8121, Lemma: 0.9752, PosFeats: 0.9734, UD-UAS: 0.8916, UD-LAS: 0.8975, EUD-UAS: 0.7513, EUD-LAS: 0.7518, Misc: 0.9967, SS: 0.9158, SC: 0.9216, Avg: 0.8972, batch_loss: 1.3237, loss: 1.3957 ||: 14%|#4 | 165/1147 [00:20<01:51, 8.82it/s] |
|
2024-06-29 19:34:17,490 - INFO - tqdm - NullAccuracy: 0.9954, NullF1: 0.8144, Lemma: 0.9744, PosFeats: 0.9727, UD-UAS: 0.8941, UD-LAS: 0.9001, EUD-UAS: 0.7546, EUD-LAS: 0.7551, Misc: 0.9965, SS: 0.9167, SC: 0.9211, Avg: 0.8984, batch_loss: 1.2684, loss: 1.3864 ||: 22%|##1 | 249/1147 [00:30<01:53, 7.89it/s] |
|
2024-06-29 19:34:27,493 - INFO - tqdm - NullAccuracy: 0.9954, NullF1: 0.8128, Lemma: 0.9742, PosFeats: 0.9730, UD-UAS: 0.8951, UD-LAS: 0.9002, EUD-UAS: 0.7574, EUD-LAS: 0.7575, Misc: 0.9963, SS: 0.9169, SC: 0.9203, Avg: 0.8990, batch_loss: 1.1529, loss: 1.3789 ||: 29%|##8 | 332/1147 [00:40<01:39, 8.23it/s] |
|
2024-06-29 19:34:37,560 - INFO - tqdm - NullAccuracy: 0.9954, NullF1: 0.8134, Lemma: 0.9745, PosFeats: 0.9730, UD-UAS: 0.8957, UD-LAS: 0.9009, EUD-UAS: 0.7598, EUD-LAS: 0.7597, Misc: 0.9964, SS: 0.9173, SC: 0.9202, Avg: 0.8997, batch_loss: 1.3392, loss: 1.3741 ||: 36%|###6 | 417/1147 [00:50<01:26, 8.48it/s] |
|
2024-06-29 19:34:47,631 - INFO - tqdm - NullAccuracy: 0.9953, NullF1: 0.8112, Lemma: 0.9745, PosFeats: 0.9731, UD-UAS: 0.8958, UD-LAS: 0.9014, EUD-UAS: 0.7602, EUD-LAS: 0.7604, Misc: 0.9963, SS: 0.9169, SC: 0.9201, Avg: 0.8998, batch_loss: 1.0445, loss: 1.3804 ||: 44%|####3 | 500/1147 [01:00<01:12, 8.93it/s] |
|
2024-06-29 19:34:57,694 - INFO - tqdm - NullAccuracy: 0.9952, NullF1: 0.8070, Lemma: 0.9745, PosFeats: 0.9731, UD-UAS: 0.8960, UD-LAS: 0.9016, EUD-UAS: 0.7601, EUD-LAS: 0.7605, Misc: 0.9963, SS: 0.9174, SC: 0.9203, Avg: 0.9000, batch_loss: 1.1462, loss: 1.3750 ||: 51%|##### | 584/1147 [01:10<01:06, 8.50it/s] |
|
2024-06-29 19:35:07,806 - INFO - tqdm - NullAccuracy: 0.9953, NullF1: 0.8109, Lemma: 0.9745, PosFeats: 0.9730, UD-UAS: 0.8960, UD-LAS: 0.9017, EUD-UAS: 0.7599, EUD-LAS: 0.7605, Misc: 0.9962, SS: 0.9176, SC: 0.9205, Avg: 0.9000, batch_loss: 1.5332, loss: 1.3764 ||: 58%|#####8 | 667/1147 [01:20<00:57, 8.32it/s] |
|
2024-06-29 19:35:17,854 - INFO - tqdm - NullAccuracy: 0.9953, NullF1: 0.8110, Lemma: 0.9746, PosFeats: 0.9730, UD-UAS: 0.8957, UD-LAS: 0.9014, EUD-UAS: 0.7594, EUD-LAS: 0.7600, Misc: 0.9962, SS: 0.9173, SC: 0.9204, Avg: 0.8998, batch_loss: 1.1324, loss: 1.3802 ||: 65%|######5 | 749/1147 [01:30<00:47, 8.43it/s] |
|
2024-06-29 19:35:27,900 - INFO - tqdm - NullAccuracy: 0.9953, NullF1: 0.8116, Lemma: 0.9745, PosFeats: 0.9730, UD-UAS: 0.8963, UD-LAS: 0.9020, EUD-UAS: 0.7602, EUD-LAS: 0.7609, Misc: 0.9961, SS: 0.9177, SC: 0.9204, Avg: 0.9001, batch_loss: 0.9856, loss: 1.3761 ||: 73%|#######2 | 832/1147 [01:40<00:37, 8.48it/s] |
|
2024-06-29 19:35:37,964 - INFO - tqdm - NullAccuracy: 0.9953, NullF1: 0.8104, Lemma: 0.9746, PosFeats: 0.9730, UD-UAS: 0.8969, UD-LAS: 0.9026, EUD-UAS: 0.7620, EUD-LAS: 0.7626, Misc: 0.9961, SS: 0.9178, SC: 0.9204, Avg: 0.9007, batch_loss: 1.0854, loss: 1.3727 ||: 80%|#######9 | 916/1147 [01:50<00:27, 8.36it/s] |
|
2024-06-29 19:35:47,965 - INFO - tqdm - NullAccuracy: 0.9953, NullF1: 0.8076, Lemma: 0.9745, PosFeats: 0.9730, UD-UAS: 0.8972, UD-LAS: 0.9027, EUD-UAS: 0.7625, EUD-LAS: 0.7630, Misc: 0.9961, SS: 0.9181, SC: 0.9204, Avg: 0.9008, batch_loss: 1.3083, loss: 1.3695 ||: 87%|########7 | 999/1147 [02:00<00:17, 8.41it/s] |
|
2024-06-29 19:35:57,986 - INFO - tqdm - NullAccuracy: 0.9953, NullF1: 0.8080, Lemma: 0.9745, PosFeats: 0.9729, UD-UAS: 0.8975, UD-LAS: 0.9031, EUD-UAS: 0.7633, EUD-LAS: 0.7638, Misc: 0.9962, SS: 0.9184, SC: 0.9205, Avg: 0.9011, batch_loss: 1.4589, loss: 1.3648 ||: 94%|#########4| 1082/1147 [02:10<00:07, 8.36it/s] |
|
2024-06-29 19:36:05,268 - INFO - tqdm - NullAccuracy: 0.9953, NullF1: 0.8073, Lemma: 0.9746, PosFeats: 0.9730, UD-UAS: 0.8974, UD-LAS: 0.9030, EUD-UAS: 0.7629, EUD-LAS: 0.7635, Misc: 0.9962, SS: 0.9185, SC: 0.9208, Avg: 0.9011, batch_loss: 1.2374, loss: 1.3627 ||: 100%|#########9| 1142/1147 [02:18<00:00, 8.57it/s] |
|
2024-06-29 19:36:05,382 - INFO - tqdm - NullAccuracy: 0.9953, NullF1: 0.8073, Lemma: 0.9746, PosFeats: 0.9730, UD-UAS: 0.8974, UD-LAS: 0.9030, EUD-UAS: 0.7630, EUD-LAS: 0.7636, Misc: 0.9962, SS: 0.9185, SC: 0.9208, Avg: 0.9011, batch_loss: 1.0635, loss: 1.3625 ||: 100%|#########9| 1143/1147 [02:18<00:00, 8.63it/s] |
|
2024-06-29 19:36:05,527 - INFO - tqdm - NullAccuracy: 0.9953, NullF1: 0.8072, Lemma: 0.9746, PosFeats: 0.9730, UD-UAS: 0.8974, UD-LAS: 0.9030, EUD-UAS: 0.7630, EUD-LAS: 0.7636, Misc: 0.9962, SS: 0.9185, SC: 0.9208, Avg: 0.9011, batch_loss: 1.5301, loss: 1.3626 ||: 100%|#########9| 1144/1147 [02:18<00:00, 8.04it/s] |
|
2024-06-29 19:36:05,640 - INFO - tqdm - NullAccuracy: 0.9953, NullF1: 0.8073, Lemma: 0.9746, PosFeats: 0.9730, UD-UAS: 0.8974, UD-LAS: 0.9030, EUD-UAS: 0.7630, EUD-LAS: 0.7636, Misc: 0.9962, SS: 0.9185, SC: 0.9208, Avg: 0.9011, batch_loss: 1.3743, loss: 1.3626 ||: 100%|#########9| 1145/1147 [02:18<00:00, 8.27it/s] |
|
2024-06-29 19:36:05,746 - INFO - tqdm - NullAccuracy: 0.9953, NullF1: 0.8074, Lemma: 0.9746, PosFeats: 0.9730, UD-UAS: 0.8974, UD-LAS: 0.9030, EUD-UAS: 0.7631, EUD-LAS: 0.7637, Misc: 0.9962, SS: 0.9185, SC: 0.9208, Avg: 0.9011, batch_loss: 0.7187, loss: 1.3621 ||: 100%|#########9| 1146/1147 [02:18<00:00, 8.58it/s] |
|
2024-06-29 19:36:05,844 - INFO - tqdm - NullAccuracy: 0.9953, NullF1: 0.8073, Lemma: 0.9746, PosFeats: 0.9730, UD-UAS: 0.8974, UD-LAS: 0.9030, EUD-UAS: 0.7632, EUD-LAS: 0.7638, Misc: 0.9962, SS: 0.9185, SC: 0.9208, Avg: 0.9012, batch_loss: 1.4546, loss: 1.3621 ||: 100%|##########| 1147/1147 [02:18<00:00, 8.27it/s] |
|
2024-06-29 19:36:05,845 - INFO - allennlp.training.gradient_descent_trainer - Validating |
|
2024-06-29 19:36:05,846 - INFO - tqdm - 0%| | 0/287 [00:00<?, ?it/s] |
|
2024-06-29 19:36:15,940 - INFO - tqdm - NullAccuracy: 0.9950, NullF1: 0.8000, Lemma: 0.9792, PosFeats: 0.9734, UD-UAS: 0.9383, UD-LAS: 0.9473, EUD-UAS: 0.8658, EUD-LAS: 0.8700, Misc: 0.9962, SS: 0.9197, SC: 0.9249, Avg: 0.9350, batch_loss: 1.0108, loss: 1.3464 ||: 54%|#####3 | 154/287 [00:10<00:08, 15.88it/s] |
|
2024-06-29 19:36:24,121 - INFO - tqdm - NullAccuracy: 0.9954, NullF1: 0.8038, Lemma: 0.9810, PosFeats: 0.9746, UD-UAS: 0.9417, UD-LAS: 0.9499, EUD-UAS: 0.8702, EUD-LAS: 0.8733, Misc: 0.9961, SS: 0.9221, SC: 0.9274, Avg: 0.9374, batch_loss: 2.7066, loss: 1.2898 ||: 100%|##########| 287/287 [00:18<00:00, 16.81it/s] |
|
2024-06-29 19:36:24,122 - INFO - tqdm - NullAccuracy: 0.9954, NullF1: 0.8038, Lemma: 0.9810, PosFeats: 0.9746, UD-UAS: 0.9417, UD-LAS: 0.9499, EUD-UAS: 0.8702, EUD-LAS: 0.8733, Misc: 0.9961, SS: 0.9221, SC: 0.9274, Avg: 0.9374, batch_loss: 2.7066, loss: 1.2898 ||: 100%|##########| 287/287 [00:18<00:00, 15.70it/s] |
|
2024-06-29 19:36:24,122 - INFO - allennlp.training.learning_rate_schedulers.slanted_triangular - Gradual unfreezing finished. Training all layers. |
|
2024-06-29 19:36:24,124 - INFO - allennlp.training.callbacks.console_logger - Training | Validation |
|
2024-06-29 19:36:24,125 - INFO - allennlp.training.callbacks.console_logger - Avg | 0.901 | 0.937 |
|
2024-06-29 19:36:24,125 - INFO - allennlp.training.callbacks.console_logger - EUD-LAS | 0.764 | 0.873 |
|
2024-06-29 19:36:24,125 - INFO - allennlp.training.callbacks.console_logger - EUD-UAS | 0.763 | 0.870 |
|
2024-06-29 19:36:24,125 - INFO - allennlp.training.callbacks.console_logger - Lemma | 0.975 | 0.981 |
|
2024-06-29 19:36:24,125 - INFO - allennlp.training.callbacks.console_logger - Misc | 0.996 | 0.996 |
|
2024-06-29 19:36:24,125 - INFO - allennlp.training.callbacks.console_logger - NullAccuracy | 0.995 | 0.995 |
|
2024-06-29 19:36:24,125 - INFO - allennlp.training.callbacks.console_logger - NullF1 | 0.807 | 0.804 |
|
2024-06-29 19:36:24,125 - INFO - allennlp.training.callbacks.console_logger - PosFeats | 0.973 | 0.975 |
|
2024-06-29 19:36:24,125 - INFO - allennlp.training.callbacks.console_logger - SC | 0.921 | 0.927 |
|
2024-06-29 19:36:24,125 - INFO - allennlp.training.callbacks.console_logger - SS | 0.919 | 0.922 |
|
2024-06-29 19:36:24,125 - INFO - allennlp.training.callbacks.console_logger - UD-LAS | 0.903 | 0.950 |
|
2024-06-29 19:36:24,125 - INFO - allennlp.training.callbacks.console_logger - UD-UAS | 0.897 | 0.942 |
|
2024-06-29 19:36:24,125 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB | 11409.911 | N/A |
|
2024-06-29 19:36:24,125 - INFO - allennlp.training.callbacks.console_logger - loss | 1.362 | 1.290 |
|
2024-06-29 19:36:24,125 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB | 5001.953 | N/A |
|
2024-06-29 19:36:29,404 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:02:42.202102 |
|
2024-06-29 19:36:29,404 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:07:38 |
|
2024-06-29 19:36:29,404 - INFO - allennlp.training.gradient_descent_trainer - Epoch 7/9 |
|
2024-06-29 19:36:29,404 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 4.9G |
|
2024-06-29 19:36:29,405 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 11G |
|
2024-06-29 19:36:29,406 - INFO - allennlp.training.gradient_descent_trainer - Training |
|
2024-06-29 19:36:29,406 - INFO - tqdm - 0%| | 0/1147 [00:00<?, ?it/s] |
|
2024-06-29 19:36:39,425 - INFO - tqdm - NullAccuracy: 0.9950, NullF1: 0.8006, Lemma: 0.9768, PosFeats: 0.9765, UD-UAS: 0.9019, UD-LAS: 0.9081, EUD-UAS: 0.7693, EUD-LAS: 0.7706, Misc: 0.9971, SS: 0.9226, SC: 0.9285, Avg: 0.9057, batch_loss: 1.0939, loss: 1.2545 ||: 7%|7 | 83/1147 [00:10<02:05, 8.50it/s] |
|
2024-06-29 19:36:49,427 - INFO - tqdm - NullAccuracy: 0.9952, NullF1: 0.8090, Lemma: 0.9777, PosFeats: 0.9772, UD-UAS: 0.9037, UD-LAS: 0.9099, EUD-UAS: 0.7727, EUD-LAS: 0.7737, Misc: 0.9969, SS: 0.9238, SC: 0.9294, Avg: 0.9072, batch_loss: 1.1221, loss: 1.2150 ||: 14%|#4 | 166/1147 [00:20<01:58, 8.31it/s] |
|
2024-06-29 19:36:59,497 - INFO - tqdm - NullAccuracy: 0.9953, NullF1: 0.8119, Lemma: 0.9777, PosFeats: 0.9772, UD-UAS: 0.9061, UD-LAS: 0.9123, EUD-UAS: 0.7767, EUD-LAS: 0.7777, Misc: 0.9970, SS: 0.9242, SC: 0.9296, Avg: 0.9087, batch_loss: 1.4607, loss: 1.2037 ||: 21%|## | 237/1147 [00:30<01:44, 8.70it/s] |
|
2024-06-29 19:37:09,571 - INFO - tqdm - NullAccuracy: 0.9953, NullF1: 0.8114, Lemma: 0.9776, PosFeats: 0.9769, UD-UAS: 0.9065, UD-LAS: 0.9124, EUD-UAS: 0.7785, EUD-LAS: 0.7790, Misc: 0.9968, SS: 0.9248, SC: 0.9292, Avg: 0.9091, batch_loss: 1.6282, loss: 1.2096 ||: 28%|##7 | 321/1147 [00:40<01:40, 8.23it/s] |
|
2024-06-29 19:37:19,670 - INFO - tqdm - NullAccuracy: 0.9955, NullF1: 0.8176, Lemma: 0.9778, PosFeats: 0.9766, UD-UAS: 0.9074, UD-LAS: 0.9133, EUD-UAS: 0.7811, EUD-LAS: 0.7815, Misc: 0.9966, SS: 0.9243, SC: 0.9286, Avg: 0.9097, batch_loss: 1.0417, loss: 1.2077 ||: 35%|###5 | 405/1147 [00:50<01:27, 8.49it/s] |
|
2024-06-29 19:37:29,718 - INFO - tqdm - NullAccuracy: 0.9955, NullF1: 0.8190, Lemma: 0.9780, PosFeats: 0.9762, UD-UAS: 0.9071, UD-LAS: 0.9129, EUD-UAS: 0.7802, EUD-LAS: 0.7806, Misc: 0.9967, SS: 0.9244, SC: 0.9284, Avg: 0.9094, batch_loss: 1.2412, loss: 1.2113 ||: 43%|####2 | 489/1147 [01:00<01:14, 8.79it/s] |
|
2024-06-29 19:37:39,800 - INFO - tqdm - NullAccuracy: 0.9955, NullF1: 0.8202, Lemma: 0.9781, PosFeats: 0.9762, UD-UAS: 0.9074, UD-LAS: 0.9134, EUD-UAS: 0.7806, EUD-LAS: 0.7813, Misc: 0.9966, SS: 0.9246, SC: 0.9284, Avg: 0.9096, batch_loss: 0.9601, loss: 1.2111 ||: 50%|####9 | 571/1147 [01:10<01:17, 7.44it/s] |
|
2024-06-29 19:37:49,881 - INFO - tqdm - NullAccuracy: 0.9954, NullF1: 0.8168, Lemma: 0.9781, PosFeats: 0.9760, UD-UAS: 0.9070, UD-LAS: 0.9128, EUD-UAS: 0.7795, EUD-LAS: 0.7803, Misc: 0.9967, SS: 0.9242, SC: 0.9282, Avg: 0.9092, batch_loss: 1.2752, loss: 1.2187 ||: 57%|#####6 | 653/1147 [01:20<00:59, 8.30it/s] |
|
2024-06-29 19:37:59,963 - INFO - tqdm - NullAccuracy: 0.9954, NullF1: 0.8151, Lemma: 0.9783, PosFeats: 0.9762, UD-UAS: 0.9071, UD-LAS: 0.9131, EUD-UAS: 0.7797, EUD-LAS: 0.7807, Misc: 0.9967, SS: 0.9243, SC: 0.9279, Avg: 0.9093, batch_loss: 1.7001, loss: 1.2187 ||: 64%|######4 | 736/1147 [01:30<00:47, 8.61it/s] |
|
2024-06-29 19:38:10,066 - INFO - tqdm - NullAccuracy: 0.9955, NullF1: 0.8166, Lemma: 0.9782, PosFeats: 0.9763, UD-UAS: 0.9073, UD-LAS: 0.9131, EUD-UAS: 0.7805, EUD-LAS: 0.7813, Misc: 0.9968, SS: 0.9244, SC: 0.9280, Avg: 0.9095, batch_loss: 1.2517, loss: 1.2151 ||: 71%|#######1 | 819/1147 [01:40<00:36, 8.88it/s] |
|
2024-06-29 19:38:20,085 - INFO - tqdm - NullAccuracy: 0.9955, NullF1: 0.8164, Lemma: 0.9782, PosFeats: 0.9765, UD-UAS: 0.9076, UD-LAS: 0.9133, EUD-UAS: 0.7806, EUD-LAS: 0.7815, Misc: 0.9968, SS: 0.9245, SC: 0.9279, Avg: 0.9096, batch_loss: 1.2216, loss: 1.2149 ||: 79%|#######8 | 901/1147 [01:50<00:27, 8.94it/s] |
|
2024-06-29 19:38:30,132 - INFO - tqdm - NullAccuracy: 0.9954, NullF1: 0.8157, Lemma: 0.9782, PosFeats: 0.9764, UD-UAS: 0.9071, UD-LAS: 0.9130, EUD-UAS: 0.7800, EUD-LAS: 0.7811, Misc: 0.9967, SS: 0.9243, SC: 0.9280, Avg: 0.9094, batch_loss: 1.0139, loss: 1.2197 ||: 86%|########5 | 983/1147 [02:00<00:19, 8.36it/s] |
|
2024-06-29 19:38:40,146 - INFO - tqdm - NullAccuracy: 0.9954, NullF1: 0.8153, Lemma: 0.9783, PosFeats: 0.9764, UD-UAS: 0.9075, UD-LAS: 0.9132, EUD-UAS: 0.7807, EUD-LAS: 0.7818, Misc: 0.9967, SS: 0.9243, SC: 0.9280, Avg: 0.9096, batch_loss: 1.3527, loss: 1.2164 ||: 93%|#########2| 1066/1147 [02:10<00:09, 8.10it/s] |
|
2024-06-29 19:38:49,400 - INFO - tqdm - NullAccuracy: 0.9954, NullF1: 0.8153, Lemma: 0.9783, PosFeats: 0.9764, UD-UAS: 0.9073, UD-LAS: 0.9131, EUD-UAS: 0.7804, EUD-LAS: 0.7815, Misc: 0.9968, SS: 0.9242, SC: 0.9278, Avg: 0.9095, batch_loss: 1.3320, loss: 1.2185 ||: 100%|#########9| 1142/1147 [02:19<00:00, 8.45it/s] |
|
2024-06-29 19:38:49,525 - INFO - tqdm - NullAccuracy: 0.9954, NullF1: 0.8154, Lemma: 0.9783, PosFeats: 0.9764, UD-UAS: 0.9073, UD-LAS: 0.9130, EUD-UAS: 0.7804, EUD-LAS: 0.7815, Misc: 0.9968, SS: 0.9242, SC: 0.9278, Avg: 0.9095, batch_loss: 1.3673, loss: 1.2186 ||: 100%|#########9| 1143/1147 [02:20<00:00, 8.30it/s] |
|
2024-06-29 19:38:49,631 - INFO - tqdm - NullAccuracy: 0.9954, NullF1: 0.8150, Lemma: 0.9783, PosFeats: 0.9764, UD-UAS: 0.9073, UD-LAS: 0.9131, EUD-UAS: 0.7803, EUD-LAS: 0.7815, Misc: 0.9968, SS: 0.9242, SC: 0.9278, Avg: 0.9095, batch_loss: 1.5132, loss: 1.2189 ||: 100%|#########9| 1144/1147 [02:20<00:00, 8.63it/s] |
|
2024-06-29 19:38:49,742 - INFO - tqdm - NullAccuracy: 0.9954, NullF1: 0.8149, Lemma: 0.9782, PosFeats: 0.9764, UD-UAS: 0.9074, UD-LAS: 0.9131, EUD-UAS: 0.7803, EUD-LAS: 0.7814, Misc: 0.9968, SS: 0.9242, SC: 0.9278, Avg: 0.9095, batch_loss: 1.1053, loss: 1.2188 ||: 100%|#########9| 1145/1147 [02:20<00:00, 8.74it/s] |
|
2024-06-29 19:38:49,849 - INFO - tqdm - NullAccuracy: 0.9954, NullF1: 0.8148, Lemma: 0.9782, PosFeats: 0.9764, UD-UAS: 0.9073, UD-LAS: 0.9131, EUD-UAS: 0.7803, EUD-LAS: 0.7814, Misc: 0.9968, SS: 0.9242, SC: 0.9278, Avg: 0.9095, batch_loss: 1.3207, loss: 1.2189 ||: 100%|#########9| 1146/1147 [02:20<00:00, 8.90it/s] |
|
2024-06-29 19:38:49,953 - INFO - tqdm - NullAccuracy: 0.9954, NullF1: 0.8146, Lemma: 0.9782, PosFeats: 0.9764, UD-UAS: 0.9073, UD-LAS: 0.9130, EUD-UAS: 0.7803, EUD-LAS: 0.7814, Misc: 0.9968, SS: 0.9242, SC: 0.9278, Avg: 0.9095, batch_loss: 1.3824, loss: 1.2190 ||: 100%|##########| 1147/1147 [02:20<00:00, 9.11it/s] |
|
2024-06-29 19:38:49,953 - INFO - tqdm - NullAccuracy: 0.9954, NullF1: 0.8146, Lemma: 0.9782, PosFeats: 0.9764, UD-UAS: 0.9073, UD-LAS: 0.9130, EUD-UAS: 0.7803, EUD-LAS: 0.7814, Misc: 0.9968, SS: 0.9242, SC: 0.9278, Avg: 0.9095, batch_loss: 1.3824, loss: 1.2190 ||: 100%|##########| 1147/1147 [02:20<00:00, 8.16it/s] |
|
2024-06-29 19:38:49,954 - INFO - allennlp.training.gradient_descent_trainer - Validating |
|
2024-06-29 19:38:49,955 - INFO - tqdm - 0%| | 0/287 [00:00<?, ?it/s] |
|
2024-06-29 19:39:00,060 - INFO - tqdm - NullAccuracy: 0.9951, NullF1: 0.8036, Lemma: 0.9801, PosFeats: 0.9746, UD-UAS: 0.9401, UD-LAS: 0.9488, EUD-UAS: 0.8580, EUD-LAS: 0.8643, Misc: 0.9967, SS: 0.9230, SC: 0.9272, Avg: 0.9348, batch_loss: 0.7028, loss: 1.2827 ||: 54%|#####4 | 155/287 [00:10<00:08, 15.87it/s] |
|
2024-06-29 19:39:08,168 - INFO - tqdm - NullAccuracy: 0.9954, NullF1: 0.8047, Lemma: 0.9819, PosFeats: 0.9758, UD-UAS: 0.9434, UD-LAS: 0.9513, EUD-UAS: 0.8628, EUD-LAS: 0.8680, Misc: 0.9967, SS: 0.9250, SC: 0.9300, Avg: 0.9372, batch_loss: 2.6062, loss: 1.2296 ||: 100%|##########| 287/287 [00:18<00:00, 16.82it/s] |
|
2024-06-29 19:39:08,168 - INFO - tqdm - NullAccuracy: 0.9954, NullF1: 0.8047, Lemma: 0.9819, PosFeats: 0.9758, UD-UAS: 0.9434, UD-LAS: 0.9513, EUD-UAS: 0.8628, EUD-LAS: 0.8680, Misc: 0.9967, SS: 0.9250, SC: 0.9300, Avg: 0.9372, batch_loss: 2.6062, loss: 1.2296 ||: 100%|##########| 287/287 [00:18<00:00, 15.76it/s] |
|
2024-06-29 19:39:08,168 - INFO - allennlp.training.learning_rate_schedulers.slanted_triangular - Gradual unfreezing finished. Training all layers. |
|
2024-06-29 19:39:08,171 - INFO - allennlp.training.callbacks.console_logger - Training | Validation |
|
2024-06-29 19:39:08,171 - INFO - allennlp.training.callbacks.console_logger - Avg | 0.909 | 0.937 |
|
2024-06-29 19:39:08,171 - INFO - allennlp.training.callbacks.console_logger - EUD-LAS | 0.781 | 0.868 |
|
2024-06-29 19:39:08,171 - INFO - allennlp.training.callbacks.console_logger - EUD-UAS | 0.780 | 0.863 |
|
2024-06-29 19:39:08,171 - INFO - allennlp.training.callbacks.console_logger - Lemma | 0.978 | 0.982 |
|
2024-06-29 19:39:08,171 - INFO - allennlp.training.callbacks.console_logger - Misc | 0.997 | 0.997 |
|
2024-06-29 19:39:08,171 - INFO - allennlp.training.callbacks.console_logger - NullAccuracy | 0.995 | 0.995 |
|
2024-06-29 19:39:08,171 - INFO - allennlp.training.callbacks.console_logger - NullF1 | 0.815 | 0.805 |
|
2024-06-29 19:39:08,171 - INFO - allennlp.training.callbacks.console_logger - PosFeats | 0.976 | 0.976 |
|
2024-06-29 19:39:08,171 - INFO - allennlp.training.callbacks.console_logger - SC | 0.928 | 0.930 |
|
2024-06-29 19:39:08,171 - INFO - allennlp.training.callbacks.console_logger - SS | 0.924 | 0.925 |
|
2024-06-29 19:39:08,171 - INFO - allennlp.training.callbacks.console_logger - UD-LAS | 0.913 | 0.951 |
|
2024-06-29 19:39:08,171 - INFO - allennlp.training.callbacks.console_logger - UD-UAS | 0.907 | 0.943 |
|
2024-06-29 19:39:08,171 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB | 11258.147 | N/A |
|
2024-06-29 19:39:08,171 - INFO - allennlp.training.callbacks.console_logger - loss | 1.219 | 1.230 |
|
2024-06-29 19:39:08,171 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB | 5001.953 | N/A |
|
2024-06-29 19:39:12,912 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:02:43.508106 |
|
2024-06-29 19:39:12,912 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:05:08 |
|
2024-06-29 19:39:12,912 - INFO - allennlp.training.gradient_descent_trainer - Epoch 8/9 |
|
2024-06-29 19:39:12,913 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 4.9G |
|
2024-06-29 19:39:12,913 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 11G |
|
2024-06-29 19:39:12,914 - INFO - allennlp.training.gradient_descent_trainer - Training |
|
2024-06-29 19:39:12,914 - INFO - tqdm - 0%| | 0/1147 [00:00<?, ?it/s] |
|
2024-06-29 19:39:23,014 - INFO - tqdm - NullAccuracy: 0.9955, NullF1: 0.8197, Lemma: 0.9802, PosFeats: 0.9785, UD-UAS: 0.9125, UD-LAS: 0.9181, EUD-UAS: 0.7944, EUD-LAS: 0.7953, Misc: 0.9966, SS: 0.9291, SC: 0.9310, Avg: 0.9151, batch_loss: 0.9805, loss: 1.1218 ||: 7%|7 | 84/1147 [00:10<02:01, 8.74it/s] |
|
2024-06-29 19:39:33,073 - INFO - tqdm - NullAccuracy: 0.9958, NullF1: 0.8333, Lemma: 0.9804, PosFeats: 0.9790, UD-UAS: 0.9156, UD-LAS: 0.9205, EUD-UAS: 0.8006, EUD-LAS: 0.7997, Misc: 0.9966, SS: 0.9304, SC: 0.9320, Avg: 0.9172, batch_loss: 0.9275, loss: 1.1092 ||: 15%|#4 | 169/1147 [00:20<01:53, 8.63it/s] |
|
2024-06-29 19:39:43,199 - INFO - tqdm - NullAccuracy: 0.9958, NullF1: 0.8386, Lemma: 0.9805, PosFeats: 0.9788, UD-UAS: 0.9136, UD-LAS: 0.9187, EUD-UAS: 0.7971, EUD-LAS: 0.7975, Misc: 0.9966, SS: 0.9297, SC: 0.9314, Avg: 0.9160, batch_loss: 1.2340, loss: 1.1239 ||: 22%|##1 | 252/1147 [00:30<01:44, 8.54it/s] |
|
2024-06-29 19:39:53,220 - INFO - tqdm - NullAccuracy: 0.9959, NullF1: 0.8402, Lemma: 0.9807, PosFeats: 0.9788, UD-UAS: 0.9139, UD-LAS: 0.9194, EUD-UAS: 0.7961, EUD-LAS: 0.7971, Misc: 0.9967, SS: 0.9295, SC: 0.9312, Avg: 0.9159, batch_loss: 1.3311, loss: 1.1237 ||: 29%|##9 | 336/1147 [00:40<01:40, 8.04it/s] |
|
2024-06-29 19:40:03,338 - INFO - tqdm - NullAccuracy: 0.9960, NullF1: 0.8404, Lemma: 0.9807, PosFeats: 0.9789, UD-UAS: 0.9135, UD-LAS: 0.9189, EUD-UAS: 0.7953, EUD-LAS: 0.7963, Misc: 0.9968, SS: 0.9298, SC: 0.9321, Avg: 0.9158, batch_loss: 0.9099, loss: 1.1166 ||: 36%|###6 | 417/1147 [00:50<01:30, 8.09it/s] |
|
2024-06-29 19:40:13,432 - INFO - tqdm - NullAccuracy: 0.9960, NullF1: 0.8393, Lemma: 0.9804, PosFeats: 0.9790, UD-UAS: 0.9136, UD-LAS: 0.9190, EUD-UAS: 0.7949, EUD-LAS: 0.7958, Misc: 0.9968, SS: 0.9296, SC: 0.9322, Avg: 0.9157, batch_loss: 1.0461, loss: 1.1155 ||: 44%|####3 | 501/1147 [01:00<01:12, 8.87it/s] |
|
2024-06-29 19:40:23,445 - INFO - tqdm - NullAccuracy: 0.9960, NullF1: 0.8410, Lemma: 0.9805, PosFeats: 0.9793, UD-UAS: 0.9129, UD-LAS: 0.9187, EUD-UAS: 0.7938, EUD-LAS: 0.7953, Misc: 0.9969, SS: 0.9295, SC: 0.9323, Avg: 0.9155, batch_loss: 1.2139, loss: 1.1152 ||: 51%|##### | 584/1147 [01:10<01:04, 8.70it/s] |
|
2024-06-29 19:40:33,459 - INFO - tqdm - NullAccuracy: 0.9959, NullF1: 0.8366, Lemma: 0.9805, PosFeats: 0.9792, UD-UAS: 0.9129, UD-LAS: 0.9188, EUD-UAS: 0.7941, EUD-LAS: 0.7957, Misc: 0.9969, SS: 0.9295, SC: 0.9325, Avg: 0.9156, batch_loss: 1.1480, loss: 1.1146 ||: 58%|#####8 | 667/1147 [01:20<00:59, 8.11it/s] |
|
2024-06-29 19:40:43,487 - INFO - tqdm - NullAccuracy: 0.9958, NullF1: 0.8326, Lemma: 0.9806, PosFeats: 0.9792, UD-UAS: 0.9132, UD-LAS: 0.9193, EUD-UAS: 0.7945, EUD-LAS: 0.7961, Misc: 0.9969, SS: 0.9297, SC: 0.9329, Avg: 0.9158, batch_loss: 1.0566, loss: 1.1100 ||: 65%|######5 | 750/1147 [01:30<00:50, 7.79it/s] |
|
2024-06-29 19:40:53,593 - INFO - tqdm - NullAccuracy: 0.9958, NullF1: 0.8332, Lemma: 0.9808, PosFeats: 0.9792, UD-UAS: 0.9133, UD-LAS: 0.9194, EUD-UAS: 0.7952, EUD-LAS: 0.7968, Misc: 0.9969, SS: 0.9299, SC: 0.9332, Avg: 0.9161, batch_loss: 1.3605, loss: 1.1070 ||: 73%|#######2 | 834/1147 [01:40<00:38, 8.19it/s] |
|
2024-06-29 19:41:03,685 - INFO - tqdm - NullAccuracy: 0.9958, NullF1: 0.8321, Lemma: 0.9808, PosFeats: 0.9791, UD-UAS: 0.9132, UD-LAS: 0.9192, EUD-UAS: 0.7953, EUD-LAS: 0.7967, Misc: 0.9969, SS: 0.9299, SC: 0.9332, Avg: 0.9160, batch_loss: 1.1754, loss: 1.1064 ||: 80%|#######9 | 916/1147 [01:50<00:32, 7.07it/s] |
|
2024-06-29 19:41:13,799 - INFO - tqdm - NullAccuracy: 0.9957, NullF1: 0.8291, Lemma: 0.9808, PosFeats: 0.9792, UD-UAS: 0.9132, UD-LAS: 0.9190, EUD-UAS: 0.7952, EUD-LAS: 0.7966, Misc: 0.9970, SS: 0.9300, SC: 0.9334, Avg: 0.9160, batch_loss: 1.3157, loss: 1.1042 ||: 87%|########7 | 998/1147 [02:00<00:17, 8.28it/s] |
|
2024-06-29 19:41:23,896 - INFO - tqdm - NullAccuracy: 0.9957, NullF1: 0.8287, Lemma: 0.9809, PosFeats: 0.9791, UD-UAS: 0.9133, UD-LAS: 0.9192, EUD-UAS: 0.7953, EUD-LAS: 0.7967, Misc: 0.9970, SS: 0.9303, SC: 0.9336, Avg: 0.9161, batch_loss: 1.1725, loss: 1.1040 ||: 94%|#########4| 1081/1147 [02:10<00:08, 8.21it/s] |
|
2024-06-29 19:41:31,266 - INFO - tqdm - NullAccuracy: 0.9957, NullF1: 0.8281, Lemma: 0.9810, PosFeats: 0.9792, UD-UAS: 0.9134, UD-LAS: 0.9193, EUD-UAS: 0.7956, EUD-LAS: 0.7970, Misc: 0.9970, SS: 0.9302, SC: 0.9337, Avg: 0.9163, batch_loss: 1.2624, loss: 1.1023 ||: 100%|#########9| 1142/1147 [02:18<00:00, 8.38it/s] |
|
2024-06-29 19:41:31,377 - INFO - tqdm - NullAccuracy: 0.9957, NullF1: 0.8281, Lemma: 0.9809, PosFeats: 0.9792, UD-UAS: 0.9134, UD-LAS: 0.9194, EUD-UAS: 0.7956, EUD-LAS: 0.7970, Misc: 0.9970, SS: 0.9303, SC: 0.9337, Avg: 0.9163, batch_loss: 1.0048, loss: 1.1022 ||: 100%|#########9| 1143/1147 [02:18<00:00, 8.56it/s] |
|
2024-06-29 19:41:31,503 - INFO - tqdm - NullAccuracy: 0.9957, NullF1: 0.8282, Lemma: 0.9810, PosFeats: 0.9792, UD-UAS: 0.9134, UD-LAS: 0.9193, EUD-UAS: 0.7956, EUD-LAS: 0.7970, Misc: 0.9970, SS: 0.9302, SC: 0.9337, Avg: 0.9163, batch_loss: 1.2619, loss: 1.1023 ||: 100%|#########9| 1144/1147 [02:18<00:00, 8.37it/s] |
|
2024-06-29 19:41:31,646 - INFO - tqdm - NullAccuracy: 0.9957, NullF1: 0.8282, Lemma: 0.9810, PosFeats: 0.9792, UD-UAS: 0.9134, UD-LAS: 0.9193, EUD-UAS: 0.7956, EUD-LAS: 0.7970, Misc: 0.9970, SS: 0.9302, SC: 0.9337, Avg: 0.9163, batch_loss: 1.0054, loss: 1.1022 ||: 100%|#########9| 1145/1147 [02:18<00:00, 7.91it/s] |
|
2024-06-29 19:41:31,779 - INFO - tqdm - NullAccuracy: 0.9957, NullF1: 0.8284, Lemma: 0.9809, PosFeats: 0.9793, UD-UAS: 0.9134, UD-LAS: 0.9193, EUD-UAS: 0.7956, EUD-LAS: 0.7969, Misc: 0.9970, SS: 0.9302, SC: 0.9337, Avg: 0.9163, batch_loss: 0.9726, loss: 1.1021 ||: 100%|#########9| 1146/1147 [02:18<00:00, 7.78it/s] |
|
2024-06-29 19:41:31,878 - INFO - tqdm - NullAccuracy: 0.9957, NullF1: 0.8285, Lemma: 0.9809, PosFeats: 0.9792, UD-UAS: 0.9134, UD-LAS: 0.9193, EUD-UAS: 0.7956, EUD-LAS: 0.7969, Misc: 0.9970, SS: 0.9302, SC: 0.9337, Avg: 0.9163, batch_loss: 1.1922, loss: 1.1022 ||: 100%|##########| 1147/1147 [02:18<00:00, 8.25it/s] |
|
2024-06-29 19:41:31,879 - INFO - allennlp.training.gradient_descent_trainer - Validating |
|
2024-06-29 19:41:31,880 - INFO - tqdm - 0%| | 0/287 [00:00<?, ?it/s] |
|
2024-06-29 19:41:41,987 - INFO - tqdm - NullAccuracy: 0.9953, NullF1: 0.8142, Lemma: 0.9819, PosFeats: 0.9759, UD-UAS: 0.9432, UD-LAS: 0.9515, EUD-UAS: 0.8512, EUD-LAS: 0.8579, Misc: 0.9968, SS: 0.9257, SC: 0.9303, Avg: 0.9349, batch_loss: 1.3403, loss: 1.2645 ||: 54%|#####4 | 156/287 [00:10<00:08, 16.14it/s] |
|
2024-06-29 19:41:49,948 - INFO - tqdm - NullAccuracy: 0.9956, NullF1: 0.8178, Lemma: 0.9835, PosFeats: 0.9769, UD-UAS: 0.9457, UD-LAS: 0.9536, EUD-UAS: 0.8560, EUD-LAS: 0.8612, Misc: 0.9968, SS: 0.9276, SC: 0.9327, Avg: 0.9371, batch_loss: 2.6657, loss: 1.2141 ||: 100%|##########| 287/287 [00:18<00:00, 16.89it/s] |
|
2024-06-29 19:41:49,948 - INFO - tqdm - NullAccuracy: 0.9956, NullF1: 0.8178, Lemma: 0.9835, PosFeats: 0.9769, UD-UAS: 0.9457, UD-LAS: 0.9536, EUD-UAS: 0.8560, EUD-LAS: 0.8612, Misc: 0.9968, SS: 0.9276, SC: 0.9327, Avg: 0.9371, batch_loss: 2.6657, loss: 1.2141 ||: 100%|##########| 287/287 [00:18<00:00, 15.88it/s] |
|
2024-06-29 19:41:49,949 - INFO - allennlp.training.learning_rate_schedulers.slanted_triangular - Gradual unfreezing finished. Training all layers. |
|
2024-06-29 19:41:49,951 - INFO - allennlp.training.callbacks.console_logger - Training | Validation |
|
2024-06-29 19:41:49,951 - INFO - allennlp.training.callbacks.console_logger - Avg | 0.916 | 0.937 |
|
2024-06-29 19:41:49,951 - INFO - allennlp.training.callbacks.console_logger - EUD-LAS | 0.797 | 0.861 |
|
2024-06-29 19:41:49,952 - INFO - allennlp.training.callbacks.console_logger - EUD-UAS | 0.796 | 0.856 |
|
2024-06-29 19:41:49,952 - INFO - allennlp.training.callbacks.console_logger - Lemma | 0.981 | 0.983 |
|
2024-06-29 19:41:49,952 - INFO - allennlp.training.callbacks.console_logger - Misc | 0.997 | 0.997 |
|
2024-06-29 19:41:49,952 - INFO - allennlp.training.callbacks.console_logger - NullAccuracy | 0.996 | 0.996 |
|
2024-06-29 19:41:49,952 - INFO - allennlp.training.callbacks.console_logger - NullF1 | 0.828 | 0.818 |
|
2024-06-29 19:41:49,952 - INFO - allennlp.training.callbacks.console_logger - PosFeats | 0.979 | 0.977 |
|
2024-06-29 19:41:49,952 - INFO - allennlp.training.callbacks.console_logger - SC | 0.934 | 0.933 |
|
2024-06-29 19:41:49,952 - INFO - allennlp.training.callbacks.console_logger - SS | 0.930 | 0.928 |
|
2024-06-29 19:41:49,952 - INFO - allennlp.training.callbacks.console_logger - UD-LAS | 0.919 | 0.954 |
|
2024-06-29 19:41:49,952 - INFO - allennlp.training.callbacks.console_logger - UD-UAS | 0.913 | 0.946 |
|
2024-06-29 19:41:49,952 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB | 11302.523 | N/A |
|
2024-06-29 19:41:49,952 - INFO - allennlp.training.callbacks.console_logger - loss | 1.102 | 1.214 |
|
2024-06-29 19:41:49,952 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB | 5001.953 | N/A |
|
2024-06-29 19:41:54,667 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:02:41.754446 |
|
2024-06-29 19:41:54,667 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:02:35 |
|
2024-06-29 19:41:54,667 - INFO - allennlp.training.gradient_descent_trainer - Epoch 9/9 |
|
2024-06-29 19:41:54,667 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 4.9G |
|
2024-06-29 19:41:54,667 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 11G |
|
2024-06-29 19:41:54,669 - INFO - allennlp.training.gradient_descent_trainer - Training |
|
2024-06-29 19:41:54,669 - INFO - tqdm - 0%| | 0/1147 [00:00<?, ?it/s] |
|
2024-06-29 19:42:04,785 - INFO - tqdm - NullAccuracy: 0.9960, NullF1: 0.8455, Lemma: 0.9838, PosFeats: 0.9822, UD-UAS: 0.9153, UD-LAS: 0.9219, EUD-UAS: 0.7983, EUD-LAS: 0.7996, Misc: 0.9972, SS: 0.9327, SC: 0.9401, Avg: 0.9190, batch_loss: 0.7848, loss: 1.0079 ||: 7%|7 | 82/1147 [00:10<02:03, 8.60it/s] |
|
2024-06-29 19:42:14,888 - INFO - tqdm - NullAccuracy: 0.9958, NullF1: 0.8307, Lemma: 0.9834, PosFeats: 0.9812, UD-UAS: 0.9158, UD-LAS: 0.9221, EUD-UAS: 0.7977, EUD-LAS: 0.7990, Misc: 0.9972, SS: 0.9333, SC: 0.9407, Avg: 0.9189, batch_loss: 0.8913, loss: 1.0137 ||: 14%|#4 | 164/1147 [00:20<01:58, 8.29it/s] |
|
2024-06-29 19:42:24,990 - INFO - tqdm - NullAccuracy: 0.9960, NullF1: 0.8409, Lemma: 0.9837, PosFeats: 0.9817, UD-UAS: 0.9158, UD-LAS: 0.9221, EUD-UAS: 0.7978, EUD-LAS: 0.7996, Misc: 0.9973, SS: 0.9337, SC: 0.9399, Avg: 0.9191, batch_loss: 1.0167, loss: 1.0196 ||: 22%|##1 | 247/1147 [00:30<01:50, 8.17it/s] |
|
2024-06-29 19:42:35,045 - INFO - tqdm - NullAccuracy: 0.9960, NullF1: 0.8411, Lemma: 0.9835, PosFeats: 0.9812, UD-UAS: 0.9172, UD-LAS: 0.9230, EUD-UAS: 0.7994, EUD-LAS: 0.8012, Misc: 0.9974, SS: 0.9339, SC: 0.9400, Avg: 0.9196, batch_loss: 0.8981, loss: 1.0156 ||: 29%|##8 | 329/1147 [00:40<01:37, 8.37it/s] |
|
2024-06-29 19:42:45,116 - INFO - tqdm - NullAccuracy: 0.9961, NullF1: 0.8441, Lemma: 0.9835, PosFeats: 0.9814, UD-UAS: 0.9172, UD-LAS: 0.9227, EUD-UAS: 0.8001, EUD-LAS: 0.8021, Misc: 0.9976, SS: 0.9341, SC: 0.9403, Avg: 0.9199, batch_loss: 1.0670, loss: 1.0123 ||: 36%|###5 | 411/1147 [00:50<01:29, 8.24it/s] |
|
2024-06-29 19:42:55,169 - INFO - tqdm - NullAccuracy: 0.9959, NullF1: 0.8396, Lemma: 0.9836, PosFeats: 0.9812, UD-UAS: 0.9174, UD-LAS: 0.9228, EUD-UAS: 0.8005, EUD-LAS: 0.8022, Misc: 0.9976, SS: 0.9340, SC: 0.9400, Avg: 0.9199, batch_loss: 1.3882, loss: 1.0119 ||: 43%|####3 | 495/1147 [01:00<01:18, 8.34it/s] |
|
2024-06-29 19:43:05,240 - INFO - tqdm - NullAccuracy: 0.9959, NullF1: 0.8361, Lemma: 0.9833, PosFeats: 0.9812, UD-UAS: 0.9177, UD-LAS: 0.9230, EUD-UAS: 0.8014, EUD-LAS: 0.8029, Misc: 0.9976, SS: 0.9341, SC: 0.9399, Avg: 0.9201, batch_loss: 0.8409, loss: 1.0118 ||: 50%|##### | 577/1147 [01:10<01:10, 8.05it/s] |
|
2024-06-29 19:43:15,299 - INFO - tqdm - NullAccuracy: 0.9959, NullF1: 0.8368, Lemma: 0.9834, PosFeats: 0.9810, UD-UAS: 0.9178, UD-LAS: 0.9233, EUD-UAS: 0.8015, EUD-LAS: 0.8031, Misc: 0.9976, SS: 0.9341, SC: 0.9399, Avg: 0.9202, batch_loss: 1.0741, loss: 1.0137 ||: 58%|#####7 | 661/1147 [01:20<00:57, 8.39it/s] |
|
2024-06-29 19:43:25,363 - INFO - tqdm - NullAccuracy: 0.9959, NullF1: 0.8370, Lemma: 0.9833, PosFeats: 0.9812, UD-UAS: 0.9173, UD-LAS: 0.9229, EUD-UAS: 0.8011, EUD-LAS: 0.8028, Misc: 0.9976, SS: 0.9340, SC: 0.9400, Avg: 0.9200, batch_loss: 0.8668, loss: 1.0147 ||: 65%|######4 | 742/1147 [01:30<00:48, 8.33it/s] |
|
2024-06-29 19:43:35,447 - INFO - tqdm - NullAccuracy: 0.9959, NullF1: 0.8348, Lemma: 0.9833, PosFeats: 0.9811, UD-UAS: 0.9175, UD-LAS: 0.9234, EUD-UAS: 0.8016, EUD-LAS: 0.8035, Misc: 0.9976, SS: 0.9343, SC: 0.9398, Avg: 0.9202, batch_loss: 0.9316, loss: 1.0138 ||: 72%|#######2 | 826/1147 [01:40<00:37, 8.51it/s] |
|
2024-06-29 19:43:45,513 - INFO - tqdm - NullAccuracy: 0.9959, NullF1: 0.8348, Lemma: 0.9833, PosFeats: 0.9811, UD-UAS: 0.9181, UD-LAS: 0.9239, EUD-UAS: 0.8025, EUD-LAS: 0.8043, Misc: 0.9976, SS: 0.9345, SC: 0.9398, Avg: 0.9206, batch_loss: 1.3381, loss: 1.0087 ||: 79%|#######9 | 910/1147 [01:50<00:29, 7.95it/s] |
|
2024-06-29 19:43:55,624 - INFO - tqdm - NullAccuracy: 0.9959, NullF1: 0.8362, Lemma: 0.9834, PosFeats: 0.9811, UD-UAS: 0.9186, UD-LAS: 0.9244, EUD-UAS: 0.8035, EUD-LAS: 0.8055, Misc: 0.9976, SS: 0.9344, SC: 0.9397, Avg: 0.9209, batch_loss: 1.5786, loss: 1.0056 ||: 87%|########6 | 995/1147 [02:00<00:18, 8.35it/s] |
|
2024-06-29 19:44:05,698 - INFO - tqdm - NullAccuracy: 0.9959, NullF1: 0.8357, Lemma: 0.9835, PosFeats: 0.9811, UD-UAS: 0.9188, UD-LAS: 0.9245, EUD-UAS: 0.8040, EUD-LAS: 0.8060, Misc: 0.9976, SS: 0.9345, SC: 0.9396, Avg: 0.9211, batch_loss: 0.8823, loss: 1.0035 ||: 94%|#########4| 1079/1147 [02:11<00:07, 8.56it/s] |
|
2024-06-29 19:44:13,178 - INFO - tqdm - NullAccuracy: 0.9959, NullF1: 0.8350, Lemma: 0.9834, PosFeats: 0.9812, UD-UAS: 0.9194, UD-LAS: 0.9251, EUD-UAS: 0.8052, EUD-LAS: 0.8070, Misc: 0.9976, SS: 0.9344, SC: 0.9396, Avg: 0.9214, batch_loss: 1.2278, loss: 1.0012 ||: 100%|#########9| 1142/1147 [02:18<00:00, 8.20it/s] |
|
2024-06-29 19:44:13,290 - INFO - tqdm - NullAccuracy: 0.9959, NullF1: 0.8352, Lemma: 0.9834, PosFeats: 0.9812, UD-UAS: 0.9194, UD-LAS: 0.9251, EUD-UAS: 0.8052, EUD-LAS: 0.8069, Misc: 0.9976, SS: 0.9344, SC: 0.9396, Avg: 0.9214, batch_loss: 1.1826, loss: 1.0013 ||: 100%|#########9| 1143/1147 [02:18<00:00, 8.39it/s] |
|
2024-06-29 19:44:13,412 - INFO - tqdm - NullAccuracy: 0.9959, NullF1: 0.8352, Lemma: 0.9834, PosFeats: 0.9812, UD-UAS: 0.9194, UD-LAS: 0.9251, EUD-UAS: 0.8052, EUD-LAS: 0.8070, Misc: 0.9976, SS: 0.9344, SC: 0.9396, Avg: 0.9214, batch_loss: 0.7944, loss: 1.0012 ||: 100%|#########9| 1144/1147 [02:18<00:00, 8.34it/s] |
|
2024-06-29 19:44:13,524 - INFO - tqdm - NullAccuracy: 0.9959, NullF1: 0.8353, Lemma: 0.9834, PosFeats: 0.9812, UD-UAS: 0.9194, UD-LAS: 0.9251, EUD-UAS: 0.8052, EUD-LAS: 0.8070, Misc: 0.9976, SS: 0.9344, SC: 0.9396, Avg: 0.9214, batch_loss: 1.1721, loss: 1.0013 ||: 100%|#########9| 1145/1147 [02:18<00:00, 8.52it/s] |
|
2024-06-29 19:44:13,650 - INFO - tqdm - NullAccuracy: 0.9959, NullF1: 0.8352, Lemma: 0.9834, PosFeats: 0.9812, UD-UAS: 0.9194, UD-LAS: 0.9250, EUD-UAS: 0.8052, EUD-LAS: 0.8070, Misc: 0.9976, SS: 0.9344, SC: 0.9396, Avg: 0.9214, batch_loss: 1.0943, loss: 1.0014 ||: 100%|#########9| 1146/1147 [02:18<00:00, 8.33it/s] |
|
2024-06-29 19:44:13,748 - INFO - tqdm - NullAccuracy: 0.9959, NullF1: 0.8353, Lemma: 0.9834, PosFeats: 0.9812, UD-UAS: 0.9194, UD-LAS: 0.9251, EUD-UAS: 0.8052, EUD-LAS: 0.8070, Misc: 0.9976, SS: 0.9344, SC: 0.9396, Avg: 0.9214, batch_loss: 0.9567, loss: 1.0014 ||: 100%|##########| 1147/1147 [02:19<00:00, 8.25it/s] |
|
2024-06-29 19:44:13,749 - INFO - allennlp.training.gradient_descent_trainer - Validating |
|
2024-06-29 19:44:13,750 - INFO - tqdm - 0%| | 0/287 [00:00<?, ?it/s] |
|
2024-06-29 19:44:23,841 - INFO - tqdm - NullAccuracy: 0.9953, NullF1: 0.8150, Lemma: 0.9822, PosFeats: 0.9764, UD-UAS: 0.9441, UD-LAS: 0.9521, EUD-UAS: 0.8602, EUD-LAS: 0.8667, Misc: 0.9970, SS: 0.9276, SC: 0.9317, Avg: 0.9376, batch_loss: 1.0500, loss: 1.2419 ||: 55%|#####5 | 158/287 [00:10<00:07, 16.87it/s] |
|
2024-06-29 19:44:31,553 - INFO - tqdm - NullAccuracy: 0.9956, NullF1: 0.8189, Lemma: 0.9838, PosFeats: 0.9776, UD-UAS: 0.9472, UD-LAS: 0.9548, EUD-UAS: 0.8649, EUD-LAS: 0.8704, Misc: 0.9969, SS: 0.9296, SC: 0.9345, Avg: 0.9400, batch_loss: 1.7458, loss: 1.1879 ||: 100%|#########9| 286/287 [00:17<00:00, 16.61it/s] |
|
2024-06-29 19:44:31,599 - INFO - tqdm - NullAccuracy: 0.9956, NullF1: 0.8188, Lemma: 0.9838, PosFeats: 0.9775, UD-UAS: 0.9471, UD-LAS: 0.9546, EUD-UAS: 0.8646, EUD-LAS: 0.8701, Misc: 0.9969, SS: 0.9295, SC: 0.9344, Avg: 0.9398, batch_loss: 2.6710, loss: 1.1931 ||: 100%|##########| 287/287 [00:17<00:00, 16.08it/s] |
|
2024-06-29 19:44:31,599 - INFO - allennlp.training.learning_rate_schedulers.slanted_triangular - Gradual unfreezing finished. Training all layers. |
|
2024-06-29 19:44:31,602 - INFO - allennlp.training.callbacks.console_logger - Training | Validation |
|
2024-06-29 19:44:31,602 - INFO - allennlp.training.callbacks.console_logger - Avg | 0.921 | 0.940 |
|
2024-06-29 19:44:31,602 - INFO - allennlp.training.callbacks.console_logger - EUD-LAS | 0.807 | 0.870 |
|
2024-06-29 19:44:31,602 - INFO - allennlp.training.callbacks.console_logger - EUD-UAS | 0.805 | 0.865 |
|
2024-06-29 19:44:31,602 - INFO - allennlp.training.callbacks.console_logger - Lemma | 0.983 | 0.984 |
|
2024-06-29 19:44:31,602 - INFO - allennlp.training.callbacks.console_logger - Misc | 0.998 | 0.997 |
|
2024-06-29 19:44:31,602 - INFO - allennlp.training.callbacks.console_logger - NullAccuracy | 0.996 | 0.996 |
|
2024-06-29 19:44:31,602 - INFO - allennlp.training.callbacks.console_logger - NullF1 | 0.835 | 0.819 |
|
2024-06-29 19:44:31,603 - INFO - allennlp.training.callbacks.console_logger - PosFeats | 0.981 | 0.978 |
|
2024-06-29 19:44:31,603 - INFO - allennlp.training.callbacks.console_logger - SC | 0.940 | 0.934 |
|
2024-06-29 19:44:31,603 - INFO - allennlp.training.callbacks.console_logger - SS | 0.934 | 0.929 |
|
2024-06-29 19:44:31,603 - INFO - allennlp.training.callbacks.console_logger - UD-LAS | 0.925 | 0.955 |
|
2024-06-29 19:44:31,603 - INFO - allennlp.training.callbacks.console_logger - UD-UAS | 0.919 | 0.947 |
|
2024-06-29 19:44:31,603 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB | 11258.240 | N/A |
|
2024-06-29 19:44:31,603 - INFO - allennlp.training.callbacks.console_logger - loss | 1.001 | 1.193 |
|
2024-06-29 19:44:31,603 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB | 5002.051 | N/A |
|
2024-06-29 19:44:36,559 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:02:41.891413 |
|
2024-06-29 19:44:36,561 - INFO - allennlp.common.util - Metrics: { |
|
"best_epoch": 9, |
|
"peak_worker_0_memory_MB": 5002.05078125, |
|
"peak_gpu_0_memory_MB": 11409.91064453125, |
|
"training_duration": "0:25:57.194477", |
|
"epoch": 9, |
|
"training_NullAccuracy": 0.9959010905042368, |
|
"training_NullF1": 0.8352854251861572, |
|
"training_Lemma": 0.9834424902494665, |
|
"training_PosFeats": 0.9811832906505086, |
|
"training_UD-UAS": 0.9193629114176333, |
|
"training_UD-LAS": 0.9250535106478481, |
|
"training_EUD-UAS": 0.8051802036896266, |
|
"training_EUD-LAS": 0.8069673531803114, |
|
"training_Misc": 0.9976119935947866, |
|
"training_SS": 0.9344274643054417, |
|
"training_SC": 0.9396247259093116, |
|
"training_Avg": 0.9214282159605484, |
|
"training_loss": 1.0013585822314102, |
|
"training_worker_0_memory_MB": 5002.05078125, |
|
"training_gpu_0_memory_MB": 11258.240234375, |
|
"validation_NullAccuracy": 0.9956414668576316, |
|
"validation_NullF1": 0.8188437223434448, |
|
"validation_Lemma": 0.9837974515087999, |
|
"validation_PosFeats": 0.9775191169552898, |
|
"validation_UD-UAS": 0.9471266043421227, |
|
"validation_UD-LAS": 0.9546475382174867, |
|
"validation_EUD-UAS": 0.8646401306741709, |
|
"validation_EUD-LAS": 0.8701164952135139, |
|
"validation_Misc": 0.996925971710188, |
|
"validation_SS": 0.9294833225760576, |
|
"validation_SC": 0.9343842644758289, |
|
"validation_Avg": 0.939848988408162, |
|
"validation_loss": 1.1931014731785976, |
|
"best_validation_NullAccuracy": 0.9956414668576316, |
|
"best_validation_NullF1": 0.8188437223434448, |
|
"best_validation_Lemma": 0.9837974515087999, |
|
"best_validation_PosFeats": 0.9775191169552898, |
|
"best_validation_UD-UAS": 0.9471266043421227, |
|
"best_validation_UD-LAS": 0.9546475382174867, |
|
"best_validation_EUD-UAS": 0.8646401306741709, |
|
"best_validation_EUD-LAS": 0.8701164952135139, |
|
"best_validation_Misc": 0.996925971710188, |
|
"best_validation_SS": 0.9294833225760576, |
|
"best_validation_SC": 0.9343842644758289, |
|
"best_validation_Avg": 0.939848988408162, |
|
"best_validation_loss": 1.1931014731785976 |
|
} |
|
2024-06-29 19:44:36,561 - INFO - allennlp.models.archival - archiving weights and vocabulary to serialization/xlm-roberta-base-ru/model.tar.gz |
|
|