|
2023-10-23 21:43:27,817 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 21:43:27,818 Model: "SequenceTagger( |
|
(embeddings): TransformerWordEmbeddings( |
|
(model): BertModel( |
|
(embeddings): BertEmbeddings( |
|
(word_embeddings): Embedding(64001, 768) |
|
(position_embeddings): Embedding(512, 768) |
|
(token_type_embeddings): Embedding(2, 768) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(encoder): BertEncoder( |
|
(layer): ModuleList( |
|
(0): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(1): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(2): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(3): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(4): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(5): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(6): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(7): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(8): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(9): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(10): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(11): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
) |
|
) |
|
(pooler): BertPooler( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(activation): Tanh() |
|
) |
|
) |
|
) |
|
(locked_dropout): LockedDropout(p=0.5) |
|
(linear): Linear(in_features=768, out_features=21, bias=True) |
|
(loss_function): CrossEntropyLoss() |
|
)" |
|
2023-10-23 21:43:27,818 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 21:43:27,819 MultiCorpus: 3575 train + 1235 dev + 1266 test sentences |
|
- NER_HIPE_2022 Corpus: 3575 train + 1235 dev + 1266 test sentences - /home/ubuntu/.flair/datasets/ner_hipe_2022/v2.1/hipe2020/de/with_doc_seperator |
|
2023-10-23 21:43:27,819 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 21:43:27,819 Train: 3575 sentences |
|
2023-10-23 21:43:27,819 (train_with_dev=False, train_with_test=False) |
|
2023-10-23 21:43:27,819 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 21:43:27,819 Training Params: |
|
2023-10-23 21:43:27,819 - learning_rate: "3e-05" |
|
2023-10-23 21:43:27,819 - mini_batch_size: "8" |
|
2023-10-23 21:43:27,819 - max_epochs: "10" |
|
2023-10-23 21:43:27,819 - shuffle: "True" |
|
2023-10-23 21:43:27,819 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 21:43:27,819 Plugins: |
|
2023-10-23 21:43:27,819 - TensorboardLogger |
|
2023-10-23 21:43:27,819 - LinearScheduler | warmup_fraction: '0.1' |
|
2023-10-23 21:43:27,819 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 21:43:27,819 Final evaluation on model from best epoch (best-model.pt) |
|
2023-10-23 21:43:27,819 - metric: "('micro avg', 'f1-score')" |
|
2023-10-23 21:43:27,819 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 21:43:27,819 Computation: |
|
2023-10-23 21:43:27,819 - compute on device: cuda:0 |
|
2023-10-23 21:43:27,819 - embedding storage: none |
|
2023-10-23 21:43:27,819 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 21:43:27,819 Model training base path: "hmbench-hipe2020/de-dbmdz/bert-base-historic-multilingual-64k-td-cased-bs8-wsFalse-e10-lr3e-05-poolingfirst-layers-1-crfFalse-3" |
|
2023-10-23 21:43:27,819 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 21:43:27,819 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 21:43:27,819 Logging anything other than scalars to TensorBoard is currently not supported. |
|
2023-10-23 21:43:31,549 epoch 1 - iter 44/447 - loss 2.59736907 - time (sec): 3.73 - samples/sec: 2232.32 - lr: 0.000003 - momentum: 0.000000 |
|
2023-10-23 21:43:35,682 epoch 1 - iter 88/447 - loss 1.66480304 - time (sec): 7.86 - samples/sec: 2185.19 - lr: 0.000006 - momentum: 0.000000 |
|
2023-10-23 21:43:39,656 epoch 1 - iter 132/447 - loss 1.26020851 - time (sec): 11.84 - samples/sec: 2198.48 - lr: 0.000009 - momentum: 0.000000 |
|
2023-10-23 21:43:43,583 epoch 1 - iter 176/447 - loss 1.03511087 - time (sec): 15.76 - samples/sec: 2203.99 - lr: 0.000012 - momentum: 0.000000 |
|
2023-10-23 21:43:47,775 epoch 1 - iter 220/447 - loss 0.89283125 - time (sec): 19.95 - samples/sec: 2194.86 - lr: 0.000015 - momentum: 0.000000 |
|
2023-10-23 21:43:51,681 epoch 1 - iter 264/447 - loss 0.80764086 - time (sec): 23.86 - samples/sec: 2180.36 - lr: 0.000018 - momentum: 0.000000 |
|
2023-10-23 21:43:55,776 epoch 1 - iter 308/447 - loss 0.73311634 - time (sec): 27.96 - samples/sec: 2167.82 - lr: 0.000021 - momentum: 0.000000 |
|
2023-10-23 21:43:59,480 epoch 1 - iter 352/447 - loss 0.67644386 - time (sec): 31.66 - samples/sec: 2169.32 - lr: 0.000024 - momentum: 0.000000 |
|
2023-10-23 21:44:03,410 epoch 1 - iter 396/447 - loss 0.62981661 - time (sec): 35.59 - samples/sec: 2163.10 - lr: 0.000027 - momentum: 0.000000 |
|
2023-10-23 21:44:07,550 epoch 1 - iter 440/447 - loss 0.59048312 - time (sec): 39.73 - samples/sec: 2145.47 - lr: 0.000029 - momentum: 0.000000 |
|
2023-10-23 21:44:08,172 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 21:44:08,172 EPOCH 1 done: loss 0.5849 - lr: 0.000029 |
|
2023-10-23 21:44:13,024 DEV : loss 0.15741746127605438 - f1-score (micro avg) 0.6304 |
|
2023-10-23 21:44:13,044 saving best model |
|
2023-10-23 21:44:13,611 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 21:44:17,779 epoch 2 - iter 44/447 - loss 0.18063276 - time (sec): 4.17 - samples/sec: 2257.14 - lr: 0.000030 - momentum: 0.000000 |
|
2023-10-23 21:44:21,569 epoch 2 - iter 88/447 - loss 0.18528584 - time (sec): 7.96 - samples/sec: 2187.93 - lr: 0.000029 - momentum: 0.000000 |
|
2023-10-23 21:44:25,757 epoch 2 - iter 132/447 - loss 0.16599237 - time (sec): 12.15 - samples/sec: 2182.35 - lr: 0.000029 - momentum: 0.000000 |
|
2023-10-23 21:44:29,711 epoch 2 - iter 176/447 - loss 0.15670000 - time (sec): 16.10 - samples/sec: 2169.31 - lr: 0.000029 - momentum: 0.000000 |
|
2023-10-23 21:44:33,587 epoch 2 - iter 220/447 - loss 0.15648904 - time (sec): 19.98 - samples/sec: 2181.11 - lr: 0.000028 - momentum: 0.000000 |
|
2023-10-23 21:44:37,531 epoch 2 - iter 264/447 - loss 0.15443719 - time (sec): 23.92 - samples/sec: 2157.18 - lr: 0.000028 - momentum: 0.000000 |
|
2023-10-23 21:44:41,316 epoch 2 - iter 308/447 - loss 0.14760432 - time (sec): 27.70 - samples/sec: 2166.92 - lr: 0.000028 - momentum: 0.000000 |
|
2023-10-23 21:44:45,037 epoch 2 - iter 352/447 - loss 0.14586645 - time (sec): 31.43 - samples/sec: 2162.64 - lr: 0.000027 - momentum: 0.000000 |
|
2023-10-23 21:44:49,356 epoch 2 - iter 396/447 - loss 0.14259641 - time (sec): 35.74 - samples/sec: 2167.87 - lr: 0.000027 - momentum: 0.000000 |
|
2023-10-23 21:44:53,194 epoch 2 - iter 440/447 - loss 0.14126909 - time (sec): 39.58 - samples/sec: 2155.17 - lr: 0.000027 - momentum: 0.000000 |
|
2023-10-23 21:44:53,795 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 21:44:53,795 EPOCH 2 done: loss 0.1402 - lr: 0.000027 |
|
2023-10-23 21:45:00,267 DEV : loss 0.13381491601467133 - f1-score (micro avg) 0.7117 |
|
2023-10-23 21:45:00,287 saving best model |
|
2023-10-23 21:45:00,985 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 21:45:05,601 epoch 3 - iter 44/447 - loss 0.06751128 - time (sec): 4.62 - samples/sec: 2259.03 - lr: 0.000026 - momentum: 0.000000 |
|
2023-10-23 21:45:09,604 epoch 3 - iter 88/447 - loss 0.07069500 - time (sec): 8.62 - samples/sec: 2206.18 - lr: 0.000026 - momentum: 0.000000 |
|
2023-10-23 21:45:13,531 epoch 3 - iter 132/447 - loss 0.07976465 - time (sec): 12.55 - samples/sec: 2175.25 - lr: 0.000026 - momentum: 0.000000 |
|
2023-10-23 21:45:17,346 epoch 3 - iter 176/447 - loss 0.07651757 - time (sec): 16.36 - samples/sec: 2160.96 - lr: 0.000025 - momentum: 0.000000 |
|
2023-10-23 21:45:21,434 epoch 3 - iter 220/447 - loss 0.07807169 - time (sec): 20.45 - samples/sec: 2136.55 - lr: 0.000025 - momentum: 0.000000 |
|
2023-10-23 21:45:25,359 epoch 3 - iter 264/447 - loss 0.07678230 - time (sec): 24.37 - samples/sec: 2141.63 - lr: 0.000025 - momentum: 0.000000 |
|
2023-10-23 21:45:29,201 epoch 3 - iter 308/447 - loss 0.07502733 - time (sec): 28.22 - samples/sec: 2169.75 - lr: 0.000024 - momentum: 0.000000 |
|
2023-10-23 21:45:32,828 epoch 3 - iter 352/447 - loss 0.07417559 - time (sec): 31.84 - samples/sec: 2155.29 - lr: 0.000024 - momentum: 0.000000 |
|
2023-10-23 21:45:36,858 epoch 3 - iter 396/447 - loss 0.07690418 - time (sec): 35.87 - samples/sec: 2139.50 - lr: 0.000024 - momentum: 0.000000 |
|
2023-10-23 21:45:40,794 epoch 3 - iter 440/447 - loss 0.07512869 - time (sec): 39.81 - samples/sec: 2144.65 - lr: 0.000023 - momentum: 0.000000 |
|
2023-10-23 21:45:41,344 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 21:45:41,344 EPOCH 3 done: loss 0.0747 - lr: 0.000023 |
|
2023-10-23 21:45:47,862 DEV : loss 0.1403728574514389 - f1-score (micro avg) 0.7576 |
|
2023-10-23 21:45:47,882 saving best model |
|
2023-10-23 21:45:48,534 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 21:45:52,383 epoch 4 - iter 44/447 - loss 0.04429873 - time (sec): 3.85 - samples/sec: 2190.16 - lr: 0.000023 - momentum: 0.000000 |
|
2023-10-23 21:45:56,211 epoch 4 - iter 88/447 - loss 0.05556305 - time (sec): 7.68 - samples/sec: 2182.82 - lr: 0.000023 - momentum: 0.000000 |
|
2023-10-23 21:46:00,413 epoch 4 - iter 132/447 - loss 0.04771936 - time (sec): 11.88 - samples/sec: 2185.70 - lr: 0.000022 - momentum: 0.000000 |
|
2023-10-23 21:46:04,432 epoch 4 - iter 176/447 - loss 0.04763457 - time (sec): 15.90 - samples/sec: 2148.76 - lr: 0.000022 - momentum: 0.000000 |
|
2023-10-23 21:46:08,771 epoch 4 - iter 220/447 - loss 0.04883475 - time (sec): 20.24 - samples/sec: 2164.09 - lr: 0.000022 - momentum: 0.000000 |
|
2023-10-23 21:46:12,638 epoch 4 - iter 264/447 - loss 0.05042629 - time (sec): 24.10 - samples/sec: 2149.11 - lr: 0.000021 - momentum: 0.000000 |
|
2023-10-23 21:46:16,490 epoch 4 - iter 308/447 - loss 0.04933331 - time (sec): 27.95 - samples/sec: 2138.45 - lr: 0.000021 - momentum: 0.000000 |
|
2023-10-23 21:46:20,184 epoch 4 - iter 352/447 - loss 0.04993052 - time (sec): 31.65 - samples/sec: 2134.04 - lr: 0.000021 - momentum: 0.000000 |
|
2023-10-23 21:46:24,364 epoch 4 - iter 396/447 - loss 0.05054757 - time (sec): 35.83 - samples/sec: 2125.87 - lr: 0.000020 - momentum: 0.000000 |
|
2023-10-23 21:46:28,318 epoch 4 - iter 440/447 - loss 0.04943137 - time (sec): 39.78 - samples/sec: 2133.38 - lr: 0.000020 - momentum: 0.000000 |
|
2023-10-23 21:46:29,180 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 21:46:29,180 EPOCH 4 done: loss 0.0495 - lr: 0.000020 |
|
2023-10-23 21:46:35,657 DEV : loss 0.15535356104373932 - f1-score (micro avg) 0.7538 |
|
2023-10-23 21:46:35,677 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 21:46:39,548 epoch 5 - iter 44/447 - loss 0.03078265 - time (sec): 3.87 - samples/sec: 2225.40 - lr: 0.000020 - momentum: 0.000000 |
|
2023-10-23 21:46:44,002 epoch 5 - iter 88/447 - loss 0.03386077 - time (sec): 8.32 - samples/sec: 2240.13 - lr: 0.000019 - momentum: 0.000000 |
|
2023-10-23 21:46:47,844 epoch 5 - iter 132/447 - loss 0.02800467 - time (sec): 12.17 - samples/sec: 2207.49 - lr: 0.000019 - momentum: 0.000000 |
|
2023-10-23 21:46:51,925 epoch 5 - iter 176/447 - loss 0.02859791 - time (sec): 16.25 - samples/sec: 2192.30 - lr: 0.000019 - momentum: 0.000000 |
|
2023-10-23 21:46:55,830 epoch 5 - iter 220/447 - loss 0.02933140 - time (sec): 20.15 - samples/sec: 2186.28 - lr: 0.000018 - momentum: 0.000000 |
|
2023-10-23 21:46:59,903 epoch 5 - iter 264/447 - loss 0.03168646 - time (sec): 24.22 - samples/sec: 2165.59 - lr: 0.000018 - momentum: 0.000000 |
|
2023-10-23 21:47:04,063 epoch 5 - iter 308/447 - loss 0.03078826 - time (sec): 28.38 - samples/sec: 2153.17 - lr: 0.000018 - momentum: 0.000000 |
|
2023-10-23 21:47:07,934 epoch 5 - iter 352/447 - loss 0.03164438 - time (sec): 32.26 - samples/sec: 2137.69 - lr: 0.000017 - momentum: 0.000000 |
|
2023-10-23 21:47:11,983 epoch 5 - iter 396/447 - loss 0.03204700 - time (sec): 36.30 - samples/sec: 2133.66 - lr: 0.000017 - momentum: 0.000000 |
|
2023-10-23 21:47:15,699 epoch 5 - iter 440/447 - loss 0.03119195 - time (sec): 40.02 - samples/sec: 2133.16 - lr: 0.000017 - momentum: 0.000000 |
|
2023-10-23 21:47:16,246 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 21:47:16,246 EPOCH 5 done: loss 0.0309 - lr: 0.000017 |
|
2023-10-23 21:47:22,748 DEV : loss 0.19321992993354797 - f1-score (micro avg) 0.7672 |
|
2023-10-23 21:47:22,769 saving best model |
|
2023-10-23 21:47:23,478 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 21:47:27,940 epoch 6 - iter 44/447 - loss 0.02741518 - time (sec): 4.46 - samples/sec: 2090.87 - lr: 0.000016 - momentum: 0.000000 |
|
2023-10-23 21:47:31,462 epoch 6 - iter 88/447 - loss 0.02648322 - time (sec): 7.98 - samples/sec: 2098.54 - lr: 0.000016 - momentum: 0.000000 |
|
2023-10-23 21:47:35,446 epoch 6 - iter 132/447 - loss 0.02696457 - time (sec): 11.97 - samples/sec: 2118.34 - lr: 0.000016 - momentum: 0.000000 |
|
2023-10-23 21:47:40,129 epoch 6 - iter 176/447 - loss 0.02361068 - time (sec): 16.65 - samples/sec: 2081.86 - lr: 0.000015 - momentum: 0.000000 |
|
2023-10-23 21:47:44,245 epoch 6 - iter 220/447 - loss 0.02276207 - time (sec): 20.77 - samples/sec: 2080.13 - lr: 0.000015 - momentum: 0.000000 |
|
2023-10-23 21:47:48,072 epoch 6 - iter 264/447 - loss 0.02276839 - time (sec): 24.59 - samples/sec: 2086.22 - lr: 0.000015 - momentum: 0.000000 |
|
2023-10-23 21:47:51,854 epoch 6 - iter 308/447 - loss 0.02374098 - time (sec): 28.37 - samples/sec: 2087.49 - lr: 0.000014 - momentum: 0.000000 |
|
2023-10-23 21:47:55,586 epoch 6 - iter 352/447 - loss 0.02378282 - time (sec): 32.11 - samples/sec: 2108.41 - lr: 0.000014 - momentum: 0.000000 |
|
2023-10-23 21:47:59,400 epoch 6 - iter 396/447 - loss 0.02305597 - time (sec): 35.92 - samples/sec: 2122.86 - lr: 0.000014 - momentum: 0.000000 |
|
2023-10-23 21:48:03,543 epoch 6 - iter 440/447 - loss 0.02262792 - time (sec): 40.06 - samples/sec: 2126.09 - lr: 0.000013 - momentum: 0.000000 |
|
2023-10-23 21:48:04,170 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 21:48:04,170 EPOCH 6 done: loss 0.0228 - lr: 0.000013 |
|
2023-10-23 21:48:10,648 DEV : loss 0.2212265431880951 - f1-score (micro avg) 0.7681 |
|
2023-10-23 21:48:10,668 saving best model |
|
2023-10-23 21:48:11,380 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 21:48:15,630 epoch 7 - iter 44/447 - loss 0.02011576 - time (sec): 4.25 - samples/sec: 2161.60 - lr: 0.000013 - momentum: 0.000000 |
|
2023-10-23 21:48:20,236 epoch 7 - iter 88/447 - loss 0.02058168 - time (sec): 8.86 - samples/sec: 2129.94 - lr: 0.000013 - momentum: 0.000000 |
|
2023-10-23 21:48:24,023 epoch 7 - iter 132/447 - loss 0.01673971 - time (sec): 12.64 - samples/sec: 2161.41 - lr: 0.000012 - momentum: 0.000000 |
|
2023-10-23 21:48:27,774 epoch 7 - iter 176/447 - loss 0.01674535 - time (sec): 16.39 - samples/sec: 2137.16 - lr: 0.000012 - momentum: 0.000000 |
|
2023-10-23 21:48:31,840 epoch 7 - iter 220/447 - loss 0.01634720 - time (sec): 20.46 - samples/sec: 2107.89 - lr: 0.000012 - momentum: 0.000000 |
|
2023-10-23 21:48:35,738 epoch 7 - iter 264/447 - loss 0.01512947 - time (sec): 24.36 - samples/sec: 2110.43 - lr: 0.000011 - momentum: 0.000000 |
|
2023-10-23 21:48:39,714 epoch 7 - iter 308/447 - loss 0.01474730 - time (sec): 28.33 - samples/sec: 2126.89 - lr: 0.000011 - momentum: 0.000000 |
|
2023-10-23 21:48:43,633 epoch 7 - iter 352/447 - loss 0.01413429 - time (sec): 32.25 - samples/sec: 2123.06 - lr: 0.000011 - momentum: 0.000000 |
|
2023-10-23 21:48:47,542 epoch 7 - iter 396/447 - loss 0.01551299 - time (sec): 36.16 - samples/sec: 2132.18 - lr: 0.000010 - momentum: 0.000000 |
|
2023-10-23 21:48:51,431 epoch 7 - iter 440/447 - loss 0.01500511 - time (sec): 40.05 - samples/sec: 2128.34 - lr: 0.000010 - momentum: 0.000000 |
|
2023-10-23 21:48:52,048 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 21:48:52,049 EPOCH 7 done: loss 0.0151 - lr: 0.000010 |
|
2023-10-23 21:48:58,550 DEV : loss 0.20411019027233124 - f1-score (micro avg) 0.7805 |
|
2023-10-23 21:48:58,570 saving best model |
|
2023-10-23 21:48:59,286 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 21:49:03,512 epoch 8 - iter 44/447 - loss 0.01270864 - time (sec): 4.23 - samples/sec: 2032.12 - lr: 0.000010 - momentum: 0.000000 |
|
2023-10-23 21:49:07,306 epoch 8 - iter 88/447 - loss 0.01253401 - time (sec): 8.02 - samples/sec: 2083.82 - lr: 0.000009 - momentum: 0.000000 |
|
2023-10-23 21:49:11,270 epoch 8 - iter 132/447 - loss 0.01166606 - time (sec): 11.98 - samples/sec: 2081.78 - lr: 0.000009 - momentum: 0.000000 |
|
2023-10-23 21:49:15,017 epoch 8 - iter 176/447 - loss 0.01169154 - time (sec): 15.73 - samples/sec: 2096.62 - lr: 0.000009 - momentum: 0.000000 |
|
2023-10-23 21:49:19,083 epoch 8 - iter 220/447 - loss 0.01146001 - time (sec): 19.80 - samples/sec: 2091.88 - lr: 0.000008 - momentum: 0.000000 |
|
2023-10-23 21:49:22,864 epoch 8 - iter 264/447 - loss 0.01101559 - time (sec): 23.58 - samples/sec: 2107.80 - lr: 0.000008 - momentum: 0.000000 |
|
2023-10-23 21:49:27,278 epoch 8 - iter 308/447 - loss 0.01131528 - time (sec): 27.99 - samples/sec: 2116.82 - lr: 0.000008 - momentum: 0.000000 |
|
2023-10-23 21:49:31,122 epoch 8 - iter 352/447 - loss 0.01058721 - time (sec): 31.84 - samples/sec: 2113.39 - lr: 0.000007 - momentum: 0.000000 |
|
2023-10-23 21:49:35,175 epoch 8 - iter 396/447 - loss 0.01001339 - time (sec): 35.89 - samples/sec: 2129.54 - lr: 0.000007 - momentum: 0.000000 |
|
2023-10-23 21:49:39,230 epoch 8 - iter 440/447 - loss 0.00967542 - time (sec): 39.94 - samples/sec: 2134.83 - lr: 0.000007 - momentum: 0.000000 |
|
2023-10-23 21:49:39,875 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 21:49:39,876 EPOCH 8 done: loss 0.0095 - lr: 0.000007 |
|
2023-10-23 21:49:46,389 DEV : loss 0.225086510181427 - f1-score (micro avg) 0.7789 |
|
2023-10-23 21:49:46,409 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 21:49:50,173 epoch 9 - iter 44/447 - loss 0.00403557 - time (sec): 3.76 - samples/sec: 2081.58 - lr: 0.000006 - momentum: 0.000000 |
|
2023-10-23 21:49:54,550 epoch 9 - iter 88/447 - loss 0.00765470 - time (sec): 8.14 - samples/sec: 2163.07 - lr: 0.000006 - momentum: 0.000000 |
|
2023-10-23 21:49:58,621 epoch 9 - iter 132/447 - loss 0.00920501 - time (sec): 12.21 - samples/sec: 2172.70 - lr: 0.000006 - momentum: 0.000000 |
|
2023-10-23 21:50:02,655 epoch 9 - iter 176/447 - loss 0.00917938 - time (sec): 16.24 - samples/sec: 2153.71 - lr: 0.000005 - momentum: 0.000000 |
|
2023-10-23 21:50:06,801 epoch 9 - iter 220/447 - loss 0.00894625 - time (sec): 20.39 - samples/sec: 2138.41 - lr: 0.000005 - momentum: 0.000000 |
|
2023-10-23 21:50:11,253 epoch 9 - iter 264/447 - loss 0.00790337 - time (sec): 24.84 - samples/sec: 2127.58 - lr: 0.000005 - momentum: 0.000000 |
|
2023-10-23 21:50:15,046 epoch 9 - iter 308/447 - loss 0.00736902 - time (sec): 28.64 - samples/sec: 2131.12 - lr: 0.000004 - momentum: 0.000000 |
|
2023-10-23 21:50:18,757 epoch 9 - iter 352/447 - loss 0.00748375 - time (sec): 32.35 - samples/sec: 2130.89 - lr: 0.000004 - momentum: 0.000000 |
|
2023-10-23 21:50:22,404 epoch 9 - iter 396/447 - loss 0.00686718 - time (sec): 35.99 - samples/sec: 2128.26 - lr: 0.000004 - momentum: 0.000000 |
|
2023-10-23 21:50:26,194 epoch 9 - iter 440/447 - loss 0.00697380 - time (sec): 39.78 - samples/sec: 2134.97 - lr: 0.000003 - momentum: 0.000000 |
|
2023-10-23 21:50:26,915 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 21:50:26,916 EPOCH 9 done: loss 0.0068 - lr: 0.000003 |
|
2023-10-23 21:50:33,435 DEV : loss 0.23983320593833923 - f1-score (micro avg) 0.7897 |
|
2023-10-23 21:50:33,456 saving best model |
|
2023-10-23 21:50:34,253 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 21:50:38,619 epoch 10 - iter 44/447 - loss 0.00392863 - time (sec): 4.37 - samples/sec: 2090.26 - lr: 0.000003 - momentum: 0.000000 |
|
2023-10-23 21:50:42,547 epoch 10 - iter 88/447 - loss 0.00267496 - time (sec): 8.29 - samples/sec: 2111.21 - lr: 0.000003 - momentum: 0.000000 |
|
2023-10-23 21:50:46,964 epoch 10 - iter 132/447 - loss 0.00256060 - time (sec): 12.71 - samples/sec: 2134.72 - lr: 0.000002 - momentum: 0.000000 |
|
2023-10-23 21:50:50,766 epoch 10 - iter 176/447 - loss 0.00265105 - time (sec): 16.51 - samples/sec: 2144.00 - lr: 0.000002 - momentum: 0.000000 |
|
2023-10-23 21:50:54,684 epoch 10 - iter 220/447 - loss 0.00294022 - time (sec): 20.43 - samples/sec: 2130.59 - lr: 0.000002 - momentum: 0.000000 |
|
2023-10-23 21:50:58,612 epoch 10 - iter 264/447 - loss 0.00407805 - time (sec): 24.36 - samples/sec: 2146.37 - lr: 0.000001 - momentum: 0.000000 |
|
2023-10-23 21:51:02,387 epoch 10 - iter 308/447 - loss 0.00366515 - time (sec): 28.13 - samples/sec: 2145.46 - lr: 0.000001 - momentum: 0.000000 |
|
2023-10-23 21:51:06,524 epoch 10 - iter 352/447 - loss 0.00414494 - time (sec): 32.27 - samples/sec: 2151.05 - lr: 0.000001 - momentum: 0.000000 |
|
2023-10-23 21:51:10,214 epoch 10 - iter 396/447 - loss 0.00427295 - time (sec): 35.96 - samples/sec: 2143.80 - lr: 0.000000 - momentum: 0.000000 |
|
2023-10-23 21:51:14,104 epoch 10 - iter 440/447 - loss 0.00403470 - time (sec): 39.85 - samples/sec: 2138.07 - lr: 0.000000 - momentum: 0.000000 |
|
2023-10-23 21:51:14,719 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 21:51:14,719 EPOCH 10 done: loss 0.0040 - lr: 0.000000 |
|
2023-10-23 21:51:20,939 DEV : loss 0.24832327663898468 - f1-score (micro avg) 0.7863 |
|
2023-10-23 21:51:21,516 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 21:51:21,517 Loading model from best epoch ... |
|
2023-10-23 21:51:23,587 SequenceTagger predicts: Dictionary with 21 tags: O, S-loc, B-loc, E-loc, I-loc, S-pers, B-pers, E-pers, I-pers, S-org, B-org, E-org, I-org, S-prod, B-prod, E-prod, I-prod, S-time, B-time, E-time, I-time |
|
2023-10-23 21:51:28,141 |
|
Results: |
|
- F-score (micro) 0.7529 |
|
- F-score (macro) 0.664 |
|
- Accuracy 0.6222 |
|
|
|
By class: |
|
precision recall f1-score support |
|
|
|
loc 0.8486 0.8557 0.8521 596 |
|
pers 0.6675 0.7658 0.7133 333 |
|
org 0.5254 0.4697 0.4960 132 |
|
prod 0.6977 0.4545 0.5505 66 |
|
time 0.7234 0.6939 0.7083 49 |
|
|
|
micro avg 0.7481 0.7577 0.7529 1176 |
|
macro avg 0.6925 0.6479 0.6640 1176 |
|
weighted avg 0.7474 0.7577 0.7499 1176 |
|
|
|
2023-10-23 21:51:28,141 ---------------------------------------------------------------------------------------------------- |
|
|