Update modeling_bert.py
Browse files- modeling_bert.py +1 -0
modeling_bert.py
CHANGED
@@ -386,6 +386,7 @@ class BertSelfAttention(nn.Module):
|
|
386 |
# Normalize the attention scores to probabilities.
|
387 |
#attention_probs = nn.functional.softmax(attention_scores, dim=-1)
|
388 |
attention_probs = softmax_1(attention_scores, dim=-1)
|
|
|
389 |
|
390 |
# This is actually dropping out entire tokens to attend to, which might
|
391 |
# seem a bit unusual, but is taken from the original Transformer paper.
|
|
|
386 |
# Normalize the attention scores to probabilities.
|
387 |
#attention_probs = nn.functional.softmax(attention_scores, dim=-1)
|
388 |
attention_probs = softmax_1(attention_scores, dim=-1)
|
389 |
+
print(softmax_1)
|
390 |
|
391 |
# This is actually dropping out entire tokens to attend to, which might
|
392 |
# seem a bit unusual, but is taken from the original Transformer paper.
|