Upload modeling_opt.py
Browse files- modeling_opt.py +4 -4
modeling_opt.py
CHANGED
@@ -201,10 +201,10 @@ class OPTAttention(nn.Module):
|
|
201 |
self.scaling = self.head_dim**-0.5
|
202 |
self.is_decoder = is_decoder
|
203 |
|
204 |
-
self.k_proj = nn.Linear(embed_dim, embed_dim, bias=bias)
|
205 |
-
self.v_proj = nn.Linear(embed_dim, embed_dim, bias=bias)
|
206 |
-
self.q_proj = nn.Linear(embed_dim, embed_dim, bias=bias)
|
207 |
-
self.out_proj = nn.Linear(embed_dim, embed_dim, bias=bias)
|
208 |
|
209 |
# YB: capture the input and output of the softmax
|
210 |
self.attn_scores = nn.Identity() # before attention mask
|
|
|
201 |
self.scaling = self.head_dim**-0.5
|
202 |
self.is_decoder = is_decoder
|
203 |
|
204 |
+
self.k_proj = nn.Linear(self.embed_dim, self.embed_dim, bias=bias)
|
205 |
+
self.v_proj = nn.Linear(self.embed_dim, self.embed_dim, bias=bias)
|
206 |
+
self.q_proj = nn.Linear(self.embed_dim, self.embed_dim, bias=bias)
|
207 |
+
self.out_proj = nn.Linear(self.embed_dim, self.embed_dim, bias=bias)
|
208 |
|
209 |
# YB: capture the input and output of the softmax
|
210 |
self.attn_scores = nn.Identity() # before attention mask
|