TypeError: forward() missing 1 required positional argument: 'attention_masks'
#16
by
lmlmvxi
- opened
class RegressionModel(torch.nn.Module):
def init(self, base_model, config):
super(RegressionModel, self).init()
self.base_model = base_model
self.output_layer = torch.nn.Linear(config.hidden_size, 1)
def forward(self, input_ids, attention_masks=None):
# 如果 `base_model` 不支持 `attention_mask`,我们可以直接去掉
if hasattr(self.base_model, 'forward'):
if attention_masks is not None:
# 如果模型支持 attention_mask,就传递它
# outputs = self.base_model(input_ids=input_ids)
outputs = self.base_model(input_ids=input_ids, attention_mask=attention_masks)
else:
# 如果模型不支持 attention_mask,直接去掉它
outputs = self.base_model(input_ids=input_ids)
else:
outputs = self.base_model(input_ids=input_ids) # 如果完全不支持 attention_mask
# 如果模型有 pooler_output,使用它
if hasattr(outputs, 'pooler_output'):
pooled_output = outputs.pooler_output
else:
# 如果没有 pooler_output,使用 last_hidden_state 作为替代
last_hidden_state = outputs.last_hidden_state
# 假设使用 [CLS] token 进行池化
pooled_output = last_hidden_state[:, 0] # 取出每个样本的第一个 token([CLS])
return self.output_layer(pooled_output).squeeze(-1)
The solution is
attention_mask = (inputs["input_ids"] != tokenizer.pad_token_id).long()
outputs = model.generate(
inputs["input_ids"],
attention_mask=attention_mask, # Pass the attention mask here
max_length=200,
num_return_sequences=1,
temperature=0.7,
do_sample=True,
pad_token_id=tokenizer.eos_token_id
)