TypeError: forward() missing 1 required positional argument: 'attention_masks'

#16
by lmlmvxi - opened

class RegressionModel(torch.nn.Module):
def init(self, base_model, config):
super(RegressionModel, self).init()
self.base_model = base_model
self.output_layer = torch.nn.Linear(config.hidden_size, 1)

def forward(self, input_ids, attention_masks=None):
    # 如果 `base_model` 不支持 `attention_mask`,我们可以直接去掉
    if hasattr(self.base_model, 'forward'):
        if attention_masks is not None:
            # 如果模型支持 attention_mask,就传递它
            # outputs = self.base_model(input_ids=input_ids)
            outputs = self.base_model(input_ids=input_ids, attention_mask=attention_masks)
        else:
            # 如果模型不支持 attention_mask,直接去掉它
            outputs = self.base_model(input_ids=input_ids)
    else:
        outputs = self.base_model(input_ids=input_ids)  # 如果完全不支持 attention_mask
    
    # 如果模型有 pooler_output,使用它
    if hasattr(outputs, 'pooler_output'):
        pooled_output = outputs.pooler_output
    else:
        # 如果没有 pooler_output,使用 last_hidden_state 作为替代
        last_hidden_state = outputs.last_hidden_state
        # 假设使用 [CLS] token 进行池化
        pooled_output = last_hidden_state[:, 0]  # 取出每个样本的第一个 token([CLS])

    return self.output_layer(pooled_output).squeeze(-1)

The solution is

attention_mask = (inputs["input_ids"] != tokenizer.pad_token_id).long()

outputs = model.generate(
    inputs["input_ids"],
    attention_mask=attention_mask,  # Pass the attention mask here
    max_length=200,
    num_return_sequences=1,
    temperature=0.7,
    do_sample=True,
    pad_token_id=tokenizer.eos_token_id
) 

Sign up or log in to comment