deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B · TypeError: forward() missing 1 required positional argument: 'attention

class RegressionModel(torch.nn.Module):
def init(self, base_model, config):
super(RegressionModel, self).init()
self.base_model = base_model
self.output_layer = torch.nn.Linear(config.hidden_size, 1)

def forward(self, input_ids, attention_masks=None):
    # 如果 `base_model` 不支持 `attention_mask`，我们可以直接去掉
    if hasattr(self.base_model, 'forward'):
        if attention_masks is not None:
            # 如果模型支持 attention_mask，就传递它
            # outputs = self.base_model(input_ids=input_ids)
            outputs = self.base_model(input_ids=input_ids, attention_mask=attention_masks)
        else:
            # 如果模型不支持 attention_mask，直接去掉它
            outputs = self.base_model(input_ids=input_ids)
    else:
        outputs = self.base_model(input_ids=input_ids)  # 如果完全不支持 attention_mask
    
    # 如果模型有 pooler_output，使用它
    if hasattr(outputs, 'pooler_output'):
        pooled_output = outputs.pooler_output
    else:
        # 如果没有 pooler_output，使用 last_hidden_state 作为替代
        last_hidden_state = outputs.last_hidden_state
        # 假设使用 [CLS] token 进行池化
        pooled_output = last_hidden_state[:, 0]  # 取出每个样本的第一个 token（[CLS]）

    return self.output_layer(pooled_output).squeeze(-1)

deepseek-ai
/

DeepSeek-R1-Distill-Qwen-1.5B

TypeError: forward() missing 1 required positional argument: 'attention_masks'

The solution is