|
import numpy as np |
|
|
|
from transformers import Pipeline |
|
|
|
|
|
def softmax(outputs): |
|
maxes = np.max(outputs, axis=-1, keepdims=True) |
|
shifted_exp = np.exp(outputs - maxes) |
|
return shifted_exp / shifted_exp.sum(axis=-1, keepdims=True) |
|
|
|
|
|
class PairClassificationPipeline(Pipeline): |
|
|
|
def _sanitize_parameters(self, **kwargs): |
|
preprocess_kwargs = {} |
|
if "second_text" in kwargs: |
|
preprocess_kwargs["second_text"] = kwargs["second_text"] |
|
return preprocess_kwargs, {}, {} |
|
|
|
def preprocess(self, text, second_text=None): |
|
return self.tokenizer(text, |
|
text_pair=second_text, |
|
return_tensors=self.framework) |
|
|
|
def _forward(self, model_inputs): |
|
return self.model(**model_inputs) |
|
|
|
def postprocess(self, model_outputs): |
|
logits = model_outputs.logits[0].numpy() |
|
probabilities = softmax(logits) |
|
|
|
best_class = np.argmax(probabilities) |
|
label = self.model.config.id2label[best_class] |
|
score = probabilities[best_class].item() |
|
logits = logits.tolist() |
|
return {"label": label, "score": score, "logits": logits} |
|
|
|
|
|
from transformers.pipelines import PIPELINE_REGISTRY |
|
|
|
from transformers import AutoModelForSequenceClassification, TFAutoModelForSequenceClassification |
|
|
|
if __name__ == "__main__": |
|
PIPELINE_REGISTRY.register_pipeline( |
|
"pair-classification", |
|
pipeline_class=PairClassificationPipeline, |
|
pt_model=AutoModelForSequenceClassification, |
|
tf_model=TFAutoModelForSequenceClassification, |
|
) |
|
|
|
from transformers import pipeline |
|
|
|
classifier = pipeline("pair-classification", |
|
model="sgugger/finetuned-bert-mrpc") |
|
from huggingface_hub import Repository |
|
|
|
repo = Repository("test-dynamic-pipeline", |
|
clone_from="paulhindemith/test-dynamic-pipeline") |
|
classifier.save_pretrained("test-dynamic-pipeline") |
|
repo.push_to_hub() |