|
from transformers import CLIPTokenizer |
|
|
|
class SDXLTokenizer: |
|
"""Wrapper around HuggingFace tokenizers for SDXL. |
|
|
|
Tokenizes prompt with two tokenizers and returns the joined output. |
|
|
|
Args: |
|
model_name (str): Name of the model's text encoders to load. Defaults to 'stabilityai/stable-diffusion-xl-base-1.0'. |
|
""" |
|
|
|
def __init__(self, file_path_or_name='stabilityai/stable-diffusion-xl-base-1.0'): |
|
self.tokenizer = CLIPTokenizer.from_pretrained(file_path_or_name, subfolder='tokenizer') |
|
self.tokenizer_2 = CLIPTokenizer.from_pretrained(file_path_or_name, subfolder='tokenizer_2') |
|
|
|
@classmethod |
|
def from_pretrained(cls, file_path_or_name='stabilityai/stable-diffusion-xl-base-1.0', **kwargs): |
|
""" |
|
Create a new instance of SDXLTextEncoder with specified pretrained model parameters. |
|
|
|
Args: |
|
file_path_or_name (str): Name or path of the model's text encoders to load. |
|
encode_latents_in_fp16 (bool): Whether to encode latents in fp16. |
|
torch_dtype (torch.dtype): Data type for model parameters. |
|
**kwargs: Additional keyword arguments. |
|
|
|
Returns: |
|
SDXLTextEncoder: A new instance of SDXLTextEncoder. |
|
""" |
|
|
|
init_args = {'file_path_or_name': file_path_or_name} |
|
init_args.update(kwargs) |
|
|
|
|
|
return cls(**init_args) |
|
|
|
|
|
def __call__(self, prompt, padding, truncation, return_tensors, max_length=None): |
|
tokenized_output = self.tokenizer( |
|
prompt, |
|
padding=padding, |
|
max_length=self.tokenizer.model_max_length if max_length is None else max_length, |
|
truncation=truncation, |
|
return_tensors=return_tensors) |
|
tokenized_output_2 = self.tokenizer_2( |
|
prompt, |
|
padding=padding, |
|
max_length=self.tokenizer_2.model_max_length if max_length is None else max_length, |
|
truncation=truncation, |
|
return_tensors=return_tensors) |
|
|
|
|
|
for key in tokenized_output.keys(): |
|
tokenized_output[key] = [tokenized_output[key], tokenized_output_2[key]] |
|
return tokenized_output |
|
|