Spaces:
Runtime error
Runtime error
File size: 10,750 Bytes
bd97c62 3bbce21 bd97c62 e668dd5 e0cb68e e668dd5 e0cb68e b07346d e668dd5 b07346d e668dd5 b07346d e668dd5 e0cb68e b07346d e0cb68e 552cad7 e668dd5 e0cb68e e668dd5 e0cb68e e668dd5 b07346d e668dd5 b07346d e668dd5 b07346d e668dd5 b07346d e668dd5 e0cb68e e668dd5 b07346d e668dd5 b07346d e668dd5 3bbce21 a3d3a25 6657a5d 3bbce21 e668dd5 552cad7 e668dd5 3bbce21 6657a5d 3bbce21 a3d3a25 3bbce21 e0cb68e 3bbce21 a3d3a25 06f2eaf e0cb68e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 |
import gradio as gr
import jax
import numpy as np
import jax
import jax.numpy as jnp
from PIL import Image
from diffusers import (
FlaxAutoencoderKL,
FlaxDPMSolverMultistepScheduler,
FlaxUNet2DConditionModel,
)
from transformers import ByT5Tokenizer, FlaxT5ForConditionalGeneration
def get_inference_lambda(seed):
tokenizer = ByT5Tokenizer()
language_model = FlaxT5ForConditionalGeneration.from_pretrained(
"google/byt5-base",
dtype=jnp.float32,
)
text_encoder = language_model.encode
text_encoder_params = language_model.params
max_length = 1024
tokenized_negative_prompt = tokenizer(
"", padding="max_length", max_length=max_length, return_tensors="np"
).input_ids
negative_prompt_text_encoder_hidden_states = text_encoder(
tokenized_negative_prompt,
params=text_encoder_params,
train=False,
)[0]
scheduler = FlaxDPMSolverMultistepScheduler.from_config(
config={
"_diffusers_version": "0.16.0",
"beta_end": 0.012,
"beta_schedule": "scaled_linear",
"beta_start": 0.00085,
"clip_sample": False,
"num_train_timesteps": 1000,
"prediction_type": "v_prediction",
"set_alpha_to_one": False,
"skip_prk_steps": True,
"steps_offset": 1,
"trained_betas": None,
}
)
timesteps = 20
guidance_scale = jnp.array([7.5], dtype=jnp.bfloat16)
unet, unet_params = FlaxUNet2DConditionModel.from_pretrained(
"character-aware-diffusion/charred",
dtype=jnp.bfloat16,
)
vae, vae_params = FlaxAutoencoderKL.from_pretrained(
"flax/stable-diffusion-2-1",
subfolder="vae",
dtype=jnp.bfloat16,
)
vae_scale_factor = 2 ** (len(vae.config.block_out_channels) - 1)
image_width = image_height = 256
# Generating latent shape
latent_shape = (
negative_prompt_text_encoder_hidden_states.shape[0], # is th
unet.in_channels,
image_width // vae_scale_factor,
image_height // vae_scale_factor,
)
def __tokenize_prompt(prompt: str):
return tokenizer(
text=prompt,
max_length=1024,
padding="max_length",
truncation=True,
return_tensors="jax",
).input_ids
def __convert_image(image):
# create PIL image from JAX tensor converted to numpy
return Image.fromarray(np.asarray(image), mode="RGB")
def __get_context(tokenized_prompt: jnp.array):
# Get the text embedding
text_encoder_hidden_states = text_encoder(
tokenized_prompt,
params=text_encoder_params,
train=False,
)[0]
# context = empty negative prompt embedding + prompt embedding
return jnp.concatenate(
[negative_prompt_text_encoder_hidden_states, text_encoder_hidden_states]
)
def __predict_image(context: jnp.array):
def ___timestep(step, step_args):
latents, scheduler_state = step_args
t = jnp.array(scheduler_state.timesteps, dtype=jnp.int32)[step]
# For classifier-free guidance, we need to do two forward passes.
# Here we concatenate the unconditional and text embeddings into a single batch
# to avoid doing two forward passes
latent_input = jnp.concatenate([latents] * 2)
timestep = jnp.broadcast_to(t, latent_input.shape[0])
scaled_latent_input = scheduler.scale_model_input(
scheduler_state, latent_input, t
)
# predict the noise residual
unet_prediction_sample = unet.apply(
{"params": unet_params},
jnp.array(scaled_latent_input),
jnp.array(timestep, dtype=jnp.int32),
context,
).sample
# perform guidance
unet_prediction_sample_uncond, unet_prediction_text = jnp.split(
unet_prediction_sample, 2, axis=0
)
guided_unet_prediction_sample = (
unet_prediction_sample_uncond
+ guidance_scale
* (unet_prediction_text - unet_prediction_sample_uncond)
)
# compute the previous noisy sample x_t -> x_t-1
latents, scheduler_state = scheduler.step(
scheduler_state, guided_unet_prediction_sample, t, latents
).to_tuple()
return latents, scheduler_state
# initialize scheduler state
initial_scheduler_state = scheduler.set_timesteps(
scheduler.create_state(), num_inference_steps=timesteps, shape=latent_shape
)
# initialize latents
initial_latents = (
jax.random.normal(
jax.random.PRNGKey(seed), shape=latent_shape, dtype=jnp.bfloat16
)
* initial_scheduler_state.init_noise_sigma
)
final_latents, _ = jax.lax.fori_loop(
0, timesteps, ___timestep, (initial_latents, initial_scheduler_state)
)
vae_output = vae.apply(
{"params": vae_params},
1 / vae.config.scaling_factor * final_latents,
method=vae.decode,
).sample
# return 8 bit RGB image (width, height, rgb)
return (
((vae_output / 2 + 0.5).transpose(0, 2, 3, 1).clip(0, 1) * 255)
.round()
.astype(jnp.uint8)[0]
)
jax_jit_compiled_accel_predict_image = jax.jit(__predict_image)
jax_jit_compiled_cpu_get_context = jax.jit(
__get_context, device=jax.devices(backend="cpu")[0]
)
return lambda prompt: __convert_image(
jax_jit_compiled_accel_predict_image(
jax_jit_compiled_cpu_get_context(__tokenize_prompt(prompt))
)
)
generate_image_for_prompt = get_inference_lambda(87)
with gr.Blocks(theme="gradio/soft") as demo:
gr.Markdown("# Character-Aware Stable Diffusion (CHARRED)")
with gr.Tab("Journal"):
gr.Markdown(
"""
## On How Four Crazy Fellows Embarked on Training a JAX U-Net from Scratch in Five Days and Almost Died in the End
Lorem ipsum dolor sit amet, consectetur adipiscing elit. Mauris vitae varius libero. Nullam laoreet eget sapien quis tristique. Cras odio odio, consequat sed cursus quis, dignissim hendrerit ligula. Curabitur non lorem tellus. Nam bibendum malesuada mi sed faucibus. Sed euismod enim metus, sit amet venenatis elit elementum vel. Duis nec rhoncus tellus, rhoncus auctor justo. Proin id gravida dolor. Sed nulla lectus, finibus non fringilla ac, fermentum in sapien. Cras lobortis est augue, vel posuere justo pretium vitae. Aliquam lorem dolor, condimentum et finibus rutrum, rhoncus eget nunc.
In varius eu nulla non tempor. Maecenas laoreet scelerisque ipsum, eu placerat enim luctus sed. In malesuada, nibh finibus finibus sollicitudin, lacus massa pulvinar sem, vel venenatis nibh sem eget lorem. Cras at augue magna. Nullam elementum porta turpis, et tristique sapien placerat vel. Etiam eu lorem malesuada, ornare leo a, commodo erat. Mauris a velit vulputate, placerat lectus vel, varius lorem. Sed volutpat porttitor venenatisLorem ipsum dolor sit amet, consectetur adipiscing elit. Mauris vitae varius libero. Nullam laoreet eget sapien quis tristique. Cras odio odio, consequat sed cursus quis, dignissim hendrerit ligula. Curabitur non lorem tellus. Nam bibendum malesuada mi sed faucibus. Sed euismod enim metus, sit amet venenatis elit elementum vel. Duis nec rhoncus tellus, rhoncus auctor justo. Proin id gravida dolor. Sed nulla lectus, finibus non fringilla ac, fermentum in sapien. Cras lobortis est augue, vel posuere justo pretium vitae. Aliquam lorem dolor, condimentum et finibus rutrum, rhoncus eget nunc.
Sed pellentesque gravida consectetur. Mauris molestie nunc quis lacinia egestas. Curabitur aliquam varius quam, nec venenatis leo efficitur a. Pellentesque habitant morbi tristique senectus et netus et malesuada fames ac turpis egestas. Ut fermentum gravida mauris, at blandit diam suscipit dapibus. Maecenas ac condimentum justo. Pellentesque aliquet risus vitae massa molestie iaculis. Quisque at libero tincidunt dui ornare vulputate. Sed tristique dolor lacinia pellentesque maximus. Donec bibendum tempus orci, eu gravida metus vehicula sit amet. Donec quis sodales neque, id consequat elit.
Sed molestie diam a massa sodales porta. Sed et ex vitae felis blandit consectetur porttitor in lectus. Interdum et malesuada fames ac ante ipsum primis in faucibus. Praesent est mi, lacinia ut egestas sed, dapibus sed augue. Sed scelerisque est a ex porta suscipit. Curabitur eleifend massa vitae suscipit finibus. Cras lobortis pellentesque est. Pellentesque semper justo nibh, vitae convallis lectus ultrices sed. Nunc auctor dignissim pretium. Praesent orci justo, posuere a diam at, tincidunt viverra leo. Quisque sit amet dignissim erat, id varius massa. Phasellus fringilla vestibulum elit, id eleifend erat hendrerit ut.
Duis scelerisque sit amet est at iaculis. Suspendisse sed ipsum vitae massa placerat semper. Pellentesque vitae sapien tristique, congue ligula sed, dapibus nunc. Suspendisse sed maximus neque, a lobortis risus. Nam lorem nisi, commodo a neque ut, volutpat porttitor ipsum. Quisque in tortor blandit, ultrices leo eget, venenatis nisl. Vestibulum ultricies sapien at sapien tincidunt vehicula vel in lacus. Sed ultricies mattis quam ac aliquet. Nulla a ullamcorper urna. Duis lacus ligula, auctor in orci sed, hendrerit maximus lectus. Nam a enim at nibh aliquam rhoncus. Pellentesque nulla justo, varius eget molestie sit amet, ultricies id tortor.
"""
)
with gr.Tab("☢️ DEMO ☢️"):
gr.Markdown(
"## This is a demo of the CHARRED character-aware stable diffusion model for you to enjoy at your own leisure, risk and peril"
)
prompt_input_charr = gr.Textbox(label="Prompt")
charred_output = gr.Image(label="Output Image")
submit_btn = gr.Button(value="Submit")
charred_inputs = [prompt_input_charr]
submit_btn.click(
fn=generate_image_for_prompt,
inputs=charred_inputs,
outputs=[charred_output],
)
# examples = [["postage stamp from california", "low quality", "charr_output.png", "charr_output.png" ]]
# gr.Examples(fn = infer_sd, inputs = ["text", "text", "image", "image"], examples=examples, cache_examples=True)
demo.queue(concurrency_count=1)
demo.launch(debug=True, show_error=True)
|