Spaces:
Running
on
Zero
Running
on
Zero
| import spaces | |
| import gradio as gr | |
| from diffusers import StableDiffusionXLPipeline, DDIMScheduler | |
| import torch | |
| import sa_handler | |
| import math | |
| from diffusers.utils import load_image | |
| import inversion | |
| import numpy as np | |
| # init models | |
| scheduler = DDIMScheduler( | |
| beta_start=0.00085, beta_end=0.012, beta_schedule="scaled_linear", | |
| clip_sample=False, set_alpha_to_one=False) | |
| pipeline = StableDiffusionXLPipeline.from_pretrained( | |
| "stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16, variant="fp16", | |
| use_safetensors=True, | |
| scheduler=scheduler | |
| ).to("cuda") | |
| pipeline.enable_model_cpu_offload() | |
| pipeline.enable_vae_slicing() | |
| def run(ref_path, ref_style, ref_prompt, prompt1, progress=gr.Progress(track_tqdm=True)): | |
| """Generate an image in the style of a reference image using the StyleAligned method. | |
| This function performs DDIM inversion on a reference image to capture its latent representation | |
| and then applies the StyleAligned diffusion technique to transfer the style to a new prompt. | |
| Args: | |
| ref_path: File path to the reference image. | |
| ref_style: Textual description of the reference style (e.g., 'medieval painting'). | |
| ref_prompt: Description of the content in the reference image. | |
| prompt1: The prompt describing the new image to be generated with the same style. | |
| progress: Internal Gradio progress tracker (automatically handled). | |
| Returns: | |
| A list of generated images with the style of the reference image applied to the new prompt. | |
| """ | |
| # DDIM inversion | |
| src_style = f"{ref_style}" | |
| src_prompt = f"{ref_prompt}, {src_style}." | |
| image_path = f"{ref_path}" | |
| num_inference_steps = 50 | |
| x0 = np.array(load_image(image_path).resize((1024, 1024))) | |
| try: | |
| zts = inversion.ddim_inversion(pipeline, x0, src_prompt, num_inference_steps, 2) | |
| except: | |
| zts = inversion.ddim_inversion(pipeline, x0, src_prompt, num_inference_steps, 2) | |
| #mediapy.show_image(x0, title="innput reference image", height=256) | |
| # run StyleAligned | |
| prompts = [ | |
| src_prompt, | |
| prompt1, | |
| ] | |
| # some parameters you can adjust to control fidelity to reference | |
| shared_score_shift = np.log(2) # higher value induces higher fidelity, set 0 for no shift | |
| shared_score_scale = 1.0 # higher value induces higher, set 1 for no rescale | |
| # for very famouse images consider supressing attention to refference, here is a configuration example: | |
| # shared_score_shift = np.log(1) | |
| # shared_score_scale = 0.5 | |
| for i in range(1, len(prompts)): | |
| prompts[i] = f'{prompts[i]}, {src_style}.' | |
| handler = sa_handler.Handler(pipeline) | |
| sa_args = sa_handler.StyleAlignedArgs( | |
| share_group_norm=True, share_layer_norm=True, share_attention=True, | |
| adain_queries=True, adain_keys=True, adain_values=False, | |
| shared_score_shift=shared_score_shift, shared_score_scale=shared_score_scale,) | |
| handler.register(sa_args) | |
| zT, inversion_callback = inversion.make_inversion_callback(zts, offset=5) | |
| g_cpu = torch.Generator(device='cuda') | |
| g_cpu.manual_seed(10) | |
| latents = torch.randn(len(prompts), 4, 128, 128, device='cuda', generator=g_cpu, | |
| dtype=pipeline.unet.dtype,).to('cuda') | |
| latents[0] = zT | |
| images_a = pipeline(prompts, latents=latents, | |
| callback_on_step_end=inversion_callback, | |
| num_inference_steps=num_inference_steps, guidance_scale=10.0).images | |
| handler.remove() | |
| #mediapy.show_images(images_a, titles=[p[:-(len(src_style) + 3)] for p in prompts]) | |
| return images_a | |
| css = """ | |
| #col-container{ | |
| margin: 0 auto; | |
| max-width: 820px; | |
| } | |
| """ | |
| with gr.Blocks(css=css) as demo: | |
| with gr. Column(elem_id="col-container"): | |
| gr.HTML(""" | |
| <h2 style="text-align: center;">Google's StyleAligned Transfer</h2> | |
| """ | |
| ) | |
| with gr.Row(): | |
| with gr.Column(): | |
| with gr.Group(): | |
| ref_path = gr.Image(type="filepath") | |
| ref_style = gr.Textbox(label="Reference style") | |
| ref_prompt = gr.Textbox(label="Reference prompt") | |
| with gr.Column(): | |
| with gr.Group(): | |
| results = gr.Gallery() | |
| prompt1 = gr.Textbox(label="Prompt1") | |
| run_button = gr.Button("Submit") | |
| gr.Examples( | |
| examples=[ | |
| [ | |
| "./example_image/medieval-bed.jpeg", | |
| "medieval painting", | |
| "Man laying on bed", | |
| "A man working on a laptop", | |
| ] | |
| ], | |
| fn=run, | |
| inputs = [ | |
| ref_path, ref_style, ref_prompt, | |
| prompt1 | |
| ], | |
| outputs=[results], | |
| cache_examples=False | |
| ) | |
| run_button.click( | |
| fn = run, | |
| inputs = [ | |
| ref_path, ref_style, ref_prompt, | |
| prompt1 | |
| ], | |
| outputs = [ | |
| results | |
| ] | |
| ) | |
| demo.queue().launch(ssr_mode=False, mcp_server=True) |