| |
|
| | from huggingface_hub import hf_hub_download |
| | import torch |
| | import PIL |
| |
|
| | class CosXLEdit(): |
| | """ |
| | Edit Cos Stable Diffusion XL 1.0 Base is tuned to use a Cosine-Continuous EDM VPred schedule, and then upgraded to perform instructed image editing. |
| | Reference: https://huggingface.co/stabilityai/cosxl |
| | """ |
| | def __init__(self, device="cuda"): |
| | """ |
| | Attributes: |
| | pipe (CosStableDiffusionXLInstructPix2PixPipeline): The InstructPix2Pix pipeline for image transformation. |
| | |
| | Args: |
| | device (str, optional): Device on which the pipeline runs. Defaults to "cuda". |
| | """ |
| | from diffusers import EDMEulerScheduler |
| | from .cosxl.custom_pipeline import CosStableDiffusionXLInstructPix2PixPipeline |
| | from .cosxl.utils import set_timesteps_patched |
| |
|
| | EDMEulerScheduler.set_timesteps = set_timesteps_patched |
| |
|
| | try: |
| | edit_file = hf_hub_download(repo_id="TIGER-Lab/cosxl", filename="cosxl_edit.safetensors") |
| | self.pipe = CosStableDiffusionXLInstructPix2PixPipeline.from_single_file( |
| | edit_file, num_in_channels=8 |
| | ) |
| | except: |
| | edit_file_path = "./black_box_image_edit/cosxl/cosxl_edit.safetensors" |
| | self.pipe = CosStableDiffusionXLInstructPix2PixPipeline.from_single_file( |
| | edit_file_path, num_in_channels=8 |
| | ) |
| | self.pipe.scheduler = EDMEulerScheduler(sigma_min=0.002, sigma_max=120.0, sigma_data=1.0, prediction_type="v_prediction") |
| | self.pipe.to(device) |
| | self.pipe.enable_vae_tiling() |
| | self.pipe.enable_model_cpu_offload() |
| |
|
| | def infer_one_image(self, src_image: PIL.Image.Image = None, src_prompt: str = None, target_prompt: str = None, instruct_prompt: str = None, seed: int = 42, negative_prompt=""): |
| | """ |
| | Modifies the source image based on the provided instruction prompt. |
| | |
| | Args: |
| | src_image (PIL.Image.Image): Source image in RGB format. |
| | instruct_prompt (str): Caption for editing the image. |
| | seed (int, optional): Seed for random generator. Defaults to 42. |
| | |
| | Returns: |
| | PIL.Image.Image: The transformed image. |
| | """ |
| | src_image = src_image.convert('RGB') |
| | generator = torch.manual_seed(seed) |
| |
|
| | resolution = 1024 |
| | preprocessed_image = src_image.resize((resolution, resolution)) |
| | image = self.pipe(prompt=instruct_prompt, |
| | image=preprocessed_image, |
| | height=resolution, |
| | width=resolution, |
| | negative_prompt=negative_prompt, |
| | guidance_scale=7, |
| | num_inference_steps=20, |
| | generator=generator).images[0] |
| | image = image.resize((src_image.width, src_image.height)) |
| |
|
| | return image |
| |
|