MakeAnything

Sleeping

App Files Files Community

yiren98 commited on Feb 9

Commit

bf3710c

1 Parent(s): 7b123f5

main

Browse files

Files changed (1) hide show

gradio_app.py +24 -21

gradio_app.py CHANGED Viewed

@@ -100,7 +100,6 @@ def load_target_model(selected_model):
         t5xxl = flux_utils.load_t5xxl(T5XXL_PATH, torch.bfloat16, "cpu", disable_mmap=False)
         t5xxl.eval()
         ae = flux_utils.load_ae(AE_PATH, torch.bfloat16, "cpu", disable_mmap=False)
-        logger.info("Models loaded successfully.")
         # Load LoRA weights
         multiplier = 1.0
@@ -111,6 +110,8 @@ def load_target_model(selected_model):
         logger.info(f"Loaded LoRA weights from {LORA_WEIGHTS_PATH}: {info}")
         lora_model.eval()
     except Exception as e:
         logger.error(f"Error loading models: {e}")
         raise
@@ -129,19 +130,19 @@ class ResizeWithPadding:
         width, height = img.size
-        # Convert to RGB to remove transparency, fill with white background if necessary
-        if img.mode in ('RGBA', 'LA') or (img.mode == 'P' and 'transparency' in img.info):
-            background = Image.new("RGB", img.size, (fill, fill, fill))
-            background.paste(img, mask=img.split()[-1])  # Use alpha channel as mask
-            img = background
-        if width == height:
-            img = img.resize((self.size, self.size), Image.LANCZOS)
-        else:
-            max_dim = max(width, height)
-            new_img = Image.new("RGB", (max_dim, max_dim), (self.fill, self.fill, self.fill))
-            new_img.paste(img, ((max_dim - width) // 2, (max_dim - height) // 2))
-            img = new_img.resize((self.size, self.size), Image.LANCZOS)
         return img
 # The function to generate image from a prompt and conditional image
@@ -197,9 +198,11 @@ def infer(prompt, sample_image, frame_num, seed=0):
     logger.debug("Image encoded to latents.")
     conditions = {}
-    conditions[prompt] = latents.to("cpu")
-    ae.to("cpu")
     clip_l.to(device)
     t5xxl.to(device)
@@ -236,8 +239,8 @@ def infer(prompt, sample_image, frame_num, seed=0):
     args = lambda: None
     args.frame_num = frame_num
-    clip_l.to("cpu")
-    t5xxl.to("cpu")
     model.to(device)
@@ -251,12 +254,12 @@ def infer(prompt, sample_image, frame_num, seed=0):
     # Decode the final image
     x = x.float()
     x = flux_utils.unpack_latents(x, packed_latent_height, packed_latent_width)
-    model.to("cpu")
     ae.to(device)
     with accelerator.autocast(), torch.no_grad():
         x = ae.decode(x)
     logger.debug("Latents decoded into image.")
-    ae.to("cpu")
     # Convert the tensor to an image
     x = x.clamp(-1, 1)
@@ -285,7 +288,7 @@ with gr.Blocks() as demo:
         sample_image = gr.Image(label="Upload a Conditional Image", type="pil")
         # Frame number selection
-        frame_num = gr.Radio([4, 9], label="Select Frame Number", value=4)
         # Seed
         seed = gr.Slider(0, np.iinfo(np.int32).max, step=1, label="Seed", value=0)

         t5xxl = flux_utils.load_t5xxl(T5XXL_PATH, torch.bfloat16, "cpu", disable_mmap=False)
         t5xxl.eval()
         ae = flux_utils.load_ae(AE_PATH, torch.bfloat16, "cpu", disable_mmap=False)
         # Load LoRA weights
         multiplier = 1.0
         logger.info(f"Loaded LoRA weights from {LORA_WEIGHTS_PATH}: {info}")
         lora_model.eval()
+        logger.info("Models loaded successfully.")
     except Exception as e:
         logger.error(f"Error loading models: {e}")
         raise
         width, height = img.size
+        # # Convert to RGB to remove transparency, fill with white background if necessary
+        # if img.mode in ('RGBA', 'LA') or (img.mode == 'P' and 'transparency' in img.info):
+        #     background = Image.new("RGB", img.size, (fill, fill, fill))
+        #     background.paste(img, mask=img.split()[-1])  # Use alpha channel as mask
+        #     img = background
+        # if width == height:
+        #     img = img.resize((self.size, self.size), Image.LANCZOS)
+        # else:
+        max_dim = max(width, height)
+        new_img = Image.new("RGB", (max_dim, max_dim), (self.fill, self.fill, self.fill))
+        new_img.paste(img, ((max_dim - width) // 2, (max_dim - height) // 2))
+        img = new_img.resize((self.size, self.size), Image.LANCZOS)
         return img
 # The function to generate image from a prompt and conditional image
     logger.debug("Image encoded to latents.")
     conditions = {}
+    # conditions[prompt] = latents.to("cpu")
+    conditions[prompt] = latents
+    # ae.to("cpu")
     clip_l.to(device)
     t5xxl.to(device)
     args = lambda: None
     args.frame_num = frame_num
+    # clip_l.to("cpu")
+    # t5xxl.to("cpu")
     model.to(device)
     # Decode the final image
     x = x.float()
     x = flux_utils.unpack_latents(x, packed_latent_height, packed_latent_width)
+    # model.to("cpu")
     ae.to(device)
     with accelerator.autocast(), torch.no_grad():
         x = ae.decode(x)
     logger.debug("Latents decoded into image.")
+    # ae.to("cpu")
     # Convert the tensor to an image
     x = x.clamp(-1, 1)
         sample_image = gr.Image(label="Upload a Conditional Image", type="pil")
         # Frame number selection
+        frame_num = gr.Radio([4, 9], label="Select Frame Number", value=9)
         # Seed
         seed = gr.Slider(0, np.iinfo(np.int32).max, step=1, label="Seed", value=0)