Satoshi2077
/

app.py

Model card Files Files and versions

xet

Community

Satoshi2077 commited on Mar 3, 2024

Commit

9da140c

verified ·

1 Parent(s): 0572872

Create app.py

Browse files

Files changed (1) hide show

app.py +233 -0

app.py ADDED Viewed

	@@ -0,0 +1,233 @@

+import os
+import urllib
+from functools import lru_cache
+from random import randint
+from typing import Any, Callable, Dict, List, Tuple
+import clip
+import cv2
+import gradio as gr
+import numpy as np
+import PIL
+import torch
+from segment_anything import SamAutomaticMaskGenerator, sam_model_registry
+CHECKPOINT_PATH = os.path.join(os.path.expanduser("~"), ".cache", "SAM")
+CHECKPOINT_NAME = "sam_vit_h_4b8939.pth"
+CHECKPOINT_URL = "https://dl.fbaipublicfiles.com/segment_anything/sam_vit_h_4b8939.pth"
+MODEL_TYPE = "default"
+MAX_WIDTH = MAX_HEIGHT = 1024
+TOP_K_OBJ = 100
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+@lru_cache
+def load_mask_generator() -> SamAutomaticMaskGenerator:
+    if not os.path.exists(CHECKPOINT_PATH):
+        os.makedirs(CHECKPOINT_PATH)
+    checkpoint = os.path.join(CHECKPOINT_PATH, CHECKPOINT_NAME)
+    if not os.path.exists(checkpoint):
+        urllib.request.urlretrieve(CHECKPOINT_URL, checkpoint)
+    sam = sam_model_registry[MODEL_TYPE](checkpoint=checkpoint).to(device)
+    mask_generator = SamAutomaticMaskGenerator(sam)
+    return mask_generator
+@lru_cache
+def load_clip(
+    name: str = "ViT-B/32",
+) -> Tuple[torch.nn.Module, Callable[[PIL.Image.Image], torch.Tensor]]:
+    model, preprocess = clip.load(name, device=device)
+    return model.to(device), preprocess
+def adjust_image_size(image: np.ndarray) -> np.ndarray:
+    height, width = image.shape[:2]
+    if height > width:
+        if height > MAX_HEIGHT:
+            height, width = MAX_HEIGHT, int(MAX_HEIGHT / height * width)
+    else:
+        if width > MAX_WIDTH:
+            height, width = int(MAX_WIDTH / width * height), MAX_WIDTH
+    image = cv2.resize(image, (width, height))
+    return image
+@torch.no_grad()
+def get_score(crop: PIL.Image.Image, texts: List[str]) -> torch.Tensor:
+    model, preprocess = load_clip()
+    preprocessed = preprocess(crop).unsqueeze(0).to(device)
+    tokens = clip.tokenize(texts).to(device)
+    logits_per_image, _ = model(preprocessed, tokens)
+    similarity = logits_per_image.softmax(-1).cpu()
+    return similarity[0, 0]
+def crop_image(image: np.ndarray, mask: Dict[str, Any]) -> PIL.Image.Image:
+    x, y, w, h = mask["bbox"]
+    masked = image * np.expand_dims(mask["segmentation"], -1)
+    crop = masked[y : y + h, x : x + w]
+    if h > w:
+        top, bottom, left, right = 0, 0, (h - w) // 2, (h - w) // 2
+    else:
+        top, bottom, left, right = (w - h) // 2, (w - h) // 2, 0, 0
+    # padding
+    crop = cv2.copyMakeBorder(
+        crop,
+        top,
+        bottom,
+        left,
+        right,
+        cv2.BORDER_CONSTANT,
+        value=(0, 0, 0),
+    )
+    crop = PIL.Image.fromarray(crop)
+    return crop
+def get_texts(query: str) -> List[str]:
+    return [f"a picture of {query}", "a picture of background"]
+def filter_masks(
+    image: np.ndarray,
+    masks: List[Dict[str, Any]],
+    predicted_iou_threshold: float,
+    stability_score_threshold: float,
+    query: str,
+    clip_threshold: float,
+) -> List[Dict[str, Any]]:
+    filtered_masks: List[Dict[str, Any]] = []
+    for mask in sorted(masks, key=lambda mask: mask["area"])[-TOP_K_OBJ:]:
+        if (
+            mask["predicted_iou"] < predicted_iou_threshold
+            or mask["stability_score"] < stability_score_threshold
+            or image.shape[:2] != mask["segmentation"].shape[:2]
+            or query
+            and get_score(crop_image(image, mask), get_texts(query)) < clip_threshold
+        ):
+            continue
+        filtered_masks.append(mask)
+    return filtered_masks
+def draw_masks(
+    image: np.ndarray, masks: List[np.ndarray], alpha: float = 0.7
+) -> np.ndarray:
+    for mask in masks:
+        color = [randint(127, 255) for _ in range(3)]
+        # draw mask overlay
+        colored_mask = np.expand_dims(mask["segmentation"], 0).repeat(3, axis=0)
+        colored_mask = np.moveaxis(colored_mask, 0, -1)
+        masked = np.ma.MaskedArray(image, mask=colored_mask, fill_value=color)
+        image_overlay = masked.filled()
+        image = cv2.addWeighted(image, 1 - alpha, image_overlay, alpha, 0)
+        # draw contour
+        contours, _ = cv2.findContours(
+            np.uint8(mask["segmentation"]), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE
+        )
+        cv2.drawContours(image, contours, -1, (0, 0, 255), 2)
+    return image
+def segment(
+    predicted_iou_threshold: float,
+    stability_score_threshold: float,
+    clip_threshold: float,
+    image_path: str,
+    query: str,
+) -> PIL.ImageFile.ImageFile:
+    mask_generator = load_mask_generator()
+    image = cv2.imread(image_path, cv2.IMREAD_COLOR)
+    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+    # reduce the size to save gpu memory
+    image = adjust_image_size(image)
+    print(image.shape)
+    masks = mask_generator.generate(image)
+    # print(masks)
+    masks = filter_masks(
+        image,
+        masks,
+        predicted_iou_threshold,
+        stability_score_threshold,
+        query,
+        clip_threshold,
+    )
+    image = draw_masks(image, masks)
+    image = PIL.Image.fromarray(image)
+    return image
+demo = gr.Interface(
+    fn=segment,
+    inputs=[
+        gr.Slider(0, 1, value=0.9, label="predicted_iou_threshold"),
+        gr.Slider(0, 1, value=0.8, label="stability_score_threshold"),
+        gr.Slider(0, 1, value=0.85, label="clip_threshold"),
+        gr.Image(type="filepath"),
+        "text",
+    ],
+    outputs="image",
+    allow_flagging="never",
+    title="Segment Anything with CLIP",
+    examples=[
+        [
+            0.9,
+            0.8,
+            0.99,
+            os.path.join(os.path.dirname(__file__), "examples/dog.jpg"),
+            "dog",
+        ],
+        [
+            0.9,
+            0.8,
+            0.75,
+            os.path.join(os.path.dirname(__file__), "examples/city.jpg"),
+            "building",
+        ],
+        [
+            0.9,
+            0.8,
+            0.998,
+            os.path.join(os.path.dirname(__file__), "examples/food.jpg"),
+            "strawberry",
+        ],
+        [
+            0.9,
+            0.8,
+            0.75,
+            os.path.join(os.path.dirname(__file__), "examples/horse.jpg"),
+            "horse",
+        ],
+        [
+            0.9,
+            0.8,
+            0.99,
+            os.path.join(os.path.dirname(__file__), "examples/bears.jpg"),
+            "bear",
+        ],
+        [
+            0.9,
+            0.8,
+            0.99,
+            os.path.join(os.path.dirname(__file__), "examples/cats.jpg"),
+            "cat",
+        ],
+        [
+            0.9,
+            0.8,
+            0.99,
+            os.path.join(os.path.dirname(__file__), "examples/fish.jpg"),
+            "fish",
+        ],
+    ],
+)
+if __name__ == "__main__":
+    demo.launch(share=True)