Upload epoch=17-step=1836.ckpt

Browse files

Files changed (6) hide show

README.md +72 -3
epoch=17-step=1836.ckpt +3 -0
model_config.json +26 -0
models.py +155 -0
predict_attention.py +171 -0
state_dict.pt +3 -0

README.md CHANGED Viewed

@@ -1,3 +1,72 @@
----
-license: cc-by-nc-4.0
----

+---
+license: cc-by-nc-4.0
+datasets:
+- ODELIA-AI/ODELIA-Challenge-2025
+language:
+- en
+metrics:
+- roc_auc
+pipeline_tag: image-classification
+tags:
+- breast
+- cancer
+- odelia
+extra_gated_prompt: >-
+  ### 🛡️ Model Usage Agreement
+  By accessing or using this model (the “Model”), you acknowledge and agree to the following terms and conditions:
+  #### 1. Research-Only Use
+  The Model is provided strictly for non-commercial, academic, and research purposes. It must not be used for clinical decision-making, diagnosis, treatment, or any other application involving real patients or clinical care.
+  #### 2. No Clinical or Commercial Deployment
+  The Model is **not approved for clinical use** or any commercial application. Any deployment in healthcare settings or use for patient-related decision support is expressly prohibited.
+  #### 3. Redistribution and Modification
+  You may not copy, distribute, sublicense, or otherwise share the Model or any derivative works without prior written permission from the model authors or the ODELIA consortium.
+  #### 4. Privacy and Ethics Compliance
+  You must not attempt to identify, re-identify, or deanonymize any individual whose data may have contributed to the training or evaluation of the Model.
+  #### 5. Attribution Requirement
+  Any publication, presentation, or derivative work that uses or references this Model must include clear attribution to the **ODELIA consortium**, along with any citations specified in the accompanying documentation.
+  #### 6. Responsibility and Verification
+  You are solely responsible for verifying and validating the Model’s outputs and ensuring they are appropriate for your research context. The Model and its outputs are provided “as is,” without warranties of any kind.
+  #### 7. Inclusion of Third-Party Components
+  This Model incorporates or is derived from **DINOv3**, developed by **Meta Platforms**.
+  Use of the Model is therefore also subject to the **DINOv3 License Agreement**.
+  By using this Model, you agree to comply with both:
+  * This Model Usage Agreement, **and**
+  * The [DINOv3 License Terms](https://github.com/facebookresearch/dinov3).
+---
+# ODELIA Classification Baseline Model
+For a comprehensive description of the model and its intended use, please refer to our paper: [Read the paper](https://arxiv.org/abs/2506.00474)
+## Get Probabilities and Attention
+To use this model, first download the required files from this repository:
+```python
+from huggingface_hub import hf_hub_download
+# Download model files to local directory
+hf_hub_download(repo_id="ODELIA-AI/MST", filename="models.py", local_dir="./")
+hf_hub_download(repo_id="ODELIA-AI/MST", filename="predict_attention.py", local_dir="./")
+```
+Then execute `predict_attention.py --path_img path/to/Sub_1.nii.gz` to get probabilities and attention maps.

epoch=17-step=1836.ckpt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a42b9c83fa4ec1c9a7b0060df288ea6fd3c20a9c9f7002ee26be5fb27f320c71
+size 277159866

model_config.json ADDED Viewed

	@@ -0,0 +1,26 @@

+{
+  "checkpoint_source": "epoch=17-step=1836.ckpt",
+  "created_at": "2025-10-26T16:51:08.236236Z",
+  "hparams": {
+    "backbone_type": "dinov3",
+    "in_ch": 1,
+    "loss": "<class 'odelia.models.utils.losses.MulitCELoss'>",
+    "loss_kwargs": {
+      "class_labels_num": [
+        3
+      ]
+    },
+    "lr_scheduler": null,
+    "lr_scheduler_kwargs": {},
+    "model_size": "s",
+    "optimizer": "<class 'torch.optim.adamw.AdamW'>",
+    "optimizer_kwargs": {
+      "lr": 1e-05
+    },
+    "out_ch": 3,
+    "save_hyperparameters": true,
+    "slice_fusion_type": "transformer",
+    "spatial_dims": 3
+  },
+  "model_class": "odelia.models.mst.MSTRegression"
+}

models.py ADDED Viewed

	@@ -0,0 +1,155 @@

+from einops import rearrange
+import torch.nn as nn
+import torch
+import math
+from transformers import AutoModel
+from x_transformers import Encoder
+class _MST(nn.Module):
+    def __init__(
+        self,
+        out_ch=1,
+        backbone_type="dinov3",
+        model_size = "s", # 34, 50, ... or 's', 'b', 'l'
+        slice_fusion_type = "transformer", # transformer, linear, average, none
+    ):
+        super().__init__()
+        self.backbone_type = backbone_type
+        self.slice_fusion_type = slice_fusion_type
+        if backbone_type == "dinov2":
+            model_size = {'s':'small', 'b':'base', 'l':'large'}.get(model_size)
+            self.backbone = AutoModel.from_pretrained(f"facebook/dinov2-with-registers-{model_size}")
+            emb_ch = self.backbone.config.hidden_size
+        elif backbone_type == "dinov3":
+            self.backbone = AutoModel.from_pretrained(f"facebook/dinov3-vit{model_size}16-pretrain-lvd1689m")
+            emb_ch = self.backbone.config.hidden_size
+        else:
+            raise ValueError("Unknown backbone_type")
+        self.emb_ch = emb_ch
+        if slice_fusion_type == "transformer":
+            self.slice_fusion = Encoder(
+                dim = emb_ch,
+                heads = 12 if emb_ch%12 == 0 else 8,
+                ff_mult = 1,
+                attn_dropout=0.0,
+                pre_norm = True,
+                depth = 1,
+                attn_flash = True,
+                ff_no_bias = True,
+                rotary_pos_emb=True,
+            )
+            self.cls_token = nn.Parameter(torch.randn(1, 1, emb_ch))
+        elif slice_fusion_type == 'average':
+            pass
+        elif slice_fusion_type == "none":
+            pass
+        else:
+            raise ValueError("Unknown slice_fusion_type")
+        self.linear = nn.Linear(emb_ch, out_ch)
+    def forward(self, x, output_attentions=False):
+        B, *_ = x.shape
+        # Mask (Slices with constant padded values)
+        x_pad = torch.isclose(x.mean(dim=(-1,-2)), x[:, :, :, 0, 0]) # [B, C, D]
+        x_pad = rearrange(x_pad, 'b c d -> b (c d)')
+        x = rearrange(x, 'b c d h w -> (b c d) h w')
+        x = x[:, None]
+        x = x.repeat(1, 3, 1, 1) # Gray to RGB
+        # -------------- Backbone --------------
+        backbone_out = self.backbone(x, output_attentions=output_attentions)
+        x = backbone_out.pooler_output
+        x = rearrange(x, '(b d) e -> b d e', b=B)
+        # -------------- Slice Fusion --------------
+        if self.slice_fusion_type == 'none':
+            return x
+        elif self.slice_fusion_type == 'transformer':
+            cls_pad = torch.zeros(B, 1, dtype=torch.bool, device=x.device)
+            pad = torch.concat([x_pad, cls_pad], dim=1)  # [B, D+1]
+            x = torch.concat([x, self.cls_token.repeat(B, 1, 1)], dim=1) # [B, 1+D, E]
+            if output_attentions:
+                x, slice_hiddens = self.slice_fusion(x, mask=~pad, return_hiddens=True) # [B, D+1, E]
+            else:
+                x = self.slice_fusion(x, mask=~pad) # [B, D+1, L]
+        elif self.slice_fusion_type == 'linear':
+            x = rearrange(x, 'b d e -> b e d')
+            x = self.slice_fusion(x) # ->  [B, E, 1]
+            x = rearrange(x, 'b e d -> b d e') #  ->  [B, 1, E]
+        elif self.slice_fusion_type == 'average':
+            x = x.mean(dim=1, keepdim=True) #  [B, D, E] ->  [B, 1, E]
+        # -------------- Logits --------------
+        x = self.linear(x[:, -1])
+        if output_attentions:
+            slice_attn_layers = [
+                interm.post_softmax_attn
+                for interm in getattr(slice_hiddens, 'attn_intermediates', [])
+                if interm is not None and getattr(interm, 'post_softmax_attn', None) is not None
+            ]
+            return x, backbone_out.attentions, slice_attn_layers
+        return x
+    def forward_attention(self, x) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
+        B, C, D, _, _ = x.shape
+        # Disable fast attention
+        attn_impl = self.backbone.config._attn_implementation
+        self.backbone.set_attn_implementation("eager")
+        flash_modules = []
+        for module in self.slice_fusion.modules():
+            if hasattr(module, 'flash'):
+                flash_modules.append((module, module.flash))
+                module.flash = False
+        out, backbone_attn, slice_attn_layers = self.forward(x, output_attentions=True)
+        # Restore previous attention implementation
+        for module, previous in flash_modules:
+            module.flash = previous
+        if hasattr(self.backbone, "set_attn_implementation"):
+            self.backbone.set_attn_implementation(attn_impl)
+        # Process attentions
+        slice_attn = torch.stack(slice_attn_layers)[-1]
+        slice_attn = slice_attn.mean(dim=1)
+        slice_attn = slice_attn[:, -1, :-1]
+        slice_attn = slice_attn.view(B, C, D).mean(dim=1)
+        plane_attn_layers = [att for att in backbone_attn if att is not None]
+        plane_attn = torch.stack(plane_attn_layers)[-1]
+        plane_attn = plane_attn.mean(dim=1)
+        num_reg_tokens = getattr(self.backbone.config, 'num_register_tokens', 0)
+        plane_attn = plane_attn[:, 0, 1 + num_reg_tokens:]
+        plane_attn = plane_attn.view(B, C * D, -1)
+        # Weight every slice by its slice attention
+        plane_attn = plane_attn * slice_attn.unsqueeze(-1)
+        num_patches = plane_attn.shape[-1]
+        side = int(math.sqrt(num_patches))
+        if side * side != num_patches:
+            raise RuntimeError("number of patches is not a perfect square")
+        plane_attn = plane_attn.reshape(B, C * D, side, side)
+        return out, plane_attn, slice_attn
+class MSTRegression(nn.Module):
+    def __init__(self, in_ch=1, out_ch=1, spatial_dims=3, backbone_type="dinov3", model_size="s", slice_fusion_type="transformer", optimizer_kwargs={'lr':1e-5}, **kwargs):
+        super().__init__()
+        self.mst = _MST(out_ch=out_ch, backbone_type=backbone_type, model_size=model_size, slice_fusion_type=slice_fusion_type)
+    def forward(self, x):
+        return self.mst(x)
+    def forward_attention(self, x):
+        return self.mst.forward_attention(x)

predict_attention.py ADDED Viewed

	@@ -0,0 +1,171 @@

+import argparse
+import json
+from pathlib import Path
+from huggingface_hub import hf_hub_download
+import torch
+import numpy as np
+import torch.nn.functional as F
+import torchio as tio
+from torchvision.utils import save_image
+from matplotlib.pyplot import get_cmap
+from models import MSTRegression
+def minmax_norm(x):
+    """Normalizes input to [0, 1] for each batch and channel"""
+    return (x - x.min()) / (x.max() - x.min())
+def tensor2image(tensor, batch=0):
+    """Transform tensor into shape of multiple 2D RGB/gray images. """
+    return (tensor if tensor.ndim<5 else torch.swapaxes(tensor[batch], 0, 1).reshape(-1, *tensor.shape[-2:])[:,None])
+def tensor_cam2image(tensor, cam, batch=0, alpha=0.5, color_map=get_cmap('jet')):
+    """Transform a tensor and a (grad) cam into multiple 2D RGB images."""
+    img = tensor2image(tensor, batch) #  -> [B, C, H, W]
+    img = torch.cat([img for _ in range(3)], dim=1) if img.shape[1]!=3 else img # Ensure RGB  [B, 3, H, W]
+    cam_img = tensor2image(cam, batch) #  -> [B, 1, H, W]
+    cam_img = cam_img[:,0].cpu().numpy() # -> [B, H, W]
+    cam_img = torch.tensor(color_map(cam_img)) # -> [B, H, W, 4], color_map expects input to be [0.0, 1.0]
+    cam_img = torch.moveaxis(cam_img, -1, 1)[:, :3] # -> [B, 3, H, W]
+    overlay = (1-alpha)*img + alpha*cam_img
+    return overlay
+def crop_breast_height(image, margin_top=10) -> tio.Crop:
+    """Crop height to 256 and try to cover breast based on intensity localization"""
+    threshold = int(np.quantile(image.data.float(), 0.9))
+    foreground = image.data>threshold
+    fg_rows = foreground[0].sum(axis=(0, 2))
+    top = min(max(512-int(torch.argwhere(fg_rows).max()) - margin_top, 0), 256)
+    bottom = 256-top
+    return  tio.Crop((0,0, bottom, top, 0, 0))
+def get_bilateral_transform(img:tio.ScalarImage, ref_img=None, target_spacing = (0.7, 0.7, 3), target_shape = (512, 512, 32)):
+    # -------- Settings --------------
+    ref_img = img if ref_img is None else ref_img
+    # Spacing
+    ref_img = tio.ToCanonical()(ref_img)
+    ref_img = tio.Resample(target_spacing)(ref_img)
+    resample = tio.Resample(ref_img)
+    # Crop
+    ref_img = tio.CropOrPad(target_shape, padding_mode='minimum')(ref_img)
+    crop_height = crop_breast_height(ref_img)
+    # Process input image
+    trans = tio.Compose([
+        resample,
+        tio.CropOrPad(target_shape, padding_mode='minimum'),
+        crop_height,
+    ])
+    trans_inv = tio.Compose([
+        crop_height.inverse(),
+        tio.CropOrPad(img.spatial_shape, padding_mode='minimum'),
+        tio.Resample(img),
+    ])
+    return trans(img), trans_inv
+def get_unilateral_transform(img: tio.ScalarImage, target_shape=(224, 224, 32)):
+    transform = tio.Compose([
+        tio.Flip((1,0)),
+        tio.CropOrPad(target_shape),
+        tio.ZNormalization(masking_method=lambda x:(x>x.min()) & (x<x.max())),
+    ])
+    inv_transform = tio.Compose([
+        tio.CropOrPad(img.spatial_shape),
+        tio.Flip((1,0)),
+    ])
+    return transform(img), inv_transform
+def run_prediction(img: tio.ScalarImage, model: MSTRegression):
+    img_bil, bil_trans_rev = get_bilateral_transform(img)
+    split_side = {
+        'right': tio.Crop((256, 0, 0, 0, 0, 0)),
+        'left': tio.Crop((0, 256, 0, 0, 0, 0)),
+    }
+    weights, probs = {}, {}
+    for side, crop in split_side.items():
+        img_side = crop(img_bil)
+        img_side, uni_trans_inv = get_unilateral_transform(img_side)
+        img_side = img_side.data.swapaxes(1,-1)
+        img_side = img_side.unsqueeze(0)  # Add batch dim -> [1, C, H, W, D]
+        with torch.no_grad():
+            device = next(model.parameters()).device
+            logits, weight, weight_slice = model.forward_attention(img_side.to(device))
+        weight = F.interpolate(weight.unsqueeze(1), size=img_side.shape[2:], mode='trilinear', align_corners=False).cpu()
+        # pred_prob = model.logits2probabilities(logits).cpu()
+        pred_prob = F.softmax(logits, dim=-1).cpu()
+        probs[side] = pred_prob.squeeze(0)
+        weight = weight.squeeze(0).swapaxes(1,-1)  # ->[C, W, H, D]
+        weight = uni_trans_inv(weight)
+        weights[side] = weight
+    weight = torch.concat([weights['left'], weights['right']], dim=1) #  C, W, H, D
+    weight = tio.ScalarImage(tensor=weight, affine=img_bil.affine)
+    weight = bil_trans_rev(weight)
+    weight.set_data(minmax_norm(weight.data))
+    return probs, weight
+def load_model(repo_id= "ODELIA-AI/MST") -> MSTRegression:
+    # Download config and state dict
+    config_path = hf_hub_download(repo_id=repo_id, repo_type="model", filename="model_config.json")
+    with open(config_path, "r", encoding="utf-8") as fp:
+        config = json.load(fp)
+    hparams = config.get("hparams", {})
+    model = MSTRegression(**hparams)
+    state_dict_path = hf_hub_download(repo_id=repo_id, repo_type="model", filename="state_dict.pt")
+    state_dict = torch.load(state_dict_path, map_location="cpu")
+    model.load_state_dict(state_dict, strict=True)
+    return model
+if __name__ == "__main__":
+    #------------ Get Arguments ----------------
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--path_img', default='/home/homesOnMaster/gfranzes/Documents/datasets/ODELIA/UKA/data/UKA_2/Sub_1.nii.gz', type=str)
+    args = parser.parse_args()
+    #------------ Settings/Defaults ----------------
+    path_out_dir = Path().cwd()/'results/test_attention'
+    path_out_dir.mkdir(parents=True, exist_ok=True)
+    # ------------ Load Data ----------------
+    path_img = Path(args.path_img)
+    img = tio.ScalarImage(path_img)
+    # ------------ Initialize Model ------------
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    model = load_model()
+    model.to(device)
+    model.eval()
+    # ------------ Predict ----------------
+    probs, weight = run_prediction(img, model)
+    img.save(path_out_dir/f"input.nii.gz")
+    weight.save(path_out_dir/f"attention.nii.gz")
+    weight = weight.data.swapaxes(1,-1).unsqueeze(0)  # C, D, H, W
+    img = img.data.swapaxes(1,-1).unsqueeze(0)  # C, D, H, W
+    save_image(tensor_cam2image(minmax_norm(img), minmax_norm(weight), alpha=0.5),
+            path_out_dir/f"overlay.png", normalize=False)
+    for side in ['left', 'right']:
+        print(f"{side} breast predicted probabilities: {probs[side]}")

state_dict.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c25602fec82d90912ed6f2623639937a2fb44931cfcfb382aecd16d5647c8327
+size 92379550