Spaces:

eloise54
/

pcam_project

Sleeping

App Files Files Community

eloise54 commited on Nov 29, 2025

Commit

46f0bee

1 Parent(s): 2908d13

update documentation

Browse files

Files changed (3) hide show

PCAM-pipeline.ipynb +55 -56
README.md +12 -0
app.py +27 -0

PCAM-pipeline.ipynb CHANGED Viewed

@@ -13,6 +13,8 @@
     "\n",
     "## 📊 Dataset Overview\n",
     "\n",
     "The **PatchCamelyon (PCam)** benchmark is a challenging image classification dataset designed for breast cancer detection tasks.\n",
     "\n",
     "- 📦 **Total images**: 327,680 color patches  \n",
@@ -20,6 +22,16 @@
     "- 🧪 **Source**: Histopathologic scans of lymph node sections  \n",
     "- 🏷️ **Labels**: Binary — A positive (1) label indicates that the center 32x32px region of a patch contains at least one pixel of tumor tissue. Tumor tissue in the outer region of the patch does not influence the label.\n",
     "\n",
     "\n",
     "## 🧠 Solution to Implement\n",
     "\n",
@@ -901,7 +913,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 22,
    "id": "a7e5a085-7671-4b20-8337-cf0d2fae7abf",
    "metadata": {},
    "outputs": [
@@ -934,8 +946,8 @@
     }
    ],
    "source": [
-    "from torchvision.models import densenet169, DenseNet169_Weights\n",
-    "model = densenet169(weights=DenseNet169_Weights.DEFAULT)\n",
     "\n",
     "for params in model.parameters():\n",
     "    params.requires_grad = False\n",
@@ -1026,13 +1038,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 24,
    "id": "a8bdb826-a1e8-4639-8f3a-f99207676e5f",
    "metadata": {},
    "outputs": [],
    "source": [
-    "from torchvision.models import densenet169, DenseNet169_Weights, densenet121, DenseNet121_Weights\n",
-    "model = densenet169(weights=DenseNet169_Weights.DEFAULT)\n",
     "\n",
     "for params in model.parameters():\n",
     "    params.requires_grad = False\n",
@@ -1284,7 +1296,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 52,
    "id": "bec9dc53-f45c-4456-b925-1da02d812c29",
    "metadata": {},
    "outputs": [
@@ -1305,7 +1317,7 @@
     "sk_learn_metrics_logits = [roc_auc_score]\n",
     "sk_learn_metrics_pred = [f1_score, accuracy_score]\n",
     "epoch_num = 2\n",
-    "finetune_epoch_num = 3\n",
     "    \n",
     "for i in range(epoch_num, epoch_num + finetune_epoch_num):\n",
     "    start_time = time.time()\n",
@@ -1370,7 +1382,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 58,
    "id": "e33b4ddf-025e-40e0-a6ea-8d630c54ae42",
    "metadata": {},
    "outputs": [
@@ -1384,58 +1396,44 @@
     }
    ],
    "source": [
-    "i=4\n",
-    "models_paths = [exp_dir+\"/model_\" + str(i) + \".pt\"]\n",
     "\n",
-    "# First create tta_num augmented dataloaders\n",
-    "tta_num = 1\n",
-    "logits = []\n",
-    "for j in range(0, tta_num):\n",
-    "    test_set_augment = PcamDatasetKaggle(root=dataset_dir, split=\"test\", transform = deepcopy(transform_data_augment)) #For TTA\n",
-    "    test_dataloader_augment = DataLoader(test_set_augment, batch_size=batch_size, shuffle=False, pin_memory=True, num_workers=6, persistent_workers = True)\n",
-    "    for modelp in models_paths:\n",
-    "        pcam_model = torch.load(modelp, weights_only = False)\n",
-    "        pcam_model = pcam_model.to(device)\n",
-    "        test_y, test_logits = run_inference(pcam_model, test_dataloader, device)\n",
-    "        logits.append(test_logits)\n",
-    "        test_y_augm, test_logits_aum = run_inference(pcam_model, test_dataloader_augment, device)\n",
-    "        logits.append(test_logits_aum)\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 59,
-   "id": "27845efb-1408-4b53-8ae7-3fcb8d04bde8",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Average logits\n",
-    "logits_stacked = torch.stack(logits)\n",
-    "mean_logits = torch.mean(logits_stacked, dim = 0, keepdims=True)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 60,
-   "id": "39de3d8e-b42d-4595-bc5a-082bc8c6156e",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#Create submission file with final predictions\n",
-    "image_ids = [img.replace('.tif', '') for img in test_set.imgs.tolist()]\n",
-    "test_preds = torch.sigmoid(mean_logits)\n",
     "\n",
-    "submission_df = pd.DataFrame({\n",
-    "    'id': image_ids,\n",
-    "    'label': test_preds.squeeze().detach().cpu().numpy()\n",
-    "})\n",
     "\n",
-    "submission_df.to_csv(exp_dir+'/submission.csv', index=False)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 61,
    "id": "bc61c425-04e2-4f43-98fd-3b93991b8049",
    "metadata": {},
    "outputs": [
@@ -1449,7 +1447,7 @@
     }
    ],
    "source": [
-    "sub_path = exp_dir + '/submission.csv'\n",
     "model_path = models_paths[0]\n",
     "!kaggle competitions submit -c histopathologic-cancer-detection -f {sub_path} -m {model_path}"
    ]
@@ -1464,11 +1462,12 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 98,
    "id": "a64a0209-4ca7-4566-8bef-dac798f7f3bc",
    "metadata": {},
    "outputs": [],
    "source": [
     "models_paths = [exp_dir+\"/model_\" + str(i) + \".pt\"]\n",
     "pcam_model = torch.load(models_paths[0], weights_only = False)\n",
     "pcam_model = pcam_model.to(device)\n",

     "\n",
     "## 📊 Dataset Overview\n",
     "\n",
+    "https://github.com/basveeling/pcam\n",
+    "\n",
     "The **PatchCamelyon (PCam)** benchmark is a challenging image classification dataset designed for breast cancer detection tasks.\n",
     "\n",
     "- 📦 **Total images**: 327,680 color patches  \n",
     "- 🧪 **Source**: Histopathologic scans of lymph node sections  \n",
     "- 🏷️ **Labels**: Binary — A positive (1) label indicates that the center 32x32px region of a patch contains at least one pixel of tumor tissue. Tumor tissue in the outer region of the patch does not influence the label.\n",
     "\n",
+    "```\n",
+    "B. S. Veeling, J. Linmans, J. Winkens, T. Cohen, M. Welling. \"Rotation Equivariant CNNs for Digital Pathology\". arXiv:1806.03962\n",
+    "```\n",
+    "\n",
+    "```\n",
+    "Ehteshami Bejnordi et al. Diagnostic Assessment of Deep Learning Algorithms for Detection of Lymph Node Metastases in Women With Breast Cancer. JAMA: The Journal of the American Medical Association, 318(22), 2199–2210. doi:jama.2017.14585\n",
+    "```\n",
+    "\n",
+    "Under CC0 License\n",
+    "\n",
     "\n",
     "## 🧠 Solution to Implement\n",
     "\n",
   },
   {
    "cell_type": "code",
+   "execution_count": null,
    "id": "a7e5a085-7671-4b20-8337-cf0d2fae7abf",
    "metadata": {},
    "outputs": [
     }
    ],
    "source": [
+    "from torchvision.models import densenet201, DenseNet201_Weights\n",
+    "model = densenet201(weights=DenseNet201_Weights.DEFAULT)\n",
     "\n",
     "for params in model.parameters():\n",
     "    params.requires_grad = False\n",
   },
   {
    "cell_type": "code",
+   "execution_count": null,
    "id": "a8bdb826-a1e8-4639-8f3a-f99207676e5f",
    "metadata": {},
    "outputs": [],
    "source": [
+    "from torchvision.models import densenet201, DenseNet201_Weights, densenet121, DenseNet121_Weights\n",
+    "model = densenet201(weights=DenseNet201_Weights.DEFAULT)\n",
     "\n",
     "for params in model.parameters():\n",
     "    params.requires_grad = False\n",
   },
   {
    "cell_type": "code",
+   "execution_count": null,
    "id": "bec9dc53-f45c-4456-b925-1da02d812c29",
    "metadata": {},
    "outputs": [
     "sk_learn_metrics_logits = [roc_auc_score]\n",
     "sk_learn_metrics_pred = [f1_score, accuracy_score]\n",
     "epoch_num = 2\n",
+    "finetune_epoch_num = 5\n",
     "    \n",
     "for i in range(epoch_num, epoch_num + finetune_epoch_num):\n",
     "    start_time = time.time()\n",
   },
   {
    "cell_type": "code",
+   "execution_count": null,
    "id": "e33b4ddf-025e-40e0-a6ea-8d630c54ae42",
    "metadata": {},
    "outputs": [
     }
    ],
    "source": [
+    "for i in range(0, epoch_num + finetune_epoch_num):\n",
+    "    models_paths = [exp_dir+\"/model_\" + str(i) + \".pt\"]\n",
+    "    pcam_model = torch.load(models_paths[0], weights_only = False)\n",
+    "    pcam_model = pcam_model.to(device)\n",
+    "\n",
+    "    # First create tta_num augmented dataloaders\n",
+    "    tta_num = 1\n",
+    "    logits = []\n",
+    "    for j in range(0, tta_num):\n",
+    "        test_set_augment = PcamDatasetKaggle(root=dataset_dir, split=\"test\", transform = deepcopy(transform_data_augment)) #For TTA\n",
+    "        test_dataloader_augment = DataLoader(test_set_augment, batch_size=batch_size, shuffle=False, pin_memory=True, num_workers=6, persistent_workers = True)\n",
+    "        for modelp in models_paths:\n",
+    "            pcam_model = torch.load(modelp, weights_only = False)\n",
+    "            pcam_model = pcam_model.to(device)\n",
+    "            test_y, test_logits = run_inference(pcam_model, test_dataloader, device)\n",
+    "            logits.append(test_logits)\n",
+    "            test_y_augm, test_logits_aum = run_inference(pcam_model, test_dataloader_augment, device)\n",
+    "            logits.append(test_logits_aum)\n",
+    "        \n",
+    "    # Average logits\n",
+    "    logits_stacked = torch.stack(logits)\n",
+    "    mean_logits = torch.mean(logits_stacked, dim = 0, keepdims=True)\n",
     "\n",
+    "    #Create submission file with final predictions\n",
+    "    image_ids = [img.replace('.tif', '') for img in test_set.imgs.tolist()]\n",
+    "    test_preds = torch.sigmoid(mean_logits)\n",
     "\n",
+    "    submission_df = pd.DataFrame({\n",
+    "        'id': image_ids,\n",
+    "        'label': test_preds.squeeze().detach().cpu().numpy()\n",
+    "    })\n",
     "\n",
+    "    submission_df.to_csv(exp_dir+'/submission_'+str(i)+'.csv', index=False)"
    ]
   },
   {
    "cell_type": "code",
+   "execution_count": null,
    "id": "bc61c425-04e2-4f43-98fd-3b93991b8049",
    "metadata": {},
    "outputs": [
     }
    ],
    "source": [
+    "sub_path = exp_dir + '/submission_6.csv'\n",
     "model_path = models_paths[0]\n",
     "!kaggle competitions submit -c histopathologic-cancer-detection -f {sub_path} -m {model_path}"
    ]
   },
   {
    "cell_type": "code",
+   "execution_count": null,
    "id": "a64a0209-4ca7-4566-8bef-dac798f7f3bc",
    "metadata": {},
    "outputs": [],
    "source": [
+    "i = 6\n",
     "models_paths = [exp_dir+\"/model_\" + str(i) + \".pt\"]\n",
     "pcam_model = torch.load(models_paths[0], weights_only = False)\n",
     "pcam_model = pcam_model.to(device)\n",

README.md CHANGED Viewed

@@ -40,6 +40,8 @@ Or execute it on kaggle:
 ## 📊 Dataset Overview
 The **PatchCamelyon (PCam)** benchmark is a challenging image classification dataset designed for breast cancer detection tasks.
 - 📦 **Total images**: 327,680 color patches
@@ -47,6 +49,16 @@ The **PatchCamelyon (PCam)** benchmark is a challenging image classification dat
 - 🧪 **Source**: Histopathologic scans of lymph node sections
 - 🏷️ **Labels**: Binary — A positive (1) label indicates that the center 32x32px region of a patch contains at least one pixel of tumor tissue. Tumor tissue in the outer region of the patch does not influence the label.
 ## Results
 The submission on kaggle with the model trained on this notebook is

 ## 📊 Dataset Overview
+https://github.com/basveeling/pcam
 The **PatchCamelyon (PCam)** benchmark is a challenging image classification dataset designed for breast cancer detection tasks.
 - 📦 **Total images**: 327,680 color patches
 - 🧪 **Source**: Histopathologic scans of lymph node sections
 - 🏷️ **Labels**: Binary — A positive (1) label indicates that the center 32x32px region of a patch contains at least one pixel of tumor tissue. Tumor tissue in the outer region of the patch does not influence the label.
+```
+B. S. Veeling, J. Linmans, J. Winkens, T. Cohen, M. Welling. "Rotation Equivariant CNNs for Digital Pathology". arXiv:1806.03962
+```
+```
+Ehteshami Bejnordi et al. Diagnostic Assessment of Deep Learning Algorithms for Detection of Lymph Node Metastases in Women With Breast Cancer. JAMA: The Journal of the American Medical Association, 318(22), 2199–2210. doi:jama.2017.14585
+```
+Under CC0 License
 ## Results
 The submission on kaggle with the model trained on this notebook is

app.py CHANGED Viewed

@@ -116,6 +116,30 @@ def prev_sample(index: int, dataset_choice: str):
 # ---------------------------------
 # 5. UI elements
 # ---------------------------------
 with gr.Blocks() as demo:
     gr.Markdown("## 🧬 PCAM Tumor Classifier")
     gr.Markdown("Use **Next** or **Previous** to browse samples and see model predictions vs ground truth.")
@@ -139,6 +163,9 @@ with gr.Blocks() as demo:
         error_label = gr.Text(label="Prediction error")
         confidence = gr.Text(label="Probability")
     # Connect navigation
     prev_btn.click(fn=prev_sample, inputs=[state, dropdown], outputs=[image_output, pred_label, confidence, true_label, state, error_label, index, dataset_choice])
     next_btn.click(fn=next_sample, inputs=[state, dropdown], outputs=[image_output, pred_label, confidence, true_label, state, error_label, index, dataset_choice])

 # ---------------------------------
 # 5. UI elements
 # ---------------------------------
+dataset_information = """
+## 📊 Dataset Overview
+https://github.com/basveeling/pcam
+The **PatchCamelyon (PCam)** benchmark is a challenging image classification dataset designed for breast cancer detection tasks.
+- 📦 **Total images**: 327,680 color patches
+- 🖼️ **Image size**: 96 × 96 pixels
+- 🧪 **Source**: Histopathologic scans of lymph node sections
+- 🏷️ **Labels**: Binary — A positive (1) label indicates that the center 32x32px region of a patch contains at least one pixel of tumor tissue. Tumor tissue in the outer region of the patch does not influence the label.
+```
+B. S. Veeling, J. Linmans, J. Winkens, T. Cohen, M. Welling. "Rotation Equivariant CNNs for Digital Pathology". arXiv:1806.03962
+```
+```
+Ehteshami Bejnordi et al. Diagnostic Assessment of Deep Learning Algorithms for Detection of Lymph Node Metastases in Women With Breast Cancer. JAMA: The Journal of the American Medical Association, 318(22), 2199–2210. doi:jama.2017.14585
+```
+Under CC0 License
+"""
 with gr.Blocks() as demo:
     gr.Markdown("## 🧬 PCAM Tumor Classifier")
     gr.Markdown("Use **Next** or **Previous** to browse samples and see model predictions vs ground truth.")
         error_label = gr.Text(label="Prediction error")
         confidence = gr.Text(label="Probability")
+    with gr.Row():
+        gr.Markdown(dataset_information)
     # Connect navigation
     prev_btn.click(fn=prev_sample, inputs=[state, dropdown], outputs=[image_output, pred_label, confidence, true_label, state, error_label, index, dataset_choice])
     next_btn.click(fn=next_sample, inputs=[state, dropdown], outputs=[image_output, pred_label, confidence, true_label, state, error_label, index, dataset_choice])