eloise54 commited on
Commit
46f0bee
Β·
1 Parent(s): 2908d13

update documentation

Browse files
Files changed (3) hide show
  1. PCAM-pipeline.ipynb +55 -56
  2. README.md +12 -0
  3. app.py +27 -0
PCAM-pipeline.ipynb CHANGED
@@ -13,6 +13,8 @@
13
  "\n",
14
  "## πŸ“Š Dataset Overview\n",
15
  "\n",
 
 
16
  "The **PatchCamelyon (PCam)** benchmark is a challenging image classification dataset designed for breast cancer detection tasks.\n",
17
  "\n",
18
  "- πŸ“¦ **Total images**: 327,680 color patches \n",
@@ -20,6 +22,16 @@
20
  "- πŸ§ͺ **Source**: Histopathologic scans of lymph node sections \n",
21
  "- 🏷️ **Labels**: Binary β€” A positive (1) label indicates that the center 32x32px region of a patch contains at least one pixel of tumor tissue. Tumor tissue in the outer region of the patch does not influence the label.\n",
22
  "\n",
 
 
 
 
 
 
 
 
 
 
23
  "\n",
24
  "## 🧠 Solution to Implement\n",
25
  "\n",
@@ -901,7 +913,7 @@
901
  },
902
  {
903
  "cell_type": "code",
904
- "execution_count": 22,
905
  "id": "a7e5a085-7671-4b20-8337-cf0d2fae7abf",
906
  "metadata": {},
907
  "outputs": [
@@ -934,8 +946,8 @@
934
  }
935
  ],
936
  "source": [
937
- "from torchvision.models import densenet169, DenseNet169_Weights\n",
938
- "model = densenet169(weights=DenseNet169_Weights.DEFAULT)\n",
939
  "\n",
940
  "for params in model.parameters():\n",
941
  " params.requires_grad = False\n",
@@ -1026,13 +1038,13 @@
1026
  },
1027
  {
1028
  "cell_type": "code",
1029
- "execution_count": 24,
1030
  "id": "a8bdb826-a1e8-4639-8f3a-f99207676e5f",
1031
  "metadata": {},
1032
  "outputs": [],
1033
  "source": [
1034
- "from torchvision.models import densenet169, DenseNet169_Weights, densenet121, DenseNet121_Weights\n",
1035
- "model = densenet169(weights=DenseNet169_Weights.DEFAULT)\n",
1036
  "\n",
1037
  "for params in model.parameters():\n",
1038
  " params.requires_grad = False\n",
@@ -1284,7 +1296,7 @@
1284
  },
1285
  {
1286
  "cell_type": "code",
1287
- "execution_count": 52,
1288
  "id": "bec9dc53-f45c-4456-b925-1da02d812c29",
1289
  "metadata": {},
1290
  "outputs": [
@@ -1305,7 +1317,7 @@
1305
  "sk_learn_metrics_logits = [roc_auc_score]\n",
1306
  "sk_learn_metrics_pred = [f1_score, accuracy_score]\n",
1307
  "epoch_num = 2\n",
1308
- "finetune_epoch_num = 3\n",
1309
  " \n",
1310
  "for i in range(epoch_num, epoch_num + finetune_epoch_num):\n",
1311
  " start_time = time.time()\n",
@@ -1370,7 +1382,7 @@
1370
  },
1371
  {
1372
  "cell_type": "code",
1373
- "execution_count": 58,
1374
  "id": "e33b4ddf-025e-40e0-a6ea-8d630c54ae42",
1375
  "metadata": {},
1376
  "outputs": [
@@ -1384,58 +1396,44 @@
1384
  }
1385
  ],
1386
  "source": [
1387
- "i=4\n",
1388
- "models_paths = [exp_dir+\"/model_\" + str(i) + \".pt\"]\n",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1389
  "\n",
1390
- "# First create tta_num augmented dataloaders\n",
1391
- "tta_num = 1\n",
1392
- "logits = []\n",
1393
- "for j in range(0, tta_num):\n",
1394
- " test_set_augment = PcamDatasetKaggle(root=dataset_dir, split=\"test\", transform = deepcopy(transform_data_augment)) #For TTA\n",
1395
- " test_dataloader_augment = DataLoader(test_set_augment, batch_size=batch_size, shuffle=False, pin_memory=True, num_workers=6, persistent_workers = True)\n",
1396
- " for modelp in models_paths:\n",
1397
- " pcam_model = torch.load(modelp, weights_only = False)\n",
1398
- " pcam_model = pcam_model.to(device)\n",
1399
- " test_y, test_logits = run_inference(pcam_model, test_dataloader, device)\n",
1400
- " logits.append(test_logits)\n",
1401
- " test_y_augm, test_logits_aum = run_inference(pcam_model, test_dataloader_augment, device)\n",
1402
- " logits.append(test_logits_aum)\n"
1403
- ]
1404
- },
1405
- {
1406
- "cell_type": "code",
1407
- "execution_count": 59,
1408
- "id": "27845efb-1408-4b53-8ae7-3fcb8d04bde8",
1409
- "metadata": {},
1410
- "outputs": [],
1411
- "source": [
1412
- "# Average logits\n",
1413
- "logits_stacked = torch.stack(logits)\n",
1414
- "mean_logits = torch.mean(logits_stacked, dim = 0, keepdims=True)"
1415
- ]
1416
- },
1417
- {
1418
- "cell_type": "code",
1419
- "execution_count": 60,
1420
- "id": "39de3d8e-b42d-4595-bc5a-082bc8c6156e",
1421
- "metadata": {},
1422
- "outputs": [],
1423
- "source": [
1424
- "#Create submission file with final predictions\n",
1425
- "image_ids = [img.replace('.tif', '') for img in test_set.imgs.tolist()]\n",
1426
- "test_preds = torch.sigmoid(mean_logits)\n",
1427
  "\n",
1428
- "submission_df = pd.DataFrame({\n",
1429
- " 'id': image_ids,\n",
1430
- " 'label': test_preds.squeeze().detach().cpu().numpy()\n",
1431
- "})\n",
1432
  "\n",
1433
- "submission_df.to_csv(exp_dir+'/submission.csv', index=False)"
1434
  ]
1435
  },
1436
  {
1437
  "cell_type": "code",
1438
- "execution_count": 61,
1439
  "id": "bc61c425-04e2-4f43-98fd-3b93991b8049",
1440
  "metadata": {},
1441
  "outputs": [
@@ -1449,7 +1447,7 @@
1449
  }
1450
  ],
1451
  "source": [
1452
- "sub_path = exp_dir + '/submission.csv'\n",
1453
  "model_path = models_paths[0]\n",
1454
  "!kaggle competitions submit -c histopathologic-cancer-detection -f {sub_path} -m {model_path}"
1455
  ]
@@ -1464,11 +1462,12 @@
1464
  },
1465
  {
1466
  "cell_type": "code",
1467
- "execution_count": 98,
1468
  "id": "a64a0209-4ca7-4566-8bef-dac798f7f3bc",
1469
  "metadata": {},
1470
  "outputs": [],
1471
  "source": [
 
1472
  "models_paths = [exp_dir+\"/model_\" + str(i) + \".pt\"]\n",
1473
  "pcam_model = torch.load(models_paths[0], weights_only = False)\n",
1474
  "pcam_model = pcam_model.to(device)\n",
 
13
  "\n",
14
  "## πŸ“Š Dataset Overview\n",
15
  "\n",
16
+ "https://github.com/basveeling/pcam\n",
17
+ "\n",
18
  "The **PatchCamelyon (PCam)** benchmark is a challenging image classification dataset designed for breast cancer detection tasks.\n",
19
  "\n",
20
  "- πŸ“¦ **Total images**: 327,680 color patches \n",
 
22
  "- πŸ§ͺ **Source**: Histopathologic scans of lymph node sections \n",
23
  "- 🏷️ **Labels**: Binary β€” A positive (1) label indicates that the center 32x32px region of a patch contains at least one pixel of tumor tissue. Tumor tissue in the outer region of the patch does not influence the label.\n",
24
  "\n",
25
+ "```\n",
26
+ "B. S. Veeling, J. Linmans, J. Winkens, T. Cohen, M. Welling. \"Rotation Equivariant CNNs for Digital Pathology\". arXiv:1806.03962\n",
27
+ "```\n",
28
+ "\n",
29
+ "```\n",
30
+ "Ehteshami Bejnordi et al. Diagnostic Assessment of Deep Learning Algorithms for Detection of Lymph Node Metastases in Women With Breast Cancer. JAMA: The Journal of the American Medical Association, 318(22), 2199–2210. doi:jama.2017.14585\n",
31
+ "```\n",
32
+ "\n",
33
+ "Under CC0 License\n",
34
+ "\n",
35
  "\n",
36
  "## 🧠 Solution to Implement\n",
37
  "\n",
 
913
  },
914
  {
915
  "cell_type": "code",
916
+ "execution_count": null,
917
  "id": "a7e5a085-7671-4b20-8337-cf0d2fae7abf",
918
  "metadata": {},
919
  "outputs": [
 
946
  }
947
  ],
948
  "source": [
949
+ "from torchvision.models import densenet201, DenseNet201_Weights\n",
950
+ "model = densenet201(weights=DenseNet201_Weights.DEFAULT)\n",
951
  "\n",
952
  "for params in model.parameters():\n",
953
  " params.requires_grad = False\n",
 
1038
  },
1039
  {
1040
  "cell_type": "code",
1041
+ "execution_count": null,
1042
  "id": "a8bdb826-a1e8-4639-8f3a-f99207676e5f",
1043
  "metadata": {},
1044
  "outputs": [],
1045
  "source": [
1046
+ "from torchvision.models import densenet201, DenseNet201_Weights, densenet121, DenseNet121_Weights\n",
1047
+ "model = densenet201(weights=DenseNet201_Weights.DEFAULT)\n",
1048
  "\n",
1049
  "for params in model.parameters():\n",
1050
  " params.requires_grad = False\n",
 
1296
  },
1297
  {
1298
  "cell_type": "code",
1299
+ "execution_count": null,
1300
  "id": "bec9dc53-f45c-4456-b925-1da02d812c29",
1301
  "metadata": {},
1302
  "outputs": [
 
1317
  "sk_learn_metrics_logits = [roc_auc_score]\n",
1318
  "sk_learn_metrics_pred = [f1_score, accuracy_score]\n",
1319
  "epoch_num = 2\n",
1320
+ "finetune_epoch_num = 5\n",
1321
  " \n",
1322
  "for i in range(epoch_num, epoch_num + finetune_epoch_num):\n",
1323
  " start_time = time.time()\n",
 
1382
  },
1383
  {
1384
  "cell_type": "code",
1385
+ "execution_count": null,
1386
  "id": "e33b4ddf-025e-40e0-a6ea-8d630c54ae42",
1387
  "metadata": {},
1388
  "outputs": [
 
1396
  }
1397
  ],
1398
  "source": [
1399
+ "for i in range(0, epoch_num + finetune_epoch_num):\n",
1400
+ " models_paths = [exp_dir+\"/model_\" + str(i) + \".pt\"]\n",
1401
+ " pcam_model = torch.load(models_paths[0], weights_only = False)\n",
1402
+ " pcam_model = pcam_model.to(device)\n",
1403
+ "\n",
1404
+ " # First create tta_num augmented dataloaders\n",
1405
+ " tta_num = 1\n",
1406
+ " logits = []\n",
1407
+ " for j in range(0, tta_num):\n",
1408
+ " test_set_augment = PcamDatasetKaggle(root=dataset_dir, split=\"test\", transform = deepcopy(transform_data_augment)) #For TTA\n",
1409
+ " test_dataloader_augment = DataLoader(test_set_augment, batch_size=batch_size, shuffle=False, pin_memory=True, num_workers=6, persistent_workers = True)\n",
1410
+ " for modelp in models_paths:\n",
1411
+ " pcam_model = torch.load(modelp, weights_only = False)\n",
1412
+ " pcam_model = pcam_model.to(device)\n",
1413
+ " test_y, test_logits = run_inference(pcam_model, test_dataloader, device)\n",
1414
+ " logits.append(test_logits)\n",
1415
+ " test_y_augm, test_logits_aum = run_inference(pcam_model, test_dataloader_augment, device)\n",
1416
+ " logits.append(test_logits_aum)\n",
1417
+ " \n",
1418
+ " # Average logits\n",
1419
+ " logits_stacked = torch.stack(logits)\n",
1420
+ " mean_logits = torch.mean(logits_stacked, dim = 0, keepdims=True)\n",
1421
  "\n",
1422
+ " #Create submission file with final predictions\n",
1423
+ " image_ids = [img.replace('.tif', '') for img in test_set.imgs.tolist()]\n",
1424
+ " test_preds = torch.sigmoid(mean_logits)\n",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1425
  "\n",
1426
+ " submission_df = pd.DataFrame({\n",
1427
+ " 'id': image_ids,\n",
1428
+ " 'label': test_preds.squeeze().detach().cpu().numpy()\n",
1429
+ " })\n",
1430
  "\n",
1431
+ " submission_df.to_csv(exp_dir+'/submission_'+str(i)+'.csv', index=False)"
1432
  ]
1433
  },
1434
  {
1435
  "cell_type": "code",
1436
+ "execution_count": null,
1437
  "id": "bc61c425-04e2-4f43-98fd-3b93991b8049",
1438
  "metadata": {},
1439
  "outputs": [
 
1447
  }
1448
  ],
1449
  "source": [
1450
+ "sub_path = exp_dir + '/submission_6.csv'\n",
1451
  "model_path = models_paths[0]\n",
1452
  "!kaggle competitions submit -c histopathologic-cancer-detection -f {sub_path} -m {model_path}"
1453
  ]
 
1462
  },
1463
  {
1464
  "cell_type": "code",
1465
+ "execution_count": null,
1466
  "id": "a64a0209-4ca7-4566-8bef-dac798f7f3bc",
1467
  "metadata": {},
1468
  "outputs": [],
1469
  "source": [
1470
+ "i = 6\n",
1471
  "models_paths = [exp_dir+\"/model_\" + str(i) + \".pt\"]\n",
1472
  "pcam_model = torch.load(models_paths[0], weights_only = False)\n",
1473
  "pcam_model = pcam_model.to(device)\n",
README.md CHANGED
@@ -40,6 +40,8 @@ Or execute it on kaggle:
40
 
41
  ## πŸ“Š Dataset Overview
42
 
 
 
43
  The **PatchCamelyon (PCam)** benchmark is a challenging image classification dataset designed for breast cancer detection tasks.
44
 
45
  - πŸ“¦ **Total images**: 327,680 color patches
@@ -47,6 +49,16 @@ The **PatchCamelyon (PCam)** benchmark is a challenging image classification dat
47
  - πŸ§ͺ **Source**: Histopathologic scans of lymph node sections
48
  - 🏷️ **Labels**: Binary β€” A positive (1) label indicates that the center 32x32px region of a patch contains at least one pixel of tumor tissue. Tumor tissue in the outer region of the patch does not influence the label.
49
 
 
 
 
 
 
 
 
 
 
 
50
  ## Results
51
 
52
  The submission on kaggle with the model trained on this notebook is
 
40
 
41
  ## πŸ“Š Dataset Overview
42
 
43
+ https://github.com/basveeling/pcam
44
+
45
  The **PatchCamelyon (PCam)** benchmark is a challenging image classification dataset designed for breast cancer detection tasks.
46
 
47
  - πŸ“¦ **Total images**: 327,680 color patches
 
49
  - πŸ§ͺ **Source**: Histopathologic scans of lymph node sections
50
  - 🏷️ **Labels**: Binary β€” A positive (1) label indicates that the center 32x32px region of a patch contains at least one pixel of tumor tissue. Tumor tissue in the outer region of the patch does not influence the label.
51
 
52
+ ```
53
+ B. S. Veeling, J. Linmans, J. Winkens, T. Cohen, M. Welling. "Rotation Equivariant CNNs for Digital Pathology". arXiv:1806.03962
54
+ ```
55
+
56
+ ```
57
+ Ehteshami Bejnordi et al. Diagnostic Assessment of Deep Learning Algorithms for Detection of Lymph Node Metastases in Women With Breast Cancer. JAMA: The Journal of the American Medical Association, 318(22), 2199–2210. doi:jama.2017.14585
58
+ ```
59
+
60
+ Under CC0 License
61
+
62
  ## Results
63
 
64
  The submission on kaggle with the model trained on this notebook is
app.py CHANGED
@@ -116,6 +116,30 @@ def prev_sample(index: int, dataset_choice: str):
116
  # ---------------------------------
117
  # 5. UI elements
118
  # ---------------------------------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
119
  with gr.Blocks() as demo:
120
  gr.Markdown("## 🧬 PCAM Tumor Classifier")
121
  gr.Markdown("Use **Next** or **Previous** to browse samples and see model predictions vs ground truth.")
@@ -139,6 +163,9 @@ with gr.Blocks() as demo:
139
  error_label = gr.Text(label="Prediction error")
140
  confidence = gr.Text(label="Probability")
141
 
 
 
 
142
  # Connect navigation
143
  prev_btn.click(fn=prev_sample, inputs=[state, dropdown], outputs=[image_output, pred_label, confidence, true_label, state, error_label, index, dataset_choice])
144
  next_btn.click(fn=next_sample, inputs=[state, dropdown], outputs=[image_output, pred_label, confidence, true_label, state, error_label, index, dataset_choice])
 
116
  # ---------------------------------
117
  # 5. UI elements
118
  # ---------------------------------
119
+
120
+ dataset_information = """
121
+ ## πŸ“Š Dataset Overview
122
+
123
+ https://github.com/basveeling/pcam
124
+
125
+ The **PatchCamelyon (PCam)** benchmark is a challenging image classification dataset designed for breast cancer detection tasks.
126
+
127
+ - πŸ“¦ **Total images**: 327,680 color patches
128
+ - πŸ–ΌοΈ **Image size**: 96 Γ— 96 pixels
129
+ - πŸ§ͺ **Source**: Histopathologic scans of lymph node sections
130
+ - 🏷️ **Labels**: Binary β€” A positive (1) label indicates that the center 32x32px region of a patch contains at least one pixel of tumor tissue. Tumor tissue in the outer region of the patch does not influence the label.
131
+
132
+ ```
133
+ B. S. Veeling, J. Linmans, J. Winkens, T. Cohen, M. Welling. "Rotation Equivariant CNNs for Digital Pathology". arXiv:1806.03962
134
+ ```
135
+
136
+ ```
137
+ Ehteshami Bejnordi et al. Diagnostic Assessment of Deep Learning Algorithms for Detection of Lymph Node Metastases in Women With Breast Cancer. JAMA: The Journal of the American Medical Association, 318(22), 2199–2210. doi:jama.2017.14585
138
+ ```
139
+
140
+ Under CC0 License
141
+ """
142
+
143
  with gr.Blocks() as demo:
144
  gr.Markdown("## 🧬 PCAM Tumor Classifier")
145
  gr.Markdown("Use **Next** or **Previous** to browse samples and see model predictions vs ground truth.")
 
163
  error_label = gr.Text(label="Prediction error")
164
  confidence = gr.Text(label="Probability")
165
 
166
+ with gr.Row():
167
+ gr.Markdown(dataset_information)
168
+
169
  # Connect navigation
170
  prev_btn.click(fn=prev_sample, inputs=[state, dropdown], outputs=[image_output, pred_label, confidence, true_label, state, error_label, index, dataset_choice])
171
  next_btn.click(fn=next_sample, inputs=[state, dropdown], outputs=[image_output, pred_label, confidence, true_label, state, error_label, index, dataset_choice])