initial commit

atamazian · Jul 8, 2022 · f309ff7 · f309ff7
commit f309ff7
Show file tree

Hide file tree

Showing 17 changed files with 507 additions and 0 deletions.
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,129 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+pip-wheel-metadata/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+.python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
diff --git a/LICENSE b/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2019 Araik Tamazian
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/README.md b/README.md
@@ -0,0 +1,24 @@
+# kaggle-rsna-ihd
+In Kaggle: RSNA Intracranial Hemorrhage Detection competition, the challenge was to build an algorithm to detect acute intracranial hemorrhage and its subtypes.
+
+
+kaggle-rsna-ihd is a Python library for
+
+## Installation
+
+Use the package manager [pip](https://pip.pypa.io/en/stable/) to install foobar.
+
+```bash
+pip install git+https://github.com/atamazian/kaggle_rsna_ihd.git
+```
+
+## Usage
+See example notebook.
+
+## Contributing
+Pull requests are welcome. For major changes, please open an issue first to discuss what you would like to change.
+
+Please make sure to update tests as appropriate.
+
+## License
+See LICENSE.
diff --git a/demo/__init__.py b/demo/__init__.py
@@ -0,0 +1,3 @@
+import os
+
+DEMO_DIR = os.path.dirname(__file__)
diff --git a/demo/train.csv b/demo/train.csv
@@ -0,0 +1,19 @@
+ID,Label
+ID_000000000_epidural,0
+ID_000000000_intraparenchymal,0
+ID_000000000_intraventricular,0
+ID_000000000_subarachnoid,0
+ID_000000000_subdural,0
+ID_000000000_any,0
+ID_000000001_epidural,0
+ID_000000001_intraparenchymal,0
+ID_000000001_intraventricular,0
+ID_000000001_subarachnoid,0
+ID_000000001_subdural,0
+ID_000000001_any,0
+ID_000000002_epidural,0
+ID_000000002_intraparenchymal,0
+ID_000000002_intraventricular,0
+ID_000000002_subarachnoid,0
+ID_000000002_subdural,0
+ID_000000002_any,0
diff --git a/demo/train_images/ID_000000000.png b/demo/train_images/ID_000000000.png
diff --git a/demo/train_images/ID_000000001.png b/demo/train_images/ID_000000001.png
diff --git a/demo/train_images/ID_000000002.png b/demo/train_images/ID_000000002.png
diff --git a/kaggle_rsna_ihd/__init__.py b/kaggle_rsna_ihd/__init__.py
@@ -0,0 +1 @@
+__version__ = '0.1.0'
diff --git a/kaggle_rsna_ihd/datasets.py b/kaggle_rsna_ihd/datasets.py
@@ -0,0 +1,144 @@
+import logging
+import multiprocessing as mproc
+import os
+from math import ceil
+
+import pandas as pd
+from pytorch_lightning import LightningDataModule
+from torch.utils.data import DataLoader, Dataset
+from monai import transforms as T
+
+TRAIN_TRANSFORM = T.Compose(
+    [
+        T.AddChannel(),
+        T.CenterSpatialCrop((200, 200)),
+        T.RandFlip(prob=0.5, spatial_axis=0),
+        T.ScaleIntensity(),
+        T.EnsureType(),
+    ]
+)
+
+VALID_TRANSFORM = T.Compose(
+    [
+        T.AddChannel(),
+        T.CenterSpatialCrop((200, 200)),
+        T.ScaleIntensity(),
+        T.EnsureType(),
+    ]
+)
+
+
+class IHDDataset(Dataset):
+    def __init__(
+        self,
+        path_csv: str,
+        path_img_dir: str,
+        transforms=None,
+        mode: str = "train",
+        split: float = 0.8,
+    ):
+        self.path_img_dir = path_img_dir
+        self.transforms = transforms
+        self.mode = mode
+
+        self.data = pd.read_csv(path_csv)
+        self.data["image_id"] = self.data["ID"].apply(
+            lambda x: "_".join(x.split("_")[:-1]) + ".png"
+        )
+        self.data["type"] = self.data["ID"].apply(lambda x: x.split("_")[2])
+        self.data = (
+            self.data[["Label", "image_id", "type"]]
+            .drop_duplicates()
+            .pivot(index="image_id", columns="type", values="Label")
+            .reset_index()
+        )
+        label_cols = [
+            "epidural",
+            "intraparenchymal",
+            "intraventricular",
+            "subarachnoid",
+            "subdural",
+            "any",
+        ]
+
+        # shuffle data
+        self.data = self.data.sample(frac=1, random_state=42).reset_index(drop=True)
+
+        # split dataset
+        assert 0.0 <= split <= 1.0
+        frac = int(ceil(split * len(self.data)))
+        self.data = self.data[:frac] if mode == "train" else self.data[frac:]
+        self.img_names = list(self.data["image_id"])
+        self.labels = list(self.data[label_cols].values)
+
+    def __getitem__(self, idx: int) -> tuple:
+        img_path = os.path.join(self.path_img_dir, self.img_names[idx])
+        assert os.path.isfile(img_path)
+        label = self.labels[idx]
+        img = T.LoadImage(image_only=True)(img_path)
+
+        if self.transforms:
+            img = self.transforms(img)
+        return img, label
+
+    def __len__(self) -> int:
+        return len(self.data)
+
+
+class IHDDataModule(LightningDataModule):
+    def __init__(
+        self,
+        path_csv: str,
+        path_img_dir: str,
+        train_transform=TRAIN_TRANSFORM,
+        valid_transform=VALID_TRANSFORM,
+        batch_size: int = 128,
+        split: float = 0.8,
+    ):
+        super().__init__()
+        self.path_csv = path_csv
+        self.path_img_dir = path_img_dir
+        self.train_transform = train_transform
+        self.valid_transform = valid_transform
+        self.batch_size = batch_size
+        self.split = split
+
+    def prepare_data(self):
+        pass
+
+    def setup(self, stage=None):
+        self.train_dataset = IHDDataset(
+            self.path_csv,
+            self.path_img_dir,
+            split=self.split,
+            mode="train",
+            transforms=self.train_transform,
+        )
+        logging.info(f"training dataset: {len(self.train_dataset)}")
+        self.valid_dataset = IHDDataset(
+            self.path_csv,
+            self.path_img_dir,
+            split=self.split,
+            mode="valid",
+            transforms=self.valid_transform,
+        )
+        logging.info(f"validation dataset: {len(self.valid_dataset)}")
+
+    def train_dataloader(self):
+        return DataLoader(
+            self.train_dataset,
+            batch_size=self.batch_size,
+            num_workers=mproc.cpu_count(),
+            shuffle=True,
+        )
+
+    def val_dataloader(self):
+        return DataLoader(
+            self.valid_dataset,
+            batch_size=self.batch_size,
+            num_workers=mproc.cpu_count(),
+            shuffle=False,
+        )
+
+    def test_dataloader(self):
+        pass
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,3 @@
		import os

		DEMO_DIR = os.path.dirname(__file__)