diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..9fe17bc
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,129 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+pip-wheel-metadata/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+.python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
\ No newline at end of file
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..1409bb9
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2019 Araik Tamazian
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
\ No newline at end of file
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..d0b3b93
--- /dev/null
+++ b/README.md
@@ -0,0 +1,24 @@
+# kaggle-rsna-ihd
+In Kaggle: RSNA Intracranial Hemorrhage Detection competition, the challenge was to build an algorithm to detect acute intracranial hemorrhage and its subtypes.
+
+
+kaggle-rsna-ihd is a Python library for
+
+## Installation
+
+Use the package manager [pip](https://pip.pypa.io/en/stable/) to install foobar.
+
+```bash
+pip install git+https://github.com/atamazian/kaggle_rsna_ihd.git
+```
+
+## Usage
+See example notebook.
+
+## Contributing
+Pull requests are welcome. For major changes, please open an issue first to discuss what you would like to change.
+
+Please make sure to update tests as appropriate.
+
+## License
+See LICENSE.
\ No newline at end of file
diff --git a/demo/__init__.py b/demo/__init__.py
new file mode 100644
index 0000000..3c1dba7
--- /dev/null
+++ b/demo/__init__.py
@@ -0,0 +1,3 @@
+import os
+
+DEMO_DIR = os.path.dirname(__file__)
\ No newline at end of file
diff --git a/demo/train.csv b/demo/train.csv
new file mode 100644
index 0000000..b707b8d
--- /dev/null
+++ b/demo/train.csv
@@ -0,0 +1,19 @@
+ID,Label
+ID_000000000_epidural,0
+ID_000000000_intraparenchymal,0
+ID_000000000_intraventricular,0
+ID_000000000_subarachnoid,0
+ID_000000000_subdural,0
+ID_000000000_any,0
+ID_000000001_epidural,0
+ID_000000001_intraparenchymal,0
+ID_000000001_intraventricular,0
+ID_000000001_subarachnoid,0
+ID_000000001_subdural,0
+ID_000000001_any,0
+ID_000000002_epidural,0
+ID_000000002_intraparenchymal,0
+ID_000000002_intraventricular,0
+ID_000000002_subarachnoid,0
+ID_000000002_subdural,0
+ID_000000002_any,0
diff --git a/demo/train_images/ID_000000000.png b/demo/train_images/ID_000000000.png
new file mode 100644
index 0000000..ae3e9b8
Binary files /dev/null and b/demo/train_images/ID_000000000.png differ
diff --git a/demo/train_images/ID_000000001.png b/demo/train_images/ID_000000001.png
new file mode 100644
index 0000000..ae3e9b8
Binary files /dev/null and b/demo/train_images/ID_000000001.png differ
diff --git a/demo/train_images/ID_000000002.png b/demo/train_images/ID_000000002.png
new file mode 100644
index 0000000..ae3e9b8
Binary files /dev/null and b/demo/train_images/ID_000000002.png differ
diff --git a/kaggle_rsna_ihd/__init__.py b/kaggle_rsna_ihd/__init__.py
new file mode 100644
index 0000000..b794fd4
--- /dev/null
+++ b/kaggle_rsna_ihd/__init__.py
@@ -0,0 +1 @@
+__version__ = '0.1.0'
diff --git a/kaggle_rsna_ihd/datasets.py b/kaggle_rsna_ihd/datasets.py
new file mode 100644
index 0000000..20fa404
--- /dev/null
+++ b/kaggle_rsna_ihd/datasets.py
@@ -0,0 +1,144 @@
+import logging
+import multiprocessing as mproc
+import os
+from math import ceil
+
+import pandas as pd
+from pytorch_lightning import LightningDataModule
+from torch.utils.data import DataLoader, Dataset
+from monai import transforms as T
+
+TRAIN_TRANSFORM = T.Compose(
+    [
+        T.AddChannel(),
+        T.CenterSpatialCrop((200, 200)),
+        T.RandFlip(prob=0.5, spatial_axis=0),
+        T.ScaleIntensity(),
+        T.EnsureType(),
+    ]
+)
+
+VALID_TRANSFORM = T.Compose(
+    [
+        T.AddChannel(),
+        T.CenterSpatialCrop((200, 200)),
+        T.ScaleIntensity(),
+        T.EnsureType(),
+    ]
+)
+
+
+class IHDDataset(Dataset):
+    def __init__(
+        self,
+        path_csv: str,
+        path_img_dir: str,
+        transforms=None,
+        mode: str = "train",
+        split: float = 0.8,
+    ):
+        self.path_img_dir = path_img_dir
+        self.transforms = transforms
+        self.mode = mode
+
+        self.data = pd.read_csv(path_csv)
+        self.data["image_id"] = self.data["ID"].apply(
+            lambda x: "_".join(x.split("_")[:-1]) + ".png"
+        )
+        self.data["type"] = self.data["ID"].apply(lambda x: x.split("_")[2])
+        self.data = (
+            self.data[["Label", "image_id", "type"]]
+            .drop_duplicates()
+            .pivot(index="image_id", columns="type", values="Label")
+            .reset_index()
+        )
+        label_cols = [
+            "epidural",
+            "intraparenchymal",
+            "intraventricular",
+            "subarachnoid",
+            "subdural",
+            "any",
+        ]
+
+        # shuffle data
+        self.data = self.data.sample(frac=1, random_state=42).reset_index(drop=True)
+
+        # split dataset
+        assert 0.0 <= split <= 1.0
+        frac = int(ceil(split * len(self.data)))
+        self.data = self.data[:frac] if mode == "train" else self.data[frac:]
+        self.img_names = list(self.data["image_id"])
+        self.labels = list(self.data[label_cols].values)
+
+    def __getitem__(self, idx: int) -> tuple:
+        img_path = os.path.join(self.path_img_dir, self.img_names[idx])
+        assert os.path.isfile(img_path)
+        label = self.labels[idx]
+        img = T.LoadImage(image_only=True)(img_path)
+
+        if self.transforms:
+            img = self.transforms(img)
+        return img, label
+
+    def __len__(self) -> int:
+        return len(self.data)
+
+
+class IHDDataModule(LightningDataModule):
+    def __init__(
+        self,
+        path_csv: str,
+        path_img_dir: str,
+        train_transform=TRAIN_TRANSFORM,
+        valid_transform=VALID_TRANSFORM,
+        batch_size: int = 128,
+        split: float = 0.8,
+    ):
+        super().__init__()
+        self.path_csv = path_csv
+        self.path_img_dir = path_img_dir
+        self.train_transform = train_transform
+        self.valid_transform = valid_transform
+        self.batch_size = batch_size
+        self.split = split
+
+    def prepare_data(self):
+        pass
+
+    def setup(self, stage=None):
+        self.train_dataset = IHDDataset(
+            self.path_csv,
+            self.path_img_dir,
+            split=self.split,
+            mode="train",
+            transforms=self.train_transform,
+        )
+        logging.info(f"training dataset: {len(self.train_dataset)}")
+        self.valid_dataset = IHDDataset(
+            self.path_csv,
+            self.path_img_dir,
+            split=self.split,
+            mode="valid",
+            transforms=self.valid_transform,
+        )
+        logging.info(f"validation dataset: {len(self.valid_dataset)}")
+
+    def train_dataloader(self):
+        return DataLoader(
+            self.train_dataset,
+            batch_size=self.batch_size,
+            num_workers=mproc.cpu_count(),
+            shuffle=True,
+        )
+
+    def val_dataloader(self):
+        return DataLoader(
+            self.valid_dataset,
+            batch_size=self.batch_size,
+            num_workers=mproc.cpu_count(),
+            shuffle=False,
+        )
+
+    def test_dataloader(self):
+        pass
diff --git a/kaggle_rsna_ihd/models.py b/kaggle_rsna_ihd/models.py
new file mode 100644
index 0000000..658e186
--- /dev/null
+++ b/kaggle_rsna_ihd/models.py
@@ -0,0 +1,49 @@
+from typing import Union
+
+import torch
+from pytorch_lightning import LightningModule
+from torch import nn
+from torch.nn import functional as F
+import timm
+
+
+class IHDModel(LightningModule):
+    """Basic IHD model.
+    >>> model = IHDModel("resnet18")
+    """
+
+    def __init__(
+        self, model: Union[str, nn.Module], pretrained: bool = True, lr: float = 1e-4
+    ):
+        super().__init__()
+        if isinstance(model, str):
+            self.model = timm.create_model(
+                model, pretrained=pretrained, num_classes=6, in_chans=1
+            )
+        else:
+            self.model = model
+        self.learn_rate = lr
+        self.loss_fn = F.binary_cross_entropy_with_logits
+
+    def forward(self, x):
+        return self.model(x)
+
+    def training_step(self, batch, batch_idx):
+        x, y = batch
+        y_hat = self(x)
+        loss = self.loss_fn(y_hat, y.float())
+        self.log("train_loss", loss, prog_bar=True)
+        return loss
+
+    def validation_step(self, batch, batch_idx):
+        x, y = batch
+        y_hat = self(x)
+        loss = self.loss_fn(y_hat, y.float())
+        self.log("valid_loss", loss, prog_bar=False)
+
+    def configure_optimizers(self):
+        optimizer = torch.optim.AdamW(self.model.parameters(), lr=self.learn_rate)
+        scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
+            optimizer, self.trainer.max_epochs, 0
+        )
+        return [optimizer], [scheduler]
diff --git a/pytest.ini b/pytest.ini
new file mode 100644
index 0000000..55583bf
--- /dev/null
+++ b/pytest.ini
@@ -0,0 +1,8 @@
+[pytest]
+python_files=test*.py
+addopts=
+    --cov kaggle_rsna_ihd
+    --cov-report term-missing
+    --cov-report=xml
+    
+testpaths = tests
\ No newline at end of file
diff --git a/setup.cfg b/setup.cfg
new file mode 100644
index 0000000..2bcd70e
--- /dev/null
+++ b/setup.cfg
@@ -0,0 +1,2 @@
+[flake8]
+max-line-length = 88
diff --git a/setup.py b/setup.py
new file mode 100644
index 0000000..1affcb7
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,58 @@
+import codecs
+import os
+import re
+
+from setuptools import setup, find_packages
+
+with open("README.md", "r") as readme_file:
+    long_description = readme_file.read()
+
+here = os.path.abspath(os.path.dirname(__file__))
+
+
+def read(*parts):
+    with codecs.open(os.path.join(here, *parts), "r") as fp:
+        return fp.read()
+
+
+def find_version(*file_paths):
+    version_file = read(*file_paths)
+    version_match = re.search(r"^__version__ = ['\"]([^'\"]*)['\"]", version_file, re.M)
+    if version_match:
+        return version_match.group(1)
+    raise RuntimeError("Unable to find version string.")
+
+
+setup(
+    name="kaggle-rsna-ihd",
+    version=find_version("kaggle_rsna_ihd", "__init__.py"),
+    author="Araik Tamazian",
+    description="A package to aid in building models for RSNA Intracranial Hemorrhage Detection task.",
+    license="MIT",
+    long_description=long_description,
+    long_description_content_type="text/markdown",
+    url="https://github.com/atamazian/kaggle_rsna_ihd",
+    packages=find_packages(exclude=["demo", "tests"]),
+    install_requires=[
+        "torch>=1.8",
+        "torchvision",
+        "pytorch-lightning>=1.5.0",
+        "monai",
+        "timm",
+        "pandas",
+        "scikit-learn>=1.0",
+    ],
+    python_requires=">=3.7",
+    classifiers=[
+        "Programming Language :: Python :: 3",
+        "License :: OSI Approved :: MIT License",
+        "Operating System :: OS Independent",
+        "Development Status :: 3 - Alpha",
+        "Intended Audience :: Developers",
+        "Intended Audience :: Science/Research",
+        "Topic :: Multimedia",
+        "Topic :: Multimedia :: Sound/Audio",
+        "Topic :: Scientific/Engineering :: Artificial Intelligence",
+        "Topic :: Scientific/Engineering :: Image Recognition",
+    ],
+)
diff --git a/tests/__init__.py b/tests/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/test_datasets.py b/tests/test_datasets.py
new file mode 100644
index 0000000..c8cf61d
--- /dev/null
+++ b/tests/test_datasets.py
@@ -0,0 +1,27 @@
+import os
+import numpy
+from kaggle_rsna_ihd.datasets import IHDDataModule, IHDDataset
+from demo import DEMO_DIR
+
+print(DEMO_DIR)
+
+def test_dataset(path_data=DEMO_DIR):
+    dataset = IHDDataset(
+        path_csv=os.path.join(path_data, "train.csv"),
+        path_img_dir=os.path.join(path_data, "train_images"),
+    )
+    img, lb = dataset[0]
+    assert isinstance(img, numpy.ndarray)
+
+
+def test_datamodule(path_data=DEMO_DIR):
+    dm = IHDDataModule(
+        path_csv=os.path.join(path_data, "train.csv"),
+        path_img_dir=os.path.join(path_data, "train_images"),
+    )
+    dm.setup()
+
+    for imgs, lbs in dm.train_dataloader():
+        assert len(imgs)
+        assert len(lbs)
+        break
\ No newline at end of file
diff --git a/tests/test_models.py b/tests/test_models.py
new file mode 100644
index 0000000..15a2e18
--- /dev/null
+++ b/tests/test_models.py
@@ -0,0 +1,22 @@
+import os
+from pytorch_lightning import Trainer
+from kaggle_rsna_ihd.datasets import IHDDataModule
+from kaggle_rsna_ihd.models import IHDModel
+from demo import DEMO_DIR 
+
+def test_model(tmpdir, path_data=DEMO_DIR):
+    dm = IHDDataModule(
+        path_csv=os.path.join(path_data, "train.csv"),
+        path_img_dir=os.path.join(path_data, "train_images"),
+        batch_size=1,
+        split=0.6,
+    )
+    model = IHDModel(model="resnet18")
+
+    trainer = Trainer(
+        default_root_dir=tmpdir,
+        fast_dev_run=True,
+        gpus=0,
+    )
+    dm.setup()
+    trainer.fit(model, datamodule=dm)
\ No newline at end of file