From aa9ad675c10b4f97b916c406d59be66db7d425c5 Mon Sep 17 00:00:00 2001
From: Max Kaznady <max.kaznady@gmail.com>
Date: Tue, 28 Jul 2020 12:04:05 -0400
Subject: [PATCH 01/15] segyviewer fix in README; train.py single GPU
 multiprocess fix; new data BYOD script

---
 README.md                                     |   9 +-
 .../interpretation/dutchf3_patch/train.py     |   6 +-
 .../deepseismic_interpretation/data.py        |  41 ++++
 scripts/byod_competition.py                   | 190 ++++++++++++++++++
 4 files changed, 244 insertions(+), 2 deletions(-)
 create mode 100644 scripts/byod_competition.py

diff --git a/README.md b/README.md
index c28d22e8..1b82b9b1 100644
--- a/README.md
+++ b/README.md
@@ -228,10 +228,17 @@ To install [segyviewer](https://github.com/equinor/segyviewer) run:
 ```bash
 conda env create -n segyviewer python=2.7
 conda activate segyviewer
-conda install -c anaconda pyqt=4.11.4
+conda install -c conda-forge pyqt=4.11.4
 pip install segyviewer
 ```
 
+If you run into any QtPy4 related problems after the installation, try running:
+```bash
+sudo add-apt-repository ppa:rock-core/qt4
+sudo apt update
+sudo apt install libqt4-designer libqt4-opengl libqt4-svg libqtgui4 libqtwebkit4
+```
+
 To visualize cross-sections of a 3D volume, you can run
 [segyviewer](https://github.com/equinor/segyviewer) like so:
 ```bash
diff --git a/experiments/interpretation/dutchf3_patch/train.py b/experiments/interpretation/dutchf3_patch/train.py
index fc585817..e687290e 100644
--- a/experiments/interpretation/dutchf3_patch/train.py
+++ b/experiments/interpretation/dutchf3_patch/train.py
@@ -95,7 +95,11 @@ def run(*options, cfg=None, local_rank=0, debug=False, input=None, distributed=F
 
     if distributed:
         # FOR DISTRIBUTED: Set the device according to local_rank.
-        torch.cuda.set_device(local_rank)
+        # if we're running on a single GPU (multi-GPU development), set to the same GPU
+        if torch.cuda.device_count()==1:
+            torch.cuda.set_device(0)
+        else:
+            torch.cuda.set_device(local_rank)
 
         # FOR DISTRIBUTED: Initialize the backend. torch.distributed.launch will
         # provide environment variables, and requires that you use init_method=`env://`.
diff --git a/interpretation/deepseismic_interpretation/data.py b/interpretation/deepseismic_interpretation/data.py
index 53ec7f9f..1ff9c100 100644
--- a/interpretation/deepseismic_interpretation/data.py
+++ b/interpretation/deepseismic_interpretation/data.py
@@ -5,6 +5,10 @@
 import math
 from collections import defaultdict
 
+import logging
+# toggle to WARNING when running in production, or use CLI
+logging.getLogger().setLevel(logging.DEBUG)
+
 import numpy as np
 import torch
 from PIL import Image
@@ -13,6 +17,8 @@
 from torch.utils.data import Dataset
 from torchvision.datasets.utils import iterable_to_str, verify_str_arg
 
+import segyio
+
 _open_to_array = compose(np.array, Image.open)
 
 
@@ -20,6 +26,41 @@ class DataNotSplitException(Exception):
     pass
 
 
+def read_segy(filename):
+    """
+    Read in a SEGY-format file given a filename
+
+    Args:
+        filename: input filename
+
+    Returns:
+        numpy data array and its info as a dictionary (tuple)
+
+    """
+    logging.info(f"Loading data cube from {filename}")
+
+    # Read full data cube
+    data = segyio.tools.cube(filename)
+
+    # Read meta data
+    segyfile = segyio.open(filename, "r")
+    print("  Crosslines: ", segyfile.xlines[0], ":", segyfile.xlines[-1])
+    print("  Inlines:    ", segyfile.ilines[0], ":", segyfile.ilines[-1])
+    print("  Timeslices: ", "1", ":", data.shape[2])
+
+    # Make dict with cube-info
+    # TODO: read this from segy
+    # Read dt and other params needed to do create a new
+    data_info = {
+        "crossline_start": segyfile.xlines[0],
+        "inline_start": segyfile.ilines[0],
+        "timeslice_start": 1,
+        "shape": data.shape,
+    }
+
+    return data, data_info
+
+
 def _get_classes_and_counts(mask_list):
     class_counts_dict = defaultdict(int)
     for mask in mask_list:
diff --git a/scripts/byod_competition.py b/scripts/byod_competition.py
new file mode 100644
index 00000000..70e70492
--- /dev/null
+++ b/scripts/byod_competition.py
@@ -0,0 +1,190 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+
+"""
+Run example:
+
+python byod_competition.py --train <input segy file> --label <input labels file> --outdir <where to output data>
+python prepare_dutchf3.py split_train_val patch   --data_dir=<outdir from the previous step> --label_file=train/train_labels.npy --output_dir=splits --stride=50 --patch_size=100 --split_direction=both --section_stride=100
+
+# information to include in configuration file when running:
+
+INFO:root:CLASS WEIGHTS TO USE
+INFO:root:[0.84979262 0.57790153 0.95866329 0.71236326 0.99004844 0.91123086]
+INFO:root:MEAN
+INFO:root:0.0043642526
+INFO:root:STANDARD DEVIATION
+INFO:root:0.07544233
+
+# kick off run as:
+
+NGPU=2
+python -m torch.distributed.launch --nproc_per_node=${NGPU} train.py \
+TRAIN.BATCH_SIZE_PER_GPU 2 VALIDATION.BATCH_SIZE_PER_GPU 2 \
+DATASET.ROOT "/data/seismic" DATASET.NUM_CLASSES 6 DATASET.CLASS_WEIGHTS  "[0.84979262, 0.57790153, 0.95866329, 0.71236326, 0.99004844, 0.91123086]" \
+TRAIN.MEAN 0.0043642526 TRAIN.STD 0.07544233 \
+--distributed --cfg configs/seresnet_unet.yaml
+
+
+nohup time \
+python -m torch.distributed.launch --nproc_per_node=4 train.py \
+DATASET.ROOT "/home/maxkaz/data/seismic" DATASET.NUM_CLASSES 6 DATASET.CLASS_WEIGHTS  "[0.84979262, 0.57790153, 0.95866329, 0.71236326, 0.99004844, 0.91123086]" \
+TRAIN.MEAN 0.0043642526 TRAIN.STD 0.07544233 \
+--distributed --cfg configs/seresnet_unet.yaml > se.log 2>&1 &
+
+nohup time \
+python -m torch.distributed.launch --nproc_per_node=4 train.py \
+MODEL.PRETRAINED "/home/alfred/models/hrnetv2_w48_imagenet_pretrained.pth" \
+DATASET.ROOT "/home/maxkaz/data/seismic" DATASET.NUM_CLASSES 6 DATASET.CLASS_WEIGHTS  "[0.84979262, 0.57790153, 0.95866329, 0.71236326, 0.99004844, 0.91123086]" \
+TRAIN.MEAN 0.0043642526 TRAIN.STD 0.07544233 \
+--distributed --cfg configs/hrnet.yaml > hr.log 2>&1 &
+
+"""
+
+from interpretation.deepseismic_interpretation.data import read_segy
+
+""" libraries """
+import segyio
+
+import numpy as np
+from scipy import stats
+import os
+
+np.set_printoptions(linewidth=200)
+import logging
+
+# toggle to WARNING when running in production, or use CLI
+logging.getLogger().setLevel(logging.DEBUG)
+# logging.getLogger().setLevel(logging.WARNING)
+import argparse
+
+parser = argparse.ArgumentParser()
+
+""" useful information when running from a GIT folder."""
+myname = os.path.realpath(__file__)
+mypath = os.path.dirname(myname)
+myname = os.path.basename(myname)
+
+
+def main(args):
+    """
+    Transforms Penobscot HDF5 dataset into DeepSeismic Tensor Format
+    """
+
+    logging.info("loading data")
+    data, _ = read_segy(args.train)
+    labels, _ = read_segy(args.label)
+
+    assert labels.min() == 1.0
+    n_classes = labels.max()
+    assert n_classes == N_CLASSES
+
+    logging.info("Running 3-sigma clipping")
+    clip_scaling = 3.0
+    mean, std = data.mean(), data.std()
+    data[data > mean + clip_scaling * std] = mean + clip_scaling * std
+    data[data < mean - clip_scaling * std] = mean - clip_scaling * std
+
+    # Make data cube fast to access
+    logging.info("Adjusting precision")
+    data = np.ascontiguousarray(data, "float32")
+    labels = np.ascontiguousarray(labels, "uint8")
+
+    # adjust labels to start from zero
+    labels -= 1
+
+    # rescale to be within a certain range
+    range_min, range_max = -1.0, 1.0
+    data_std = (data - data.min()) / (data.max() - data.min())
+    data = data_std * (range_max - range_min) + range_min
+
+    """
+    # cut off a buffer zone around the volume (to avoid mislabeled data):
+    buffer = 25
+    data = data[:, buffer:-buffer, buffer:-buffer]
+    labels = labels[:, buffer:-buffer, buffer:-buffer]
+    """
+
+    # time by crosslines by inlines
+    n_inlines = data.shape[0]
+    n_crosslines = data.shape[1]
+
+    inline_cut = int(np.floor(n_inlines * INLINE_FRACTION))
+    crossline_cut = int(np.floor(n_crosslines * CROSSLINE_FRACTION))
+
+    data_train = data[0:inline_cut, 0:crossline_cut, :]
+    data_test1 = data[inline_cut:n_inlines, :, :]
+    data_test2 = data[:, crossline_cut:n_crosslines, :]
+
+    labels_train = labels[0:inline_cut, 0:crossline_cut, :]
+    labels_test1 = labels[inline_cut:n_inlines, :, :]
+    labels_test2 = labels[:, crossline_cut:n_crosslines, :]
+
+    def mkdir(dirname):
+
+        if os.path.isdir(dirname) and os.path.exists(dirname):
+            return
+
+        if not os.path.isdir(dirname) and os.path.exists(dirname):
+            logging.info("remote file", dirname, "and run this script again")
+
+        os.mkdir(dirname)
+
+    mkdir(args.outdir)
+    mkdir(os.path.join(args.outdir, "splits"))
+    mkdir(os.path.join(args.outdir, "train"))
+    mkdir(os.path.join(args.outdir, "test_once"))
+
+    np.save(os.path.join(args.outdir, "train", "train_seismic.npy"), data_train)
+    np.save(os.path.join(args.outdir, "train", "train_labels.npy"), labels_train)
+
+    np.save(os.path.join(args.outdir, "test_once", "test1_seismic.npy"), data_test1)
+    np.save(os.path.join(args.outdir, "test_once", "test1_labels.npy"), labels_test1)
+
+    np.save(os.path.join(args.outdir, "test_once", "test2_seismic.npy"), data_test2)
+    np.save(os.path.join(args.outdir, "test_once", "test2_labels.npy"), labels_test2)
+
+    # Compute class weights:
+    num_classes, class_count = np.unique(labels[:], return_counts=True)
+    # class_probabilities = np.histogram(labels[:], bins= , density=True)
+    class_weights = 1 - class_count / np.sum(class_count)
+    logging.info("CLASS WEIGHTS TO USE")
+    logging.info(class_weights)
+    logging.info("MEAN")
+    logging.info(mean)
+    logging.info("STANDARD DEVIATION")
+    logging.info(std)
+
+
+""" GLOBAL VARIABLES """
+INLINE_FRACTION = 0.7
+CROSSLINE_FRACTION = 1.0
+N_CLASSES = 6
+
+parser.add_argument("--train", help="Name of train data", type=str, required=True)
+parser.add_argument("--label", help="Name of train labels data", type=str, required=True)
+parser.add_argument("--outdir", help="Output data directory location", type=str, required=True)
+
+""" main wrapper with profiler """
+if __name__ == "__main__":
+    main(parser.parse_args())
+
+# pretty printing of the stack
+"""
+  try:
+    logging.info('before main')
+    main(parser.parse_args())
+    logging.info('after main')
+  except:
+    for frame in traceback.extract_tb(sys.exc_info()[2]):
+      fname,lineno,fn,text = frame
+      print ("Error in %s on line %d" % (fname, lineno))
+"""
+# optionally enable profiling information
+#  import cProfile
+#  name = <insert_name_here>
+#  cProfile.run('main.run()', name + '.prof')
+#  import pstats
+#  p = pstats.Stats(name + '.prof')
+#  p.sort_stats('cumulative').print_stats(10)
+#  p.sort_stats('time').print_stats()

From fc32ec7b5ad38e316ae3f8b9f41b8e7400e6cc53 Mon Sep 17 00:00:00 2001
From: Max Kaznady <max.kaznady@gmail.com>
Date: Wed, 29 Jul 2020 15:52:42 -0400
Subject: [PATCH 02/15] fixes to ratios in byod scripts; documentation

---
 scripts/byod_competition.py | 25 +++++++++++++++++--------
 scripts/byod_penobscot.py   |  2 +-
 2 files changed, 18 insertions(+), 9 deletions(-)

diff --git a/scripts/byod_competition.py b/scripts/byod_competition.py
index 70e70492..4b46d412 100644
--- a/scripts/byod_competition.py
+++ b/scripts/byod_competition.py
@@ -9,12 +9,11 @@
 
 # information to include in configuration file when running:
 
-INFO:root:CLASS WEIGHTS TO USE
 INFO:root:[0.84979262 0.57790153 0.95866329 0.71236326 0.99004844 0.91123086]
 INFO:root:MEAN
-INFO:root:0.0043642526
+INFO:root:4.183678e-05
 INFO:root:STANDARD DEVIATION
-INFO:root:0.07544233
+INFO:root:0.31477982
 
 # kick off run as:
 
@@ -22,23 +21,32 @@
 python -m torch.distributed.launch --nproc_per_node=${NGPU} train.py \
 TRAIN.BATCH_SIZE_PER_GPU 2 VALIDATION.BATCH_SIZE_PER_GPU 2 \
 DATASET.ROOT "/data/seismic" DATASET.NUM_CLASSES 6 DATASET.CLASS_WEIGHTS  "[0.84979262, 0.57790153, 0.95866329, 0.71236326, 0.99004844, 0.91123086]" \
-TRAIN.MEAN 0.0043642526 TRAIN.STD 0.07544233 \
+TRAIN.MEAN 0.0 TRAIN.STD 0.31477982 \
 --distributed --cfg configs/seresnet_unet.yaml
 
-
 nohup time \
 python -m torch.distributed.launch --nproc_per_node=4 train.py \
 DATASET.ROOT "/home/maxkaz/data/seismic" DATASET.NUM_CLASSES 6 DATASET.CLASS_WEIGHTS  "[0.84979262, 0.57790153, 0.95866329, 0.71236326, 0.99004844, 0.91123086]" \
-TRAIN.MEAN 0.0043642526 TRAIN.STD 0.07544233 \
+TRAIN.MEAN 0.0 TRAIN.STD 0.31477982 \
 --distributed --cfg configs/seresnet_unet.yaml > se.log 2>&1 &
 
 nohup time \
 python -m torch.distributed.launch --nproc_per_node=4 train.py \
 MODEL.PRETRAINED "/home/alfred/models/hrnetv2_w48_imagenet_pretrained.pth" \
 DATASET.ROOT "/home/maxkaz/data/seismic" DATASET.NUM_CLASSES 6 DATASET.CLASS_WEIGHTS  "[0.84979262, 0.57790153, 0.95866329, 0.71236326, 0.99004844, 0.91123086]" \
-TRAIN.MEAN 0.0043642526 TRAIN.STD 0.07544233 \
+TRAIN.MEAN 0.0 TRAIN.STD 0.31477982 \
 --distributed --cfg configs/hrnet.yaml > hr.log 2>&1 &
 
+Scoring:
+
+nohup time \
+python test.py \
+DATASET.ROOT "/home/maxkaz/data/seismic" DATASET.NUM_CLASSES 6 \
+DATASET.CLASS_WEIGHTS  "[0.84979262, 0.57790153, 0.95866329, 0.71236326, 0.99004844, 0.91123086]" \
+TRAIN.MEAN 0.0043642526 TRAIN.STD 0.07544233 \
+TEST.SPLIT 'test1'
+--distributed --cfg configs/seresnet_unet.yaml > se.log 2>&1 &
+
 """
 
 from interpretation.deepseismic_interpretation.data import read_segy
@@ -150,6 +158,7 @@ def mkdir(dirname):
     class_weights = 1 - class_count / np.sum(class_count)
     logging.info("CLASS WEIGHTS TO USE")
     logging.info(class_weights)
+    mean, std = data.mean(), data.std()
     logging.info("MEAN")
     logging.info(mean)
     logging.info("STANDARD DEVIATION")
@@ -158,7 +167,7 @@ def mkdir(dirname):
 
 """ GLOBAL VARIABLES """
 INLINE_FRACTION = 0.7
-CROSSLINE_FRACTION = 1.0
+CROSSLINE_FRACTION = 0.78
 N_CLASSES = 6
 
 parser.add_argument("--train", help="Name of train data", type=str, required=True)
diff --git a/scripts/byod_penobscot.py b/scripts/byod_penobscot.py
index 8ccc8b6e..32e65d5f 100644
--- a/scripts/byod_penobscot.py
+++ b/scripts/byod_penobscot.py
@@ -120,7 +120,7 @@ def mkdir(dirname):
 
 """ GLOBAL VARIABLES """
 INLINE_FRACTION = 0.7
-CROSSLINE_FRACTION = 1.0
+CROSSLINE_FRACTION = 0.78
 N_CLASSES = 8
 
 parser.add_argument("--filename", help="Name of HDF5 data", type=str, required=True)

From 325e0a5f7cdf46f1f1d0f550dd66689bd576cce2 Mon Sep 17 00:00:00 2001
From: Max Kaznady <max.kaznady@gmail.com>
Date: Wed, 29 Jul 2020 19:55:11 -0400
Subject: [PATCH 03/15] fixed unet model not loading properly in the unet
 config file

---
 cv_lib/cv_lib/segmentation/models/unet.py                  | 2 ++
 experiments/interpretation/dutchf3_patch/configs/unet.yaml | 4 ++--
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/cv_lib/cv_lib/segmentation/models/unet.py b/cv_lib/cv_lib/segmentation/models/unet.py
index ddb9197f..d73b75c1 100644
--- a/cv_lib/cv_lib/segmentation/models/unet.py
+++ b/cv_lib/cv_lib/segmentation/models/unet.py
@@ -4,6 +4,8 @@
 """ Taken from https://github.com/milesial/Pytorch-UNet
 
 """
+import os
+
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
diff --git a/experiments/interpretation/dutchf3_patch/configs/unet.yaml b/experiments/interpretation/dutchf3_patch/configs/unet.yaml
index 5ae1ee45..0597b8fa 100644
--- a/experiments/interpretation/dutchf3_patch/configs/unet.yaml
+++ b/experiments/interpretation/dutchf3_patch/configs/unet.yaml
@@ -11,7 +11,7 @@ WORKERS: 4
 PRINT_FREQ: 10
 LOG_CONFIG: logging.conf
 SEED: 2019
-
+OPENCV_BORDER_CONSTANT: 0
 
 DATASET:
   NUM_CLASSES: 6
@@ -21,7 +21,7 @@ DATASET:
   MAX: 1
 
 MODEL:
-  NAME: resnet_unet
+  NAME: unet
   IN_CHANNELS: 3
 
 

From 9dd1e989ca0e7ff1b7709d43e6b2f90f197b15f1 Mon Sep 17 00:00:00 2001
From: Max Kaznady <max.kaznady@gmail.com>
Date: Fri, 31 Jul 2020 15:00:11 +0000
Subject: [PATCH 04/15] minor tweaks to docs and imports

---
 scripts/byod_competition.py | 5 ++++-
 scripts/byod_penobscot.py   | 2 +-
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/scripts/byod_competition.py b/scripts/byod_competition.py
index 4b46d412..8f6bb805 100644
--- a/scripts/byod_competition.py
+++ b/scripts/byod_competition.py
@@ -17,6 +17,9 @@
 
 # kick off run as:
 
+python byod_competition.py --train /home/maxkaz/data/seismic/TrainingData_Image.segy --label /home/maxkaz/data/seismic/TrainingData_Labels.segy --outdir /home/maxkaz/data/seismic
+python prepare_dutchf3.py split_train_val patch   --data_dir=/home/maxkaz/data/seismic --label_file=train/train_labels.npy --output_dir=splits --stride=50 --patch_size=100 --split_direction=both --section_stride=100
+
 NGPU=2
 python -m torch.distributed.launch --nproc_per_node=${NGPU} train.py \
 TRAIN.BATCH_SIZE_PER_GPU 2 VALIDATION.BATCH_SIZE_PER_GPU 2 \
@@ -49,7 +52,7 @@
 
 """
 
-from interpretation.deepseismic_interpretation.data import read_segy
+from deepseismic_interpretation.data import read_segy
 
 """ libraries """
 import segyio
diff --git a/scripts/byod_penobscot.py b/scripts/byod_penobscot.py
index 32e65d5f..bc0b682b 100644
--- a/scripts/byod_penobscot.py
+++ b/scripts/byod_penobscot.py
@@ -19,7 +19,7 @@
 import logging
 
 # toggle to WARNING when running in production, or use CLI
-logging.getLogger().setLevel(logging.DEBUG)
+logging.getLogger().setLevel(logging.DEBUG)~/da
 # logging.getLogger().setLevel(logging.WARNING)
 import argparse
 

From b8b219ea2a0e4d23d49d6ace844378cc1b2c27b0 Mon Sep 17 00:00:00 2001
From: Max Kaznady <max.kaznady@gmail.com>
Date: Fri, 31 Jul 2020 15:38:00 -0400
Subject: [PATCH 05/15] added 3D scoring - working on making it work - file is
 also saved as segy

---
 .../interpretation/dutchf3_patch/test.py      | 98 ++++++++++++++++++-
 .../deepseismic_interpretation/data.py        | 44 +++++++++
 scripts/byod_competition.py                   |  9 +-
 3 files changed, 144 insertions(+), 7 deletions(-)

diff --git a/experiments/interpretation/dutchf3_patch/test.py b/experiments/interpretation/dutchf3_patch/test.py
index 2efeee85..4acf7db5 100644
--- a/experiments/interpretation/dutchf3_patch/test.py
+++ b/experiments/interpretation/dutchf3_patch/test.py
@@ -27,11 +27,14 @@
 from toolz import compose, curry, itertoolz, pipe, take
 from torch.utils import data
 
+from interpretation.deepseismic_interpretation.data import write_segy
+
 from cv_lib.segmentation import models
 from cv_lib.segmentation.dutchf3.utils import current_datetime, git_branch, git_hash
 
 from cv_lib.utils import load_log_configuration, mask_to_disk, generate_path, image_to_disk
-from deepseismic_interpretation.dutchf3.data import add_patch_depth_channels, get_test_loader
+from deepseismic_interpretation.dutchf3.data import add_patch_depth_channels, get_test_loader, _test1_labels_for, \
+    _test2_labels_for
 from default import _C as config
 from default import update_config
 
@@ -44,6 +47,9 @@
     "zechstein",
 ]
 
+# we can optionally supply a segy file whose geometry we will use to write out 3D test set predictions
+# if it doesn't exist, we won't output a segy file
+SEGY_INFILE = '/data/seismic_orig/TrainingData_Labels.segy'
 
 class runningScore(object):
     def __init__(self, n_classes):
@@ -94,6 +100,48 @@ def reset(self):
         self.confusion_matrix = np.zeros((self.n_classes, self.n_classes))
 
 
+def _compute_3D_metrics(gt_labels, pred, n_classes, split):
+    """
+    Compute 3D metrics on two 3D arrays. A good test case is to set gt==pred.
+
+    Args:
+        gt: ground truth 3D numpy array
+        pred: predictions 3D array
+        n_classes: number of classes
+        split: which test set split we're computing
+
+    Returns:
+        Nothing - stdout print
+
+    """
+
+    logger = logging.getLogger(__name__)
+
+    score = runningScore(n_classes)
+    score.update(gt_labels, pred)
+
+    score, class_iou = score.get_scores()
+
+    logger.info(f"--------------- 3D RESULTS {split} -----------------")
+    logger.info(f'Pixel Acc: {score["Pixel Acc: "]:.4f}')
+
+    logger.info(f'Mean Class Acc: {score["Mean Class Acc: "]:.4f}')
+    for cdx, class_name in enumerate(_CLASS_NAMES):
+        logger.info(f'     class {cdx} named {class_name} accuracy {score["Class Accuracy: "][cdx]:.4f}')
+
+    logger.info(f'Mean IoU: {score["Mean IoU: "]:0.4f}')
+
+    for cdx, class_name in enumerate(_CLASS_NAMES):
+        logger.info(f"     class {cdx} named {class_name} IoU {class_iou[cdx]:.4f}")
+    logger.info(f'Freq Weighted IoU: {score["Freq Weighted IoU: "]:.4f}')
+
+    # Save confusion matrix:
+    logger.info("writing confusion matrix")
+    confusion = score["confusion_matrix"]
+    np.savetxt(f"confusion_split_{split}.csv", confusion, delimiter=" ")
+
+    logger.info("----------------- 3D DONE ---------------------------")
+
 def _transform_CHW_to_HWC(numpy_array):
     return np.moveaxis(numpy_array, 0, -1)
 
@@ -307,6 +355,10 @@ def _evaluate_split(
 
     running_metrics_split = runningScore(n_classes)
 
+    n_inlines, n_crosslines, n_depth = test_set.labels.shape
+    accum_inline = np.zeros((n_classes, n_depth, n_crosslines, n_inlines))
+    accum_crossline = np.zeros((n_classes, n_depth, n_crosslines, n_inlines))
+
     # evaluation mode:
     with torch.no_grad():  # operations inside don't track history
         model.eval()
@@ -328,6 +380,21 @@ def _evaluate_split(
                 config.DATASET.MAX,
             )
 
+            # for debugging, if you set this to GT then you can test if
+            # the reconstructions matches test_set.labels
+            preds_numpy = outputs.detach().squeeze().numpy()
+
+            # direction is channel x depth x crossline x inline
+
+            # dealing with inline
+            if test_set.sections[i].startswith("i"):
+                accum_inline[:, :, :, i] = preds_numpy
+            # dealing with crossline
+            elif test_set.sections[i].startswith("x"):
+                accum_crossline[:, :, i, :] = preds_numpy
+            else:
+                raise Exception("we need either an inline or crossline split")
+
             pred = outputs.detach().max(1)[1].numpy()
             gt = labels.numpy()
             if debug:
@@ -364,6 +431,31 @@ def _evaluate_split(
     logger.info(f'Mean IoU: {score["Mean IoU: "]:0.3f}')
     running_metrics_split.reset()
 
+    ######################################################################
+    # 3D: now compute metrics on full 3D volume
+    ######################################################################
+
+    gt_labels = test_set.labels.swapaxes(0, 2)
+    assert gt_labels.shape == accum_inline.shape
+    assert gt_labels.shape == accum_crossline.shape
+
+    # compute mIoU here
+    logging.info("Simple average")
+    np.save()
+    pred_sum = accum_inline + accum_crossline
+    pred = pred_sum.argmax(0)
+    _compute_3D_metrics(gt_labels, pred, n_classes, split)
+    np.save(f"test_simple_avg_split_{split}.npy", pred)
+    if os.path.isfile(SEGY_INFILE):
+        write_segy(f"test_simple_avg_split_{split}.segy", SEGY_INFILE, pred)
+
+    logging.info("Geometric average")
+    pred_sum = np.sqrt(accum_inline*accum_crossline)
+    pred = pred_sum.argmax(0)
+    _compute_3D_metrics(gt_labels, pred, n_classes, split)
+    np.save(f"test_geometric_avg_split_{split}.npy", pred)
+    if os.path.isfile(SEGY_INFILE):
+        write_segy(f"test_geometric_avg_split_{split}.segy", SEGY_INFILE, pred)
 
 def _write_section_file(labels, section_file):
     # define indices of the array
@@ -381,7 +473,9 @@ def _write_section_file(labels, section_file):
     else:
         x_list = []
 
-    list_test = i_list + x_list
+    # TODO: revert
+    # list_test = i_list + x_list
+    list_test = i_list[0:1] + x_list[0:1]
 
     file_object = open(section_file, "w")
     file_object.write("\n".join(list_test))
diff --git a/interpretation/deepseismic_interpretation/data.py b/interpretation/deepseismic_interpretation/data.py
index 1ff9c100..2dd6ed37 100644
--- a/interpretation/deepseismic_interpretation/data.py
+++ b/interpretation/deepseismic_interpretation/data.py
@@ -19,6 +19,9 @@
 
 import segyio
 
+from shutil import copyfile
+
+
 _open_to_array = compose(np.array, Image.open)
 
 
@@ -61,6 +64,47 @@ def read_segy(filename):
     return data, data_info
 
 
+def write_segy(out_filename, in_filename, out_cube):
+    """
+    Writes out_cube to a segy-file (out_filename) with same header/size as in_filename
+
+    Args:
+        out_filename: output filename
+        in_filename: input file, whose metadata will be copied
+        out_cube: array which we write to out_filename
+
+    Returns:
+        Nothing
+    """
+
+    # Select last channel
+    if type(out_cube) is list:
+        out_cube = out_cube[-1]
+
+    print("Writing interpretation to " + out_filename)
+
+    # Copy segy file
+    copyfile(in_filename, out_filename)
+
+    # Moving temporal axis back again
+    out_cube = np.moveaxis(out_cube, 0, -1)
+
+    # Open out-file
+    with segyio.open(out_filename, "r+") as src:
+        iline_start = src.ilines[0]
+        dtype = src.iline[iline_start].dtype
+        # loop through inlines and insert output
+        for i in src.ilines:
+            iline = out_cube[i - iline_start, :, :]
+            src.iline[i] = np.ascontiguousarray(iline.astype(dtype))
+
+    # TODO: rewrite this whole function
+    # Moving temporal axis first again - just in case the user want to keep working on it
+    out_cube = np.moveaxis(out_cube, -1, 0)
+
+    print("Writing interpretation - Finished")
+    return
+
 def _get_classes_and_counts(mask_list):
     class_counts_dict = defaultdict(int)
     for mask in mask_list:
diff --git a/scripts/byod_competition.py b/scripts/byod_competition.py
index 4b46d412..2e542b71 100644
--- a/scripts/byod_competition.py
+++ b/scripts/byod_competition.py
@@ -41,11 +41,10 @@
 
 nohup time \
 python test.py \
-DATASET.ROOT "/home/maxkaz/data/seismic" DATASET.NUM_CLASSES 6 \
-DATASET.CLASS_WEIGHTS  "[0.84979262, 0.57790153, 0.95866329, 0.71236326, 0.99004844, 0.91123086]" \
-TRAIN.MEAN 0.0043642526 TRAIN.STD 0.07544233 \
-TEST.SPLIT 'test1'
---distributed --cfg configs/seresnet_unet.yaml > se.log 2>&1 &
+DATASET.ROOT "/data/seismic" DATASET.NUM_CLASSES 6 DATASET.CLASS_WEIGHTS  "[0.84979262, 0.57790153, 0.95866329, 0.71236326, 0.99004844, 0.91123086]" \
+TRAIN.MEAN 0.0 TRAIN.STD 0.31477982 \
+TEST.SPLIT 'both'
+--cfg configs/unet.yaml > unet.log 2>&1 &
 
 """
 

From d4e05b865339958265514c94073c3796751fa0f8 Mon Sep 17 00:00:00 2001
From: Max Kaznady <max.kaznady@gmail.com>
Date: Sun, 2 Aug 2020 21:16:29 -0400
Subject: [PATCH 06/15] optimized memory usage when computing confusion matrix

---
 environment/anaconda/local/environment.yml    |  1 +
 .../interpretation/dutchf3_patch/test.py      | 86 ++++++++++++++-----
 .../deepseismic_interpretation/data.py        | 20 ++++-
 3 files changed, 81 insertions(+), 26 deletions(-)

diff --git a/environment/anaconda/local/environment.yml b/environment/anaconda/local/environment.yml
index cf7078c1..503dd2b1 100644
--- a/environment/anaconda/local/environment.yml
+++ b/environment/anaconda/local/environment.yml
@@ -19,6 +19,7 @@ dependencies:
   - papermill>=1.0.1
   - jupyterlab
   - pip:
+    - memory_profiler
     - segyio==1.8.8
     - pytorch-ignite==0.3.0
     - fire==0.2.1
diff --git a/experiments/interpretation/dutchf3_patch/test.py b/experiments/interpretation/dutchf3_patch/test.py
index 4acf7db5..ba945b8d 100644
--- a/experiments/interpretation/dutchf3_patch/test.py
+++ b/experiments/interpretation/dutchf3_patch/test.py
@@ -21,20 +21,25 @@
 
 import fire
 import numpy as np
+from sklearn import metrics
 import torch
 import torch.nn.functional as F
 from albumentations import Compose, Normalize, PadIfNeeded, Resize
 from toolz import compose, curry, itertoolz, pipe, take
 from torch.utils import data
 
-from interpretation.deepseismic_interpretation.data import write_segy
+from deepseismic_interpretation.data import write_segy
 
 from cv_lib.segmentation import models
 from cv_lib.segmentation.dutchf3.utils import current_datetime, git_branch, git_hash
 
 from cv_lib.utils import load_log_configuration, mask_to_disk, generate_path, image_to_disk
-from deepseismic_interpretation.dutchf3.data import add_patch_depth_channels, get_test_loader, _test1_labels_for, \
-    _test2_labels_for
+from deepseismic_interpretation.dutchf3.data import (
+    add_patch_depth_channels,
+    get_test_loader,
+    _test1_labels_for,
+    _test2_labels_for,
+)
 from default import _C as config
 from default import update_config
 
@@ -49,24 +54,47 @@
 
 # we can optionally supply a segy file whose geometry we will use to write out 3D test set predictions
 # if it doesn't exist, we won't output a segy file
-SEGY_INFILE = '/data/seismic_orig/TrainingData_Labels.segy'
+SEGY_INFILE = "/data/seismic_orig/TrainingData_Labels.segy"
+
 
 class runningScore(object):
     def __init__(self, n_classes):
         self.n_classes = n_classes
         self.confusion_matrix = np.zeros((n_classes, n_classes))
 
+    # @profile
     def _fast_hist(self, label_true, label_pred, n_class):
+        """
+        speed-optimized but not memory-optimized version of the confusion matrix calculation
+        """
+        logger = logging.getLogger(__name__)
         mask = (label_true >= 0) & (label_true < n_class)
-        hist = np.bincount(n_class * label_true[mask].astype(int) + label_pred[mask], minlength=n_class ** 2,).reshape(
-            n_class, n_class
-        )
+        bincount_arg = n_class * label_true[mask].astype(int) + label_pred[mask]
+        logger.info('bincount operation starting...')
+        hist = np.bincount(bincount_arg, minlength=n_class ** 2,)
+        hist = hist.reshape(n_class, n_class)
+        logger.info('finished')
         return hist
 
-    def update(self, label_trues, label_preds):
+    # @profile
+    def _confusion_matrix(self, label_true, label_pred, n_class):
+        """
+        memory-optimized but not speed-optimized version of the confusion matrix calculation
+        """
+        mask = (label_true >= 0) & (label_true < n_class)
+        #bincount_arg = n_class * label_true[mask].astype(int) + label_pred[mask]
+        matrix = metrics.confusion_matrix(label_true[mask], label_pred[mask], labels = list(range(n_class)))
+        return matrix
+
+    # @profile
+    def update(self, label_trues, label_preds, fast_hist = True):
         for lt, lp in zip(label_trues, label_preds):
-            self.confusion_matrix += self._fast_hist(lt.flatten(), lp.flatten(), self.n_classes)
+            if fast_hist:
+                self.confusion_matrix += self._fast_hist(lt.flatten(), lp.flatten(), self.n_classes)
+            else:
+                self.confusion_matrix += self._confusion_matrix(lt.flatten(), lp.flatten(), self.n_classes)
 
+    # @profile
     def get_scores(self):
         """Returns accuracy score evaluation result.
             - overall accuracy
@@ -117,8 +145,13 @@ def _compute_3D_metrics(gt_labels, pred, n_classes, split):
 
     logger = logging.getLogger(__name__)
 
+    # TODO: remove
+    #n = 300
+    #gt_labels = gt_labels[:n, :n, :n]
+    #pred = pred[:n, :n, :n]
+
     score = runningScore(n_classes)
-    score.update(gt_labels, pred)
+    score.update(gt_labels, pred, fast_hist=False)
 
     score, class_iou = score.get_scores()
 
@@ -142,6 +175,7 @@ def _compute_3D_metrics(gt_labels, pred, n_classes, split):
 
     logger.info("----------------- 3D DONE ---------------------------")
 
+
 def _transform_CHW_to_HWC(numpy_array):
     return np.moveaxis(numpy_array, 0, -1)
 
@@ -287,6 +321,9 @@ def _patch_label_2d(
             output = output_processing(output)
             output_p[:, :, hdx + ps : hdx + ps + patch_size, wdx + ps : wdx + ps + patch_size,] += output
 
+        # TODO remove
+        break
+
         # dump the data right before it's being put into the model and after scoring
         if debug:
             outdir = f"debug/test/batch_{split}"
@@ -356,8 +393,8 @@ def _evaluate_split(
     running_metrics_split = runningScore(n_classes)
 
     n_inlines, n_crosslines, n_depth = test_set.labels.shape
-    accum_inline = np.zeros((n_classes, n_depth, n_crosslines, n_inlines))
-    accum_crossline = np.zeros((n_classes, n_depth, n_crosslines, n_inlines))
+    accum_inline = np.zeros((n_classes, n_depth, n_crosslines, n_inlines), dtype=np.float32)
+    accum_crossline = np.zeros((n_classes, n_depth, n_crosslines, n_inlines), dtype=np.float32)
 
     # evaluation mode:
     with torch.no_grad():  # operations inside don't track history
@@ -382,7 +419,7 @@ def _evaluate_split(
 
             # for debugging, if you set this to GT then you can test if
             # the reconstructions matches test_set.labels
-            preds_numpy = outputs.detach().squeeze().numpy()
+            preds_numpy = outputs.detach().squeeze().numpy().astype(np.float32)
 
             # direction is channel x depth x crossline x inline
 
@@ -435,27 +472,31 @@ def _evaluate_split(
     # 3D: now compute metrics on full 3D volume
     ######################################################################
 
-    gt_labels = test_set.labels.swapaxes(0, 2)
-    assert gt_labels.shape == accum_inline.shape
-    assert gt_labels.shape == accum_crossline.shape
+    gt_labels = test_set.labels.swapaxes(0, 2).astype(np.uint8)
+    assert gt_labels.shape == accum_inline.shape[1:]
+    assert gt_labels.shape == accum_crossline.shape[1:]
 
     # compute mIoU here
     logging.info("Simple average")
-    np.save()
     pred_sum = accum_inline + accum_crossline
-    pred = pred_sum.argmax(0)
+    pred = pred_sum.argmax(0).astype(np.uint8)
+    del pred_sum
     _compute_3D_metrics(gt_labels, pred, n_classes, split)
     np.save(f"test_simple_avg_split_{split}.npy", pred)
+    # use existing SEGY file as a template to write our data into
     if os.path.isfile(SEGY_INFILE):
-        write_segy(f"test_simple_avg_split_{split}.segy", SEGY_INFILE, pred)
+       write_segy(f"test_simple_avg_split_{split}.segy", SEGY_INFILE, pred)
 
     logging.info("Geometric average")
-    pred_sum = np.sqrt(accum_inline*accum_crossline)
-    pred = pred_sum.argmax(0)
+    pred_sum = np.sqrt(accum_inline * accum_crossline)
+    pred = pred_sum.argmax(0).astype(np.uint8)
+    del pred_sum
     _compute_3D_metrics(gt_labels, pred, n_classes, split)
     np.save(f"test_geometric_avg_split_{split}.npy", pred)
+    # use existing SEGY file as a template to write our data into
     if os.path.isfile(SEGY_INFILE):
-        write_segy(f"test_geometric_avg_split_{split}.segy", SEGY_INFILE, pred)
+       write_segy(f"test_geometric_avg_split_{split}.segy", SEGY_INFILE, pred)
+
 
 def _write_section_file(labels, section_file):
     # define indices of the array
@@ -586,3 +627,4 @@ def test(*options, cfg=None, debug=False):
 
 if __name__ == "__main__":
     fire.Fire(test)
+
diff --git a/interpretation/deepseismic_interpretation/data.py b/interpretation/deepseismic_interpretation/data.py
index 2dd6ed37..3f166158 100644
--- a/interpretation/deepseismic_interpretation/data.py
+++ b/interpretation/deepseismic_interpretation/data.py
@@ -77,10 +77,6 @@ def write_segy(out_filename, in_filename, out_cube):
         Nothing
     """
 
-    # Select last channel
-    if type(out_cube) is list:
-        out_cube = out_cube[-1]
-
     print("Writing interpretation to " + out_filename)
 
     # Copy segy file
@@ -91,10 +87,26 @@ def write_segy(out_filename, in_filename, out_cube):
 
     # Open out-file
     with segyio.open(out_filename, "r+") as src:
+
         iline_start = src.ilines[0]
+        # set type to inlines
         dtype = src.iline[iline_start].dtype
+        """
+        src.ilines = list(range(src.ilines[0], src.ilines[0] + out_cube.shape[0]))
+
+        # set crosslines to match the numpy array shape
+        xline_start = src.xlines[0]
+        src.xlines = list(range(src.xlines[0], src.xlines[0] + out_cube.shape[1]))
+
+        # set depth to match the numpy array shape
+        depth_start = src.depth[0]
+        src.depth = list(range(src))
+        """
+
         # loop through inlines and insert output
         for i in src.ilines:
+            if i>=out_cube.shape[0]:
+                break
             iline = out_cube[i - iline_start, :, :]
             src.iline[i] = np.ascontiguousarray(iline.astype(dtype))
 

From 30c3eac0bb64f23601b7c830d70edbd4efc6d470 Mon Sep 17 00:00:00 2001
From: Max Kaznady <max.kaznady@gmail.com>
Date: Mon, 3 Aug 2020 15:00:16 -0400
Subject: [PATCH 07/15] added custom segy writer which can create segy files
 from numpy arrays

---
 .../interpretation/dutchf3_patch/test.py      | 54 ++++++++++---------
 .../deepseismic_interpretation/data.py        | 25 +--------
 2 files changed, 31 insertions(+), 48 deletions(-)

diff --git a/experiments/interpretation/dutchf3_patch/test.py b/experiments/interpretation/dutchf3_patch/test.py
index ba945b8d..df9f9584 100644
--- a/experiments/interpretation/dutchf3_patch/test.py
+++ b/experiments/interpretation/dutchf3_patch/test.py
@@ -21,6 +21,7 @@
 
 import fire
 import numpy as np
+import segyio
 from sklearn import metrics
 import torch
 import torch.nn.functional as F
@@ -53,8 +54,8 @@
 ]
 
 # we can optionally supply a segy file whose geometry we will use to write out 3D test set predictions
-# if it doesn't exist, we won't output a segy file
-SEGY_INFILE = "/data/seismic_orig/TrainingData_Labels.segy"
+# if it doesn't exist, we will write a blank segy file with same dimensions as the predictions array
+SEGY_INFILE = "/data/seismic/TrainingData_Labels.segy"
 
 
 class runningScore(object):
@@ -67,13 +68,13 @@ def _fast_hist(self, label_true, label_pred, n_class):
         """
         speed-optimized but not memory-optimized version of the confusion matrix calculation
         """
-        logger = logging.getLogger(__name__)
+        # logger = logging.getLogger(__name__)
         mask = (label_true >= 0) & (label_true < n_class)
         bincount_arg = n_class * label_true[mask].astype(int) + label_pred[mask]
-        logger.info('bincount operation starting...')
+        # logger.info("bincount operation starting...")
         hist = np.bincount(bincount_arg, minlength=n_class ** 2,)
         hist = hist.reshape(n_class, n_class)
-        logger.info('finished')
+        # logger.info("finished")
         return hist
 
     # @profile
@@ -82,12 +83,10 @@ def _confusion_matrix(self, label_true, label_pred, n_class):
         memory-optimized but not speed-optimized version of the confusion matrix calculation
         """
         mask = (label_true >= 0) & (label_true < n_class)
-        #bincount_arg = n_class * label_true[mask].astype(int) + label_pred[mask]
-        matrix = metrics.confusion_matrix(label_true[mask], label_pred[mask], labels = list(range(n_class)))
-        return matrix
+        return metrics.confusion_matrix(label_true[mask], label_pred[mask], labels=list(range(n_class)))
 
     # @profile
-    def update(self, label_trues, label_preds, fast_hist = True):
+    def update(self, label_trues, label_preds, fast_hist=True):
         for lt, lp in zip(label_trues, label_preds):
             if fast_hist:
                 self.confusion_matrix += self._fast_hist(lt.flatten(), lp.flatten(), self.n_classes)
@@ -145,13 +144,8 @@ def _compute_3D_metrics(gt_labels, pred, n_classes, split):
 
     logger = logging.getLogger(__name__)
 
-    # TODO: remove
-    #n = 300
-    #gt_labels = gt_labels[:n, :n, :n]
-    #pred = pred[:n, :n, :n]
-
     score = runningScore(n_classes)
-    score.update(gt_labels, pred, fast_hist=False)
+    score.update(gt_labels, pred, fast_hist=True)
 
     score, class_iou = score.get_scores()
 
@@ -321,9 +315,6 @@ def _patch_label_2d(
             output = output_processing(output)
             output_p[:, :, hdx + ps : hdx + ps + patch_size, wdx + ps : wdx + ps + patch_size,] += output
 
-        # TODO remove
-        break
-
         # dump the data right before it's being put into the model and after scoring
         if debug:
             outdir = f"debug/test/batch_{split}"
@@ -482,10 +473,18 @@ def _evaluate_split(
     pred = pred_sum.argmax(0).astype(np.uint8)
     del pred_sum
     _compute_3D_metrics(gt_labels, pred, n_classes, split)
-    np.save(f"test_simple_avg_split_{split}.npy", pred)
+    np.save(os.path.join(output_dir, f"test_simple_avg_split_{split}.npy", pred))
     # use existing SEGY file as a template to write our data into
     if os.path.isfile(SEGY_INFILE):
-       write_segy(f"test_simple_avg_split_{split}.segy", SEGY_INFILE, pred)
+        # input segy file is the ground truth here
+        write_segy(os.path.join(output_dir, f"pred_simple_avg_split_{split}.segy"), SEGY_INFILE, pred.swapaxes(0, 2))
+    else:
+        # write array into segy using array dimensions for # of inlines and crosslines
+        # make sure directions are inline, crossline, depth
+        logging.info("writing segy files")
+        segyio.tools.from_array3D(os.path.join(output_dir, f"pred_simple_avg_split_{split}.segy"), pred.swapaxes(0, 2), dt=1000)
+        segyio.tools.from_array3D(os.path.join(output_dir, f"groundtruth_simple_avg_split_{split}.segy"), gt_labels.swapaxes(0, 2), dt=1000)
+        logging.info("done")
 
     logging.info("Geometric average")
     pred_sum = np.sqrt(accum_inline * accum_crossline)
@@ -495,7 +494,15 @@ def _evaluate_split(
     np.save(f"test_geometric_avg_split_{split}.npy", pred)
     # use existing SEGY file as a template to write our data into
     if os.path.isfile(SEGY_INFILE):
-       write_segy(f"test_geometric_avg_split_{split}.segy", SEGY_INFILE, pred)
+        # input segy file is the ground truth here
+        write_segy(os.path.join(output_dir, f"pred_geometric_avg_split_{split}.segy"), SEGY_INFILE, pred.swapaxes(0, 2))
+    else:
+        # write array into segy using array dimensions for # of inlines and crosslines
+        # make sure directions are inline, crossline, depth
+        logging.info("writing segy files")
+        segyio.tools.from_array3D(os.path.join(output_dir,f"pred_geometric_avg_split_{split}.segy"), pred.swapaxes(0, 2), dt=1000)
+        segyio.tools.from_array3D(os.path.join(output_dir,f"groundtruth_geometric_avg_split_{split}.segy"), gt_labels.swapaxes(0, 2), dt=1000)
+        logging.info("done")
 
 
 def _write_section_file(labels, section_file):
@@ -514,9 +521,7 @@ def _write_section_file(labels, section_file):
     else:
         x_list = []
 
-    # TODO: revert
-    # list_test = i_list + x_list
-    list_test = i_list[0:1] + x_list[0:1]
+    list_test = i_list + x_list
 
     file_object = open(section_file, "w")
     file_object.write("\n".join(list_test))
@@ -627,4 +632,3 @@ def test(*options, cfg=None, debug=False):
 
 if __name__ == "__main__":
     fire.Fire(test)
-
diff --git a/interpretation/deepseismic_interpretation/data.py b/interpretation/deepseismic_interpretation/data.py
index 3f166158..a3ad5008 100644
--- a/interpretation/deepseismic_interpretation/data.py
+++ b/interpretation/deepseismic_interpretation/data.py
@@ -77,45 +77,24 @@ def write_segy(out_filename, in_filename, out_cube):
         Nothing
     """
 
-    print("Writing interpretation to " + out_filename)
+    logging.info("Writing interpretation to " + out_filename)
 
     # Copy segy file
     copyfile(in_filename, out_filename)
 
-    # Moving temporal axis back again
-    out_cube = np.moveaxis(out_cube, 0, -1)
-
     # Open out-file
     with segyio.open(out_filename, "r+") as src:
 
         iline_start = src.ilines[0]
         # set type to inlines
         dtype = src.iline[iline_start].dtype
-        """
-        src.ilines = list(range(src.ilines[0], src.ilines[0] + out_cube.shape[0]))
-
-        # set crosslines to match the numpy array shape
-        xline_start = src.xlines[0]
-        src.xlines = list(range(src.xlines[0], src.xlines[0] + out_cube.shape[1]))
-
-        # set depth to match the numpy array shape
-        depth_start = src.depth[0]
-        src.depth = list(range(src))
-        """
 
         # loop through inlines and insert output
         for i in src.ilines:
-            if i>=out_cube.shape[0]:
-                break
             iline = out_cube[i - iline_start, :, :]
             src.iline[i] = np.ascontiguousarray(iline.astype(dtype))
 
-    # TODO: rewrite this whole function
-    # Moving temporal axis first again - just in case the user want to keep working on it
-    out_cube = np.moveaxis(out_cube, -1, 0)
-
-    print("Writing interpretation - Finished")
-    return
+    logging.info("Writing interpretation - finished")
 
 def _get_classes_and_counts(mask_list):
     class_counts_dict = defaultdict(int)

From d896a03ad54e40936345dd672e5c77dca1542ed3 Mon Sep 17 00:00:00 2001
From: Max Kaznady <maxkaz@microsoft.com>
Date: Mon, 3 Aug 2020 15:30:59 -0400
Subject: [PATCH 08/15] minor typo

---
 scripts/byod_penobscot.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/byod_penobscot.py b/scripts/byod_penobscot.py
index bc0b682b..32e65d5f 100644
--- a/scripts/byod_penobscot.py
+++ b/scripts/byod_penobscot.py
@@ -19,7 +19,7 @@
 import logging
 
 # toggle to WARNING when running in production, or use CLI
-logging.getLogger().setLevel(logging.DEBUG)~/da
+logging.getLogger().setLevel(logging.DEBUG)
 # logging.getLogger().setLevel(logging.WARNING)
 import argparse
 

From e590bb39165de817cd84a37f4dd78a4c5064a27d Mon Sep 17 00:00:00 2001
From: Max Kaznady <max.kaznady@gmail.com>
Date: Wed, 5 Aug 2020 21:31:32 +0000
Subject: [PATCH 09/15] latest fixes

---
 .../interpretation/dutchf3_patch/test.py      |  8 +++----
 scripts/byod_competition.py                   | 21 +++++++++++++++++--
 2 files changed, 23 insertions(+), 6 deletions(-)

diff --git a/experiments/interpretation/dutchf3_patch/test.py b/experiments/interpretation/dutchf3_patch/test.py
index df9f9584..bfa3c3f1 100644
--- a/experiments/interpretation/dutchf3_patch/test.py
+++ b/experiments/interpretation/dutchf3_patch/test.py
@@ -391,7 +391,7 @@ def _evaluate_split(
     with torch.no_grad():  # operations inside don't track history
         model.eval()
         for i, (images, labels) in enumerate(test_loader):
-            logger.info(f"split: {split}, section: {i}")
+            logger.info(f"split: {split}, section: {test_set.sections[i]}")
             outputs = _patch_label_2d(
                 model,
                 images,
@@ -419,7 +419,7 @@ def _evaluate_split(
                 accum_inline[:, :, :, i] = preds_numpy
             # dealing with crossline
             elif test_set.sections[i].startswith("x"):
-                accum_crossline[:, :, i, :] = preds_numpy
+                accum_crossline[:, :, i-n_inlines, :] = preds_numpy
             else:
                 raise Exception("we need either an inline or crossline split")
 
@@ -473,7 +473,7 @@ def _evaluate_split(
     pred = pred_sum.argmax(0).astype(np.uint8)
     del pred_sum
     _compute_3D_metrics(gt_labels, pred, n_classes, split)
-    np.save(os.path.join(output_dir, f"test_simple_avg_split_{split}.npy", pred))
+    np.save(os.path.join(output_dir, f"test_simple_avg_split_{split}.npy"), pred)
     # use existing SEGY file as a template to write our data into
     if os.path.isfile(SEGY_INFILE):
         # input segy file is the ground truth here
@@ -491,7 +491,7 @@ def _evaluate_split(
     pred = pred_sum.argmax(0).astype(np.uint8)
     del pred_sum
     _compute_3D_metrics(gt_labels, pred, n_classes, split)
-    np.save(f"test_geometric_avg_split_{split}.npy", pred)
+    np.save(os.path.join(output_dir, f"test_geometric_avg_split_{split}.npy"), pred)
     # use existing SEGY file as a template to write our data into
     if os.path.isfile(SEGY_INFILE):
         # input segy file is the ground truth here
diff --git a/scripts/byod_competition.py b/scripts/byod_competition.py
index 34f9bd72..0bb4dbc1 100644
--- a/scripts/byod_competition.py
+++ b/scripts/byod_competition.py
@@ -9,12 +9,29 @@
 
 # information to include in configuration file when running:
 
+clip
 INFO:root:[0.84979262 0.57790153 0.95866329 0.71236326 0.99004844 0.91123086]
 INFO:root:MEAN
 INFO:root:4.183678e-05
 INFO:root:STANDARD DEVIATION
 INFO:root:0.31477982
 
+noclip
+INFO:root:[0.84979262 0.57790153 0.95866329 0.71236326 0.99004844 0.91123086]
+INFO:root:MEAN
+INFO:root:0.0043642526
+INFO:root:STANDARD DEVIATION
+INFO:root:0.07544233
+
+reduced test size
+INFO:root:[0.84979262 0.57790153 0.95866329 0.71236326 0.99004844 0.91123086]
+INFO:root:[0.84979262 0.57790153 0.95866329 0.71236326 0.99004844 0.91123086]
+INFO:root:MEAN
+INFO:root:4.183678e-05
+INFO:root:STANDARD DEVIATION
+INFO:root:0.31477982
+
+
 # kick off run as:
 
 python byod_competition.py --train /home/maxkaz/data/seismic/TrainingData_Image.segy --label /home/maxkaz/data/seismic/TrainingData_Labels.segy --outdir /home/maxkaz/data/seismic
@@ -168,8 +185,8 @@ def mkdir(dirname):
 
 
 """ GLOBAL VARIABLES """
-INLINE_FRACTION = 0.7
-CROSSLINE_FRACTION = 0.78
+INLINE_FRACTION = 0.9
+CROSSLINE_FRACTION = 0.9
 N_CLASSES = 6
 
 parser.add_argument("--train", help="Name of train data", type=str, required=True)

From 73e376437f44a5e9b6dc236425137df21106d251 Mon Sep 17 00:00:00 2001
From: Max Kaznady <maxkaz@microsoft.com>
Date: Thu, 6 Aug 2020 11:52:38 -0400
Subject: [PATCH 10/15] added Lovasz losses

---
 cv_lib/cv_lib/segmentation/lovasz_losses.py   | 258 ++++++++++++++++++
 .../interpretation/dutchf3_patch/train.py     |   4 +-
 2 files changed, 261 insertions(+), 1 deletion(-)
 create mode 100644 cv_lib/cv_lib/segmentation/lovasz_losses.py

diff --git a/cv_lib/cv_lib/segmentation/lovasz_losses.py b/cv_lib/cv_lib/segmentation/lovasz_losses.py
new file mode 100644
index 00000000..ab989e7e
--- /dev/null
+++ b/cv_lib/cv_lib/segmentation/lovasz_losses.py
@@ -0,0 +1,258 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+
+"""
+DO NOT REMOVE THIS COMMENT
+Lovasz-Softmax and Jaccard hinge loss in PyTorch
+Maxim Berman 2018 ESAT-PSI KU Leuven (MIT License)
+taken from https://github.com/bermanmaxim/LovaszSoftmax under MIT license
+"""
+
+from __future__ import print_function, division
+
+import torch
+from torch.autograd import Variable
+import torch.nn.functional as F
+import numpy as np
+
+try:
+    from itertools import ifilterfalse
+except ImportError:  # py3k
+    from itertools import filterfalse as ifilterfalse
+
+
+def lovasz_grad(gt_sorted):
+    """
+    Computes gradient of the Lovasz extension w.r.t sorted errors
+    See Alg. 1 in paper
+    """
+    p = len(gt_sorted)
+    gts = gt_sorted.sum()
+    intersection = gts - gt_sorted.float().cumsum(0)
+    union = gts + (1 - gt_sorted).float().cumsum(0)
+    jaccard = 1. - intersection / union
+    if p > 1:  # cover 1-pixel case
+        jaccard[1:p] = jaccard[1:p] - jaccard[0:-1]
+    return jaccard
+
+
+def iou_binary(preds, labels, EMPTY=1., ignore=None, per_image=True):
+    """
+    IoU for foreground class
+    binary: 1 foreground, 0 background
+    """
+    if not per_image:
+        preds, labels = (preds,), (labels,)
+    ious = []
+    for pred, label in zip(preds, labels):
+        intersection = ((label == 1) & (pred == 1)).sum()
+        union = ((label == 1) | ((pred == 1) & (label != ignore))).sum()
+        if not union:
+            iou = EMPTY
+        else:
+            iou = float(intersection) / float(union)
+        ious.append(iou)
+    iou = mean(ious)  # mean accross images if per_image
+    return 100 * iou
+
+
+def iou(preds, labels, C, EMPTY=1., ignore=None, per_image=False):
+    """
+    Array of IoU for each (non ignored) class
+    """
+    if not per_image:
+        preds, labels = (preds,), (labels,)
+    ious = []
+    for pred, label in zip(preds, labels):
+        iou = []
+        for i in range(C):
+            if i != ignore:  # The ignored label is sometimes among predicted classes (ENet - CityScapes)
+                intersection = ((label == i) & (pred == i)).sum()
+                union = ((label == i) | ((pred == i) & (label != ignore))).sum()
+                if not union:
+                    iou.append(EMPTY)
+                else:
+                    iou.append(float(intersection) / float(union))
+        ious.append(iou)
+    ious = [mean(iou) for iou in zip(*ious)]  # mean accross images if per_image
+    return 100 * np.array(ious)
+
+
+# --------------------------- BINARY LOSSES ---------------------------
+
+
+def lovasz_hinge(logits, labels, per_image=True, ignore=None):
+    """
+    Binary Lovasz hinge loss
+      logits: [B, H, W] Variable, logits at each pixel (between -\infty and +\infty)
+      labels: [B, H, W] Tensor, binary ground truth masks (0 or 1)
+      per_image: compute the loss per image instead of per batch
+      ignore: void class id
+    """
+    if per_image:
+        loss = mean(lovasz_hinge_flat(*flatten_binary_scores(log.unsqueeze(0), lab.unsqueeze(0), ignore))
+                    for log, lab in zip(logits, labels))
+    else:
+        loss = lovasz_hinge_flat(*flatten_binary_scores(logits, labels, ignore))
+    return loss
+
+
+def lovasz_hinge_flat(logits, labels):
+    """
+    Binary Lovasz hinge loss
+      logits: [P] Variable, logits at each prediction (between -\infty and +\infty)
+      labels: [P] Tensor, binary ground truth labels (0 or 1)
+      ignore: label to ignore
+    """
+    if len(labels) == 0:
+        # only void pixels, the gradients should be 0
+        return logits.sum() * 0.
+    signs = 2. * labels.float() - 1.
+    errors = (1. - logits * Variable(signs))
+    errors_sorted, perm = torch.sort(errors, dim=0, descending=True)
+    perm = perm.data
+    gt_sorted = labels[perm]
+    grad = lovasz_grad(gt_sorted)
+    loss = torch.dot(F.relu(errors_sorted), Variable(grad))
+    return loss
+
+
+def flatten_binary_scores(scores, labels, ignore=None):
+    """
+    Flattens predictions in the batch (binary case)
+    Remove labels equal to 'ignore'
+    """
+    scores = scores.view(-1)
+    labels = labels.view(-1)
+    if ignore is None:
+        return scores, labels
+    valid = (labels != ignore)
+    vscores = scores[valid]
+    vlabels = labels[valid]
+    return vscores, vlabels
+
+
+class StableBCELoss(torch.nn.modules.Module):
+    def __init__(self):
+        super(StableBCELoss, self).__init__()
+
+    def forward(self, input, target):
+        neg_abs = - input.abs()
+        loss = input.clamp(min=0) - input * target + (1 + neg_abs.exp()).log()
+        return loss.mean()
+
+
+def binary_xloss(logits, labels, ignore=None):
+    """
+    Binary Cross entropy loss
+      logits: [B, H, W] Variable, logits at each pixel (between -\infty and +\infty)
+      labels: [B, H, W] Tensor, binary ground truth masks (0 or 1)
+      ignore: void class id
+    """
+    logits, labels = flatten_binary_scores(logits, labels, ignore)
+    loss = StableBCELoss()(logits, Variable(labels.float()))
+    return loss
+
+
+# --------------------------- MULTICLASS LOSSES ---------------------------
+
+
+def lovasz_softmax(probas, labels, classes='present', per_image=False, ignore=None):
+    """
+    Multi-class Lovasz-Softmax loss
+      probas: [B, C, H, W] Variable, class probabilities at each prediction (between 0 and 1).
+              Interpreted as binary (sigmoid) output with outputs of size [B, H, W].
+      labels: [B, H, W] Tensor, ground truth labels (between 0 and C - 1)
+      classes: 'all' for all, 'present' for classes present in labels, or a list of classes to average.
+      per_image: compute the loss per image instead of per batch
+      ignore: void class labels
+    """
+    if per_image:
+        loss = mean(lovasz_softmax_flat(*flatten_probas(prob.unsqueeze(0), lab.unsqueeze(0), ignore), classes=classes)
+                    for prob, lab in zip(probas, labels))
+    else:
+        loss = lovasz_softmax_flat(*flatten_probas(probas, labels, ignore), classes=classes)
+    return loss
+
+
+def lovasz_softmax_flat(probas, labels, classes='present'):
+    """
+    Multi-class Lovasz-Softmax loss
+      probas: [P, C] Variable, class probabilities at each prediction (between 0 and 1)
+      labels: [P] Tensor, ground truth labels (between 0 and C - 1)
+      classes: 'all' for all, 'present' for classes present in labels, or a list of classes to average.
+    """
+    if probas.numel() == 0:
+        # only void pixels, the gradients should be 0
+        return probas * 0.
+    C = probas.size(1)
+    losses = []
+    class_to_sum = list(range(C)) if classes in ['all', 'present'] else classes
+    for c in class_to_sum:
+        fg = (labels == c).float()  # foreground for class c
+        if (classes is 'present' and fg.sum() == 0):
+            continue
+        if C == 1:
+            if len(classes) > 1:
+                raise ValueError('Sigmoid output possible only with 1 class')
+            class_pred = probas[:, 0]
+        else:
+            class_pred = probas[:, c]
+        errors = (Variable(fg) - class_pred).abs()
+        errors_sorted, perm = torch.sort(errors, 0, descending=True)
+        perm = perm.data
+        fg_sorted = fg[perm]
+        losses.append(torch.dot(errors_sorted, Variable(lovasz_grad(fg_sorted))))
+    return mean(losses)
+
+
+def flatten_probas(probas, labels, ignore=None):
+    """
+    Flattens predictions in the batch
+    """
+    if probas.dim() == 3:
+        # assumes output of a sigmoid layer
+        B, H, W = probas.size()
+        probas = probas.view(B, 1, H, W)
+    B, C, H, W = probas.size()
+    probas = probas.permute(0, 2, 3, 1).contiguous().view(-1, C)  # B * H * W, C = P, C
+    labels = labels.view(-1)
+    if ignore is None:
+        return probas, labels
+    valid = (labels != ignore)
+    vprobas = probas[valid.nonzero().squeeze()]
+    vlabels = labels[valid]
+    return vprobas, vlabels
+
+
+def xloss(logits, labels, ignore=None):
+    """
+    Cross entropy loss
+    """
+    return F.cross_entropy(logits, Variable(labels), ignore_index=255)
+
+
+# --------------------------- HELPER FUNCTIONS ---------------------------
+def isnan(x):
+    return x != x
+
+
+def mean(l, ignore_nan=False, empty=0):
+    """
+    nanmean compatible with generators.
+    """
+    l = iter(l)
+    if ignore_nan:
+        l = ifilterfalse(isnan, l)
+    try:
+        n = 1
+        acc = next(l)
+    except StopIteration:
+        if empty == 'raise':
+            raise ValueError('Empty mean')
+        return empty
+    for n, v in enumerate(l, 2):
+        acc += v
+    if n == 1:
+        return acc
+    return acc / n
diff --git a/experiments/interpretation/dutchf3_patch/train.py b/experiments/interpretation/dutchf3_patch/train.py
index e687290e..c8b9fa1b 100644
--- a/experiments/interpretation/dutchf3_patch/train.py
+++ b/experiments/interpretation/dutchf3_patch/train.py
@@ -37,6 +37,7 @@
 from cv_lib.segmentation.dutchf3.utils import current_datetime, git_branch, git_hash
 from cv_lib.segmentation.metrics import class_accuracy, class_iou, mean_class_accuracy, mean_iou, pixelwise_accuracy
 from cv_lib.utils import generate_path, load_log_configuration
+from cv_lib.segmentation import lovasz_losses as L
 from deepseismic_interpretation.dutchf3.data import get_patch_loader
 from default import _C as config
 from default import update_config
@@ -236,7 +237,8 @@ def run(*options, cfg=None, local_rank=0, debug=False, input=None, distributed=F
     class_weights = torch.tensor(config.DATASET.CLASS_WEIGHTS, device=device, requires_grad=False)
 
     # Loss:
-    criterion = torch.nn.CrossEntropyLoss(weight=class_weights, ignore_index=255, reduction="mean")
+    #criterion = torch.nn.CrossEntropyLoss(weight=class_weights, ignore_index=255, reduction="mean")
+    criterion = lambda x, y: L.lovasz_softmax(x, y, classes = list(range(n_classes)), ignore=255)
 
     # Model:
     if distributed:

From 8eb84193269f193708f79146cde872ba59187c65 Mon Sep 17 00:00:00 2001
From: Max Kaznady <max.kaznady@gmail.com>
Date: Sun, 9 Aug 2020 15:27:25 +0000
Subject: [PATCH 11/15] more minor fixes

---
 README.md                                         | 3 ++-
 experiments/interpretation/dutchf3_patch/train.py | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 1b82b9b1..6804da50 100644
--- a/README.md
+++ b/README.md
@@ -226,8 +226,9 @@ For seismic interpretation (segmentation), if you want to visualize cross-sectio
 
 To install [segyviewer](https://github.com/equinor/segyviewer) run:
 ```bash
-conda env create -n segyviewer python=2.7
+conda create -n segyviewer python=2.7
 conda activate segyviewer
+sudo apt install libqtwebkit4
 conda install -c conda-forge pyqt=4.11.4
 pip install segyviewer
 ```
diff --git a/experiments/interpretation/dutchf3_patch/train.py b/experiments/interpretation/dutchf3_patch/train.py
index c8b9fa1b..5076c541 100644
--- a/experiments/interpretation/dutchf3_patch/train.py
+++ b/experiments/interpretation/dutchf3_patch/train.py
@@ -238,7 +238,7 @@ def run(*options, cfg=None, local_rank=0, debug=False, input=None, distributed=F
 
     # Loss:
     #criterion = torch.nn.CrossEntropyLoss(weight=class_weights, ignore_index=255, reduction="mean")
-    criterion = lambda x, y: L.lovasz_softmax(x, y, classes = list(range(n_classes)), ignore=255)
+    criterion = lambda x, y: L.lovasz_softmax(x, y, classes = list(range(n_classes)), ignore=255, per_image = True)
 
     # Model:
     if distributed:

From 3047e47d307483a921b57be04057dddc5c9d35c8 Mon Sep 17 00:00:00 2001
From: Max Kaznady <max.kaznady@gmail.com>
Date: Wed, 12 Aug 2020 19:31:54 +0000
Subject: [PATCH 12/15] fixed data drop in README; changed default HRNet batch
 size

---
 README.md                                                   | 2 +-
 experiments/interpretation/dutchf3_patch/configs/hrnet.yaml | 2 +-
 scripts/byod_competition.py                                 | 4 ++--
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/README.md b/README.md
index 6804da50..7cf50124 100644
--- a/README.md
+++ b/README.md
@@ -158,7 +158,7 @@ wget -o /dev/null -O dataset.h5 https://zenodo.org/record/3924682/files/dataset.
 # convert penobscot
 python byod_penobscot.py --filename dataset.h5 --outdir <where to output data>
 # preprocess for experiments
-python prepare_dutchf3.py split_train_val patch --data_dir=<outdir from the previous step> --label_file=train/train_labels.npy --output_dir=splits --stride=50 --patch_size=100 --split_direction=both --section_stride=100
+python prepare_dutchf3.py split_train_val patch --data_dir=<outdir from the previous step> --label_file=train/train_labels.npy --output_dir=splits --stride=50 --patch_size=100 --split_direction=both
 ```
 
 ### Run Examples
diff --git a/experiments/interpretation/dutchf3_patch/configs/hrnet.yaml b/experiments/interpretation/dutchf3_patch/configs/hrnet.yaml
index 94921bf7..ba8d4236 100644
--- a/experiments/interpretation/dutchf3_patch/configs/hrnet.yaml
+++ b/experiments/interpretation/dutchf3_patch/configs/hrnet.yaml
@@ -67,7 +67,7 @@ MODEL:
       FUSE_METHOD: SUM
 
 TRAIN:
-  BATCH_SIZE_PER_GPU: 16
+  BATCH_SIZE_PER_GPU: 32
   BEGIN_EPOCH: 0
   END_EPOCH: 300
   MIN_LR: 0.001
diff --git a/scripts/byod_competition.py b/scripts/byod_competition.py
index 0bb4dbc1..978d24c5 100644
--- a/scripts/byod_competition.py
+++ b/scripts/byod_competition.py
@@ -5,7 +5,7 @@
 Run example:
 
 python byod_competition.py --train <input segy file> --label <input labels file> --outdir <where to output data>
-python prepare_dutchf3.py split_train_val patch   --data_dir=<outdir from the previous step> --label_file=train/train_labels.npy --output_dir=splits --stride=50 --patch_size=100 --split_direction=both --section_stride=100
+python prepare_dutchf3.py split_train_val patch   --data_dir=<outdir from the previous step> --label_file=train/train_labels.npy --output_dir=splits --stride=50 --patch_size=100 --split_direction=both
 
 # information to include in configuration file when running:
 
@@ -35,7 +35,7 @@
 # kick off run as:
 
 python byod_competition.py --train /home/maxkaz/data/seismic/TrainingData_Image.segy --label /home/maxkaz/data/seismic/TrainingData_Labels.segy --outdir /home/maxkaz/data/seismic
-python prepare_dutchf3.py split_train_val patch   --data_dir=/home/maxkaz/data/seismic --label_file=train/train_labels.npy --output_dir=splits --stride=50 --patch_size=100 --split_direction=both --section_stride=100
+python prepare_dutchf3.py split_train_val patch   --data_dir=/home/maxkaz/data/seismic --label_file=train/train_labels.npy --output_dir=splits --stride=50 --patch_size=100 --split_direction=both
 
 NGPU=2
 python -m torch.distributed.launch --nproc_per_node=${NGPU} train.py \

From fdfe04ee074c048273cc0456fa897ad131ac24de Mon Sep 17 00:00:00 2001
From: Max Kaznady <maxkaz@microsoft.com>
Date: Mon, 14 Sep 2020 16:02:26 -0400
Subject: [PATCH 13/15] final tweaks to SEG 2020 submission

---
 .../interpretation/dutchf3_patch/test.py      |  6 ++
 scripts/byod_competition.py                   | 12 ++--
 scripts/seg20_test_process.py                 | 59 +++++++++++++++++++
 3 files changed, 73 insertions(+), 4 deletions(-)
 create mode 100644 scripts/seg20_test_process.py

diff --git a/experiments/interpretation/dutchf3_patch/test.py b/experiments/interpretation/dutchf3_patch/test.py
index bfa3c3f1..9e5275f8 100644
--- a/experiments/interpretation/dutchf3_patch/test.py
+++ b/experiments/interpretation/dutchf3_patch/test.py
@@ -474,9 +474,14 @@ def _evaluate_split(
     del pred_sum
     _compute_3D_metrics(gt_labels, pred, n_classes, split)
     np.save(os.path.join(output_dir, f"test_simple_avg_split_{split}.npy"), pred)
+
     # use existing SEGY file as a template to write our data into
+    SEGY_INFILE=f"/data/seismic/TestData_Image{split[-1]}.segy"
+
     if os.path.isfile(SEGY_INFILE):
         # input segy file is the ground truth here
+        # adjust for competition to make classes start from 1 and not 0
+        pred += 1
         write_segy(os.path.join(output_dir, f"pred_simple_avg_split_{split}.segy"), SEGY_INFILE, pred.swapaxes(0, 2))
     else:
         # write array into segy using array dimensions for # of inlines and crosslines
@@ -495,6 +500,7 @@ def _evaluate_split(
     # use existing SEGY file as a template to write our data into
     if os.path.isfile(SEGY_INFILE):
         # input segy file is the ground truth here
+        pred += 1
         write_segy(os.path.join(output_dir, f"pred_geometric_avg_split_{split}.segy"), SEGY_INFILE, pred.swapaxes(0, 2))
     else:
         # write array into segy using array dimensions for # of inlines and crosslines
diff --git a/scripts/byod_competition.py b/scripts/byod_competition.py
index 978d24c5..3bc4d4a8 100644
--- a/scripts/byod_competition.py
+++ b/scripts/byod_competition.py
@@ -5,7 +5,7 @@
 Run example:
 
 python byod_competition.py --train <input segy file> --label <input labels file> --outdir <where to output data>
-python prepare_dutchf3.py split_train_val patch   --data_dir=<outdir from the previous step> --label_file=train/train_labels.npy --output_dir=splits --stride=50 --patch_size=100 --split_direction=both
+python prepare_dutchf3.py split_train_val patch --data_dir=<outdir from the previous step> --label_file=train/train_labels.npy --output_dir=splits --stride=50 --patch_size=100 --split_direction=both
 
 # information to include in configuration file when running:
 
@@ -31,11 +31,10 @@
 INFO:root:STANDARD DEVIATION
 INFO:root:0.31477982
 
-
 # kick off run as:
 
 python byod_competition.py --train /home/maxkaz/data/seismic/TrainingData_Image.segy --label /home/maxkaz/data/seismic/TrainingData_Labels.segy --outdir /home/maxkaz/data/seismic
-python prepare_dutchf3.py split_train_val patch   --data_dir=/home/maxkaz/data/seismic --label_file=train/train_labels.npy --output_dir=splits --stride=50 --patch_size=100 --split_direction=both
+python prepare_dutchf3.py split_train_val patch --data_dir=/home/maxkaz/data/seismic --label_file=train/train_labels.npy --output_dir=splits --stride=50 --patch_size=100 --split_direction=both
 
 NGPU=2
 python -m torch.distributed.launch --nproc_per_node=${NGPU} train.py \
@@ -66,6 +65,8 @@
 TEST.SPLIT 'both'
 --cfg configs/unet.yaml > unet.log 2>&1 &
 
+nohup time python test.py DATASET.ROOT "/data/seismic" DATASET.NUM_CLASSES 6 DATASET.CLASS_WEIGHTS  "[0.84979262, 0.57790153, 0.95866329, 0.71236326, 0.99004844, 0.91123086]" TRAIN.MEAN 0.0 TRAIN.STD 0.31477982 TEST.SPLIT 'both' MODEL.PRETRAINED /home/maxkaz/Downloads/hrnetv2_w48_imagenet_pretrained.pth TEST.MODEL_PATH --cfg configs/hrnet.yaml
+
 """
 
 from deepseismic_interpretation.data import read_segy
@@ -109,6 +110,7 @@ def main(args):
     logging.info("Running 3-sigma clipping")
     clip_scaling = 3.0
     mean, std = data.mean(), data.std()
+    logging.info(f"mean {mean} std {std}")
     data[data > mean + clip_scaling * std] = mean + clip_scaling * std
     data[data < mean - clip_scaling * std] = mean - clip_scaling * std
 
@@ -122,7 +124,9 @@ def main(args):
 
     # rescale to be within a certain range
     range_min, range_max = -1.0, 1.0
-    data_std = (data - data.min()) / (data.max() - data.min())
+    min, max = data.min(), data.max()
+    logging.info(f"min {min} max {max}")
+    data_std = (data - min) / (max - min)
     data = data_std * (range_max - range_min) + range_min
 
     """
diff --git a/scripts/seg20_test_process.py b/scripts/seg20_test_process.py
new file mode 100644
index 00000000..16d6832a
--- /dev/null
+++ b/scripts/seg20_test_process.py
@@ -0,0 +1,59 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+
+"""
+Custom one-off script to process the SEG20 competition test dataset.
+"""
+
+from deepseismic_interpretation.data import read_segy
+
+""" libraries """
+import segyio
+
+import numpy as np
+from scipy import stats
+import os
+
+np.set_printoptions(linewidth=200)
+import logging
+
+# toggle to WARNING when running in production, or use CLI
+logging.getLogger().setLevel(logging.DEBUG)
+
+# dataset locations
+N_CLASSES = 6
+TEST1 = "/data/seismic/TestData_Image1.segy"
+TEST2 = "/data/seismic/TestData_Image2.segy"
+# output location
+OUTDIR = "/data/seismic/test_once"
+# enter these from byod_competition logging output - computed on the training set
+MEAN = 0.676609992980957
+STD = 390.308837890625
+MIN = -1170.2498779296875
+MAX = 1171.6031494140625
+
+def process_test(infile, outdir, n_set):
+
+    logging.info("loading data")
+    data, _ = read_segy(infile)
+
+    logging.info("Running 3-sigma clipping")
+    clip_scaling = 3.0
+    data[data > MEAN + clip_scaling * STD] = MEAN + clip_scaling * STD
+    data[data < MEAN - clip_scaling * STD] = MEAN - clip_scaling * STD
+
+    # Make data cube fast to access
+    logging.info("Adjusting precision")
+    data = np.ascontiguousarray(data, "float32")
+
+    # rescale to be within a certain range
+    range_min, range_max = -1.0, 1.0
+    data_std = (data - MIN) / (MAX - MIN)
+    data = data_std * (range_max - range_min) + range_min
+
+    random_test_labels = np.random.randint(0,N_CLASSES-1, data.shape, dtype='uint8')
+    np.save(os.path.join(outdir, f"test{n_set}_seismic.npy"), data)
+    np.save(os.path.join(outdir, f"test{n_set}_labels.npy"), random_test_labels)
+
+process_test(TEST1, OUTDIR, 1)
+process_test(TEST2, OUTDIR, 2)

From 9782338827d4e7f868b66ece9ef3496740da5284 Mon Sep 17 00:00:00 2001
From: Max Kaznady <maxkaz@microsoft.com>
Date: Fri, 18 Sep 2020 18:17:40 -0400
Subject: [PATCH 14/15] added script to check class distributions

---
 scripts/seg20_check_distrib.py | 48 ++++++++++++++++++++++++++++++++++
 1 file changed, 48 insertions(+)
 create mode 100644 scripts/seg20_check_distrib.py

diff --git a/scripts/seg20_check_distrib.py b/scripts/seg20_check_distrib.py
new file mode 100644
index 00000000..7eddfe64
--- /dev/null
+++ b/scripts/seg20_check_distrib.py
@@ -0,0 +1,48 @@
+# checks distribution across classes in the new SEG20 competition
+
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+
+"""
+Custom one-off script to process the SEG20 competition test dataset.
+"""
+import collections
+
+from deepseismic_interpretation.data import read_segy
+
+""" libraries """
+import segyio
+
+import numpy as np
+from scipy import stats
+import os
+
+np.set_printoptions(linewidth=200)
+import logging
+
+# toggle to WARNING when running in production, or use CLI
+logging.getLogger().setLevel(logging.DEBUG)
+
+# dataset locations
+N_CLASSES = 6
+TRAIN = "/data/seismic_orig/TrainingData_Labels.segy"
+TEST1 = "/home/maxkaz/Desktop/pred_simple_avg_split_test1.segy"
+TEST2 = "/home/maxkaz/Desktop/pred_simple_avg_split_test2.segy"
+
+def check(infile):
+
+    data, _ = read_segy(infile)
+    n = data.size
+    counts = collections.Counter(data.astype(int).flatten().tolist())
+    ccounts = 0
+    for k in range(1,N_CLASSES+1):
+        ccounts += counts[k]
+        if k in counts:
+            print(f"{k}: {float(counts[k])/n} = {counts[k]} / {n}")
+    print(f"coverage {ccounts/n}")
+
+check(TRAIN)
+check(TEST1)
+check(TEST2)
+
+logging.info("done")

From 647901761e1c042563420892de61bc3495dbeab3 Mon Sep 17 00:00:00 2001
From: Max Kaznady <maxkaz@microsoft.com>
Date: Fri, 18 Sep 2020 18:18:20 -0400
Subject: [PATCH 15/15] remove unnecessary libraries

---
 scripts/seg20_check_distrib.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/scripts/seg20_check_distrib.py b/scripts/seg20_check_distrib.py
index 7eddfe64..792bcc5e 100644
--- a/scripts/seg20_check_distrib.py
+++ b/scripts/seg20_check_distrib.py
@@ -11,11 +11,7 @@
 from deepseismic_interpretation.data import read_segy
 
 """ libraries """
-import segyio
-
 import numpy as np
-from scipy import stats
-import os
 
 np.set_printoptions(linewidth=200)
 import logging