diff --git a/README.md b/README.md
index c28d22e8..7cf50124 100644
--- a/README.md
+++ b/README.md
@@ -158,7 +158,7 @@ wget -o /dev/null -O dataset.h5 https://zenodo.org/record/3924682/files/dataset.
 # convert penobscot
 python byod_penobscot.py --filename dataset.h5 --outdir <where to output data>
 # preprocess for experiments
-python prepare_dutchf3.py split_train_val patch --data_dir=<outdir from the previous step> --label_file=train/train_labels.npy --output_dir=splits --stride=50 --patch_size=100 --split_direction=both --section_stride=100
+python prepare_dutchf3.py split_train_val patch --data_dir=<outdir from the previous step> --label_file=train/train_labels.npy --output_dir=splits --stride=50 --patch_size=100 --split_direction=both
 ```
 
 ### Run Examples
@@ -226,12 +226,20 @@ For seismic interpretation (segmentation), if you want to visualize cross-sectio
 
 To install [segyviewer](https://github.com/equinor/segyviewer) run:
 ```bash
-conda env create -n segyviewer python=2.7
+conda create -n segyviewer python=2.7
 conda activate segyviewer
-conda install -c anaconda pyqt=4.11.4
+sudo apt install libqtwebkit4
+conda install -c conda-forge pyqt=4.11.4
 pip install segyviewer
 ```
 
+If you run into any QtPy4 related problems after the installation, try running:
+```bash
+sudo add-apt-repository ppa:rock-core/qt4
+sudo apt update
+sudo apt install libqt4-designer libqt4-opengl libqt4-svg libqtgui4 libqtwebkit4
+```
+
 To visualize cross-sections of a 3D volume, you can run
 [segyviewer](https://github.com/equinor/segyviewer) like so:
 ```bash
diff --git a/cv_lib/cv_lib/segmentation/lovasz_losses.py b/cv_lib/cv_lib/segmentation/lovasz_losses.py
new file mode 100644
index 00000000..ab989e7e
--- /dev/null
+++ b/cv_lib/cv_lib/segmentation/lovasz_losses.py
@@ -0,0 +1,258 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+
+"""
+DO NOT REMOVE THIS COMMENT
+Lovasz-Softmax and Jaccard hinge loss in PyTorch
+Maxim Berman 2018 ESAT-PSI KU Leuven (MIT License)
+taken from https://github.com/bermanmaxim/LovaszSoftmax under MIT license
+"""
+
+from __future__ import print_function, division
+
+import torch
+from torch.autograd import Variable
+import torch.nn.functional as F
+import numpy as np
+
+try:
+    from itertools import ifilterfalse
+except ImportError:  # py3k
+    from itertools import filterfalse as ifilterfalse
+
+
+def lovasz_grad(gt_sorted):
+    """
+    Computes gradient of the Lovasz extension w.r.t sorted errors
+    See Alg. 1 in paper
+    """
+    p = len(gt_sorted)
+    gts = gt_sorted.sum()
+    intersection = gts - gt_sorted.float().cumsum(0)
+    union = gts + (1 - gt_sorted).float().cumsum(0)
+    jaccard = 1. - intersection / union
+    if p > 1:  # cover 1-pixel case
+        jaccard[1:p] = jaccard[1:p] - jaccard[0:-1]
+    return jaccard
+
+
+def iou_binary(preds, labels, EMPTY=1., ignore=None, per_image=True):
+    """
+    IoU for foreground class
+    binary: 1 foreground, 0 background
+    """
+    if not per_image:
+        preds, labels = (preds,), (labels,)
+    ious = []
+    for pred, label in zip(preds, labels):
+        intersection = ((label == 1) & (pred == 1)).sum()
+        union = ((label == 1) | ((pred == 1) & (label != ignore))).sum()
+        if not union:
+            iou = EMPTY
+        else:
+            iou = float(intersection) / float(union)
+        ious.append(iou)
+    iou = mean(ious)  # mean accross images if per_image
+    return 100 * iou
+
+
+def iou(preds, labels, C, EMPTY=1., ignore=None, per_image=False):
+    """
+    Array of IoU for each (non ignored) class
+    """
+    if not per_image:
+        preds, labels = (preds,), (labels,)
+    ious = []
+    for pred, label in zip(preds, labels):
+        iou = []
+        for i in range(C):
+            if i != ignore:  # The ignored label is sometimes among predicted classes (ENet - CityScapes)
+                intersection = ((label == i) & (pred == i)).sum()
+                union = ((label == i) | ((pred == i) & (label != ignore))).sum()
+                if not union:
+                    iou.append(EMPTY)
+                else:
+                    iou.append(float(intersection) / float(union))
+        ious.append(iou)
+    ious = [mean(iou) for iou in zip(*ious)]  # mean accross images if per_image
+    return 100 * np.array(ious)
+
+
+# --------------------------- BINARY LOSSES ---------------------------
+
+
+def lovasz_hinge(logits, labels, per_image=True, ignore=None):
+    """
+    Binary Lovasz hinge loss
+      logits: [B, H, W] Variable, logits at each pixel (between -\infty and +\infty)
+      labels: [B, H, W] Tensor, binary ground truth masks (0 or 1)
+      per_image: compute the loss per image instead of per batch
+      ignore: void class id
+    """
+    if per_image:
+        loss = mean(lovasz_hinge_flat(*flatten_binary_scores(log.unsqueeze(0), lab.unsqueeze(0), ignore))
+                    for log, lab in zip(logits, labels))
+    else:
+        loss = lovasz_hinge_flat(*flatten_binary_scores(logits, labels, ignore))
+    return loss
+
+
+def lovasz_hinge_flat(logits, labels):
+    """
+    Binary Lovasz hinge loss
+      logits: [P] Variable, logits at each prediction (between -\infty and +\infty)
+      labels: [P] Tensor, binary ground truth labels (0 or 1)
+      ignore: label to ignore
+    """
+    if len(labels) == 0:
+        # only void pixels, the gradients should be 0
+        return logits.sum() * 0.
+    signs = 2. * labels.float() - 1.
+    errors = (1. - logits * Variable(signs))
+    errors_sorted, perm = torch.sort(errors, dim=0, descending=True)
+    perm = perm.data
+    gt_sorted = labels[perm]
+    grad = lovasz_grad(gt_sorted)
+    loss = torch.dot(F.relu(errors_sorted), Variable(grad))
+    return loss
+
+
+def flatten_binary_scores(scores, labels, ignore=None):
+    """
+    Flattens predictions in the batch (binary case)
+    Remove labels equal to 'ignore'
+    """
+    scores = scores.view(-1)
+    labels = labels.view(-1)
+    if ignore is None:
+        return scores, labels
+    valid = (labels != ignore)
+    vscores = scores[valid]
+    vlabels = labels[valid]
+    return vscores, vlabels
+
+
+class StableBCELoss(torch.nn.modules.Module):
+    def __init__(self):
+        super(StableBCELoss, self).__init__()
+
+    def forward(self, input, target):
+        neg_abs = - input.abs()
+        loss = input.clamp(min=0) - input * target + (1 + neg_abs.exp()).log()
+        return loss.mean()
+
+
+def binary_xloss(logits, labels, ignore=None):
+    """
+    Binary Cross entropy loss
+      logits: [B, H, W] Variable, logits at each pixel (between -\infty and +\infty)
+      labels: [B, H, W] Tensor, binary ground truth masks (0 or 1)
+      ignore: void class id
+    """
+    logits, labels = flatten_binary_scores(logits, labels, ignore)
+    loss = StableBCELoss()(logits, Variable(labels.float()))
+    return loss
+
+
+# --------------------------- MULTICLASS LOSSES ---------------------------
+
+
+def lovasz_softmax(probas, labels, classes='present', per_image=False, ignore=None):
+    """
+    Multi-class Lovasz-Softmax loss
+      probas: [B, C, H, W] Variable, class probabilities at each prediction (between 0 and 1).
+              Interpreted as binary (sigmoid) output with outputs of size [B, H, W].
+      labels: [B, H, W] Tensor, ground truth labels (between 0 and C - 1)
+      classes: 'all' for all, 'present' for classes present in labels, or a list of classes to average.
+      per_image: compute the loss per image instead of per batch
+      ignore: void class labels
+    """
+    if per_image:
+        loss = mean(lovasz_softmax_flat(*flatten_probas(prob.unsqueeze(0), lab.unsqueeze(0), ignore), classes=classes)
+                    for prob, lab in zip(probas, labels))
+    else:
+        loss = lovasz_softmax_flat(*flatten_probas(probas, labels, ignore), classes=classes)
+    return loss
+
+
+def lovasz_softmax_flat(probas, labels, classes='present'):
+    """
+    Multi-class Lovasz-Softmax loss
+      probas: [P, C] Variable, class probabilities at each prediction (between 0 and 1)
+      labels: [P] Tensor, ground truth labels (between 0 and C - 1)
+      classes: 'all' for all, 'present' for classes present in labels, or a list of classes to average.
+    """
+    if probas.numel() == 0:
+        # only void pixels, the gradients should be 0
+        return probas * 0.
+    C = probas.size(1)
+    losses = []
+    class_to_sum = list(range(C)) if classes in ['all', 'present'] else classes
+    for c in class_to_sum:
+        fg = (labels == c).float()  # foreground for class c
+        if (classes is 'present' and fg.sum() == 0):
+            continue
+        if C == 1:
+            if len(classes) > 1:
+                raise ValueError('Sigmoid output possible only with 1 class')
+            class_pred = probas[:, 0]
+        else:
+            class_pred = probas[:, c]
+        errors = (Variable(fg) - class_pred).abs()
+        errors_sorted, perm = torch.sort(errors, 0, descending=True)
+        perm = perm.data
+        fg_sorted = fg[perm]
+        losses.append(torch.dot(errors_sorted, Variable(lovasz_grad(fg_sorted))))
+    return mean(losses)
+
+
+def flatten_probas(probas, labels, ignore=None):
+    """
+    Flattens predictions in the batch
+    """
+    if probas.dim() == 3:
+        # assumes output of a sigmoid layer
+        B, H, W = probas.size()
+        probas = probas.view(B, 1, H, W)
+    B, C, H, W = probas.size()
+    probas = probas.permute(0, 2, 3, 1).contiguous().view(-1, C)  # B * H * W, C = P, C
+    labels = labels.view(-1)
+    if ignore is None:
+        return probas, labels
+    valid = (labels != ignore)
+    vprobas = probas[valid.nonzero().squeeze()]
+    vlabels = labels[valid]
+    return vprobas, vlabels
+
+
+def xloss(logits, labels, ignore=None):
+    """
+    Cross entropy loss
+    """
+    return F.cross_entropy(logits, Variable(labels), ignore_index=255)
+
+
+# --------------------------- HELPER FUNCTIONS ---------------------------
+def isnan(x):
+    return x != x
+
+
+def mean(l, ignore_nan=False, empty=0):
+    """
+    nanmean compatible with generators.
+    """
+    l = iter(l)
+    if ignore_nan:
+        l = ifilterfalse(isnan, l)
+    try:
+        n = 1
+        acc = next(l)
+    except StopIteration:
+        if empty == 'raise':
+            raise ValueError('Empty mean')
+        return empty
+    for n, v in enumerate(l, 2):
+        acc += v
+    if n == 1:
+        return acc
+    return acc / n
diff --git a/cv_lib/cv_lib/segmentation/models/unet.py b/cv_lib/cv_lib/segmentation/models/unet.py
index ddb9197f..d73b75c1 100644
--- a/cv_lib/cv_lib/segmentation/models/unet.py
+++ b/cv_lib/cv_lib/segmentation/models/unet.py
@@ -4,6 +4,8 @@
 """ Taken from https://github.com/milesial/Pytorch-UNet
 
 """
+import os
+
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
diff --git a/environment/anaconda/local/environment.yml b/environment/anaconda/local/environment.yml
index cf7078c1..503dd2b1 100644
--- a/environment/anaconda/local/environment.yml
+++ b/environment/anaconda/local/environment.yml
@@ -19,6 +19,7 @@ dependencies:
   - papermill>=1.0.1
   - jupyterlab
   - pip:
+    - memory_profiler
     - segyio==1.8.8
     - pytorch-ignite==0.3.0
     - fire==0.2.1
diff --git a/experiments/interpretation/dutchf3_patch/configs/hrnet.yaml b/experiments/interpretation/dutchf3_patch/configs/hrnet.yaml
index 94921bf7..ba8d4236 100644
--- a/experiments/interpretation/dutchf3_patch/configs/hrnet.yaml
+++ b/experiments/interpretation/dutchf3_patch/configs/hrnet.yaml
@@ -67,7 +67,7 @@ MODEL:
       FUSE_METHOD: SUM
 
 TRAIN:
-  BATCH_SIZE_PER_GPU: 16
+  BATCH_SIZE_PER_GPU: 32
   BEGIN_EPOCH: 0
   END_EPOCH: 300
   MIN_LR: 0.001
diff --git a/experiments/interpretation/dutchf3_patch/configs/unet.yaml b/experiments/interpretation/dutchf3_patch/configs/unet.yaml
index 5ae1ee45..0597b8fa 100644
--- a/experiments/interpretation/dutchf3_patch/configs/unet.yaml
+++ b/experiments/interpretation/dutchf3_patch/configs/unet.yaml
@@ -11,7 +11,7 @@ WORKERS: 4
 PRINT_FREQ: 10
 LOG_CONFIG: logging.conf
 SEED: 2019
-
+OPENCV_BORDER_CONSTANT: 0
 
 DATASET:
   NUM_CLASSES: 6
@@ -21,7 +21,7 @@ DATASET:
   MAX: 1
 
 MODEL:
-  NAME: resnet_unet
+  NAME: unet
   IN_CHANNELS: 3
 
 
diff --git a/experiments/interpretation/dutchf3_patch/test.py b/experiments/interpretation/dutchf3_patch/test.py
index 2efeee85..9e5275f8 100644
--- a/experiments/interpretation/dutchf3_patch/test.py
+++ b/experiments/interpretation/dutchf3_patch/test.py
@@ -21,17 +21,26 @@
 
 import fire
 import numpy as np
+import segyio
+from sklearn import metrics
 import torch
 import torch.nn.functional as F
 from albumentations import Compose, Normalize, PadIfNeeded, Resize
 from toolz import compose, curry, itertoolz, pipe, take
 from torch.utils import data
 
+from deepseismic_interpretation.data import write_segy
+
 from cv_lib.segmentation import models
 from cv_lib.segmentation.dutchf3.utils import current_datetime, git_branch, git_hash
 
 from cv_lib.utils import load_log_configuration, mask_to_disk, generate_path, image_to_disk
-from deepseismic_interpretation.dutchf3.data import add_patch_depth_channels, get_test_loader
+from deepseismic_interpretation.dutchf3.data import (
+    add_patch_depth_channels,
+    get_test_loader,
+    _test1_labels_for,
+    _test2_labels_for,
+)
 from default import _C as config
 from default import update_config
 
@@ -44,23 +53,47 @@
     "zechstein",
 ]
 
+# we can optionally supply a segy file whose geometry we will use to write out 3D test set predictions
+# if it doesn't exist, we will write a blank segy file with same dimensions as the predictions array
+SEGY_INFILE = "/data/seismic/TrainingData_Labels.segy"
+
 
 class runningScore(object):
     def __init__(self, n_classes):
         self.n_classes = n_classes
         self.confusion_matrix = np.zeros((n_classes, n_classes))
 
+    # @profile
     def _fast_hist(self, label_true, label_pred, n_class):
+        """
+        speed-optimized but not memory-optimized version of the confusion matrix calculation
+        """
+        # logger = logging.getLogger(__name__)
         mask = (label_true >= 0) & (label_true < n_class)
-        hist = np.bincount(n_class * label_true[mask].astype(int) + label_pred[mask], minlength=n_class ** 2,).reshape(
-            n_class, n_class
-        )
+        bincount_arg = n_class * label_true[mask].astype(int) + label_pred[mask]
+        # logger.info("bincount operation starting...")
+        hist = np.bincount(bincount_arg, minlength=n_class ** 2,)
+        hist = hist.reshape(n_class, n_class)
+        # logger.info("finished")
         return hist
 
-    def update(self, label_trues, label_preds):
+    # @profile
+    def _confusion_matrix(self, label_true, label_pred, n_class):
+        """
+        memory-optimized but not speed-optimized version of the confusion matrix calculation
+        """
+        mask = (label_true >= 0) & (label_true < n_class)
+        return metrics.confusion_matrix(label_true[mask], label_pred[mask], labels=list(range(n_class)))
+
+    # @profile
+    def update(self, label_trues, label_preds, fast_hist=True):
         for lt, lp in zip(label_trues, label_preds):
-            self.confusion_matrix += self._fast_hist(lt.flatten(), lp.flatten(), self.n_classes)
+            if fast_hist:
+                self.confusion_matrix += self._fast_hist(lt.flatten(), lp.flatten(), self.n_classes)
+            else:
+                self.confusion_matrix += self._confusion_matrix(lt.flatten(), lp.flatten(), self.n_classes)
 
+    # @profile
     def get_scores(self):
         """Returns accuracy score evaluation result.
             - overall accuracy
@@ -94,6 +127,49 @@ def reset(self):
         self.confusion_matrix = np.zeros((self.n_classes, self.n_classes))
 
 
+def _compute_3D_metrics(gt_labels, pred, n_classes, split):
+    """
+    Compute 3D metrics on two 3D arrays. A good test case is to set gt==pred.
+
+    Args:
+        gt: ground truth 3D numpy array
+        pred: predictions 3D array
+        n_classes: number of classes
+        split: which test set split we're computing
+
+    Returns:
+        Nothing - stdout print
+
+    """
+
+    logger = logging.getLogger(__name__)
+
+    score = runningScore(n_classes)
+    score.update(gt_labels, pred, fast_hist=True)
+
+    score, class_iou = score.get_scores()
+
+    logger.info(f"--------------- 3D RESULTS {split} -----------------")
+    logger.info(f'Pixel Acc: {score["Pixel Acc: "]:.4f}')
+
+    logger.info(f'Mean Class Acc: {score["Mean Class Acc: "]:.4f}')
+    for cdx, class_name in enumerate(_CLASS_NAMES):
+        logger.info(f'     class {cdx} named {class_name} accuracy {score["Class Accuracy: "][cdx]:.4f}')
+
+    logger.info(f'Mean IoU: {score["Mean IoU: "]:0.4f}')
+
+    for cdx, class_name in enumerate(_CLASS_NAMES):
+        logger.info(f"     class {cdx} named {class_name} IoU {class_iou[cdx]:.4f}")
+    logger.info(f'Freq Weighted IoU: {score["Freq Weighted IoU: "]:.4f}')
+
+    # Save confusion matrix:
+    logger.info("writing confusion matrix")
+    confusion = score["confusion_matrix"]
+    np.savetxt(f"confusion_split_{split}.csv", confusion, delimiter=" ")
+
+    logger.info("----------------- 3D DONE ---------------------------")
+
+
 def _transform_CHW_to_HWC(numpy_array):
     return np.moveaxis(numpy_array, 0, -1)
 
@@ -307,11 +383,15 @@ def _evaluate_split(
 
     running_metrics_split = runningScore(n_classes)
 
+    n_inlines, n_crosslines, n_depth = test_set.labels.shape
+    accum_inline = np.zeros((n_classes, n_depth, n_crosslines, n_inlines), dtype=np.float32)
+    accum_crossline = np.zeros((n_classes, n_depth, n_crosslines, n_inlines), dtype=np.float32)
+
     # evaluation mode:
     with torch.no_grad():  # operations inside don't track history
         model.eval()
         for i, (images, labels) in enumerate(test_loader):
-            logger.info(f"split: {split}, section: {i}")
+            logger.info(f"split: {split}, section: {test_set.sections[i]}")
             outputs = _patch_label_2d(
                 model,
                 images,
@@ -328,6 +408,21 @@ def _evaluate_split(
                 config.DATASET.MAX,
             )
 
+            # for debugging, if you set this to GT then you can test if
+            # the reconstructions matches test_set.labels
+            preds_numpy = outputs.detach().squeeze().numpy().astype(np.float32)
+
+            # direction is channel x depth x crossline x inline
+
+            # dealing with inline
+            if test_set.sections[i].startswith("i"):
+                accum_inline[:, :, :, i] = preds_numpy
+            # dealing with crossline
+            elif test_set.sections[i].startswith("x"):
+                accum_crossline[:, :, i-n_inlines, :] = preds_numpy
+            else:
+                raise Exception("we need either an inline or crossline split")
+
             pred = outputs.detach().max(1)[1].numpy()
             gt = labels.numpy()
             if debug:
@@ -364,6 +459,57 @@ def _evaluate_split(
     logger.info(f'Mean IoU: {score["Mean IoU: "]:0.3f}')
     running_metrics_split.reset()
 
+    ######################################################################
+    # 3D: now compute metrics on full 3D volume
+    ######################################################################
+
+    gt_labels = test_set.labels.swapaxes(0, 2).astype(np.uint8)
+    assert gt_labels.shape == accum_inline.shape[1:]
+    assert gt_labels.shape == accum_crossline.shape[1:]
+
+    # compute mIoU here
+    logging.info("Simple average")
+    pred_sum = accum_inline + accum_crossline
+    pred = pred_sum.argmax(0).astype(np.uint8)
+    del pred_sum
+    _compute_3D_metrics(gt_labels, pred, n_classes, split)
+    np.save(os.path.join(output_dir, f"test_simple_avg_split_{split}.npy"), pred)
+
+    # use existing SEGY file as a template to write our data into
+    SEGY_INFILE=f"/data/seismic/TestData_Image{split[-1]}.segy"
+
+    if os.path.isfile(SEGY_INFILE):
+        # input segy file is the ground truth here
+        # adjust for competition to make classes start from 1 and not 0
+        pred += 1
+        write_segy(os.path.join(output_dir, f"pred_simple_avg_split_{split}.segy"), SEGY_INFILE, pred.swapaxes(0, 2))
+    else:
+        # write array into segy using array dimensions for # of inlines and crosslines
+        # make sure directions are inline, crossline, depth
+        logging.info("writing segy files")
+        segyio.tools.from_array3D(os.path.join(output_dir, f"pred_simple_avg_split_{split}.segy"), pred.swapaxes(0, 2), dt=1000)
+        segyio.tools.from_array3D(os.path.join(output_dir, f"groundtruth_simple_avg_split_{split}.segy"), gt_labels.swapaxes(0, 2), dt=1000)
+        logging.info("done")
+
+    logging.info("Geometric average")
+    pred_sum = np.sqrt(accum_inline * accum_crossline)
+    pred = pred_sum.argmax(0).astype(np.uint8)
+    del pred_sum
+    _compute_3D_metrics(gt_labels, pred, n_classes, split)
+    np.save(os.path.join(output_dir, f"test_geometric_avg_split_{split}.npy"), pred)
+    # use existing SEGY file as a template to write our data into
+    if os.path.isfile(SEGY_INFILE):
+        # input segy file is the ground truth here
+        pred += 1
+        write_segy(os.path.join(output_dir, f"pred_geometric_avg_split_{split}.segy"), SEGY_INFILE, pred.swapaxes(0, 2))
+    else:
+        # write array into segy using array dimensions for # of inlines and crosslines
+        # make sure directions are inline, crossline, depth
+        logging.info("writing segy files")
+        segyio.tools.from_array3D(os.path.join(output_dir,f"pred_geometric_avg_split_{split}.segy"), pred.swapaxes(0, 2), dt=1000)
+        segyio.tools.from_array3D(os.path.join(output_dir,f"groundtruth_geometric_avg_split_{split}.segy"), gt_labels.swapaxes(0, 2), dt=1000)
+        logging.info("done")
+
 
 def _write_section_file(labels, section_file):
     # define indices of the array
diff --git a/experiments/interpretation/dutchf3_patch/train.py b/experiments/interpretation/dutchf3_patch/train.py
index fc585817..5076c541 100644
--- a/experiments/interpretation/dutchf3_patch/train.py
+++ b/experiments/interpretation/dutchf3_patch/train.py
@@ -37,6 +37,7 @@
 from cv_lib.segmentation.dutchf3.utils import current_datetime, git_branch, git_hash
 from cv_lib.segmentation.metrics import class_accuracy, class_iou, mean_class_accuracy, mean_iou, pixelwise_accuracy
 from cv_lib.utils import generate_path, load_log_configuration
+from cv_lib.segmentation import lovasz_losses as L
 from deepseismic_interpretation.dutchf3.data import get_patch_loader
 from default import _C as config
 from default import update_config
@@ -95,7 +96,11 @@ def run(*options, cfg=None, local_rank=0, debug=False, input=None, distributed=F
 
     if distributed:
         # FOR DISTRIBUTED: Set the device according to local_rank.
-        torch.cuda.set_device(local_rank)
+        # if we're running on a single GPU (multi-GPU development), set to the same GPU
+        if torch.cuda.device_count()==1:
+            torch.cuda.set_device(0)
+        else:
+            torch.cuda.set_device(local_rank)
 
         # FOR DISTRIBUTED: Initialize the backend. torch.distributed.launch will
         # provide environment variables, and requires that you use init_method=`env://`.
@@ -232,7 +237,8 @@ def run(*options, cfg=None, local_rank=0, debug=False, input=None, distributed=F
     class_weights = torch.tensor(config.DATASET.CLASS_WEIGHTS, device=device, requires_grad=False)
 
     # Loss:
-    criterion = torch.nn.CrossEntropyLoss(weight=class_weights, ignore_index=255, reduction="mean")
+    #criterion = torch.nn.CrossEntropyLoss(weight=class_weights, ignore_index=255, reduction="mean")
+    criterion = lambda x, y: L.lovasz_softmax(x, y, classes = list(range(n_classes)), ignore=255, per_image = True)
 
     # Model:
     if distributed:
diff --git a/interpretation/deepseismic_interpretation/data.py b/interpretation/deepseismic_interpretation/data.py
index 53ec7f9f..a3ad5008 100644
--- a/interpretation/deepseismic_interpretation/data.py
+++ b/interpretation/deepseismic_interpretation/data.py
@@ -5,6 +5,10 @@
 import math
 from collections import defaultdict
 
+import logging
+# toggle to WARNING when running in production, or use CLI
+logging.getLogger().setLevel(logging.DEBUG)
+
 import numpy as np
 import torch
 from PIL import Image
@@ -13,6 +17,11 @@
 from torch.utils.data import Dataset
 from torchvision.datasets.utils import iterable_to_str, verify_str_arg
 
+import segyio
+
+from shutil import copyfile
+
+
 _open_to_array = compose(np.array, Image.open)
 
 
@@ -20,6 +29,73 @@ class DataNotSplitException(Exception):
     pass
 
 
+def read_segy(filename):
+    """
+    Read in a SEGY-format file given a filename
+
+    Args:
+        filename: input filename
+
+    Returns:
+        numpy data array and its info as a dictionary (tuple)
+
+    """
+    logging.info(f"Loading data cube from {filename}")
+
+    # Read full data cube
+    data = segyio.tools.cube(filename)
+
+    # Read meta data
+    segyfile = segyio.open(filename, "r")
+    print("  Crosslines: ", segyfile.xlines[0], ":", segyfile.xlines[-1])
+    print("  Inlines:    ", segyfile.ilines[0], ":", segyfile.ilines[-1])
+    print("  Timeslices: ", "1", ":", data.shape[2])
+
+    # Make dict with cube-info
+    # TODO: read this from segy
+    # Read dt and other params needed to do create a new
+    data_info = {
+        "crossline_start": segyfile.xlines[0],
+        "inline_start": segyfile.ilines[0],
+        "timeslice_start": 1,
+        "shape": data.shape,
+    }
+
+    return data, data_info
+
+
+def write_segy(out_filename, in_filename, out_cube):
+    """
+    Writes out_cube to a segy-file (out_filename) with same header/size as in_filename
+
+    Args:
+        out_filename: output filename
+        in_filename: input file, whose metadata will be copied
+        out_cube: array which we write to out_filename
+
+    Returns:
+        Nothing
+    """
+
+    logging.info("Writing interpretation to " + out_filename)
+
+    # Copy segy file
+    copyfile(in_filename, out_filename)
+
+    # Open out-file
+    with segyio.open(out_filename, "r+") as src:
+
+        iline_start = src.ilines[0]
+        # set type to inlines
+        dtype = src.iline[iline_start].dtype
+
+        # loop through inlines and insert output
+        for i in src.ilines:
+            iline = out_cube[i - iline_start, :, :]
+            src.iline[i] = np.ascontiguousarray(iline.astype(dtype))
+
+    logging.info("Writing interpretation - finished")
+
 def _get_classes_and_counts(mask_list):
     class_counts_dict = defaultdict(int)
     for mask in mask_list:
diff --git a/scripts/byod_competition.py b/scripts/byod_competition.py
new file mode 100644
index 00000000..3bc4d4a8
--- /dev/null
+++ b/scripts/byod_competition.py
@@ -0,0 +1,222 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+
+"""
+Run example:
+
+python byod_competition.py --train <input segy file> --label <input labels file> --outdir <where to output data>
+python prepare_dutchf3.py split_train_val patch --data_dir=<outdir from the previous step> --label_file=train/train_labels.npy --output_dir=splits --stride=50 --patch_size=100 --split_direction=both
+
+# information to include in configuration file when running:
+
+clip
+INFO:root:[0.84979262 0.57790153 0.95866329 0.71236326 0.99004844 0.91123086]
+INFO:root:MEAN
+INFO:root:4.183678e-05
+INFO:root:STANDARD DEVIATION
+INFO:root:0.31477982
+
+noclip
+INFO:root:[0.84979262 0.57790153 0.95866329 0.71236326 0.99004844 0.91123086]
+INFO:root:MEAN
+INFO:root:0.0043642526
+INFO:root:STANDARD DEVIATION
+INFO:root:0.07544233
+
+reduced test size
+INFO:root:[0.84979262 0.57790153 0.95866329 0.71236326 0.99004844 0.91123086]
+INFO:root:[0.84979262 0.57790153 0.95866329 0.71236326 0.99004844 0.91123086]
+INFO:root:MEAN
+INFO:root:4.183678e-05
+INFO:root:STANDARD DEVIATION
+INFO:root:0.31477982
+
+# kick off run as:
+
+python byod_competition.py --train /home/maxkaz/data/seismic/TrainingData_Image.segy --label /home/maxkaz/data/seismic/TrainingData_Labels.segy --outdir /home/maxkaz/data/seismic
+python prepare_dutchf3.py split_train_val patch --data_dir=/home/maxkaz/data/seismic --label_file=train/train_labels.npy --output_dir=splits --stride=50 --patch_size=100 --split_direction=both
+
+NGPU=2
+python -m torch.distributed.launch --nproc_per_node=${NGPU} train.py \
+TRAIN.BATCH_SIZE_PER_GPU 2 VALIDATION.BATCH_SIZE_PER_GPU 2 \
+DATASET.ROOT "/data/seismic" DATASET.NUM_CLASSES 6 DATASET.CLASS_WEIGHTS  "[0.84979262, 0.57790153, 0.95866329, 0.71236326, 0.99004844, 0.91123086]" \
+TRAIN.MEAN 0.0 TRAIN.STD 0.31477982 \
+--distributed --cfg configs/seresnet_unet.yaml
+
+nohup time \
+python -m torch.distributed.launch --nproc_per_node=4 train.py \
+DATASET.ROOT "/home/maxkaz/data/seismic" DATASET.NUM_CLASSES 6 DATASET.CLASS_WEIGHTS  "[0.84979262, 0.57790153, 0.95866329, 0.71236326, 0.99004844, 0.91123086]" \
+TRAIN.MEAN 0.0 TRAIN.STD 0.31477982 \
+--distributed --cfg configs/seresnet_unet.yaml > se.log 2>&1 &
+
+nohup time \
+python -m torch.distributed.launch --nproc_per_node=4 train.py \
+MODEL.PRETRAINED "/home/alfred/models/hrnetv2_w48_imagenet_pretrained.pth" \
+DATASET.ROOT "/home/maxkaz/data/seismic" DATASET.NUM_CLASSES 6 DATASET.CLASS_WEIGHTS  "[0.84979262, 0.57790153, 0.95866329, 0.71236326, 0.99004844, 0.91123086]" \
+TRAIN.MEAN 0.0 TRAIN.STD 0.31477982 \
+--distributed --cfg configs/hrnet.yaml > hr.log 2>&1 &
+
+Scoring:
+
+nohup time \
+python test.py \
+DATASET.ROOT "/data/seismic" DATASET.NUM_CLASSES 6 DATASET.CLASS_WEIGHTS  "[0.84979262, 0.57790153, 0.95866329, 0.71236326, 0.99004844, 0.91123086]" \
+TRAIN.MEAN 0.0 TRAIN.STD 0.31477982 \
+TEST.SPLIT 'both'
+--cfg configs/unet.yaml > unet.log 2>&1 &
+
+nohup time python test.py DATASET.ROOT "/data/seismic" DATASET.NUM_CLASSES 6 DATASET.CLASS_WEIGHTS  "[0.84979262, 0.57790153, 0.95866329, 0.71236326, 0.99004844, 0.91123086]" TRAIN.MEAN 0.0 TRAIN.STD 0.31477982 TEST.SPLIT 'both' MODEL.PRETRAINED /home/maxkaz/Downloads/hrnetv2_w48_imagenet_pretrained.pth TEST.MODEL_PATH --cfg configs/hrnet.yaml
+
+"""
+
+from deepseismic_interpretation.data import read_segy
+
+""" libraries """
+import segyio
+
+import numpy as np
+from scipy import stats
+import os
+
+np.set_printoptions(linewidth=200)
+import logging
+
+# toggle to WARNING when running in production, or use CLI
+logging.getLogger().setLevel(logging.DEBUG)
+# logging.getLogger().setLevel(logging.WARNING)
+import argparse
+
+parser = argparse.ArgumentParser()
+
+""" useful information when running from a GIT folder."""
+myname = os.path.realpath(__file__)
+mypath = os.path.dirname(myname)
+myname = os.path.basename(myname)
+
+
+def main(args):
+    """
+    Transforms Penobscot HDF5 dataset into DeepSeismic Tensor Format
+    """
+
+    logging.info("loading data")
+    data, _ = read_segy(args.train)
+    labels, _ = read_segy(args.label)
+
+    assert labels.min() == 1.0
+    n_classes = labels.max()
+    assert n_classes == N_CLASSES
+
+    logging.info("Running 3-sigma clipping")
+    clip_scaling = 3.0
+    mean, std = data.mean(), data.std()
+    logging.info(f"mean {mean} std {std}")
+    data[data > mean + clip_scaling * std] = mean + clip_scaling * std
+    data[data < mean - clip_scaling * std] = mean - clip_scaling * std
+
+    # Make data cube fast to access
+    logging.info("Adjusting precision")
+    data = np.ascontiguousarray(data, "float32")
+    labels = np.ascontiguousarray(labels, "uint8")
+
+    # adjust labels to start from zero
+    labels -= 1
+
+    # rescale to be within a certain range
+    range_min, range_max = -1.0, 1.0
+    min, max = data.min(), data.max()
+    logging.info(f"min {min} max {max}")
+    data_std = (data - min) / (max - min)
+    data = data_std * (range_max - range_min) + range_min
+
+    """
+    # cut off a buffer zone around the volume (to avoid mislabeled data):
+    buffer = 25
+    data = data[:, buffer:-buffer, buffer:-buffer]
+    labels = labels[:, buffer:-buffer, buffer:-buffer]
+    """
+
+    # time by crosslines by inlines
+    n_inlines = data.shape[0]
+    n_crosslines = data.shape[1]
+
+    inline_cut = int(np.floor(n_inlines * INLINE_FRACTION))
+    crossline_cut = int(np.floor(n_crosslines * CROSSLINE_FRACTION))
+
+    data_train = data[0:inline_cut, 0:crossline_cut, :]
+    data_test1 = data[inline_cut:n_inlines, :, :]
+    data_test2 = data[:, crossline_cut:n_crosslines, :]
+
+    labels_train = labels[0:inline_cut, 0:crossline_cut, :]
+    labels_test1 = labels[inline_cut:n_inlines, :, :]
+    labels_test2 = labels[:, crossline_cut:n_crosslines, :]
+
+    def mkdir(dirname):
+
+        if os.path.isdir(dirname) and os.path.exists(dirname):
+            return
+
+        if not os.path.isdir(dirname) and os.path.exists(dirname):
+            logging.info("remote file", dirname, "and run this script again")
+
+        os.mkdir(dirname)
+
+    mkdir(args.outdir)
+    mkdir(os.path.join(args.outdir, "splits"))
+    mkdir(os.path.join(args.outdir, "train"))
+    mkdir(os.path.join(args.outdir, "test_once"))
+
+    np.save(os.path.join(args.outdir, "train", "train_seismic.npy"), data_train)
+    np.save(os.path.join(args.outdir, "train", "train_labels.npy"), labels_train)
+
+    np.save(os.path.join(args.outdir, "test_once", "test1_seismic.npy"), data_test1)
+    np.save(os.path.join(args.outdir, "test_once", "test1_labels.npy"), labels_test1)
+
+    np.save(os.path.join(args.outdir, "test_once", "test2_seismic.npy"), data_test2)
+    np.save(os.path.join(args.outdir, "test_once", "test2_labels.npy"), labels_test2)
+
+    # Compute class weights:
+    num_classes, class_count = np.unique(labels[:], return_counts=True)
+    # class_probabilities = np.histogram(labels[:], bins= , density=True)
+    class_weights = 1 - class_count / np.sum(class_count)
+    logging.info("CLASS WEIGHTS TO USE")
+    logging.info(class_weights)
+    mean, std = data.mean(), data.std()
+    logging.info("MEAN")
+    logging.info(mean)
+    logging.info("STANDARD DEVIATION")
+    logging.info(std)
+
+
+""" GLOBAL VARIABLES """
+INLINE_FRACTION = 0.9
+CROSSLINE_FRACTION = 0.9
+N_CLASSES = 6
+
+parser.add_argument("--train", help="Name of train data", type=str, required=True)
+parser.add_argument("--label", help="Name of train labels data", type=str, required=True)
+parser.add_argument("--outdir", help="Output data directory location", type=str, required=True)
+
+""" main wrapper with profiler """
+if __name__ == "__main__":
+    main(parser.parse_args())
+
+# pretty printing of the stack
+"""
+  try:
+    logging.info('before main')
+    main(parser.parse_args())
+    logging.info('after main')
+  except:
+    for frame in traceback.extract_tb(sys.exc_info()[2]):
+      fname,lineno,fn,text = frame
+      print ("Error in %s on line %d" % (fname, lineno))
+"""
+# optionally enable profiling information
+#  import cProfile
+#  name = <insert_name_here>
+#  cProfile.run('main.run()', name + '.prof')
+#  import pstats
+#  p = pstats.Stats(name + '.prof')
+#  p.sort_stats('cumulative').print_stats(10)
+#  p.sort_stats('time').print_stats()
diff --git a/scripts/byod_penobscot.py b/scripts/byod_penobscot.py
index 8ccc8b6e..32e65d5f 100644
--- a/scripts/byod_penobscot.py
+++ b/scripts/byod_penobscot.py
@@ -120,7 +120,7 @@ def mkdir(dirname):
 
 """ GLOBAL VARIABLES """
 INLINE_FRACTION = 0.7
-CROSSLINE_FRACTION = 1.0
+CROSSLINE_FRACTION = 0.78
 N_CLASSES = 8
 
 parser.add_argument("--filename", help="Name of HDF5 data", type=str, required=True)
diff --git a/scripts/seg20_check_distrib.py b/scripts/seg20_check_distrib.py
new file mode 100644
index 00000000..792bcc5e
--- /dev/null
+++ b/scripts/seg20_check_distrib.py
@@ -0,0 +1,44 @@
+# checks distribution across classes in the new SEG20 competition
+
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+
+"""
+Custom one-off script to process the SEG20 competition test dataset.
+"""
+import collections
+
+from deepseismic_interpretation.data import read_segy
+
+""" libraries """
+import numpy as np
+
+np.set_printoptions(linewidth=200)
+import logging
+
+# toggle to WARNING when running in production, or use CLI
+logging.getLogger().setLevel(logging.DEBUG)
+
+# dataset locations
+N_CLASSES = 6
+TRAIN = "/data/seismic_orig/TrainingData_Labels.segy"
+TEST1 = "/home/maxkaz/Desktop/pred_simple_avg_split_test1.segy"
+TEST2 = "/home/maxkaz/Desktop/pred_simple_avg_split_test2.segy"
+
+def check(infile):
+
+    data, _ = read_segy(infile)
+    n = data.size
+    counts = collections.Counter(data.astype(int).flatten().tolist())
+    ccounts = 0
+    for k in range(1,N_CLASSES+1):
+        ccounts += counts[k]
+        if k in counts:
+            print(f"{k}: {float(counts[k])/n} = {counts[k]} / {n}")
+    print(f"coverage {ccounts/n}")
+
+check(TRAIN)
+check(TEST1)
+check(TEST2)
+
+logging.info("done")
diff --git a/scripts/seg20_test_process.py b/scripts/seg20_test_process.py
new file mode 100644
index 00000000..16d6832a
--- /dev/null
+++ b/scripts/seg20_test_process.py
@@ -0,0 +1,59 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+
+"""
+Custom one-off script to process the SEG20 competition test dataset.
+"""
+
+from deepseismic_interpretation.data import read_segy
+
+""" libraries """
+import segyio
+
+import numpy as np
+from scipy import stats
+import os
+
+np.set_printoptions(linewidth=200)
+import logging
+
+# toggle to WARNING when running in production, or use CLI
+logging.getLogger().setLevel(logging.DEBUG)
+
+# dataset locations
+N_CLASSES = 6
+TEST1 = "/data/seismic/TestData_Image1.segy"
+TEST2 = "/data/seismic/TestData_Image2.segy"
+# output location
+OUTDIR = "/data/seismic/test_once"
+# enter these from byod_competition logging output - computed on the training set
+MEAN = 0.676609992980957
+STD = 390.308837890625
+MIN = -1170.2498779296875
+MAX = 1171.6031494140625
+
+def process_test(infile, outdir, n_set):
+
+    logging.info("loading data")
+    data, _ = read_segy(infile)
+
+    logging.info("Running 3-sigma clipping")
+    clip_scaling = 3.0
+    data[data > MEAN + clip_scaling * STD] = MEAN + clip_scaling * STD
+    data[data < MEAN - clip_scaling * STD] = MEAN - clip_scaling * STD
+
+    # Make data cube fast to access
+    logging.info("Adjusting precision")
+    data = np.ascontiguousarray(data, "float32")
+
+    # rescale to be within a certain range
+    range_min, range_max = -1.0, 1.0
+    data_std = (data - MIN) / (MAX - MIN)
+    data = data_std * (range_max - range_min) + range_min
+
+    random_test_labels = np.random.randint(0,N_CLASSES-1, data.shape, dtype='uint8')
+    np.save(os.path.join(outdir, f"test{n_set}_seismic.npy"), data)
+    np.save(os.path.join(outdir, f"test{n_set}_labels.npy"), random_test_labels)
+
+process_test(TEST1, OUTDIR, 1)
+process_test(TEST2, OUTDIR, 2)