diff --git a/.github/workflows/python-package-conda.yml b/.github/workflows/python-package-conda.yml
index 2f21ff8e..cfe752d7 100644
--- a/.github/workflows/python-package-conda.yml
+++ b/.github/workflows/python-package-conda.yml
@@ -27,4 +27,4 @@ jobs:
       run: |
         pip install pytest
         pip install -e .
-        pytest --ignore pensa/diffnets/tests/test_api.py --ignore pensa/diffnets/tests/test_cli.py --ignore pensa/diffnets/tests/test_diffnets.py
+        pytest
diff --git a/pensa/diffnets/__init__.py b/pensa/diffnets/__init__.py
deleted file mode 100644
index 27240f60..00000000
--- a/pensa/diffnets/__init__.py
+++ /dev/null
@@ -1,14 +0,0 @@
-"""
-diffnets
-Supervised and self-supervised autoencoders to identify the mechanistic basis for biochemical differences between protein variants.
-"""
-
-# Add imports here
-from . import training, data_processing, analysis, utils, exmax, nnutils
-
-# Handle versioneer
-from ._version import get_versions
-versions = get_versions()
-__version__ = versions['version']
-__git_revision__ = versions['full-revisionid']
-del get_versions, versions
diff --git a/pensa/diffnets/_version.py b/pensa/diffnets/_version.py
deleted file mode 100644
index f31bf120..00000000
--- a/pensa/diffnets/_version.py
+++ /dev/null
@@ -1,520 +0,0 @@
-
-# This file helps to compute a version number in source trees obtained from
-# git-archive tarball (such as those provided by githubs download-from-tag
-# feature). Distribution tarballs (built by setup.py sdist) and build
-# directories (produced by setup.py build) will contain a much shorter file
-# that just contains the computed version number.
-
-# This file is released into the public domain. Generated by
-# versioneer-0.18 (https://github.com/warner/python-versioneer)
-
-"""Git implementation of _version.py."""
-
-import errno
-import os
-import re
-import subprocess
-import sys
-
-
-def get_keywords():
-    """Get the keywords needed to look up the version information."""
-    # these strings will be replaced by git during git-archive.
-    # setup.py/versioneer.py will grep for the variable names, so they must
-    # each be defined on a line of their own. _version.py will just call
-    # get_keywords().
-    git_refnames = "$Format:%d$"
-    git_full = "$Format:%H$"
-    git_date = "$Format:%ci$"
-    keywords = {"refnames": git_refnames, "full": git_full, "date": git_date}
-    return keywords
-
-
-class VersioneerConfig:
-    """Container for Versioneer configuration parameters."""
-
-
-def get_config():
-    """Create, populate and return the VersioneerConfig() object."""
-    # these strings are filled in when 'setup.py versioneer' creates
-    # _version.py
-    cfg = VersioneerConfig()
-    cfg.VCS = "git"
-    cfg.style = "pep440"
-    cfg.tag_prefix = ""
-    cfg.parentdir_prefix = "None"
-    cfg.versionfile_source = "diffnets/_version.py"
-    cfg.verbose = False
-    return cfg
-
-
-class NotThisMethod(Exception):
-    """Exception raised if a method is not valid for the current scenario."""
-
-
-LONG_VERSION_PY = {}
-HANDLERS = {}
-
-
-def register_vcs_handler(vcs, method):  # decorator
-    """Decorator to mark a method as the handler for a particular VCS."""
-    def decorate(f):
-        """Store f in HANDLERS[vcs][method]."""
-        if vcs not in HANDLERS:
-            HANDLERS[vcs] = {}
-        HANDLERS[vcs][method] = f
-        return f
-    return decorate
-
-
-def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False,
-                env=None):
-    """Call the given command(s)."""
-    assert isinstance(commands, list)
-    p = None
-    for c in commands:
-        try:
-            dispcmd = str([c] + args)
-            # remember shell=False, so use git.cmd on windows, not just git
-            p = subprocess.Popen([c] + args, cwd=cwd, env=env,
-                                 stdout=subprocess.PIPE,
-                                 stderr=(subprocess.PIPE if hide_stderr
-                                         else None))
-            break
-        except EnvironmentError:
-            e = sys.exc_info()[1]
-            if e.errno == errno.ENOENT:
-                continue
-            if verbose:
-                print("unable to run %s" % dispcmd)
-                print(e)
-            return None, None
-    else:
-        if verbose:
-            print("unable to find command, tried %s" % (commands,))
-        return None, None
-    stdout = p.communicate()[0].strip()
-    if sys.version_info[0] >= 3:
-        stdout = stdout.decode()
-    if p.returncode != 0:
-        if verbose:
-            print("unable to run %s (error)" % dispcmd)
-            print("stdout was %s" % stdout)
-        return None, p.returncode
-    return stdout, p.returncode
-
-
-def versions_from_parentdir(parentdir_prefix, root, verbose):
-    """Try to determine the version from the parent directory name.
-
-    Source tarballs conventionally unpack into a directory that includes both
-    the project name and a version string. We will also support searching up
-    two directory levels for an appropriately named parent directory
-    """
-    rootdirs = []
-
-    for i in range(3):
-        dirname = os.path.basename(root)
-        if dirname.startswith(parentdir_prefix):
-            return {"version": dirname[len(parentdir_prefix):],
-                    "full-revisionid": None,
-                    "dirty": False, "error": None, "date": None}
-        else:
-            rootdirs.append(root)
-            root = os.path.dirname(root)  # up a level
-
-    if verbose:
-        print("Tried directories %s but none started with prefix %s" %
-              (str(rootdirs), parentdir_prefix))
-    raise NotThisMethod("rootdir doesn't start with parentdir_prefix")
-
-
-@register_vcs_handler("git", "get_keywords")
-def git_get_keywords(versionfile_abs):
-    """Extract version information from the given file."""
-    # the code embedded in _version.py can just fetch the value of these
-    # keywords. When used from setup.py, we don't want to import _version.py,
-    # so we do it with a regexp instead. This function is not used from
-    # _version.py.
-    keywords = {}
-    try:
-        f = open(versionfile_abs, "r")
-        for line in f.readlines():
-            if line.strip().startswith("git_refnames ="):
-                mo = re.search(r'=\s*"(.*)"', line)
-                if mo:
-                    keywords["refnames"] = mo.group(1)
-            if line.strip().startswith("git_full ="):
-                mo = re.search(r'=\s*"(.*)"', line)
-                if mo:
-                    keywords["full"] = mo.group(1)
-            if line.strip().startswith("git_date ="):
-                mo = re.search(r'=\s*"(.*)"', line)
-                if mo:
-                    keywords["date"] = mo.group(1)
-        f.close()
-    except EnvironmentError:
-        pass
-    return keywords
-
-
-@register_vcs_handler("git", "keywords")
-def git_versions_from_keywords(keywords, tag_prefix, verbose):
-    """Get version information from git keywords."""
-    if not keywords:
-        raise NotThisMethod("no keywords at all, weird")
-    date = keywords.get("date")
-    if date is not None:
-        # git-2.2.0 added "%cI", which expands to an ISO-8601 -compliant
-        # datestamp. However we prefer "%ci" (which expands to an "ISO-8601
-        # -like" string, which we must then edit to make compliant), because
-        # it's been around since git-1.5.3, and it's too difficult to
-        # discover which version we're using, or to work around using an
-        # older one.
-        date = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
-    refnames = keywords["refnames"].strip()
-    if refnames.startswith("$Format"):
-        if verbose:
-            print("keywords are unexpanded, not using")
-        raise NotThisMethod("unexpanded keywords, not a git-archive tarball")
-    refs = set([r.strip() for r in refnames.strip("()").split(",")])
-    # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of
-    # just "foo-1.0". If we see a "tag: " prefix, prefer those.
-    TAG = "tag: "
-    tags = set([r[len(TAG):] for r in refs if r.startswith(TAG)])
-    if not tags:
-        # Either we're using git < 1.8.3, or there really are no tags. We use
-        # a heuristic: assume all version tags have a digit. The old git %d
-        # expansion behaves like git log --decorate=short and strips out the
-        # refs/heads/ and refs/tags/ prefixes that would let us distinguish
-        # between branches and tags. By ignoring refnames without digits, we
-        # filter out many common branch names like "release" and
-        # "stabilization", as well as "HEAD" and "master".
-        tags = set([r for r in refs if re.search(r'\d', r)])
-        if verbose:
-            print("discarding '%s', no digits" % ",".join(refs - tags))
-    if verbose:
-        print("likely tags: %s" % ",".join(sorted(tags)))
-    for ref in sorted(tags):
-        # sorting will prefer e.g. "2.0" over "2.0rc1"
-        if ref.startswith(tag_prefix):
-            r = ref[len(tag_prefix):]
-            if verbose:
-                print("picking %s" % r)
-            return {"version": r,
-                    "full-revisionid": keywords["full"].strip(),
-                    "dirty": False, "error": None,
-                    "date": date}
-    # no suitable tags, so version is "0+unknown", but full hex is still there
-    if verbose:
-        print("no suitable tags, using unknown + full revision id")
-    return {"version": "0+unknown",
-            "full-revisionid": keywords["full"].strip(),
-            "dirty": False, "error": "no suitable tags", "date": None}
-
-
-@register_vcs_handler("git", "pieces_from_vcs")
-def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command):
-    """Get version from 'git describe' in the root of the source tree.
-
-    This only gets called if the git-archive 'subst' keywords were *not*
-    expanded, and _version.py hasn't already been rewritten with a short
-    version string, meaning we're inside a checked out source tree.
-    """
-    GITS = ["git"]
-    if sys.platform == "win32":
-        GITS = ["git.cmd", "git.exe"]
-
-    out, rc = run_command(GITS, ["rev-parse", "--git-dir"], cwd=root,
-                          hide_stderr=True)
-    if rc != 0:
-        if verbose:
-            print("Directory %s not under git control" % root)
-        raise NotThisMethod("'git rev-parse --git-dir' returned error")
-
-    # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty]
-    # if there isn't one, this yields HEX[-dirty] (no NUM)
-    describe_out, rc = run_command(GITS, ["describe", "--tags", "--dirty",
-                                          "--always", "--long",
-                                          "--match", "%s*" % tag_prefix],
-                                   cwd=root)
-    # --long was added in git-1.5.5
-    if describe_out is None:
-        raise NotThisMethod("'git describe' failed")
-    describe_out = describe_out.strip()
-    full_out, rc = run_command(GITS, ["rev-parse", "HEAD"], cwd=root)
-    if full_out is None:
-        raise NotThisMethod("'git rev-parse' failed")
-    full_out = full_out.strip()
-
-    pieces = {}
-    pieces["long"] = full_out
-    pieces["short"] = full_out[:7]  # maybe improved later
-    pieces["error"] = None
-
-    # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty]
-    # TAG might have hyphens.
-    git_describe = describe_out
-
-    # look for -dirty suffix
-    dirty = git_describe.endswith("-dirty")
-    pieces["dirty"] = dirty
-    if dirty:
-        git_describe = git_describe[:git_describe.rindex("-dirty")]
-
-    # now we have TAG-NUM-gHEX or HEX
-
-    if "-" in git_describe:
-        # TAG-NUM-gHEX
-        mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe)
-        if not mo:
-            # unparseable. Maybe git-describe is misbehaving?
-            pieces["error"] = ("unable to parse git-describe output: '%s'"
-                               % describe_out)
-            return pieces
-
-        # tag
-        full_tag = mo.group(1)
-        if not full_tag.startswith(tag_prefix):
-            if verbose:
-                fmt = "tag '%s' doesn't start with prefix '%s'"
-                print(fmt % (full_tag, tag_prefix))
-            pieces["error"] = ("tag '%s' doesn't start with prefix '%s'"
-                               % (full_tag, tag_prefix))
-            return pieces
-        pieces["closest-tag"] = full_tag[len(tag_prefix):]
-
-        # distance: number of commits since tag
-        pieces["distance"] = int(mo.group(2))
-
-        # commit: short hex revision ID
-        pieces["short"] = mo.group(3)
-
-    else:
-        # HEX: no tags
-        pieces["closest-tag"] = None
-        count_out, rc = run_command(GITS, ["rev-list", "HEAD", "--count"],
-                                    cwd=root)
-        pieces["distance"] = int(count_out)  # total number of commits
-
-    # commit date: see ISO-8601 comment in git_versions_from_keywords()
-    date = run_command(GITS, ["show", "-s", "--format=%ci", "HEAD"],
-                       cwd=root)[0].strip()
-    pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
-
-    return pieces
-
-
-def plus_or_dot(pieces):
-    """Return a + if we don't already have one, else return a ."""
-    if "+" in pieces.get("closest-tag", ""):
-        return "."
-    return "+"
-
-
-def render_pep440(pieces):
-    """Build up version string, with post-release "local version identifier".
-
-    Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you
-    get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty
-
-    Exceptions:
-    1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty]
-    """
-    if pieces["closest-tag"]:
-        rendered = pieces["closest-tag"]
-        if pieces["distance"] or pieces["dirty"]:
-            rendered += plus_or_dot(pieces)
-            rendered += "%d.g%s" % (pieces["distance"], pieces["short"])
-            if pieces["dirty"]:
-                rendered += ".dirty"
-    else:
-        # exception #1
-        rendered = "0+untagged.%d.g%s" % (pieces["distance"],
-                                          pieces["short"])
-        if pieces["dirty"]:
-            rendered += ".dirty"
-    return rendered
-
-
-def render_pep440_pre(pieces):
-    """TAG[.post.devDISTANCE] -- No -dirty.
-
-    Exceptions:
-    1: no tags. 0.post.devDISTANCE
-    """
-    if pieces["closest-tag"]:
-        rendered = pieces["closest-tag"]
-        if pieces["distance"]:
-            rendered += ".post.dev%d" % pieces["distance"]
-    else:
-        # exception #1
-        rendered = "0.post.dev%d" % pieces["distance"]
-    return rendered
-
-
-def render_pep440_post(pieces):
-    """TAG[.postDISTANCE[.dev0]+gHEX] .
-
-    The ".dev0" means dirty. Note that .dev0 sorts backwards
-    (a dirty tree will appear "older" than the corresponding clean one),
-    but you shouldn't be releasing software with -dirty anyways.
-
-    Exceptions:
-    1: no tags. 0.postDISTANCE[.dev0]
-    """
-    if pieces["closest-tag"]:
-        rendered = pieces["closest-tag"]
-        if pieces["distance"] or pieces["dirty"]:
-            rendered += ".post%d" % pieces["distance"]
-            if pieces["dirty"]:
-                rendered += ".dev0"
-            rendered += plus_or_dot(pieces)
-            rendered += "g%s" % pieces["short"]
-    else:
-        # exception #1
-        rendered = "0.post%d" % pieces["distance"]
-        if pieces["dirty"]:
-            rendered += ".dev0"
-        rendered += "+g%s" % pieces["short"]
-    return rendered
-
-
-def render_pep440_old(pieces):
-    """TAG[.postDISTANCE[.dev0]] .
-
-    The ".dev0" means dirty.
-
-    Eexceptions:
-    1: no tags. 0.postDISTANCE[.dev0]
-    """
-    if pieces["closest-tag"]:
-        rendered = pieces["closest-tag"]
-        if pieces["distance"] or pieces["dirty"]:
-            rendered += ".post%d" % pieces["distance"]
-            if pieces["dirty"]:
-                rendered += ".dev0"
-    else:
-        # exception #1
-        rendered = "0.post%d" % pieces["distance"]
-        if pieces["dirty"]:
-            rendered += ".dev0"
-    return rendered
-
-
-def render_git_describe(pieces):
-    """TAG[-DISTANCE-gHEX][-dirty].
-
-    Like 'git describe --tags --dirty --always'.
-
-    Exceptions:
-    1: no tags. HEX[-dirty]  (note: no 'g' prefix)
-    """
-    if pieces["closest-tag"]:
-        rendered = pieces["closest-tag"]
-        if pieces["distance"]:
-            rendered += "-%d-g%s" % (pieces["distance"], pieces["short"])
-    else:
-        # exception #1
-        rendered = pieces["short"]
-    if pieces["dirty"]:
-        rendered += "-dirty"
-    return rendered
-
-
-def render_git_describe_long(pieces):
-    """TAG-DISTANCE-gHEX[-dirty].
-
-    Like 'git describe --tags --dirty --always -long'.
-    The distance/hash is unconditional.
-
-    Exceptions:
-    1: no tags. HEX[-dirty]  (note: no 'g' prefix)
-    """
-    if pieces["closest-tag"]:
-        rendered = pieces["closest-tag"]
-        rendered += "-%d-g%s" % (pieces["distance"], pieces["short"])
-    else:
-        # exception #1
-        rendered = pieces["short"]
-    if pieces["dirty"]:
-        rendered += "-dirty"
-    return rendered
-
-
-def render(pieces, style):
-    """Render the given version pieces into the requested style."""
-    if pieces["error"]:
-        return {"version": "unknown",
-                "full-revisionid": pieces.get("long"),
-                "dirty": None,
-                "error": pieces["error"],
-                "date": None}
-
-    if not style or style == "default":
-        style = "pep440"  # the default
-
-    if style == "pep440":
-        rendered = render_pep440(pieces)
-    elif style == "pep440-pre":
-        rendered = render_pep440_pre(pieces)
-    elif style == "pep440-post":
-        rendered = render_pep440_post(pieces)
-    elif style == "pep440-old":
-        rendered = render_pep440_old(pieces)
-    elif style == "git-describe":
-        rendered = render_git_describe(pieces)
-    elif style == "git-describe-long":
-        rendered = render_git_describe_long(pieces)
-    else:
-        raise ValueError("unknown style '%s'" % style)
-
-    return {"version": rendered, "full-revisionid": pieces["long"],
-            "dirty": pieces["dirty"], "error": None,
-            "date": pieces.get("date")}
-
-
-def get_versions():
-    """Get version information or return default if unable to do so."""
-    # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have
-    # __file__, we can work backwards from there to the root. Some
-    # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which
-    # case we can only use expanded keywords.
-
-    cfg = get_config()
-    verbose = cfg.verbose
-
-    try:
-        return git_versions_from_keywords(get_keywords(), cfg.tag_prefix,
-                                          verbose)
-    except NotThisMethod:
-        pass
-
-    try:
-        root = os.path.realpath(__file__)
-        # versionfile_source is the relative path from the top of the source
-        # tree (where the .git directory might live) to this file. Invert
-        # this to find the root from __file__.
-        for i in cfg.versionfile_source.split('/'):
-            root = os.path.dirname(root)
-    except NameError:
-        return {"version": "0+unknown", "full-revisionid": None,
-                "dirty": None,
-                "error": "unable to find root of source tree",
-                "date": None}
-
-    try:
-        pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose)
-        return render(pieces, cfg.style)
-    except NotThisMethod:
-        pass
-
-    try:
-        if cfg.parentdir_prefix:
-            return versions_from_parentdir(cfg.parentdir_prefix, root, verbose)
-    except NotThisMethod:
-        pass
-
-    return {"version": "0+unknown", "full-revisionid": None,
-            "dirty": None,
-            "error": "unable to compute version", "date": None}
diff --git a/pensa/diffnets/analysis.py b/pensa/diffnets/analysis.py
deleted file mode 100644
index 7de8ce32..00000000
--- a/pensa/diffnets/analysis.py
+++ /dev/null
@@ -1,881 +0,0 @@
-import mdtraj as md
-import numpy as np
-import itertools
-from . import utils
-import multiprocessing as mp
-import os
-import functools
-from torch.autograd import Variable
-import torch
-from scipy import stats
-from scipy.stats import pearsonr
-from sklearn.metrics import roc_auc_score, roc_curve
-import matplotlib.pyplot as plt
-import enspara
-import enspara.cluster as cluster
-import enspara.info_theory as infotheor
-import enspara.msm as msm
-import enspara.cluster as cluster
-import enspara.info_theory as infotheor
-import pickle
-import scipy.sparse
-import sys
-from pylab import *
-from torch.autograd import Variable
-from collections import defaultdict
-
-class Analysis:
-    """Core object for running analysis.
-
-    Parameters
-    ----------
-    net : nnutils object
-        Neural network to perform analysis with
-    netdir : str
-        path to directory with neural network results
-    datadir : str
-        path to directory with data required to train the data. Includes
-        cm.npy, wm.npy, uwm.npy, master.pdb, an aligned_xtcs dir, and an
-        indicators dir.
-    """
-    def __init__(self, net, netdir, datadir):
-        self.net = net
-        self.netdir = netdir
-        self.datadir = datadir
-        self.top = md.load(os.path.join(
-                           self.datadir, "master.pdb"))
-        self.cm = np.load(os.path.join(self.datadir, "cm.npy"))
-        self.n_cores = mp.cpu_count()
-
-    def encode_data(self):
-        """Calculate the latent space for all trajectory frames.
-        """
-        enc_dir = os.path.join(self.netdir, "encodings")
-        utils.mkdir(enc_dir)
-        xtc_dir = os.path.join(self.datadir, "aligned_xtcs")
-        encode_dir(self.net, xtc_dir, enc_dir, self.top, self.n_cores, self.cm)
-
-    def recon_traj(self):
-        """Reconstruct all trajectory frames using the trained neural 
-        network"""
-        recon_dir = os.path.join(self.netdir, "recon_trajs")
-        utils.mkdir(recon_dir)
-        enc_dir = os.path.join(self.netdir, "encodings")
-        recon_traj_dir(self.net, enc_dir, recon_dir, self.top.top,
-                       self.cm, self.n_cores)
-        print("trajectories reconstructed")
-
-    def get_labels(self):
-        """Calculate the classification score for all trajectory frames
-        """
-        label_dir = os.path.join(self.netdir, "labels")
-        utils.mkdir(label_dir)
-        enc_dir = os.path.join(self.netdir, "encodings")
-        calc_labels(self.net, enc_dir, label_dir, self.n_cores)
-        print("labels calculated for all states")
-
-    def get_rmsd(self):
-        """Calculate RMSD between actual trajectory frames and autoencoder
-        reconstructed frames"""
-        rmsd_fn = os.path.join(self.netdir, "rmsd.npy")
-        recon_dir = os.path.join(self.netdir, "recon_trajs")
-        orig_xtc_dir = os.path.join(self.datadir, "aligned_xtcs")
-        rmsd = rmsd_dists_dir(recon_dir, orig_xtc_dir, self.top, self.n_cores)
-        np.save(rmsd_fn, rmsd)
-
-    def morph(self,n_frames=10):
-        """Get representative structures for classification scores
-        from 0 to 1.
-
-        Parameters
-        ----------
-        n_frames : int
-            How many representative structures to output. Bins between
-            0 and 1 will be calculated with this number.
-        """
-        morph_label(self.net,self.netdir,self.datadir,n_frames=n_frames)
-
-    def assign_labels_to_variants(self,plot_labels=False):
-        """Map DiffNet labels to each variant with option to plot
-           a histogram of the labels.
-
-        Parameters
-        ----------
-        plot_labels : optional, boolean
-            Save a matplotlob figure of the label histogram.
-
-        Returns
-        -------
-        lab_v : dictionary
-            Dictionary mapping labels to their respective variants.
-        """
-
-        lab_fns = utils.get_fns(os.path.join(self.netdir,"labels"),"*.npy")
-        traj_d_path = os.path.join(self.datadir,"traj_dict.pkl")
-        traj_d = pickle.load(open(traj_d_path, 'rb')) 
-        lab_v = defaultdict(list)
-        for key,item in traj_d.items():
-            for traj_ind in range(item[0],item[1]):
-                lab = np.load(lab_fns[traj_ind])
-                lab_v[key].append(lab) 
-
-        if plot_labels:
-            plt.figure(figsize=(16,16))
-            axes = plt.gca()
-            lw = 8
-
-            for k in traj_d.keys():
-                t = np.concatenate(lab_v[k])
-                n, x = np.histogram(t, range=(0, 1), bins=50)
-                plt.plot(x[:-1],n,label=k,linewidth=lw)
-
-
-            plt.xticks(fontsize=36)
-            plt.yticks(fontsize=36)
-            axes.set_xlabel('DiffNet Label',labelpad=40, fontsize=36)
-            axes.set_ylabel('# of Simulation Frames',labelpad=40,fontsize=36)
-            axes.tick_params(direction='out', length=20, width=5,
-                           grid_color='r', grid_alpha=0.5)
-            plt.legend(fontsize=36)
-
-            for axis in ['top','bottom','left','right']:
-                axes.spines[axis].set_linewidth(5)
-
-            plt.savefig(os.path.join(self.netdir,"label_plot.png"))
-
-        return lab_v
-
-    def find_feats(self,inds,out_fn,n_states=2000,num2plot=100,clusters=None):
-        """Generate a .pml file that will show the distances that change
-        in a way that is most with changes in the classifications score.
-
-        Parameters
-        ----------
-        inds : np.ndarray,
-            Indices of the topology file that are to be included in
-            calculating what distances are most correlated with classification
-            score.
-        out_fn : str
-            Name of the output file.
-        n_states : int (default=2000)
-            How many cluster centers to calculate and use for correlation
-            measurement.
-        num2plot : int (default=100)
-            Number of distances to be shown.
-        clusters : enspara cluster object
-            Cluster object with center_indices attribute
-        """
-        if not clusters:
-            cc_dir = os.path.join(self.netdir, "cluster_%d" % n_states)
-            utils.mkdir(cc_dir)
-
-            enc = utils.load_npy_dir(os.path.join(self.netdir, "encodings"), "*npy")
-            if hasattr(self.net,"split_inds"):
-                x = self.net.encoder1[-1].out_features
-                enc = enc[:,:x]
-            clusters = cluster.hybrid.hybrid(enc, euc_dist,
-                                         n_clusters=n_states, n_iters=1)
-            cluster_fn = os.path.join(cc_dir, "clusters.pkl")
-            pickle.dump(clusters, open(cluster_fn, 'wb'))
-
-        find_features(self.net,self.datadir,self.netdir,
-            clusters.center_indices,inds,out_fn,num2plot=num2plot)
-
-    def run_core(self):
-        """Wrapper to run the analysis functions that should be 
-        run after training.
-        """
-        self.encode_data()
-        
-        self.recon_traj()
-
-        self.get_labels()
-        
-        self.get_rmsd()
-
-def euc_dist(trj, frame):
-    diff = np.abs(trj - frame)
-    try:
-        d = np.sqrt(np.sum(diff * diff, axis=1))
-    except:
-        d = np.array([np.sqrt(np.sum(diff * diff))])
-    return d
-
-def recon_traj(enc, net, top, cm):
-    n = len(enc)
-    n_atoms = top.n_atoms
-    x = Variable(torch.from_numpy(enc).type(torch.FloatTensor))
-    coords = net.decode(x)
-    coords = coords.detach().numpy()
-    coords += cm
-    coords = coords.reshape((n, n_atoms, 3))
-    traj = md.Trajectory(coords, top)
-    return traj
-
-def _recon_traj_dir(enc_fn, net, recon_dir, top, cm):
-    enc = np.load(enc_fn)
-    traj = recon_traj(enc, net, top, cm)
-
-    new_fn = os.path.split(enc_fn)[1]
-    base_fn = os.path.splitext(new_fn)[0]
-    new_fn = base_fn + ".xtc"
-    new_fn = os.path.join(recon_dir, new_fn)
-    traj.save(new_fn)
-
-def recon_traj_dir(net, enc_dir, recon_dir, top, cm, n_cores):
-    enc_fns = utils.get_fns(enc_dir, "*.npy")
-    
-    pool = mp.Pool(processes=n_cores)
-    f = functools.partial(_recon_traj_dir, net=net, recon_dir=recon_dir, top=top, cm=cm)
-    pool.map(f, enc_fns)
-    pool.close()
-
-def _calc_labels(enc_fn, net, label_dir):
-    enc = np.load(enc_fn)
-    if hasattr(net,"split_inds"):
-        x = net.encoder1[-1].out_features
-        enc = enc[:,:x]
-    enc = Variable(torch.from_numpy(enc).type(torch.FloatTensor))
-    labels = net.classify(enc)
-    labels = labels.detach().numpy()
-
-    new_fn = os.path.split(enc_fn)[1]
-    new_fn = os.path.join(label_dir, "lab" + new_fn)
-    np.save(new_fn, labels)
-
-def calc_labels(net, enc_dir, label_dir, n_cores):
-    enc_fns = utils.get_fns(enc_dir, "*npy")
-
-    pool = mp.Pool(processes=n_cores)
-    f = functools.partial(_calc_labels, net=net, label_dir=label_dir)
-    pool.map(f, enc_fns)
-    pool.close()
-
-def get_rmsd_dists(orig_traj, recon_traj):
-    n_frames = len(recon_traj)
-    if n_frames != len(orig_traj):
-        # should raise exception
-        print("Can't get rmsds between trajectories of different lengths")
-        return
-    pairwise_rmsd = []
-    for i in range(0, n_frames, 10):
-        r = md.rmsd(recon_traj[i], orig_traj[i], parallel=False)[0]
-        pairwise_rmsd.append(r)
-    pairwise_rmsd = np.array(pairwise_rmsd)
-    return pairwise_rmsd
-
-def _rmsd_dists_dir(recon_fn, orig_xtc_dir, ref_pdb):
-    recon_traj = md.load(recon_fn, top=ref_pdb.top)
-    base_fn = os.path.split(recon_fn)[1]
-    orig_fn = os.path.join(orig_xtc_dir, base_fn)
-    orig_traj = md.load(orig_fn, top=ref_pdb.top)
-    pairwise_rmsd = get_rmsd_dists(orig_traj, recon_traj)
-    return pairwise_rmsd
-
-def rmsd_dists_dir(recon_dir, orig_xtc_dir, ref_pdb, n_cores):
-    recon_fns = utils.get_fns(recon_dir, "*.xtc")
-
-    pool = mp.Pool(processes=n_cores)
-    f = functools.partial(_rmsd_dists_dir, orig_xtc_dir=orig_xtc_dir, ref_pdb=ref_pdb)
-    res = pool.map(f, recon_fns)
-    pool.close()
-
-    pairwise_rmsd = np.concatenate(res)
-    return pairwise_rmsd
-
-def _encode_dir(xtc_fn, net, outdir, top, cm):
-    traj = md.load(xtc_fn, top=top)
-    n = len(traj)
-    n_atoms = traj.top.n_atoms
-    x = traj.xyz.reshape((n, 3*n_atoms))-cm
-    x = Variable(torch.from_numpy(x).type(torch.FloatTensor))
-    if hasattr(net, 'split_inds'):
-        lat1, lat2 = net.encode(x)
-        output = torch.cat((lat1,lat2),1)
-    else:
-        output = net.encode(x)
-    output = output.detach().numpy()
-    new_fn = os.path.split(xtc_fn)[1]
-    new_fn = os.path.splitext(new_fn)[0] + ".npy"
-    new_fn = os.path.join(outdir, new_fn)
-    np.save(new_fn, output)
-
-def encode_dir(net, xtc_dir, outdir, top, n_cores, cm):
-    xtc_fns = utils.get_fns(xtc_dir, "*.xtc")
-
-    pool = mp.Pool(processes=n_cores)
-    f = functools.partial(_encode_dir, net=net, outdir=outdir, top=top, cm=cm)
-    pool.map(f, xtc_fns)
-    pool.close()
-
-def morph_label(net,nn_dir,data_dir,n_frames=10):
-    pdb_fn = os.path.join(data_dir, "master.pdb")
-    ref_s = md.load(pdb_fn)
-    n_atoms = ref_s.top.n_atoms
-    uwm_fn = os.path.join(data_dir, "uwm.npy")
-    uwm = np.load(uwm_fn)
-    cm_fn = os.path.join(data_dir, "cm.npy")
-    cm = np.load(cm_fn)
-    enc = utils.load_npy_dir(os.path.join(nn_dir, "encodings"), "*npy")
-    n_latent = int(enc.shape[1])
-    morph_dir = os.path.join(nn_dir, "morph_label")
-    if not os.path.exists(morph_dir):
-        os.mkdir(morph_dir)
-
-    labels_dir = os.path.join(nn_dir,"labels")
-    labels = utils.load_npy_dir(labels_dir,"*.npy")
-    labels = labels.flatten()
-
-    my_min = np.min(labels)
-    my_max = np.max(labels)
-    morph_enc = np.zeros((n_frames,n_latent))
-    vals = np.linspace(my_min, my_max, n_frames)
-    delta = (vals[1] - vals[0]) * 0.5
-    for i in range(n_frames):
-        val = vals[i]
-        inds = np.where(np.logical_and(labels>=val-delta,
-                                   labels<=val+delta))[0]
-
-        for j in range(n_latent):
-            x = np.mean(enc[inds,j])
-            morph_enc[i, j] = x
-
-    #morph_enc = Variable(torch.from_numpy(morph_enc).type(torch.FloatTensor))
-    morph_enc = np.array(morph_enc)
-    traj = recon_traj(morph_enc,net,ref_s.top,cm)
-    rmsf = get_rmsf(traj)
-
-    out_fn = os.path.join(morph_dir, "morph_0-1.pdb")
-    traj.save_pdb(out_fn, bfactors=rmsf)
-
-def get_rmsf(traj):
-    x_mean = traj.xyz.mean(axis=0)
-    delta = traj.xyz - x_mean
-    d2 = np.einsum('ijk,ijk->ij', delta, delta)
-    p = 1.0*np.ones(len(traj)) / len(traj)
-    msf = np.einsum('ij,i->j', d2, p)
-    return np.sqrt(msf)
-
-def find_features(net,data_dir,nn_dir,clust_cents,inds,out_fn,num2plot=100):
-    #Need to atom custom indices
-    encs_dir = os.path.join(nn_dir,"encodings")
-    encs = utils.load_npy_dir(encs_dir,"*.npy")
-    encs = encs[clust_cents]
-
-    cm = np.load(os.path.join(data_dir,"cm.npy"))
-    top = md.load(os.path.join(data_dir,"master.pdb"))
-    traj = recon_traj(encs,net,top.top,cm)
-    print("trajectory calculated")
-    all_pairs = list(itertools.product(inds, repeat=2))
-    distances = md.compute_distances(traj,all_pairs)
-
-    labels_dir = os.path.join(nn_dir,"labels")
-    labels = utils.load_npy_dir(labels_dir,"*.npy")
-    labels = labels[clust_cents]
-
-    n = len(inds)
-    print(n, " distances being calculated")
-    r_values = []
-    slopes = []
-    for i in np.arange(n*n):
-        slope, intercept, r_value, p_value, std_err = stats.linregress(labels.flatten(),distances[:,i])
-        r_values.append(r_value)
-        slopes.append(slope)
-
-    r2_values = np.array(r_values)**2
-    corr_slopes = []
-    count = 0
-    print("Starting to write pymol file")
-    f = open(os.path.join(nn_dir,out_fn), "w")
-    for i in np.argsort(r2_values)[-num2plot:]:
-        corr_slopes.append(slopes[i])
-        #print(slopes[i],r2_values[i],i)
-        j,k = np.array(all_pairs)[i,:]
-        jnum = top.top.atom(j).residue.resSeq
-        jname = top.top.atom(j).name
-        knum = top.top.atom(k).residue.resSeq
-        kname = top.top.atom(k).name
-        if slopes[i] < 0:
-            f.write("distance dc%s, master and resi %s and name %s, master and resi %s and name %s\n" % (count,jnum,jname,knum,kname))
-            f.write("color red, dc%s\n" % count)
-            f.write("hide label\n")
-        else:
-            f.write("distance df%s, master and resi %s and name %s, master and resi %s and name %s\n" % (count,jnum,jname,knum,kname))
-            f.write("color blue, df%s\n" % count)
-            f.write("hide label\n")
-        count+=1
-    f.close()
-
-#########################################################
-#                                                       #
-#                Extra analysis functions               #       
-#                                                       #
-#########################################################
-
-
-def calc_auc(net_fn,out_fn,data,labels):
-    net = pickle.load(open(net_fn, 'rb'))
-    net.cpu()
-    full_x = torch.from_numpy(data).type(torch.FloatTensor)
-    if hasattr(net, "encode"):
-        full_x = Variable(full_x.view(-1, 784).float())
-        pred_x, latents, pred_class = net(full_x)
-        preds = pred_class.detach().numpy()
-    else:
-        full_x = Variable(full_x.view(-1, 3,32,32).float())
-        preds = net(full_x).detach().numpy()
-    fpr, tpr, thresh = roc_curve(labels,preds)
-    auc = roc_auc_score(labels,preds.flatten())
-    print("AUC: %f" % auc)
-    #plt.figure()
-    #lw = 2
-    #plt.plot(fpr, tpr, color='darkorange',
-    #     lw=lw, label='ROC curve (area = %f)' % auc)
-    #plt.plot([0, 1], [0, 1], color='navy', lw=lw, linestyle='--')
-    #plt.xlim([0.0, 1.0])
-    #plt.ylim([0.0, 1.05])
-    #plt.xlabel('False Positive Rate')
-    #plt.ylabel('True Positive Rate')
-    #plt.title('Receiver operating characteristic example')
-    #plt.legend(loc="lower right")
-    #plt.savefig(out_fn)
-    #plt.close()
-    return auc, fpr, tpr
-
-def split_vars(d, vars):
-    n = len(d)
-    n_vars = len(vars)
-    n_per_var = int(len(d)/n_vars)
-    lst = {}
-    for i in range(n_vars):
-        v = vars[i]
-        lst[v] = d[i*n_per_var:(i+1)*n_per_var]
-    return lst
-
-
-def get_extrema(lst_lst):
-    my_min = np.inf
-    my_max = -np.inf
-    for lst in lst_lst:
-        my_min = np.min((my_min, np.min(lst)))
-        my_max = np.max((my_max, np.max(lst)))
-    return my_min, my_max
-
-
-def common_hist(lst_lst, labels, bins):
-    my_min, my_max = get_extrema(lst_lst)
-    n_lst = len(lst_lst)
-    all_h = {}
-    for i in range(n_lst):
-        h, x = np.histogram(lst_lst[i], bins=bins, range=(my_min, my_max))
-        all_h[labels[i]] = h
-    return all_h, x
-
-def calc_overlap(d1, d2, bins):
-    n_feat = d1.shape[1]
-    js = np.zeros(n_feat)
-    ent1 = np.zeros(n_feat)
-    ent2 = np.zeros(n_feat)
-    for i in range(n_feat):
-        h, x = common_hist([d1[:, i], d2[:, i]], ["d1", "d2"], bins)
-        h1 = h["d1"]
-        h2 = h["d2"]
-        p1 = np.array(h1) / h1.sum()
-        p2 = np.array(h2) / h2.sum()
-        js[i] = infotheor.js_divergence(p1, p2)
-        ent1[i] = infotheor.shannon_entropy(p1)
-        ent2[i] = infotheor.shannon_entropy(p2)
-    return js, ent1, ent2
-
-def project(enc, lab, vars, i1, i2, bins, my_title, cutoff=0.8):
-    subsample = 100
-
-    all_act_inds = np.where(lab>cutoff)[0]
-    act_i1_mu = enc[all_act_inds, i1].mean()
-    act_i1_std = enc[all_act_inds, i1].std()
-    act_i2_mu = enc[all_act_inds, i2].mean()
-    act_i2_std = enc[all_act_inds, i2].std()
-
-    n_vars = len(vars)
-    enc_dict = split_vars(enc, vars)
-    lab_dict = split_vars(lab, vars)
-    i1_dict = {}
-    i2_dict = {}
-    act_inds = {}
-    for v in vars:
-        i1_dict[v] = enc_dict[v][:, i1]
-        i2_dict[v] = enc_dict[v][:, i2]
-        act_inds[v] = np.where(lab_dict[v]>cutoff)[0]
-    # i1_min, i1_max = get_extrema(i1_dict.values())
-    # i2_min, i2_max = get_extrema(i2_dict.values())
-
-    # drop outliers by only show data within const*std
-    const = 3
-    i1_mu = enc[:, i1].mean()
-    i1_std = enc[:, i1].std()
-    i1_min = np.max((i1_mu-const*i1_std, enc[:, i1].min()))
-    i1_max = np.min((i1_mu+const*i1_std, enc[:, i1].max()))
-    i2_mu = enc[:, i2].mean()
-    i2_std = enc[:, i2].std()
-    i2_min = np.max((i2_mu-const*i2_std, enc[:, i2].min()))
-    i2_max = np.min((i2_mu+const*i2_std, enc[:, i2].max()))
-
-    # get min/max of z dim
-    cmin = np.inf
-    cmax = -np.inf
-    for i in range(n_vars):
-        v = vars[i]
-        tmp, x, y = np.histogram2d(i1_dict[v], i2_dict[v], range=([i1_min, i1_max], [i2_min, i2_max]), bins=n_bins)
-        tmp /= tmp.sum()
-        h = np.zeros(tmp.shape)
-        inds = np.where(tmp>0)
-        h[inds] = -np.log(tmp[inds])
-        #inds = np.where(np.isnan(h))
-        #h[inds] = 0
-        cmin = np.min((cmin, h[inds].min()))
-        cmax = np.max((cmax, h[inds].max()))
-
-    height = 4
-    width = height*n_vars
-    fig = figure(figsize=(width, height))
-    fig.suptitle(my_title)
-    bins = 20
-    dot_size = 0.1
-    for i in range(n_vars):
-        v = vars[i]
-        ax = fig.add_subplot(1, n_vars, i+1, aspect='auto', xlim=x[[0, -1]], ylim=y[[0, -1]])
-        #scatter(i1_dict[v], i2_dict[v], s=dot_size, c='b', alpha=0.1)
-        tmp, x, y = np.histogram2d(i1_dict[v], i2_dict[v], range=([i1_min, i1_max], [i2_min, i2_max]), bins=n_bins)
-        tmp /= tmp.sum()
-        h = cmax*np.ones(tmp.shape)
-        inds = np.where(tmp>0)
-        h[inds] = -np.log(tmp[inds])
-        h -= cmax
-        delta_x = (x[1]-x[0])/2.0
-        delta_y = (y[1]-y[0])/2.0
-        #imshow(h, interpolation='bilinear', aspect='auto', origin='low', extent=[x[0]+delta_x, x[-1]+delta_x, y[0]+delta_y, y[-1]+delta_y], vmin=cmin-cmax, vmax=0, cmap=get_cmap('Blues_r'))
-        # transpose to put first dimension (i1) on x axis
-        #imshow(h.T, interpolation='bilinear', aspect='auto', origin='low', extent=[y[0]+delta_y, y[-1]+delta_y, x[0]+delta_x, x[-1]+delta_x], vmin=cmin-cmax, vmax=0, cmap=get_cmap('Blues_r'))
-        imshow(h.T, interpolation='bilinear', aspect='auto', origin='low', extent=[x[0]+delta_x, x[-1]+delta_x, y[0]+delta_y, y[-1]+delta_y], vmin=cmin-cmax, vmax=0, cmap=get_cmap('Blues_r'))
-        colorbar()
-        
-        lines = []
-        line_labels = []
-        for v2 in vars:
-            i1_mu = i1_dict[v2].mean()
-            i1_std = i1_dict[v2].std()
-            i2_mu = i2_dict[v2].mean()
-            i2_std = i2_dict[v2].std()
-            #print(v, "x", i1_mu, i1_std)
-            #print(v, "y", i2_mu, i2_std)
-            line, _, _ = errorbar([i1_mu], [i2_mu], xerr=[i1_std], yerr=[i2_std], label=v2)
-            lines.append(line)
-            line_labels.append(v2)
-
-            # inds = act_inds[v2]
-            # if inds.shape[0] > subsample:
-            #     inds = inds[::subsample]
-            # print(inds.shape)
-            # if inds.shape[0] > 0:
-            #     scatter(i1_dict[v2][inds], i2_dict[v2][inds], s=dot_size, c='k')
-
-        line, _, _ = errorbar([act_i1_mu], [act_i2_mu], xerr=[act_i1_std], yerr=[act_i2_std], label='act', ecolor='k', fmt='k')
-        lines.append(line)
-        line_labels.append('act')
-        #legend()
-
-        title(v)
-    # scatter([0], [0], s=dot_size*10, c='k')
-    # scatter([6], [0], s=dot_size*10, c='k')
-    # scatter([6], [6], s=dot_size*10, c='k')
-    fig.legend(lines, line_labels)
-    show()
-
-def morph_conditional(nn_dir, data_dir, n_frames=10):
-    net = pickle.load(open("%s/nn_best_polish.pkl" % nn_dir, 'rb'))
-    net.cpu()
-    pdb_fn = os.path.join(nn_dir, "master.pdb")
-    ref_s = md.load(pdb_fn)
-    n_atoms = ref_s.top.n_atoms
-    uwm_fn = os.path.join(data_dir, "uwm.npy")
-    uwm = np.load(uwm_fn)
-    cm_fn = os.path.join(data_dir, "cm.npy")
-    cm = np.load(cm_fn)
-    enc = load_npy_dir(os.path.join(nn_dir, "encodings"), "*npy")
-    n_latent = int(enc.shape[1])
-    morph_dir = os.path.join(nn_dir, "morph")
-    if not os.path.exists(morph_dir):
-        os.mkdir(morph_dir)
-
-    for i in range(n_latent):
-        my_min, my_max = get_extrema([enc[:, i]])
-        print(i, my_min, my_max)
-        morph_enc = np.zeros((n_frames, n_latent))
-        vals = np.linspace(my_min, my_max, n_frames)
-        delta = (vals[1] - vals[0]) * 0.5
-        for j in range(n_frames):
-            val = vals[j]
-
-            # set each latent variable to most probable value given latent(ind) within delta of selected value
-            inds = np.where(np.logical_and(enc[:,i]>=val-delta, enc[:,i]<=val+delta))[0]
-            for k in range(n_latent):
-                n, x = np.histogram(enc[inds, k], bins=20)
-                offset = (x[1] - x[0]) * 0.5
-                morph_enc[j, k] = x[n.argmax()] + offset
-
-            # fix ref latent variable to val
-            morph_enc[j, i] = val
-
-        morph_enc = Variable(torch.from_numpy(morph_enc).type(torch.FloatTensor))
-        try:
-            outputs, labs = net.decode(morph_enc)
-        except:
-            print("single")
-            outputs = net.decode(morph_enc)
-        outputs = outputs.data.numpy()
-        coords = whiten.apply_unwhitening(outputs, uwm, cm)
-        print("shape", coords.shape)
-        recon_trj = md.Trajectory(coords.reshape((n_frames, n_atoms, 3)), ref_s.top)
-        out_fn = os.path.join(morph_dir, "m%d.pdb" % i)
-        recon_trj.save(out_fn)
-
-def morph_cond_mean(nn_dir,data_dir,n_frames=10):
-    net = pickle.load(open("%s/nn_best_polish.pkl" % nn_dir, 'rb'))
-    net.cpu()
-    pdb_fn = os.path.join(nn_dir, "master.pdb")
-    ref_s = md.load(pdb_fn)
-    n_atoms = ref_s.top.n_atoms
-    uwm_fn = os.path.join(data_dir, "uwm.npy")
-    uwm = np.load(uwm_fn)
-    cm_fn = os.path.join(data_dir, "cm.npy")
-    cm = np.load(cm_fn)
-    enc = load_npy_dir(os.path.join(nn_dir, "encodings"), "*npy")
-    n_latent = int(enc.shape[1])
-    morph_dir = os.path.join(nn_dir, "morph_bin_mean")
-    if not os.path.exists(morph_dir):
-        os.mkdir(morph_dir)
-
-    for i in range(n_latent):
-        my_min, my_max = get_extrema([enc[:, i]])
-        print(i, my_min, my_max)
-        morph_enc = np.zeros((n_frames, n_latent))
-        vals = np.linspace(my_min, my_max, n_frames)
-        delta = (vals[1] - vals[0]) * 0.5
-        for j in range(n_frames):
-            val = vals[j]
-
-            # set each latent variable to most probable value given latent(ind) within delta of selected value
-            inds = np.where(np.logical_and(enc[:,i]>=val-delta, enc[:,i]<=val+delta))[0]
-            for k in range(n_latent):
-                x  = np.mean(enc[inds,k])
-                morph_enc[j, k] = x
-
-            # fix ref latent variable to val
-            morph_enc[j, i] = val
-
-        morph_enc = Variable(torch.from_numpy(morph_enc).type(torch.FloatTensor))
-        traj = utils.recon_traj(morph_enc,net,ref_s.top,cm)
-        rmsf = get_rmsf(traj)
-
-        out_fn = os.path.join(outdir, "m%d.pdb" % i)
-        traj.save_pdb(out_fn, bfactors=rmsf)
-
-def morph_std(nn_dir, data_dir, enc):
-    outdir = os.path.join(nn_dir, "morph_std")
-    utils.mkdir(outdir)
-    n_frames = 10
-
-    net = pickle.load(open("%s/nn_best_polish.pkl" % nn_dir, 'rb'))
-    net.cpu()
-    pdb_fn = os.path.join(nn_dir, "master.pdb")
-    ref_s = md.load(pdb_fn)
-    n_atoms = ref_s.top.n_atoms
-    cm_fn = os.path.join(data_dir, "cm.npy")
-    cm = np.load(cm_fn)
-
-    n_latent = int(enc.shape[1])
-    ave_enc = enc.mean(axis=0)
-    std_enc = enc.std(axis=0)
-    max_enc = enc.max(axis=0)
-    min_enc = enc.min(axis=0)
-
-    # want vary between mean +/- 2*std but not go out of range
-    for i in range(n_latent):
-        #my_min = np.max((ave_enc[i]-5*std_enc[i], min_enc[i]))
-        #my_max = np.min((ave_enc[i]+5*std_enc[i], max_enc[i]))
-        my_min = min_enc[i]
-        my_max = max_enc[i]
-
-        morph_enc = np.zeros((n_frames, n_latent)) + ave_enc
-        morph_enc[:, i] = np.linspace(my_min, my_max, n_frames)
-        traj = utils.recon_traj(morph_enc, net, ref_s.top, cm)
-
-        rmsf = get_rmsf(traj)
-
-        out_fn = os.path.join(outdir, "m%d.pdb" % i)
-        traj.save_pdb(out_fn, bfactors=rmsf)
-
-def get_act_inact(nn_dir, data_dir, enc, labels):
-    """Save most active/inactive sturctures with RMSDs from target less than 2 Angstroms."""
-    outdir = os.path.join(nn_dir, "act_and_inact")
-    utils.mkdir(outdir)
-    n_extreme = 1000
-
-    net = pickle.load(open("%s/nn_best_polish.pkl" % nn_dir, 'rb'))
-    net.cpu()
-    pdb_fn = os.path.join(nn_dir, "master.pdb")
-    ref_s = md.load(pdb_fn)
-    ca_inds = ref_s.top.select('name CA')
-    n_atoms = ref_s.top.n_atoms
-    cm_fn = os.path.join(data_dir, "cm.npy")
-    cm = np.load(cm_fn)
-
-    rmsd_cutoff = 0.2
-    rmsd_fn = os.path.join(nn_dir, "rmsd.npy")
-    rmsd = np.load(rmsd_fn)
-    good_inds = np.where(rmsd<rmsd_cutoff)
-    enc = enc[good_inds]
-    labels = labels[good_inds]
-
-    inds = np.argsort(labels.flatten())
-
-    act_traj = utils.recon_traj(enc[inds[-n_extreme:]], net, ref_s.top, cm)
-    out_fn = os.path.join(outdir, "active.xtc")
-    act_traj.save(out_fn)
-    for i in range(10):
-        out_fn = os.path.join(outdir, "act%d.pdb" % i)
-        act_traj[i].save(out_fn)
-    act_traj = act_traj.atom_slice(ca_inds)
-    act_rmsf = 10*get_rmsf(act_traj)
-    out_fn = os.path.join(outdir, "act_rmsf.npy")
-    np.save(out_fn, act_rmsf)
-
-    inact_traj = utils.recon_traj(enc[inds[:n_extreme]], net, ref_s.top, cm)
-    out_fn = os.path.join(outdir, "inactive.xtc")
-    inact_traj.save(out_fn)
-    for i in range(10):
-        out_fn = os.path.join(outdir, "inact%d.pdb" % i)
-        inact_traj[i].save(out_fn)
-    inact_traj = inact_traj.atom_slice(ca_inds)
-    inact_rmsf = 10*get_rmsf(inact_traj)
-    out_fn = os.path.join(outdir, "inact_rmsf.npy")
-    np.save(out_fn, inact_rmsf)
-
-    #all_h, x = common_hist([act_rmsf, inact_rmsf], ['act', 'inact'], 20)
-    fig = figure(figsize=(4, 8))
-    title
-    #plot(x, all_h['act'], label='act')
-    #plot(x, all_h['inact'], label='inact')
-    res_nums = []
-
-    for r in act_traj.top.residues:
-        res_nums.append(r.resSeq)
-
-    ax = fig.add_subplot(211)
-    plot(res_nums, act_rmsf, label='act')
-    plot(res_nums, inact_rmsf, label='inact')
-    legend()
-
-    ax = fig.add_subplot(212)
-    d = act_rmsf-inact_rmsf
-    plot(res_nums, d, 'k')
-    out_fn = os.path.join(outdir, "act_minus_inact.npy")
-    np.save(out_fn, d)
-    show()
-
-    out_fn = os.path.join(outdir, "act_minus_inact.pdb")
-    ref_s = ref_s.atom_slice(ca_inds)
-    ref_s.save_pdb(out_fn, bfactors=d)
-    print("rmsf delta extrema", d.min(), d.mean(), d.max())
-
-def enc_corr(enc):
-    n_latent = enc.shape[1]
-    corr = []
-    for i in range(n_latent):
-        for j in range(i+1, n_latent):
-            c = pearsonr(enc[:,i], enc[:,j])[0]
-            corr.append(c)
-    return np.array(corr)
-
-
-def project_act(lab_v, vars, my_title):
-    n_vars = len(vars)
-    print(my_title)
-    fig = figure(figsize=(4, 4))
-    fig.suptitle(my_title)
-    for i in range(n_vars):
-        v = vars[i]
-        n, x = np.histogram(lab_v[v], range=(0, 1), bins=50)
-        plot(x[:-1], n, label=v)
-        print(v, lab_v[v].mean())
-    legend()
-    show()
-
-
-def check_loss(nn_dir):
-    i = 2
-    fn = os.path.join(nn_dir, "test_loss_%d.npy" % i)
-    while os.path.exists(fn):
-        d = np.load(fn)
-        plot(d, label=str(i))
-        i += 1
-        fn = os.path.join(nn_dir, "test_loss_%d.npy" % i)
-    fn = os.path.join(nn_dir, "test_loss_polish.npy")
-    d = load(fn)
-    plot(d, label='p')
-    legend()
-    show()
-
-def clust_encod(nn_dir, n_clusters, vars, lag_times,n_traj_per_var):
-    msm_dir = os.path.join(nn_dir, "msm_%d" % n_clusters)
-    utils.mkdir(msm_dir)
-
-    enc = utils.load_npy_dir(os.path.join(nn_dir, "encodings"), "*npy")
-    enc_v = split_vars(enc, vars)
-    n_vars = len(vars)
-    #n_traj_per_var = 5
-
-    clusters = cluster.hybrid.hybrid(enc, euc_dist, n_clusters=n_clusters, n_iters=1)
-    # clusters.assignments and clusters.centers most relevant vars
-    cluster_fn = os.path.join(msm_dir, "clusters.pkl")
-    pickle.dump(clusters, open(cluster_fn, 'wb'))
-
-    # assuming 5 traj of equal length per variant, divide into traj
-    assigns = clusters.assignments.reshape((n_vars*n_traj_per_var, -1))
-
-    height = 4
-    width = height*n_vars
-    fig = figure(figsize=(width, height))
-    fig.suptitle(nn_dir)
-    for i in range(n_vars):
-        v = vars[i]
-        print("Getting impolied timescales for", v)
-        v_assians = assigns[i*n_traj_per_var:(i+1)*n_traj_per_var]
-
-        f = lambda c: msm.builders.normalize(c, prior_counts=1.0/n_clusters, calculate_eq_probs=True)
-        imp_times = msm.implied_timescales(v_assians, lag_times, f)
-        imp_fn = os.path.join(msm_dir, "%s_imp_norm.npy" % v)
-        np.save(imp_fn, imp_times)
-
-        ax = fig.add_subplot(1, n_vars, i+1, aspect='auto')
-        for i, t in enumerate(lag_times):
-            scatter(t*np.ones(imp_times.shape[1]), imp_times[i])
-        title(v)
-        ax.set_yscale('log')
-
-        markov_lag = 10
-        c = msm.assigns_to_counts(v_assians, 1, max_n_states=n_clusters)
-        c_fn = os.path.join(msm_dir, "%s_c_raw_lag%s.npz" % (v, markov_lag))
-        scipy.sparse.save_npz(c_fn, c)
-        C, T, p = msm.builders.normalize(c, prior_counts=1.0/n_clusters, calculate_eq_probs=True)
-        p_fn = os.path.join(msm_dir, "%s_p_norm_lag%d.npy" % (v, markov_lag))
-        np.save(p_fn, p)
-        T_fn = os.path.join(msm_dir, "%s_T_norm_lag%d.npy" % (v, markov_lag))
-        np.save(T_fn, T)
-        C_fn = os.path.join(msm_dir, "%s_C_norm_lag%d.npy" % (v, markov_lag))
-        np.save(C_fn, C)
-    out_fn = os.path.join(msm_dir, "imp_times.png")
-    savefig(out_fn)
-    show()
-
-
diff --git a/pensa/diffnets/cli/main.py b/pensa/diffnets/cli/main.py
deleted file mode 100644
index f65dd838..00000000
--- a/pensa/diffnets/cli/main.py
+++ /dev/null
@@ -1,325 +0,0 @@
-#Native to python
-import pickle
-import os
-import shutil
-import click
-import yaml
-import multiprocessing as mp
-#third-party libraries
-import numpy as np
-import mdtraj as md
-#diffnets libraries
-from diffnets.analysis import Analysis
-from diffnets.data_processing import ProcessTraj, WhitenTraj
-from diffnets.training import Trainer
-from diffnets.utils import get_fns
-from diffnets import nnutils
-
-class ImproperlyConfigured(Exception):
-    '''The given configuration is incomplete or otherwise not usable.'''
-    pass
-
-@click.group()
-def cli():
-    pass
-
-@cli.command(name='process')
-@click.argument('sim_dirs')
-@click.argument('pdb_fns')
-@click.argument('outdir')
-@click.option('-a','--atom-sel',default=None)
-@click.option('-s','--stride',default=None)
-def preprocess_data(sim_dirs,pdb_fns,outdir,atom_sel=None,stride=1):
-    """ sim_dirs: Path to an np.array containing directory names. The 
-               array needs one directory name for each variant where each
-               directory contains all trajectories for that variant.
-
-        pdb_fns: Path to an np.array containing pdb filenames. The 
-               array needs one pdb filename for each variant. The order of 
-               variants should match the order of sim_dirs.
-
-        atom_sel: (optional) Path to an np.array containing a list of indices for 
-              each variant, which operates on the pdbs supplied. The indices
-              need to select equivalent atoms across variants.
-
-        stride: (optional) Path to an np.array containing an integer for
-               each variant.
-
-        outdir: Path you would like processed data to live.
- """
-    try:
-        var_dir_names = np.load(sim_dirs)
-    except:
-        click.echo(f'Incorrect input for sim_dirs. Use --help flag for '
-               'information on the correct input for sim_dirs.')
-        raise
-
-    try:
-        var_pdb_fns = np.load(pdb_fns)
-    except:
-        click.echo(f'Incorrect input for pdb_fns. Use --help flag for '
-               'information on the correct input for pdb_fns.')
-        raise
-
-    if stride:
-        try:
-            stride = np.load(stride)
-        except:
-            click.echo(f'Incorrect input for stride. User must supply a ' 
-               'path to a np.array that has a stride value for each variant.')
-            raise
-
-    if atom_sel:
-        try:
-            atom_sel = np.load(atom_sel)
-            #Add a check to make sure atom_sel is not same
-            n_atoms = [md.load(fn).atom_slice(atom_sel[i]).n_atoms for i,fn in enumerate(var_pdb_fns)]
-            if len(np.unique(n_atoms)) != 1:
-                raise ImproperlyConfigured(
-                    f'atom_sel needs to choose equivalent atoms across variants. '
-                     'After performing atom_sel, pdbs have different numbers of '
-                     'atoms.')
-        except:
-            click.echo(f'Incorrect input for atom_sel. Use --help flag for '
-               'information on the correct input for atom_sel.')
-            raise
-
-    else:
-        n_resis = []
-        for fn in var_pdb_fns:
-            pdb = md.load(fn)
-            n_resis.append(pdb.top.n_residues)
-        if len(np.unique(n_resis)) != 1:
-            raise ImproperlyConfigured(
-                f'The PDBs supplied have different numbers of residues. The '
-                 'default atom selection does not work in this case. Please '
-                 'use the --atom-sel option to choose equivalent atoms across  '
-                 'different variant pdbs.')
-
-    if len(var_dir_names) != len(var_pdb_fns):
-        raise ImproperlyConfigured(
-            f'pdb_fns and sim_dirs must point to np.arrays that have '
-             'the same length')
-
-    for vd,fn in zip(var_dir_names, var_pdb_fns):
-        traj_fns = get_fns(vd, "*.xtc")
-        n_traj = len(traj_fns)
-        click.echo("Found %s trajectories in %s" % (n_traj,vd))
-        if n_traj == 0:
-            raise ImproperlyConfigured(
-                "Found no trajectories in %s" % vd)
-        try: 
-            traj = md.load(traj_fns[0],top=fn)
-        except:
-            click.echo(f'Order of pdb_fns and sim_dirs need to '
-                'correspond to each other.')
-            raise
-    
-    proc_traj = ProcessTraj(var_dir_names,var_pdb_fns,outdir,stride=stride,
-                            atom_sel=atom_sel)
-    proc_traj.run()
-    print("Aligned trajectories")
-    whiten_traj = WhitenTraj(outdir)
-    print("starting trajectory whitening")
-    whiten_traj.run()
-    
-nn_d = {
-    'nnutils.split_sae': nnutils.split_sae,
-    'nnutils.sae': nnutils.sae,
-    'nnutils.ae': nnutils.ae,
-    'nnutils.split_ae': nnutils.split_ae
-}
-
-@cli.command(name='train')
-@click.argument('config')
-def train(config):
-    """ config: YML config file. See train_sample.yml for an example and
-                train_sample.txt for parameter descriptions.
-    """
-    with open(config) as f:
-        job = yaml.load(f)
- 
-    required_keys = ['data_dir','n_epochs','act_map','lr','n_latent',
-                     'hidden_layer_sizes','em_bounds','do_em','em_batch_size',
-                     'nntype','batch_size','batch_output_freq',
-                     'epoch_output_freq','test_batch_size','frac_test',
-                     'subsample','outdir','data_in_mem']
-    optional_keys = ["close_inds_fn","label_spreading"]
-
-    if hasattr(job['nntype'], 'split_inds'):
-        required_keys.append("close_inds_fn")
-
-    if "label_spreading" in job.keys():
-        if job["label_spreading"] not in ["gaussian","uniform","bimodal"]:
-            raise ImproperlyConfigured(
-                f'label_spreading must be set to gaussian or uniform')
-
-    for key in job.keys():
-        try:
-            required_keys.remove(key)
-        except:
-            if key in optional_keys:
-                continue
-            else:
-                raise ImproperlyConfigured(
-                f'{key} is not a valid parameter. Check yaml file.')
-
-    if len(required_keys) != 0:
-        raise ImproperlyConfigured(
-                f'Missing the following parameters in {config} '
-                 '{required_keys} ')
-   
-    data_dir  = job['data_dir']
-    data_fns = get_fns(data_dir,"*.npy")
-    wm_fn = os.path.join(data_dir,"wm.npy")
-    if wm_fn not in data_fns:
-        raise ImproperlyConfigured(
-                f'Cannot find wm.npy in preprocessed data directory. Likely '
-                 'need to re-run data preprocessing step.')
-
-    xtc_fns = os.path.join(data_dir,"aligned_xtcs")
-    data_fns = get_fns(xtc_fns,"*.xtc")
-    ind_fns = os.path.join(data_dir,"indicators")
-    inds = get_fns(ind_fns,"*.npy")
-    if (len(inds) != len(data_fns)) or len(inds)==0:
-        raise ImproperlyConfigured(
-                f'Number of files in aligned_xtcs and indicators should be '
-                  'equal. Likely need to re-run data preprocessing step.')
-    last_indi = np.load(inds[-1])
-  
-    n_cores = mp.cpu_count()
-    master_fn = os.path.join(job['data_dir'], "master.pdb")
-    master = md.load(master_fn)
-    n_atoms = master.top.n_atoms
-    n_features = 3 * n_atoms
-    job['layer_sizes'] =[n_features,n_features]
-    if len(job['hidden_layer_sizes']) == 0:
-        job['layer_sizes'].append(int(n_features/4))
-    else: 
-        for layer in job['hidden_layer_sizes']:
-            job['layer_sizes'].append(layer)
-    job['layer_sizes'].append(job['n_latent'])
-    job['act_map'] = np.array(job['act_map'],dtype=float)
-    job['em_bounds'] = np.array(job['em_bounds'])
-    job['em_n_cores'] = n_cores
-    job['nntype'] = nn_d[job['nntype']]
-       
-    if len(job['act_map']) != last_indi[0]+1:
-        raise ImproperlyConfigured(
-                f'act_map needs to contain a value for each variant.')
-
-    if len(job['act_map']) != len(job['em_bounds']):
-        raise ImproperlyConfigured(
-                f'act_map and em_bounds should be the same length since '
-                 'each variant needs an initial classification label and '
-                 'a range for the EM update')
-
-    if n_features != job['layer_sizes'][0]:
-        raise ImproperlyConfigured(
-                f'1st layer size does not match the number of xyz coordinates')  
-
-    if job['layer_sizes'][0]!=job['layer_sizes'][1]:
-        raise ImproperlyConfigured(
-                f'1st and 2nd layer size need to be equal.')
-  
-    if job['layer_sizes'][-1]!=job['n_latent']:
-        raise ImproperlyConfigured(
-                f'Last layer size needs to equal number of latent variables')
-
-    if 'close_inds_fn' in job.keys():
-        if hasattr(job['nntype'], 'split_inds'):
-            inds = np.load(job['close_inds_fn'])
-            close_xyz_inds = []
-            for i in inds:
-                close_xyz_inds.append(i*3)
-                close_xyz_inds.append((i*3)+1)
-                close_xyz_inds.append((i*3)+2)
-            all_inds = np.arange((master.n_atoms*3))
-            non_close_xyz_inds = np.setdiff1d(all_inds,close_xyz_inds)
-            job['inds1'] = np.array(close_xyz_inds)
-            job['inds2'] = non_close_xyz_inds
-        else:
-            raise ImproperlyConfigured(
-                f'Indices chosen for a split autoencoder architecture '
-                 '(close_inds_fn), but  a split autoencoder architecture '
-                 'was not chosen (nntype)')
-
-    if not os.path.exists(job['outdir']):
-        cmd = "mkdir %s" % job['outdir']
-        os.system(cmd)
-        shutil.copyfile(config,os.path.join(job['outdir'],config))
-        #raise ImproperlyConfigured(
-        #        f'outdir already exists. Rename and try again. ')
-
-    trainer = Trainer(job)
-    net = trainer.run(data_in_mem=job['data_in_mem'])
-
-@cli.command(name='analyze')
-@click.argument('data_dir')
-@click.argument('net_dir')
-@click.option('-i', '--inds',
-              help=f'Path to a np.array that contains indices with respect '
-                    'to data_dir/master.pdb. These indices will be used'
-                    'to find features that distinguish variants by looking at '
-                    'a subset of the protein instead of the whole protein')
-@click.option('-c','--cluster-number', default=1000,
-              help=f'Number of clusters desired for clustering on latent space')
-@click.option('-n','--n-distances', default=100,
-              help=f'Number of distances to plot. Takes the n distances that '
-                    'are most correlated with the diffnet classification score.')
-def analyze(data_dir,net_dir,inds=None,cluster_number=1000,n_distances=100):
-    """ data_dir: Path to directory with processed and whitened data.
-
-        net_dir: path to directory with output from training.
-    """
-    net_fn = os.path.join(net_dir,"nn_best_polish.pkl")
-
-    try: 
-        with open(net_fn, "rb") as f:
-            net = pickle.load(f)
-    except:
-        click.echo(f'net_dir supplied either does not exist or does not '
-                    'contain a trained DiffNet.')
-        raise
-
-    try:
-        pdb = md.load(os.path.join(data_dir,"master.pdb"))
-        n = pdb.n_atoms
-    except:
-        click.echo(f'data_dir supplied should contain the processed/whitened '
-                    'data including master.pdb')
-        raise
-
-    net.cpu()
-    a = Analysis(net,net_dir,data_dir)
-
-    #this method generates encodings (latent space) for all frames,
-    #produces reconstructed trajectories, produces final classification
-    #labels for all frames, and calculates an rmsd between the DiffNets
-    #reconstruction and the actual trajectories
-    a.run_core()
-
-    #This produces a clustering based on the latent space and then
-    # finds distances that are correlated with the DiffNets classification
-    # score and generates a .pml that can be opened with master.pdb
-    # to generate a figure showing what the diffnet learned.
-    #Indices for feature analysis
-    if inds is None:
-        print("inds is none")
-        inds = np.arange(n)
-    else:
-        try:
-            inds = np.load(inds)
-            print(inds.shape)
-        except:
-            click.echo(f'Inds needs to be a path to a np.array')
-            raise
-    a.find_feats(inds,"rescorr-%s.pml" % n_distances,n_states=cluster_number,
-                 num2plot=n_distances)
-
-    #Generate a morph of structures along the DiffNets classification score
-    a.morph()
-    #print("analysis done")
-
-if __name__=="__main__":
-    cli()
diff --git a/pensa/diffnets/cli/predict_new.py b/pensa/diffnets/cli/predict_new.py
deleted file mode 100644
index 4143e2f7..00000000
--- a/pensa/diffnets/cli/predict_new.py
+++ /dev/null
@@ -1,160 +0,0 @@
-import os
-import functools
-import pickle
-import multiprocessing as mp
-import glob
-
-import numpy as np
-import mdtraj as md
-import torch
-import click
-
-
-class ImproperlyConfigured(Exception):
-    '''The given configuration is incomplete or otherwise not usable.'''
-    pass
-
-#Give me ipath to trajectories, indices, path to master.pdb and cm.npy, and output dir
-@click.group()
-def cli():
-    pass
-
-def _extract_data_from_sim(inputs,pdb_path,inds_fn,whitened_dir,outdir):
-    i, traj_fn = inputs
-    inds = np.load(inds_fn)
-    master = md.load(os.path.join(whitened_dir,"master.pdb"))
-    pdb = md.load(pdb_path)
-    traj = md.load(traj_fn, top=pdb)
-    traj = traj.atom_slice(inds)
-    traj = traj.superpose(master, parallel=False)
-    data = traj.xyz.reshape((len(traj), 3*master.n_atoms))
-    cm = np.load(os.path.join(whitened_dir,"cm.npy"))
-    data = data - cm
-    torch_traj = torch.from_numpy(data).type(torch.FloatTensor)
-    torch.save(torch_traj,os.path.join(outdir,"ID-%s.pt" % i))
-
-@cli.command(name='process_new')
-@click.argument('traj_dir')
-@click.argument('pdb_path')
-@click.argument('inds_fn')
-@click.argument('whitened_dir')
-@click.argument('outdir')
-def extract_data_from_sim(traj_dir, pdb_path, inds_fn, whitened_dir, outdir):
-    """ This function converts simulations (xtc files) into input data that
-        can be directly fed into a DiffNet.
-
-        traj_dir: Path to directory containing trajectories of a single variant
-
-        pdb_path: Path to pdb that corresponds to trajectories in traj_dir
-
-        inds_fn: File to a numpy array of indices that go with the pdb
-                 from pdb_path. Needs to pull equivalent atoms to atoms used
-                 in training.
-
-        whitened_dir: Data with processed data that was used for training.
-
-        outdir: Path to an output directory. 
-    """
-
-    click.echo(type(traj_dir))
-    click.echo(traj_dir)
-    click.echo(os.path.join(traj_dir,"*.xtc"))
-    traj_fns = glob.glob(os.path.join(traj_dir,"*.xtc"))
-    inputs = [(i,j) for i,j in enumerate(traj_fns)]
-    if not os.path.exists(outdir):
-        os.mkdir(outdir)
-    n_cores = mp.cpu_count()
-    pool = mp.Pool(processes=n_cores)
-    f = functools.partial(_extract_data_from_sim, 
-                          inds_fn=inds_fn,
-                          whitened_dir=whitened_dir,
-                          outdir=outdir,pdb_path=pdb_path)
-    result = pool.map_async(f, inputs)
-    result.wait()
-    traj_lens = result.get()
-    pool.close()
-
-def chunks(arr, chunk_size):
-    """Yield successive chunk_size chunks from arr."""
-    for i in range(0, len(arr), chunk_size):
-        yield arr[i:i + chunk_size]
-
-@cli.command(name='predict_new')
-@click.argument('data_dir')
-@click.argument('nn_path')
-@click.argument('outdir')
-@click.option('--save_labels', default=True)
-@click.option('--save_latent', default=False)
-@click.option('--save_recon', default=False)
-def predict(data_dir,nn_path,outdir,save_labels=True,
-            save_latent=False,save_recon=False):
-    """ Uses an already trained DiffNet to predict on a variant outside
-        the training. Requires the variant to be preprocessed for input.
-        See extract_data_from_sim for preprocessing.
-
-        data_dir: Directory with pytorch float tensors for each example
-                  (i.e. frame of a simulation)
-
-        nn_path: Path to directory with DiffNet training output
-
-        outdir: Directory to output label, latent vectors, and/or reconstructed
-                trajectories.
-    """
-
-    if not save_labels and not save_latent and not save_recon:
-        raise ImproperlyConfigured(
-            f'at least one of save_labels, save_latent, or save_recon'
-             'must be true.')
-    net = pickle.load(open("%s/nn_best_polish.pkl" % nn_path, 'rb'))
-    use_cuda = torch.cuda.is_available()
-    device = torch.device("cuda:0" if use_cuda else "cpu")
-    torch_trajs = glob.glob(os.path.join(data_dir,"*.pt"))
-    traj_num = 0
-    if save_labels:
-        os.mkdir(os.path.join(outdir,"labels"))
-    if save_latent:
-        os.mkdir(os.path.join(outdir,"latent"))
-    if save_recon:
-        os.mkdir(os.path.join(outdir,"recon"))
-    for t in torch_trajs:
-        ex = torch.load(t)
-        encodings = []
-        labels = []
-        recon = []
-        for batch in chunks(ex,100):
-            if use_cuda:
-                local_batch = batch.type(torch.cuda.FloatTensor)
-            else:
-                local_batch = batch.type(torch.FloatTensor)
-
-            local_batch = local_batch.to(device)
-            x_pred, latent, class_pred = net(local_batch)
-
-            if save_labels:
-                labels.append(class_pred) 
-            if save_latent:
-                encodings.append(latent)
-            if save_recon:
-                recon.append(x_pred)
-
-        if save_labels:
-            labels = np.concatenate([l.detach().numpy() for l in labels])
-            label_dir = os.path.join(outdir, "labels")
-            np.save(os.path.join(label_dir,str(traj_num).zfill(6) + ".npy"),
-                    labels)
-
-        if save_latent:
-            encodings = np.vstack(encodings)
-            encodings_dir = os.path.join(outdir, "latent")
-            np.save(os.path.join(encodings_dir,str(traj_num).zfill(6) + ".npy"),
-                    encodings)   
-
-        if save_recon:
-            recon = np.vstack(recon)
-            recon_dir = os.path.join(outdir, "recon")
-            np.save(os.path.join(recon_dir,str(traj_num).zfill(6) + ".npy"),
-                   recon)
-        traj_num += 1
-
-if __name__=="__main__":
-    cli()
diff --git a/pensa/diffnets/data/README.md b/pensa/diffnets/data/README.md
deleted file mode 100644
index d4651474..00000000
--- a/pensa/diffnets/data/README.md
+++ /dev/null
@@ -1,21 +0,0 @@
-# Sample Package Data
-
-This directory contains sample additional data you may want to include with your package.
-This is a place where non-code related additional information (such as data files, molecular structures,  etc.) can 
-go that you want to ship alongside your code.
-
-Please note that it is not recommended to place large files in your git directory. If your project requires files larger
-than a few megabytes in size it is recommended to host these files elsewhere. This is especially true for binary files
-as the `git` structure is unable to correctly take updates to these files and will store a complete copy of every version
-in your `git` history which can quickly add up. As a note most `git` hosting services like GitHub have a 1 GB per repository
-cap.
-
-## Including package data
-
-Modify your package's `setup.py` file and the `setup()` command. Include the 
-[`package_data`](http://setuptools.readthedocs.io/en/latest/setuptools.html#basic-use) keyword and point it at the 
-correct files.
-
-## Manifest
-
-* `look_and_say.dat`: first entries of the "Look and Say" integer series, sequence [A005150](https://oeis.org/A005150)
diff --git a/pensa/diffnets/data/look_and_say.dat b/pensa/diffnets/data/look_and_say.dat
deleted file mode 100644
index 97df4524..00000000
--- a/pensa/diffnets/data/look_and_say.dat
+++ /dev/null
@@ -1,15 +0,0 @@
-1
-11
-21
-1211
-111221
-312211
-13112221
-1113213211
-31131211131221
-13211311123113112211
-11131221133112132113212221
-3113112221232112111312211312113211
-1321132132111213122112311311222113111221131221
-11131221131211131231121113112221121321132132211331222113112211
-311311222113111231131112132112311321322112111312211312111322212311322113212221
\ No newline at end of file
diff --git a/pensa/diffnets/data_processing.py b/pensa/diffnets/data_processing.py
deleted file mode 100644
index 19e08c5f..00000000
--- a/pensa/diffnets/data_processing.py
+++ /dev/null
@@ -1,538 +0,0 @@
-import os
-import multiprocessing as mp
-import functools
-import glob
-from .utils import *
-import pickle
-from collections import defaultdict
-
-import numpy as np
-import mdtraj as md
-from scipy.linalg import inv, sqrtm
-import torch
-
-class ImproperlyConfigured(Exception):
-    '''The given configuration is incomplete or otherwise not usable.'''
-    pass
-
-class ProcessTraj:
-    """Process raw trajectory data to select a subset of atoms and align all
-       frames to a reference pdb. Results in a directory structure that the
-       training relies on.
-
-    Parameters
-    ----------
-    traj_dir_paths : list of str's, required
-        One string/path for each variant to a dir that contains ALL 
-        trajectory files for that variant. **ORDER MATTERS** -- when
-        training you will set a value "act_map" that depends on this 
-        order.
-    pdb_fn_paths : list of str's,  required
-        One string/path for each variant to a dir that contains the
-        starting pdb file. Variants must be in same order as traj_dir_paths.
-    outdir : str
-        Name of dir to output processed data to. This dir will be used as input
-        during DiffNet training.
-    atom_sel : str, or array-like, shape=(n_variants, n_inds)
-               (default="name CA or name CB or name N or name C")
-         If str, it should follow the selection syntax used in MDTraj. 
-         e.g. pdb.top.select("name CA") - "name CA" would be appropriate.
-         If list, there should be a list of indices for each variant since 
-         choosing equivalent atoms may require different indexing for each 
-         variant. 
-     stride : integer (default=1)
-         Subsample every nth data frame. Value of 1 means no subsampling.
-    """
-
-    def __init__(self,
-                 traj_dir_paths,
-                 pdb_fn_paths,
-                 outdir,
-                 atom_sel=None,
-                 stride=1):
-        self.traj_dir_paths = traj_dir_paths
-        self.pdb_fn_paths = pdb_fn_paths
-        self.outdir = outdir
-        self.xtc_dir = os.path.join(outdir, "aligned_xtcs")
-        self.indicator_dir = os.path.join(outdir, "indicators")
-        self.atom_sel = atom_sel
-        if self.atom_sel is None:
-            self.atom_sel = self.extract_default_inds()
-        self.master = self.make_master_pdb()
-        self.n_feats = 3*self.master.top.n_atoms
-        self.stride = stride
-
-    def extract_default_inds(self):
-       
-       pdb_lens = []
-       glycines = [] 
-       for fn in self.pdb_fn_paths:
-           pdb =  md.load(fn)
-           pdb_lens.append(pdb.top.n_residues)
-           for res in pdb.top.residues:
-               if res.name == "GLY":
-                   glycines.append(res.index)
-       glycines = np.unique(glycines)
-       
-       if len(np.unique(pdb_lens)) != 1:
-            raise ImproperlyConfigured(
-                f'Cannot use default index extraction unless all variant '
-                 'pdbs have the same number of residues. Consider supplying '
-                 'a custom atom_sel to choose equivalent atoms across different '
-                 'variant pdbs.')
-
-       var_inds = defaultdict(list)
-       var_inds_list = []
-       for fn in self.pdb_fn_paths:
-           pdb =  md.load(fn)
-           for res in pdb.top.residues:
-               if res.index in glycines:
-                   sele = "resid %s and (name CA or name C or name N)" % res.index
-                   j = pdb.topology.select(sele)
-               else:
-                   sele = "resid %s and (name CA or name C or name CB or name N)" % res.index
-                   j = pdb.topology.select(sele)
-               var_inds[fn].append(j)
-           var_inds_list.append(np.concatenate(var_inds[fn]))
-       return var_inds_list
-
-    def make_master_pdb(self):
-        """Creates a reference pdb centered at the origin
-        using the first variant pdb specified in self.pdb_fn_paths.
-        """
-
-        ## TODO: Add in a check that all pdbs have same number of atoms
-        pdb_fn = self.pdb_fn_paths[0]
-        master = md.load(pdb_fn)
-        if isinstance(self.atom_sel, list) or type(self.atom_sel)==np.ndarray:
-             inds = self.atom_sel[0]
-        else:
-             inds = master.top.select(self.atom_sel)
-        master = master.atom_slice(inds)
-        master.center_coordinates()
-        master_fn = os.path.join(self.outdir, "master.pdb")
-        mkdir(self.outdir)
-        master.save(master_fn)
-        return master
-
-    def make_traj_list(self):
-        """Makes a list of all variant trajectories where each item is a
-        list that contains 1) a path to the trajectory, 2) a path to the 
-        corresponding topology (pdb) file, 3) a trajectory number - from 
-        0 to n where n is total number of trajectories, and 4) an integer
-        to indicate which variant simulation the trajectory came from.
-        """
-        traj_num = 0
-        inputs = []
-        i = 0
-        var_dirs = self.traj_dir_paths
-        pdb_fns = self.pdb_fn_paths
-        traj_d = {}
-        for vd, fn in zip(var_dirs,pdb_fns):
-            traj_fns = get_fns(vd, "*.xtc")
-            traj_d[fn] = [traj_num, traj_num+len(traj_fns)] 
-            for traj_fn in traj_fns:
-            #i indicates which variant the traj came from -- used for training
-                inputs.append((traj_fn, fn, traj_num, i))
-                traj_num += 1
-            i += 1
-        return inputs, traj_d
-
-    def _preprocess_traj(self,inputs):
-        """Given inputs - a path to a trajectory, corresponding topology file,
-        an output trajectory number, and an integer indicating which variant
-        simulation a trajectory came from - process the trajectory to be
-        stripped to a subset of atoms and aligned to a reference pdb. Write
-        to file 1) the resulting .xtc trajectory file, 2) mean center of mass of
-        each atom in the trajectory, and 3) an indicator array to indicate which
-        variant each simulation frame came from.
-
-        Returns
-        -------
-        n : int
-            Number of simulation frames in the trajectory
-        """
-
-        traj_fn, top_fn, traj_num, var_ind = inputs
-
-        if traj_num == 0:
-            print("Processing", traj_num, traj_fn, top_fn)
-        else:
-            print("on traj", traj_num)
-
-        if type(self.stride) == np.ndarray:
-            print(self.stride)
-            traj = md.load(traj_fn, top=top_fn, stride=self.stride[var_ind])
-        else:
-            traj = md.load(traj_fn, top=top_fn, stride=self.stride)
-
-        if traj_num == 0:
-            print("Selecting inds")
-
-        if isinstance(self.atom_sel, list) or type(self.atom_sel)==np.ndarray:
-             inds = self.atom_sel[var_ind]
-        else:
-             inds = traj.top.select(self.atom_sel)
-
-        #Check for glycine mutations
-        #if traj.top.residue(238-26).name == "SER":
-             #print("have SER in ", v)
-             #bad_atom_ind = traj.top.select('resSeq 238 and name CB')[0]
-             #bad_ind = np.where(inds == bad_atom_ind)[0]
-             #inds = np.delete(inds, bad_ind)
-        traj = traj.atom_slice(inds)
-    
-        # align to master
-        if traj_num == 0:
-            print("Superposing")
-        traj = traj.superpose(self.master, parallel=False)
-        
-        # save traj and its center of mass
-        if traj_num == 0:
-            print("Saving xtc")
-        
-        new_traj_fn = os.path.join(self.xtc_dir, str(traj_num).zfill(6) + ".xtc")
-        traj.save(new_traj_fn)
-        if traj_num == 0:
-            print("Getting/saving CM")
-        n = len(traj)
-        cm = traj.xyz.astype(np.double).reshape((n, 3*traj.top.n_atoms)).mean(axis=0)
-        new_cm_fn = os.path.join(self.xtc_dir, "cm" + str(traj_num).zfill(6) + ".npy")
-        np.save(new_cm_fn, cm)
-        
-        indicators = var_ind * np.ones(n)
-        indicators_fn = os.path.join(self.indicator_dir, str(traj_num).zfill(6) + ".npy")
-        np.save(indicators_fn, indicators)
-        return n
-
-    def preprocess_traj(self,inputs):
-        """Strip all trajectories to a subset of atoms and align to a
-           reference pdb. Also, calculate and write out the mean center 
-           of mass of all atoms across all trajectories. Will write out 
-           new trajectory (.xtc files) and corresponding "inidcator" lists
-           to indicate which variant simulation each data frame came from.
-
-        Parameters
-        ---------
-        inputs : array-like, shape=(n_trajectories,4)
-            For each trajectory there should be 1) path to trajectory,
-            2) path to corresponding topology file, 3) output trajectory
-            number, and 4) integer indicating which variant the trajectory
-            came from. 
-        """
-        # If you use 20 cores to load in 20 trajectories at a time
-        # make sure the node has enough memory for all 20 trajectories
-        # or your job might stall without crashing :/
-        n_cores = mp.cpu_count()
-        pool = mp.Pool(processes=n_cores)
-        f = functools.partial(self._preprocess_traj)
-        result = pool.map_async(f, inputs)
-        result.wait()
-        traj_lens = result.get()
-        traj_lens = np.array(traj_lens, dtype=int)
-        pool.close()
-
-        traj_len_fn = os.path.join(self.outdir, "traj_lens.npy")
-        np.save(traj_len_fn, traj_lens)
-        traj_fns = get_fns(self.xtc_dir, "*.xtc")
-        cm_fns = get_fns(self.xtc_dir, "cm*.npy")
-        n_traj = len(traj_fns)
-        print("  Found %d trajectories" % n_traj)
-        cm = np.zeros(self.n_feats, dtype=np.double)
-        for i, cm_fn in enumerate(cm_fns):
-            d = np.load(cm_fn)
-            cm += traj_lens[i] * d
-        cm /= traj_lens.sum()
-        cm_fn = os.path.join(self.outdir, "cm.npy")
-        np.save(cm_fn, cm)
-
-    def traj2samples(self):
-        """For every trajectory frame, write out a PyTorch tensor file,
-        which will be used as input to the DiffNet"""
-
-        traj_fns = get_fns(self.xtc_dir, "*.xtc")
-        cm_fn = os.path.join(self.outdir, "cm.npy")
-        cm = np.load(cm_fn)
-        ex_dir = os.path.join(self.outdir,"data")
-        i = 0
-        for t in traj_fns:
-            traj = md.load(t,top=self.master)
-            data = traj.xyz.astype(np.double).reshape((len(traj),3*self.master.top.n_atoms))
-            data -= cm
-            for d in data:
-                frame = torch.from_numpy(d).type(torch.FloatTensor)
-                torch.save(frame,os.path.join(ex_dir,"ID-%s.pt" % i))
-                i+=1
-
-    def run(self):
-        """Process raw trajectory data to select a subset of atoms and align all
-       frames to a reference pdb. Results in a directory structure that the
-       training relies on.
-       """
-        inputs, traj_d = self.make_traj_list()
-        traj_d_path = os.path.join(self.outdir,"traj_dict.pkl")
-        pickle.dump(traj_d, open(traj_d_path, "wb" )) 
-        mkdir(self.xtc_dir)
-        mkdir(self.indicator_dir)
-        mkdir(os.path.join(self.outdir,"data"))
-        self.preprocess_traj(inputs)
-        self.traj2samples()
-
-class WhitenTraj: 
-    """Normalize the trajectories with a data whitening procedure [1] that
-       removes covariance between atoms in trajectories.
-
-       Parameters
-       ---------
-       data_dir : str
-           Path to a directory that contains a topology file, a file with
-           the mean center of mass of all atoms across all trajectories, 
-           and a dir named "aligned_xtcs" with all aligned trajectories.
-
-       References
-       ----------
-       [1] Wehmeyer C, Noé F. Time-lagged autoencoders: Deep learning of 
-       slow collective variables for molecular kinetics. J Chem Phys. 2018. 
-       doi:10.1063/1.5011399
-    """
-    def __init__(self,data_dir):
-        self.data_dir = data_dir
-        self.xtc_dir = os.path.join(self.data_dir,"aligned_xtcs")
-        self.cm = np.load(os.path.join(self.data_dir,"cm.npy"))
-
-    def get_c00(self, coords, cm, traj_num):
-        """Calculates the covariance matrix.
-
-        Parameters
-        ----------
-        coords : np.ndarray, shape=(n_frames,3*n_atoms)
-            XYZ coordinates of a trajectory.
-        cm : np.ndarray, shape=(3*n_atoms,)
-            Avg. center of mass of each atom across all trajectories.
-        traj_num : integer
-            Used to name the covariance matrix we are going to write
-            out for a trajectory.
-        """
-        coords -= cm
-        #n_coords = coords.shape[0]
-        #norm_const = 1.0 / n_coords
-        #c00 = np.einsum('bi,bo->io', coords, coords)
-        #matmul is faster
-        c00 = np.matmul(coords.transpose(),coords)
-        assert isinstance(c00.flat[0], np.double)
-        np.save(os.path.join(self.xtc_dir, "cov"+traj_num+".npy"),c00)
-
-    def _get_c00_xtc(self, xtc_fn, top, cm):
-        """Reshape MDTraj Trajectory item into an array of XYZ
-           coordinates and then call other function to calculate
-           covariance matrix, c00.
-
-        Parameters
-        ----------
-        xtc_fn : str
-            Path to trajectory
-        top : md.Trajectory object
-            Topology corresponding to the trajectory
-        cm : np.ndarray, shape=(3*n_atoms,)
-            Avg. center of mass of each atom across all trajectories.
-
-        Returns
-        -------
-        n : int
-            Number of data frames in the trajectory
-        """
-        traj = md.load(xtc_fn, top=top)
-        traj_num = xtc_fn.split("/")[-1].split(".")[0]
-        n = len(traj)
-        n_atoms = traj.top.n_atoms
-        coords = traj.xyz.astype(np.double).reshape((n, 3 * n_atoms))
-        self.get_c00(coords,cm,traj_num)
-        return n
-
-    def get_c00_xtc_list(self, xtc_fns, top, cm, n_cores):
-        """Calculate the covariance matrix across all trajectories.
-
-        Parameters
-        ----------
-        xtc_fn : list of str's
-            Paths to trajectories.
-        top : md.Trajectory object
-            Topology corresponding to the trajectories
-        cm : np.ndarray, shape=(3*n_atoms,)
-            Avg. center of mass of each atom across all trajectories.
-        n_cores : int
-            Number of threads to parallelize task across.
-
-        Returns
-        -------
-        c00 : np.ndarray, shape=(n_atoms*3,n_atoms*3)
-            Covariance matrix across all trajectories
-        """
-        pool = mp.Pool(processes=n_cores)
-        f = functools.partial(self._get_c00_xtc, top=top, cm=cm)
-        result = pool.map_async(f, xtc_fns)
-        result.wait()
-        r = result.get()
-        pool.close()        
-
-        c00_fns = np.sort(glob.glob(os.path.join(self.xtc_dir, "cov*.npy")))
-        c00 = np.sum(np.load(c00_fn) for c00_fn in c00_fns)
-        c00 /= sum(r)
-        assert isinstance(c00.flat[0], np.double)
-        return c00
-
-    def get_wuw_mats(self, c00):
-        """Calculate whitening matrix and unwhitening matrix.
-           Method adapted from deeptime (https://github.com/markovmodel/deeptime/blob/master/time-lagged-autoencoder/tae/utils.py)
-
-        Parameters
-        ----------
-        c00 : np.ndarray, shape=(n_atoms*3,n_atoms*3)
-            Covariance matrix
-
-        Returns
-        -------
-        uwm : np.ndarray, shape=(n_atoms*3,n_atoms*3)
-            unwhitening matrix
-        wm : np.ndarray, shape=(n_atoms*3,n_atoms*3)
-            whitening matrix
-        """
-        # Previous implementation
-        # uwm = sqrtm(c00).real
-        # wm = inv(uwm).real
-        # return uwm, wm
-
-        # Updated implementation
-        e, v = torch.symeig(torch.from_numpy(c00).double(), eigenvectors=True)
-        # In valid covariance matrix the smallest eigenvalue should be positive
-        # because the covariance matrix is a positive semidefinite matrix
-        # https://stats.stackexchange.com/questions/52976/is-a-sample-covariance-matrix-always-symmetric-and-positive-definite
-        assert torch.min(e) > 0.0
-        d = torch.diag(1.0 / torch.sqrt(e))
-        wm = torch.mm(torch.mm(v, d), v.t())
-        return inv(wm.numpy()), wm.numpy()
-
-    def apply_unwhitening(self, whitened, uwm, cm):
-        """ Apply whitening to XYZ coordinates.
-
-        Parameters
-        ----------
-        whitened : np.ndarray, shape=(n_frames,3*n_atoms)
-            Whitened XYZ coordinates of a trajectory.
-        wm : np.ndarray, shape=(n_atoms*3,n_atoms*3)
-            whitening matrix
-        cm : np.ndarray, shape=(3*n_atoms,)
-            Avg. center of mass of each atom across all trajectories.
-
-        Returns
-        -------
-        coords : np.ndarray, shape=(n_frames,3*n_atoms)
-            XYZ coordinates of a trajectory.
-        """
-        # multiply each row in whitened by c00_sqrt
-        coords = np.einsum('ij,aj->ai', uwm, whitened)
-        coords += cm
-        return coords
-
-    def apply_whitening(self, coords, wm, cm):
-        """ Apply whitening to XYZ coordinates.
-
-        Parameters
-        ----------
-        coords : np.ndarray, shape=(n_frames,3*n_atoms)
-            XYZ coordinates of a trajectory.
-        wm : np.ndarray, shape=(n_atoms*3,n_atoms*3)
-            whitening matrix
-        cm : np.ndarray, shape=(3*n_atoms,)
-            Avg. center of mass of each atom across all trajectories.
-
-        Returns
-        -------
-        whitened : np.ndarray, shape=(n_frames,3*n_atoms)
-            Whitened XYZ coordinates of a trajectory.
-        """
-        # multiply each row in coords by inv_c00
-        whitened = np.einsum('ij,aj->ai', wm, coords)
-        return whitened
-
-    def _apply_whitening_xtc_fn(self, xtc_fn, top, outdir, wm, cm):
-        """Apply data whitening to a trajectory file
-
-
-        Parameters
-        ----------
-        xtc_fn : str
-            Path to trajectory
-        top : md.Trajectory object
-            Topology corresponding to the trajectories
-        outdir : str
-            Directory to output whitened trajectory
-        wm : np.ndarray, shape=(n_atoms*3,n_atoms*3)
-            whitening matrix
-        cm : np.ndarray, shape=(3*n_atoms,)
-            Avg. center of mass of each atom across all trajectories.
-        """
-        print("whiten", xtc_fn)
-        traj = md.load(xtc_fn, top=top)
-
-        n = len(traj)
-        n_atoms = traj.top.n_atoms
-        coords = traj.xyz.reshape((n, 3 * n_atoms))
-        coords -= cm
-        whitened = self.apply_whitening(coords, wm, cm)
-        dir, fn = os.path.split(xtc_fn)
-        new_fn = os.path.join(outdir, fn)
-        traj = md.Trajectory(whitened.reshape((n, n_atoms, 3)), top)
-        traj.save(new_fn)
-
-    def apply_whitening_xtc_dir(self,xtc_dir, top, wm, cm, n_cores, outdir):
-        """Apply data whitening parallelized across many trajectories
-
-        Parameters
-        ----------
-        xtc_fn : list of str's
-            Paths to trajectories.
-        top : md.Trajectory object
-            Topology corresponding to the trajectories
-        outdir : str
-            Directory to output whitened trajectory
-        wm : np.ndarray, shape=(n_atoms*3,n_atoms*3)
-            whitening matrix
-        cm : np.ndarray, shape=(3*n_atoms,)
-            Avg. center of mass of each atom across all trajectories.
-        n_cores : int
-            Number of threads to parallelize task across.
-        """
-        xtc_fns = np.sort(glob.glob(os.path.join(xtc_dir, "*.xtc")))
-
-        pool = mp.Pool(processes=n_cores)
-        f = functools.partial(self._apply_whitening_xtc_fn, top=top, outdir=outdir, wm=wm, cm=cm)
-        pool.map(f, xtc_fns)
-        pool.close()
-
-    def run(self):
-        """Whiten existing processed trajectory data in self.data_dir
-           to calculate and write out a covariance matrix (c00.npy), a
-           whitening matrix (wm.npy) and an unwhitening matrix (uwm.npy).
-        """
-        outdir = self.data_dir
-        whitened_dir = os.path.join(outdir,"whitened_xtcs")
-        mkdir(whitened_dir)
-        n_cores = mp.cpu_count()
-        traj_fns = get_fns(self.xtc_dir, "*.xtc")
-        master = md.load(os.path.join(outdir,"master.pdb"))
-        c00 = self.get_c00_xtc_list(traj_fns, master.top, self.cm, n_cores)
-        c00_fn = os.path.join(outdir,"c00.npy")
-        np.save(c00_fn, c00)
-        c00_fns = np.sort(glob.glob(os.path.join(self.xtc_dir, "cov*.npy")))
-        for fn in c00_fns:
-            os.remove(fn)
-        uwm, wm = self.get_wuw_mats(c00)
-        uwm_fn = os.path.join(outdir, "uwm.npy")
-        np.save(uwm_fn, uwm)
-        wm_fn = os.path.join(outdir, "wm.npy")
-        np.save(wm_fn, wm)
-        #self.apply_whitening_xtc_dir(self.myNav.xtc_dir, master.top, wm, self.cm, n_cores, whitened_dir)
-
-
-
diff --git a/pensa/diffnets/exmax.py b/pensa/diffnets/exmax.py
deleted file mode 100755
index 6358dd13..00000000
--- a/pensa/diffnets/exmax.py
+++ /dev/null
@@ -1,234 +0,0 @@
-"""
-Copyright 2015 by Washington University in Saint Louis. Authored by S. Joshua Swamidass. 
-A license to use for strictly non-commerical use is granted.
-Derivative work is not permited without prior written authorization.
-All other rights reserved.
-"""
-
-from scipy import inf, asarray, array, rand, zeros, prod, where, allclose
-from scipy.stats import pearsonr
-
-def distribution_of_sum(P, ignore_idx=set()):
-    """
-    Given a set of binomial random variables parameterized by a vector P.
-    Ignoring variables in ignore_idx...
-    What is the distribution of their sum?
-    
-    Output is the discreet distribution D, where the probability of a
-    specific sum s is D[s].
-    O(N^2) time in length of P
-    
-    For example, using this P...
-    >>> P = [0.5, 0.25, 0.5]
-
-    >>> distribution_of_sum(P)
-    array([ 0.1875,  0.4375,  0.3125,  0.0625])
-
-    Ignoring the 2nd (1 in zero indexing) variable, we have...
-    >>> distribution_of_sum(P, [1]) 
-    array([ 0.25,  0.5 ,  0.25,  0.  ])
-    """
-    P = asarray(P)
-    N = P.shape[0]
-    P1 = 1 - P #convenience variable that is 1 minus P
-
-    #starting distribution of the sum (no variables in summation) is D[0] = 1 and D[>0] = 0
-    #in otherwords, we know the sum is zero.
-    D = zeros(N + 1)
-    D[0] = 1
-
-
-
-    for n in range(N):
-        if n in ignore_idx:
-            continue #skip P[n] if n in ignore_idx
-
-        #Convolution of the distribution of the nth variable with the summation distribution for the first n-1 variables.
-        C = D * P1[n] 
-        C[1:] += D[:-1] * P[n]
-        
-        D = C #the distribution of the sum of the first n variables
-        
-    #D is now the distribution of sums for all variables
-    return D
-
-
-def expectation_range_CUBIC(P, lower, upper):
-    """
-    Given a set of binomial random variables parameterized by a vector P.
-    Conditioned on the number successes between lower and upper (inclusive).
-    What is the expectation of each random variable?
-    
-    Output is a vector E of expectations.
-    O(N^3) time in length of P
-
-    >>> R = rand(10)  # a random vector of probabilities 10 elements long.
-    >>>
-    >>> lower, upper = 3, 6
-    >>>
-    >>> EC = expectation_range_CUBIC(R, lower, upper)
-    >>> EE = expectation_range_EXP(R, lower, upper)
-    
-    >>> correlation, pvalue = pearsonr(EE, EC)
-    >>> correlation > .99 and pvalue < .01
-    True
-    
-    This shows that both versions yield results that are > 99% correlated.
-    """
-    P = asarray(P)
-    N = P.shape[0]
-    E = zeros(N)
-    if upper == 0:
-        return E
-
-    D = distribution_of_sum(P)
-    PROB_IN_RANGE = sum(D[max(0, lower): min(N, upper + 1)])
-
-    for i in range(N):
-        #Implementation described in paper
-        D = distribution_of_sum(P, [i])
-        PROB_IN_RANGE_CONDITION_ON_I = sum(D[max(0, lower - 1): min(N, upper)])  
-        E[i] = P[i] * PROB_IN_RANGE_CONDITION_ON_I / PROB_IN_RANGE
-
-        #Alternative, equivalent implementation
-        #POS = sum(D[max(0, lower - 1): min(N, upper)]) * P[i]
-        #NEG = sum(D[max(0, lower): min(N, upper + 1)]) * (1 - P[i])
-        #E[i] = POS/ (POS + NEG)
-    return E
-
-
-def expectation_range_EXP(P, lower, upper):
-    """
-    Given a set of binomial random variables parameterized by a vector P.
-    Condition on the number successes between lower and upper (inclusive).
-    What is the expectation of each random variable?
-    
-    This version is slow, but more conceptually clear.
-    
-    Output is a vector E of expectations.
-    O(2^N) time in length of P
-    
-    This version suffers from floating point error, and should not be used
-    for anything other than testing.
-    """
-    P = asarray(P)
-    N = P.shape[0]
-    P1 = 1 - P
-    E = zeros(N)
-    if upper == 0:
-        return E
-
-    import itertools
-
-    D = 0
-    
-    for S in itertools.product(*tuple([[1, 0]] * N)): #iterate over all binary vectors of length N
-        SUM = sum(S)
-        if SUM >= upper or SUM <= lower:
-            continue #skip the vectors without the right sum
-                
-        S = array(S)
-        
-        p = prod(where(S, P, P1)) #probability of S according to P
-
-        E += p * S #summing up this vector's contribution to the final expectation
-        D += p #sum up this contribution to the denominator
-        
-    E = E / D #normalize by dividing by $\sum Pr(S)$ 
-    return E
-
-def expectation_or_LINEAR(P, E_or):
-    """
-    Given a set of binomial random variables parameterized by a vector P.
-    Conditioned on E[at least one success] = E_or
-    What is the expectation of each random variable?
-
-    Output is a vector E of expectations.    
-
-    All the implementations should produce the same results.
-
-    >>> R = rand(10)
-    >>>
-    >>> EL = expectation_or_LINEAR(R, 1)
-    >>> EC = expectation_or_CUBIC(R, 1)
-    >>> EE = expectation_E_EXP(R, 1)
-
-    >>> correlation, pvalue = pearsonr(EL, EC)
-    >>> correlation > .99 and pvalue < .01
-    True
-
-    >>> allclose(EL, EE)
-    True
-    >>> correlation, pvalue = pearsonr(EL, EE)
-    >>> correlation > .99 and pvalue < .01
-    True
-
-    This shows that all versions yield results that are > 99% correlated.
-      
-    And we know the results for some simple cases.
-    
-    >>> expectation_or([0.5, 0.5], 1)
-    array([ 0.66666667,  0.66666667])
-    >>> expectation_or([0.5, 0.5], .75)
-    array([ 0.5,  0.5])
-    """
-    P = asarray(P)
-    # boundary case that is easy to compuate and would cause problems if we
-    # let it pass through
-    if any(P == 1):
-        return P * E_or
-
-    # probability of success at this index, but failure or success at all
-    # others
-    P1 = P
-    # probability of failure at this index, but success at one or more others
-    P0 = (1 - prod(1 - P) / (1 - P)) * (1 - P)
-    # given that at least one is success, what is the probability of success
-    # for this index
-    PS = P1 / (P1 + P0)
-    # but the true probability is E_or, so adjust expectations down and return
-    return PS * E_or
-
-
-def expectation_or_CUBIC(P, E_or):
-    """
-    Given a set of binomial random variables parameterized by a vector P.
-    Conditioned on E[at least one success] = E_or
-    What is the expectation of each random variable?
-
-    Output is a vector E of expectations.    
-    
-    alternate, equivalent implementation for error checking
-    the problem with this implementation is that it is O(N^3) time
-    """
-    return expectation_range(P, 1, inf) * E_or
-
-
-def expectation_E_EXP(P, E_or):
-    """
-    Given a set of binomial random variables parameterized by a vector P.
-    Conditioned on E[at least one success] = E_or
-    What is the expectation of each random variable?
-
-    Output is a vector E of expectations.    
- 
-    alternate, equivalent implementation for error checking
-    the problem with this implementation is that it is exponential time
-    """
-    P = asarray(P)
-    P1 = 1 - P
-    N = P.shape[0]
-    E = zeros(N)
-    import itertools
-    for S in itertools.product(*tuple([[1, 0]] * N)): #iterate over all binary vectors of length N
-        S = array(S)
-        p = prod(where(S, P, P1)) #compute the probability according to P of vector
-        E += p * S #accumulate the probability-weighted average
-    E = E * E_or / (1 - prod(P1)) #divide by the probability of getting at least 1 success and multiply times E_or
-    return E
-
-
-expectation_or = expectation_or_LINEAR
-expectation_range = expectation_range_CUBIC
-
-
diff --git a/pensa/diffnets/nnutils.py b/pensa/diffnets/nnutils.py
deleted file mode 100755
index 7c93cd9c..00000000
--- a/pensa/diffnets/nnutils.py
+++ /dev/null
@@ -1,596 +0,0 @@
-# supervised autoencoders
-import mdtraj as md
-import multiprocessing as mp
-import numpy as np
-import os
-import itertools
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-
-from torch.autograd import Variable
-
-class split_ae(nn.Module):
-    """Unsupervised autoencoder with a split input (i.e. 2 encoders)
-
-       Parameters
-       ----------
-       layer_sizes : list
-           List of integers indicating the size of each layer in the 
-           encoder including the latent layer. First two must be identical.
-       inds1 : np.ndarray 
-             Indices in the training input array that go into encoder1.
-       inds2 : np.ndarray
-             Indices in the training input array that go into encoder2.
-       wm : np.ndarray, shape=(n_inputs,n_inputs)
-            Whitening matrix -- is applied to input data
-       uwm : np.ndarray, shape=(n_inputs,n_inputs)
-            unwhitening matrix
-       """
-
-    def __init__(self,layer_sizes,inds1,inds2,wm,uwm):
-        super(split_ae, self).__init__()
-        self.sizes = layer_sizes
-        self.n = len(self.sizes)
-        self.inds1 = inds1
-        self.inds2 = inds2
-        self.n_features = len(self.inds1)+len(self.inds2)
-        self.wm1 = wm[self.inds1[:,None],self.inds1]
-        self.wm2 = wm[self.inds2[:,None],self.inds2]
-        self.uwm = uwm
-        self.ratio = len(inds1)/(len(inds1)+len(inds2))
-
-        self.encoder1 = nn.ModuleList()
-        self.encoder2 = nn.ModuleList()
-        self.encoder1.append(nn.Linear(len(inds1),len(inds1)))
-        self.encoder2.append(nn.Linear(len(inds2),len(inds2)))
-        for i in range(1,self.n-1):
-            small_layer_in = int(np.ceil(self.sizes[i]*self.ratio))
-            print(small_layer_in)
-            small_layer_out = int(np.ceil(self.sizes[i+1]*self.ratio))
-            print(small_layer_out)
-            big_layer_in = int(np.floor(self.sizes[i] * (1-self.ratio)))
-            print(big_layer_in)
-            big_layer_out = int(np.floor(self.sizes[i+1] * (1-self.ratio)))
-            print(big_layer_out)
-            #if small_layer_in < 3:
-            #    small_layer_in = 3
-            #    big_layer_in = self.sizes[i]-3
-            #if small_layer_out < 3:
-            #    small_layer_out = 3
-            #    big_layer_out = self.sizes[i]-3
-
-            self.encoder1.append(nn.Linear(small_layer_in, small_layer_out))
-            self.encoder2.append(nn.Linear(big_layer_in,big_layer_out))
-
-        self.decoder = nn.ModuleList()
-        for i in range(self.n-1,0,-1):
-            self.decoder.append(nn.Linear(self.sizes[i], self.sizes[i-1]))
-
-    @property
-    def split_inds(self):
-        return True
-
-    def freeze_weights(self,old_net=None):
-        """Procedure to make the whitening matrix and unwhitening matrix
-           as untrainable layers. Additionally, freezes weights associated
-           with a previously learned encoder layer.
-
-        Parameters
-        ----------
-        old_net : split_ae object
-            Previously trained network with overlapping architecture. Weights
-            learned in this previous networks encoder will be frozen in the
-            new network.
-        """
-        vwm = Variable(torch.from_numpy(self.wm1).type(torch.FloatTensor))
-        self.encoder1[0].weight.data = vwm
-        vz = Variable(torch.from_numpy(np.zeros(len(self.inds1))).type(torch.FloatTensor))
-        self.encoder1[0].bias.data = vz
-        vwm2 = Variable(torch.from_numpy(self.wm2).type(torch.FloatTensor))
-        self.encoder2[0].weight.data = vwm2
-        vz2 = Variable(torch.from_numpy(np.zeros(len(self.inds2))).type(torch.FloatTensor))
-        self.encoder2[0].bias.data = vz2
-        for p in self.encoder1[0].parameters():
-            p.requires_grad = False
-        for p in self.encoder2[0].parameters():
-            p.requires_grad = False
-        self.decoder[-1].weight.data = Variable(torch.from_numpy(self.uwm).type(torch.FloatTensor))
-        self.decoder[-1].bias.data = Variable(torch.from_numpy(np.zeros(self.n_features)).type(torch.FloatTensor))
-        for p in self.decoder[-1].parameters():
-            p.requires_grad = False
-        
-        if old_net:
-            n_old = len(old_net.encoder1)
-            for i in range(1,n_old):
-                self.encoder1[i].weight.data = old_net.encoder1[i].weight.data
-                self.encoder1[i].bias.data = old_net.encoder1[i].bias.data
-                for p in self.encoder1[i].parameters():
-                    p.requires_grad = False
-                self.encoder2[i].weight.data = old_net.encoder2[i].weight.data
-                self.encoder2[i].bias.data = old_net.encoder2[i].bias.data
-                for p in self.encoder2[i].parameters():
-                    p.requires_grad = False
-
-    def unfreeze_weights(self):
-        """Makes all encoders weights trainable.
-        """
-        n_old = len(self.encoder1)
-        for i in range(1,n_old):
-           for p in self.encoder1[i].parameters():
-               p.requires_grad = True
-           for p in self.encoder2[i].parameters():
-               p.requires_grad = True
-
-    def encode(self,x):
-        """Pass the data through the encoder to the latent layer.
-
-        Parameters
-        ----------
-        x : torch.cuda.FloatTensor or torch.FloatTensor
-            Input data for a given sample
-
-        Returns
-        -------
-        lat1 : torch.cuda.FloatTensor or torch.FloatTensor
-            Latent space vector associated with encoder1
-        lat2 : torch.cuda.FloatTensor or torch.FloatTensor
-            Latent space vector associated with encoder2
-        """
-        x1 = x[:,self.inds1]
-        x2 = x[:,self.inds2]
-        lat1 = self.encoder1[0](x1)
-        lat2 = self.encoder2[0](x2)
-        for i in range(1, self.n-1):
-            lat1 = F.leaky_relu(self.encoder1[i](lat1))
-            lat2 = F.leaky_relu(self.encoder2[i](lat2))
-        return lat1, lat2
-
-    def decode(self,latent):
-        """Pass the latent space vector through the decoder
-
-        Parameters
-        ----------
-        latent : torch.cuda.FloatTensor or torch.FloatTensor
-            Latent space vector
-
-        Returns
-        -------
-        recon : torch.cuda.FloatTensor or torch.FloatTensor
-            Reconstruction of the original input data
-        """
-        recon = latent
-        for i in range(self.n-2):
-            recon = F.leaky_relu(self.decoder[i](recon))
-        recon = self.decoder[-1](recon)
-        return recon
-
-    def forward(self,x):
-        """Pass data through the entire network
-        
-        Parameters 
-        ----------
-        x : torch.cuda.FloatTensor or torch.FloatTensor
-            Input data for a given sample
-
-        Returns
-        -------
-        recon : torch.cuda.FloatTensor or torch.FloatTensor
-            Reconstruction of the original input data
-        latent : torch.cuda.FloatTensor or torch.FloatTensor
-            Latent space vector
-        """
-        lat1, lat2 = self.encode(x)
-        latent = torch.cat((lat1,lat2),1)
-        recon = self.decode(latent)
-
-        return recon, latent, None
-
-class split_sae(split_ae):
-    """Supervised autoencoder with split architecture"""
-    def __init__(self, layer_sizes,inds1,inds2,wm,uwm):
-        super(split_sae, self).__init__(layer_sizes,inds1,inds2,wm,uwm)
-
-        self.classifier = nn.Linear(self.encoder1[-1].weight.data.shape[0], 1)
-
-    def classify(self, latent):
-        """Perfom classification task using latent space representation
-        
-        Parameters
-        ----------
-        latent : torch.cuda.FloatTensor or torch.FloatTensor
-            Latent space vector
-
-        Returns
-        -------
-        Value between 0 and 1
-        """
-        return torch.sigmoid(self.classifier(latent))
-
-    def forward(self, x):
-        """Pass data through the entire network
-
-        Parameters
-        ----------
-        x : torch.cuda.FloatTensor or torch.FloatTensor
-            Input data for a given sample
-
-        Returns
-        -------
-        recon : torch.cuda.FloatTensor or torch.FloatTensor
-            Reconstruction of the original input data
-        latent : torch.cuda.FloatTensor or torch.FloatTensor
-            Latent space vector
-        label : 
-            Value between 0 and 1
-        """
-        lat1, lat2 = self.encode(x)
-        
-        label = self.classify(lat1)
-
-        latent = torch.cat((lat1,lat2),1)
-
-        recon = self.decode(latent)
-        return recon, latent, label
-
-
-class ae(nn.Module):
-    """Unsupervised autoencoder
-
-       Parameters
-       ----------
-       layer_sizes : list
-           List of integers indicating the size of each layer in the
-           encoder including the latent layer. First two must be identical.
-       wm : np.ndarray, shape=(n_inputs,n_inputs)
-            Whitening matrix -- is applied to input data
-       uwm : np.ndarray, shape=(n_inputs,n_inputs)
-            unwhitening matrix
-    """
-    def __init__(self, layer_sizes,wm,uwm):
-        super(ae, self).__init__()
-        self.sizes = layer_sizes
-        self.n = len(self.sizes)
-        self.wm = wm
-        self.uwm = uwm
-
-        self.encoder = nn.ModuleList()
-        for i in range(self.n-1):
-            self.encoder.append(nn.Linear(self.sizes[i], self.sizes[i+1]))
-
-        self.decoder = nn.ModuleList()
-        for i in range(self.n-1, 0, -1):
-            self.decoder.append(nn.Linear(self.sizes[i], self.sizes[i-1]))
-
-    def freeze_weights(self,old_net=None):
-        """Procedure to make the whitening matrix and unwhitening matrix
-           as untrainable layers. Additionally, freezes weights associated
-           with a previously learned encoder layer.
-
-        Parameters
-        ----------
-        old_net : ae object
-            Previously trained network with overlapping architecture. Weights
-            learned in this previous networks encoder will be frozen in the
-            new network.
-        """
-        self.encoder[0].weight.data = Variable(torch.from_numpy(self.wm).type(torch.FloatTensor))
-        self.encoder[0].bias.data = Variable(torch.from_numpy(np.zeros(len(self.wm))).type(torch.FloatTensor))
-        for p in self.encoder[0].parameters():
-            p.requires_grad = False
-        self.decoder[-1].weight.data = Variable(torch.from_numpy(self.uwm).type(torch.FloatTensor))
-        self.decoder[-1].bias.data = Variable(torch.from_numpy(np.zeros(len(self.uwm))).type(torch.FloatTensor))
-        for p in self.decoder[-1].parameters():
-            p.requires_grad = False
-
-        if old_net:
-            n_old = len(old_net.encoder)
-            for i in range(1,n_old):
-                self.encoder[i].weight.data = old_net.encoder[i].weight.data
-                self.encoder[i].bias.data = old_net.encoder[i].bias.data
-                for p in self.encoder[i].parameters():
-                    p.requires_grad = False
-
-    def unfreeze_weights(self):
-        """Makes all encoders weights trainable.
-        """
-        n_old = len(self.encoder)
-        for i in range(1,n_old):
-           for p in self.encoder[i].parameters():
-               p.requires_grad = True
-
-    def encode(self, x):
-        """Pass the data through the encoder to the latent layer.
-
-        Parameters
-        ----------
-        x : torch.cuda.FloatTensor or torch.FloatTensor
-            Input data for a given sample
-
-        Returns
-        -------
-        latent : torch.cuda.FloatTensor or torch.FloatTensor
-            Latent space vector associated with encoder1
-        """
-        # whiten, without applying non-linearity
-        latent = self.encoder[0](x)
-
-        # do non-linear layers
-        for i in range(1, self.n-1):
-            latent = F.leaky_relu(self.encoder[i](latent))
-
-        return latent
-
-    def decode(self, x):
-        """Pass the latent space vector through the decoder
-
-        Parameters
-        ----------
-        x : torch.cuda.FloatTensor or torch.FloatTensor
-            Latent space vector
-
-        Returns
-        -------
-        recon : torch.cuda.FloatTensor or torch.FloatTensor
-            Reconstruction of the original input data
-        """
-        # do non-linear layers
-        recon = x
-        for i in range(self.n-2):
-            recon = F.leaky_relu(self.decoder[i](recon))
-        
-        # unwhiten, without applying non-linearity
-        recon = self.decoder[-1](recon)
-
-        return recon
-
-    def forward(self, x):
-        """Pass data through the entire network
-
-        Parameters
-        ----------
-        x : torch.cuda.FloatTensor or torch.FloatTensor
-            Input data for a given sample
-
-        Returns
-        -------
-        recon : torch.cuda.FloatTensor or torch.FloatTensor
-            Reconstruction of the original input data
-        latent : torch.cuda.FloatTensor or torch.FloatTensor
-            Latent space vector
-        """
-        latent = self.encode(x)
-        recon = self.decode(latent)
-        # None for labels, so number returns same as sae class
-        return recon, latent, None
-
-
-# build classifier based on latent representation from an AE
-class classify_ae(nn.Module):
-    """Logistic Regression model
-       
-       Parameters
-       ----------
-       n_latent : int
-           Number of latent variables
-    """
-    def __init__(self, n_latent):
-        super(classify_ae, self).__init__()
-        self.n_latent = n_latent
-
-        self.fc1 = nn.Linear(self.n_latent, 1)
-
-    def classify(self, x):
-        """Perfom classification task using latent space representation
-
-        Parameters
-        ----------
-        x : torch.cuda.FloatTensor or torch.FloatTensor
-            Latent space vector
-
-        Returns
-        -------
-        Value between 0 and 1
-        """
-        return torch.sigmoid(self.fc1(x))
-
-    def forward(self, x):
-        """Perfom classification task using latent space representation
-
-        Parameters
-        ----------
-        x : torch.cuda.FloatTensor or torch.FloatTensor
-            Latent space vector
-
-        Returns
-        -------
-        Value between 0 and 1
-        """
-        return self.classify(x)
-
-
-class sae(ae):
-    """Supervised autoencoder
-
-    Parameters
-    ----------
-    layer_sizes : list
-           List of integers indicating the size of each layer in the
-           encoder including the latent layer. First two must be identical.
-       wm : np.ndarray, shape=(n_inputs,n_inputs)
-            Whitening matrix -- is applied to input data
-       uwm : np.ndarray, shape=(n_inputs,n_inputs)
-            unwhitening matrix
-    """
-    def __init__(self, layer_sizes, wm, uwm):
-        super(sae, self).__init__(layer_sizes,wm,uwm)
-        
-        self.classifier = nn.Linear(self.sizes[-1], 1)
-
-    def classify(self, latent):
-        """Perfom classification task using latent space representation
-
-        Parameters
-        ----------
-        latent : torch.cuda.FloatTensor or torch.FloatTensor
-            Latent space vector
-
-        Returns
-        -------
-        Value between 0 and 1
-        """
-        return torch.sigmoid(self.classifier(latent))
-
-    def forward(self, x):
-        """Pass through the entire network
-
-        Parameters
-        ----------
-        x : torch.cuda.FloatTensor or torch.FloatTensor
-            Latent space vector
-
-        Returns
-        -------
-        recon : torch.cuda.FloatTensor or torch.FloatTensor
-            Reconstruction of the original input data
-        latent : torch.cuda.FloatTensor or torch.FloatTensor
-            Latent space vector
-        label :
-            Value between 0 and 1
-        """
-        latent = self.encode(x)
-        label = self.classify(latent)
-        recon = self.decode(latent)
-        return recon, latent, label
-
-
-class vae(ae):
-    def __init__(self, layer_sizes):
-        super(vae, self).__init__(layer_sizes)
-
-        # last layer of encoder is mu, also need logvar of equal size
-        self.logvar = nn.Linear(self.sizes[-2], self.sizes[-1])
-
-    def encode(self, x):
-        # whiten, without applying non-linearity
-        latent = self.encoder[0](x)
-
-        # do non-linear layers
-        for i in range(1, self.n-2):
-            latent = F.leaky_relu(self.encoder[i](latent))
-
-        mu = F.leaky_relu(self.encoder[-1](latent))
-        logvar = F.leaky_relu(self.logvar(latent))
-        return mu, logvar
-
-    def reparameterize(self, mu, logvar):
-        std = torch.exp(0.5*logvar)
-        eps = torch.randn_like(std)
-        return eps.mul(std).add_(mu)
-
-    def forward(self, x):
-        mu, logvar = self.encode(x)
-        z = self.reparameterize(mu, logvar)
-        recon = self.decode(z)
-        # None for labels, so number returns same as sae class
-        return recon, mu, logvar, None
-
-
-class svae(vae):
-    def __init__(self, layer_sizes):
-        super(svae, self).__init__(layer_sizes)
-        
-        self.classifier = nn.Linear(self.sizes[-1], 1)
-
-    def classify(self, latent):
-        return torch.sigmoid(self.classifier(latent))
-
-    def forward(self, x):
-        mu, logvar = self.encode(x)
-        z = self.reparameterize(mu, logvar)
-        label = self.classify(z)
-        recon = self.decode(z)
-        # None for labels, so number returns same as sae class
-        return recon, mu, logvar, label
-
-
-def my_mse(x, x_recon):
-    """Calculate mean squared error loss
-
-    Parameters
-    ----------
-    x : torch.cuda.FloatTensor or torch.FloatTensor
-        Input data
-    x_recon : torch.cuda.FloatTensor or torch.FloatTensor
-        Reconstructed input
-
-    Returns
-    -------
-    torch.cuda.FloatTensor or torch.FloatTensor
-    """
-    return torch.mean(torch.pow(x-x_recon, 2))
-
-
-def my_l1(x, x_recon):
-    """Calculate l1 loss
-
-    Parameters
-    ----------
-    x : torch.cuda.FloatTensor or torch.FloatTensor
-        Input data
-    x_recon : torch.cuda.FloatTensor or torch.FloatTensor
-        Reconstructed input
-
-    Returns
-    -------
-    torch.cuda.FloatTensor or torch.FloatTensor
-    """
-    return torch.mean(torch.abs(x-x_recon))
-
-def chunks(arr, chunk_size):
-    """Yield successive chunk_size chunks from arr."""
-    for i in range(0, len(arr), chunk_size):
-        yield arr[i:i + chunk_size]
-
-def split_inds(pdb,resnum,focus_dist):
-        """Identify indices close and far from a residue of interest.
-           Each index corresponds to an X,Y, or Z coordinate of an atom
-           in the pdb.
-
-        Parameters
-        ----------
-        pdb : md.Trajectory object
-            Structure used to find close/far indices.
-        resnum : integer
-            The residue number of interest.
-        focus_dist : float (nannmeters)
-            All indices within this distance of resnum will be selected
-            as close indices.
-
-        Returns
-        -------
-        close_xyz_inds : np.ndarray
-            Indices of x,y,z positions of atoms in pdb that are close
-            to resnum.
-        non_close_xyz_inds : np.ndarray
-            Indices of x,y,z positions of atoms in pdb that are not
-            close to resnum.
-        """
-        res_atoms = pdb.topology.select("resSeq %s" % resnum)
-        dist_combos = [res_atoms,np.arange(pdb.n_atoms)]
-        dist_combos = np.array(list(itertools.product(*dist_combos)))
-
-        dpdb = md.compute_distances(pdb,dist_combos)
-        ind_loc = np.where(dpdb.flatten()<focus_dist)[0]
-        inds = np.unique(dist_combos[ind_loc].flatten())
-
-        close_xyz_inds = []
-        for i in inds:
-            close_xyz_inds.append(i*3)
-            close_xyz_inds.append((i*3)+1)
-            close_xyz_inds.append((i*3)+2)
-        all_inds = np.arange((pdb.n_atoms*3))
-        non_close_xyz_inds = np.setdiff1d(all_inds,close_xyz_inds)
-
-        return np.array(close_xyz_inds), non_close_xyz_inds
-
diff --git a/pensa/diffnets/tests/__init__.py b/pensa/diffnets/tests/__init__.py
deleted file mode 100644
index 325e75d3..00000000
--- a/pensa/diffnets/tests/__init__.py
+++ /dev/null
@@ -1,3 +0,0 @@
-"""
-Empty init file in case you choose a package besides PyTest such as Nose which may look for such a file
-"""
diff --git a/pensa/diffnets/tests/test_api.py b/pensa/diffnets/tests/test_api.py
deleted file mode 100644
index 4f50fbfe..00000000
--- a/pensa/diffnets/tests/test_api.py
+++ /dev/null
@@ -1,120 +0,0 @@
-import os
-import shutil
-import mdtraj as md
-import tempfile
-import numpy as np
-import torch
-
-from diffnets.utils import get_fns
-from diffnets.data_processing import ProcessTraj, WhitenTraj
-
-CURR_DIR = os.getcwd()
-UP_DIR = CURR_DIR[:-len(CURR_DIR.split('/')[-1])]
-SCRIPTS_DIR = UP_DIR + 'scripts'
-
-def assert_matrix_is_identity(M):
-    print(np.max(np.abs(M - np.eye(M.shape[0]))))
-    assert (M.shape[0] == M.shape[1]) and np.allclose(M, np.eye(M.shape[0]), atol=1e-04)
-    return True
-
-def test_default_inds():
-
-    try:
-        td = tempfile.mkdtemp(dir=CURR_DIR)
-        data_dir = os.path.join(CURR_DIR,"data")
-        pdb_fn1 = os.path.join(data_dir, "beta-peptide1.pdb")
-        pdb = md.load(pdb_fn1)
-        inds1 = pdb.top.select("name CA or name C or name CB or name N")
-        pdb_fn2 = os.path.join(data_dir, "beta-peptide2.pdb")
-        pdb = md.load(pdb_fn2)
-        inds2 = pdb.top.select("name CA or name C or name CB or name N")
-
-        var_dir_names = [os.path.join(data_dir,"traj1"),
-                    os.path.join(data_dir,"traj2")]
-        proc_traj = ProcessTraj(var_dir_names,[pdb_fn1,pdb_fn2],td)
-        assert set(proc_traj.atom_sel[0]) == set(inds1)
-        assert set(proc_traj.atom_sel[1]) == set(inds2) 
-
-    finally:
-        shutil.rmtree(td)
-
-def test_whitening_correctness1():
-
-    w = WhitenTraj("./data/whitened/")
-    master = md.load("./data/whitened/master.pdb")
-    traj1 = md.load("./data/whitened/aligned_xtcs/000000.xtc",top=master)
-    traj2 = md.load("./data/whitened/aligned_xtcs/000001.xtc",top=master)
-    wm = np.load("./data/whitened/wm.npy")
-    w.apply_whitening_xtc_dir(w.xtc_dir,master.top,wm,
-                              w.cm,1,"./data/whitened/whitened_xtcs")
-    traj_fns = get_fns("./data/whitened/whitened_xtcs/", "*.xtc")
-    traj = md.load(traj_fns[0],top=master)
-    coords = traj.xyz.reshape((2501, 3 * 39))
-    c00_1 = np.matmul(coords.transpose(),coords)
-
-    traj = md.load(traj_fns[1],top=master)
-    coords = traj.xyz.reshape((2500, 3 * 39))
-    c00_2 = np.matmul(coords.transpose(),coords)
-    c00 = c00_1 + c00_2
-    c00 /= 5001
-
-    # assert_matrix_is_identity(c00)
-    assert (np.abs(117 - np.sum(np.diagonal(c00))) < 1)
-
-def test_whitening_correctness2():
-    w = WhitenTraj("./data/whitened")
-    # generate dummy data
-    X = np.random.rand(100, 30)
-    X_s = X - X.mean(axis=0)
-    cov = np.cov(X_s.transpose())
-    # get whitening matrix
-    uwm, wm = w.get_wuw_mats(cov)
-
-    Y = w.apply_whitening(X_s, wm, X_s.mean(axis=0))
-    whitened_cov = np.cov(Y.transpose())
-    # assert that covariance of whitened data is identity
-    assert_matrix_is_identity(whitened_cov)
-    assert np.abs(np.sum(whitened_cov) - Y.shape[1]) < .0001
-    assert np.abs(np.sum(np.diagonal(whitened_cov)) - Y.shape[1]) < .0001
-
-def test_whitening_correctness3():
-    '''
-    This tests assess if whitening matrix can
-    be correctly generated for a real covariance matrix
-    '''
-    from scipy.linalg import inv, sqrtm
-
-    w = WhitenTraj("data/whitened")
-
-    # Load three different covariance matrices
-    # cov1 is a DiffNets-generated matrix for 8 myosin isoforms with a subset of atoms relative to cov2
-    # cov2 is a DiffNets-generated matrix for 8 myosin isoforms
-    # cov3 is beta-lactamase covariance matrix
-    for i in range(1, 4):
-        # Load covariance matrix
-        cov = np.load(f"data/cov/cov{i}.npy")
-
-        e, v = torch.symeig(torch.from_numpy(cov).double(), eigenvectors=True)
-        # In valid covariance matrix the smallest eigenvalue should be positive
-        # because the covariance matrix is a positive semidefinite matrix
-        # https://stats.stackexchange.com/questions/52976/is-a-sample-covariance-matrix-always-symmetric-and-positive-definite
-        assert torch.min(e) > 0.0
-
-        # Covariance matrix should be symmetric
-        assert np.allclose(cov, cov.T)
-
-        uwm, wm = w.get_wuw_mats(cov.astype(np.double))
-
-        # ZCA whitening matrix should be symmteric
-        assert np.allclose(wm, wm.T)
-
-        # assert that whitening and whitening produce the identity matrix
-        assert_matrix_is_identity(np.matmul(wm, uwm))
-
-        # assert that the covariance matrix multiplied by the transpose of
-        # the whitening matrix multiplied by the whitening matrix is identity
-        # for detailed explanation see page 3 of https://arxiv.org/pdf/1512.00809.pdf
-        test = np.matmul(cov, np.matmul(wm.transpose(), wm))
-        assert_matrix_is_identity(test)
-
-
diff --git a/pensa/diffnets/tests/test_cli.py b/pensa/diffnets/tests/test_cli.py
deleted file mode 100644
index 4687ec4e..00000000
--- a/pensa/diffnets/tests/test_cli.py
+++ /dev/null
@@ -1,165 +0,0 @@
-import os
-import shutil
-import subprocess
-import tempfile
-import mdtraj as md
-import numpy as np
-from diffnets.utils import get_fns
-
-CURR_DIR = os.getcwd()
-UP_DIR = CURR_DIR[:-len(CURR_DIR.split('/')[-1])]
-CLI_DIR = UP_DIR + 'cli'
-
-def test_preprocess_default_inds():
-    curr_dir = os.getcwd()
-    try:
-        td = tempfile.mkdtemp(dir=curr_dir)
-        ftmp = tempfile.NamedTemporaryFile(delete=False)
-        traj_dirs_tmp = ftmp.name + ".npy"
-        inp = np.array([os.path.join(curr_dir,"data/traj1"),
-                    os.path.join(curr_dir,"data/traj2")])
-        np.save(traj_dirs_tmp, inp, allow_pickle=False)
-
-        ftmp2 = tempfile.NamedTemporaryFile(delete=False)
-        pdb_fns_tmp = ftmp2.name + ".npy"
-        inp = np.array([os.path.join(curr_dir,"data/beta-peptide1.pdb"),
-                    os.path.join(curr_dir,"data/beta-peptide2.pdb")])
-        np.save(pdb_fns_tmp, inp, allow_pickle=False)
-
-        subprocess.call(['python', CLI_DIR + "/main.py", "process",
-                        traj_dirs_tmp, pdb_fns_tmp, td])
-
-        assert os.path.exists(os.path.join(td,"wm.npy"))
-        assert os.path.exists(os.path.join(td,"uwm.npy"))
-        assert os.path.exists(os.path.join(td,"master.pdb"))
-        assert os.path.exists(os.path.join(td,"data"))
-
-        xtc_fns = os.path.join(td,"aligned_xtcs")
-        data_fns = get_fns(xtc_fns,"*.xtc")
-        ind_fns = os.path.join(td,"indicators")
-        inds = get_fns(ind_fns,"*.npy")
-
-        print(len(data_fns))
-        assert len(data_fns) == len(inds)
-
-    finally:
-        os.remove(traj_dirs_tmp)
-        os.remove(pdb_fns_tmp)
-        shutil.rmtree(td)
-
-def test_preprocess_custom_inds():
-    curr_dir = os.getcwd()
-    try:
-        td = tempfile.mkdtemp(dir=curr_dir) 
-        ftmp = tempfile.NamedTemporaryFile(delete=False)
-        traj_dirs_tmp = ftmp.name + ".npy"
-        inp = np.array([os.path.join(curr_dir,"data/traj1"),
-                    os.path.join(curr_dir,"data/traj2")])
-        np.save(traj_dirs_tmp, inp, allow_pickle=False)
-    
-        ftmp2 = tempfile.NamedTemporaryFile(delete=False)
-        pdb_fns_tmp = ftmp2.name + ".npy"
-        inp = np.array([os.path.join(curr_dir,"data/beta-peptide1.pdb"),
-                    os.path.join(curr_dir,"data/beta-peptide2.pdb")])
-        np.save(pdb_fns_tmp, inp, allow_pickle=False) 
-
-        ftmp3 = tempfile.NamedTemporaryFile(delete=False)
-        inds_fn_tmp = ftmp3.name + ".npy"   
-        pdb = md.load(inp[0])
-        inds = pdb.top.select("name CA or name N or name CB or name C")
-        both_inds = np.array([inds,inds])
-        np.save(inds_fn_tmp, both_inds, allow_pickle=False)
-
-        subprocess.call(['python', CLI_DIR + "/main.py", "process", 
-                        traj_dirs_tmp, pdb_fns_tmp, td,
-                        "-a" + inds_fn_tmp])
-
-        assert os.path.exists(os.path.join(td,"wm.npy"))
-        assert os.path.exists(os.path.join(td,"uwm.npy")) 
-        assert os.path.exists(os.path.join(td,"master.pdb"))
-        assert os.path.exists(os.path.join(td,"data"))    
-
-        xtc_fns = os.path.join(td,"aligned_xtcs")
-        data_fns = get_fns(xtc_fns,"*.xtc")
-        ind_fns = os.path.join(td,"indicators")
-        inds = get_fns(ind_fns,"*.npy")
-        
-        print(len(data_fns))
-        assert len(data_fns) == len(inds)
-
-    finally:
-        os.remove(traj_dirs_tmp)
-        os.remove(pdb_fns_tmp)
-        os.remove(inds_fn_tmp)
-        shutil.rmtree(td)
-
-def test_train():
-    curr_dir = os.getcwd()
-    try:
-        td = tempfile.mkdtemp(dir=curr_dir)
-        ftmp = tempfile.NamedTemporaryFile(delete=False,mode="w+")
-
-        params =["data_dir: '%s/data/whitened'" % curr_dir,
-                 "n_epochs: 4",
-                 "act_map: [0,1]",
-                 "lr: 0.0001",
-                 "n_latent: 10",
-                 "hidden_layer_sizes: [50]",
-                 "em_bounds: [[0.1,0.3],[0.6,0.9]]",
-                 "do_em: True",
-                 "em_batch_size: 50",
-                 "nntype: 'nnutils.sae'",
-                 "batch_size: 32",
-                 "batch_output_freq: 50",
-                 "epoch_output_freq: 2",
-                 "test_batch_size: 50",
-                 "frac_test: 0.1",
-                 "subsample: 10",
-                 "outdir: %s" % td,
-                 "data_in_mem: False"
-                ]
-        
-        for line in params:
-            ftmp.write(line)
-            ftmp.write("\n")
-        ftmp.close()
-        
-        subprocess.call(['python', CLI_DIR + "/main.py", "train",
-                        ftmp.name])
-
-        assert os.path.exists(os.path.join(td,"nn_best_polish.pkl"))
-    finally:
-        os.remove(ftmp.name)
-        shutil.rmtree(td)
-
-def test_analyze():
-    curr_dir = os.getcwd()
-    try:
-        subprocess.call(['python', CLI_DIR + "/main.py", "analyze",
-                        "%s/data/whitened" % curr_dir,
-                        "%s/data/trained_output" % curr_dir,
-                         "-c", "20"])
-
-        assert os.path.exists(os.path.join("%s/data/trained_output" % curr_dir,
-                                            "rescorr-100.pml"))
-        assert os.path.exists(os.path.join("%s/data/trained_output/rmsd.npy"
-                                             % curr_dir))
-        assert os.path.exists(os.path.join("%s/data/trained_output/labels"
-                                             % curr_dir))
-        assert os.path.exists(os.path.join("%s/data/trained_output/encodings"
-                                             % curr_dir))
-        assert os.path.exists(os.path.join("%s/data/trained_output/cluster_20"
-                                             % curr_dir))
-        assert os.path.exists(os.path.join("%s/data/trained_output/recon_trajs"
-                                             % curr_dir))
-        assert os.path.exists(os.path.join("%s/data/trained_output/morph_label"
-                                             % curr_dir))
-
-    finally:
-        shutil.rmtree(os.path.join("%s/data/trained_output/encodings" % curr_dir))
-        shutil.rmtree(os.path.join("%s/data/trained_output/labels" % curr_dir))
-        shutil.rmtree(os.path.join("%s/data/trained_output/cluster_20" % curr_dir))
-        shutil.rmtree(os.path.join("%s/data/trained_output/recon_trajs" % curr_dir))
-        shutil.rmtree(os.path.join("%s/data/trained_output/morph_label" % curr_dir))
-        os.remove(os.path.join("%s/data/trained_output/rmsd.npy" % curr_dir))
-        os.remove(os.path.join("%s/data/trained_output/rescorr-100.pml" % curr_dir))
diff --git a/pensa/diffnets/tests/test_diffnets.py b/pensa/diffnets/tests/test_diffnets.py
deleted file mode 100644
index 307572e5..00000000
--- a/pensa/diffnets/tests/test_diffnets.py
+++ /dev/null
@@ -1,12 +0,0 @@
-"""
-Unit and regression test for the diffnets package.
-"""
-
-# Import package, test suite, and other packages as needed
-import diffnets
-import pytest
-import sys
-
-def test_diffnets_imported():
-    """Sample test, will always pass so long as import statement worked"""
-    assert "diffnets" in sys.modules
diff --git a/pensa/diffnets/training.py b/pensa/diffnets/training.py
deleted file mode 100644
index bb6216e1..00000000
--- a/pensa/diffnets/training.py
+++ /dev/null
@@ -1,556 +0,0 @@
-import os
-import pickle
-import sys
-import multiprocessing as mp
-import mdtraj as md
-import numpy as np
-from . import exmax, nnutils, utils, data_processing
-import copy
-import pickle
-
-import torch
-import torch.nn as nn
-import torch.optim as optim
-from torch.autograd import Variable
-from torch.utils import data as torch_data
-
-class Dataset(torch_data.Dataset):
-  'Characterizes a dataset for PyTorch'
-  def __init__(self, train_inds, labels, data):
-        'Initialization'
-        self.labels = labels
-        self.train_inds = train_inds
-        self.data = data
-
-  def __len__(self):
-        'Denotes the total number of samples'
-        return len(self.train_inds)
-
-  def __getitem__(self, index):
-        'Generates one sample of data'
-        #If data needs to be loaded
-        ID = self.train_inds[index]
-        if type(self.data) is str:
-            # Load data and get label
-            X = torch.load(self.data + "/ID-%s" % ID + '.pt')
-        else: 
-            X = torch.from_numpy(self.data[ID]).type(torch.FloatTensor)
-        y = self.labels[ID]
-        
-        return X, y, ID
-
-
-
-class Trainer:
-
-    def __init__(self,job):
-        """Object to train your DiffNet.
-        
-        Parameters:
-        -----------
-        job : dict 
-            Dictionary with all training parameters. See training_dict.txt
-            for all keys. All keys are required. See train_submit.py for an
-            example.
-        """
-        self.job = job
-
-    def set_training_data(self, job, train_inds, test_inds, labels, data):
-        """Construct generators out of the dataset for training, validation,
-        and expectation maximization.
-
-        Parameters
-        ----------
-        job : dict
-            See training_dict.tx for all keys.
-        train_inds : np.ndarray
-            Indices in data that are to be trained on
-        test_inds : np.ndarray
-            Indices in data that are to be validated on
-        labels : np.ndarray,
-            classification labels used for training
-        data : np.ndarray, shape=(n_frames,3*n_atoms) OR str to path
-            All data
-        """
-
-        batch_size = job['batch_size']
-        cpu_cores = job['em_n_cores']
-        test_batch_size = job['test_batch_size']
-        em_batch_size = job['em_batch_size']
-        subsample = job['subsample']
-        data_dir = job["data_dir"]
-
-        n_train_inds = len(train_inds)
-        random_inds = np.random.choice(np.arange(n_train_inds),int(n_train_inds/subsample),replace=False)
-        sampler=torch_data.SubsetRandomSampler(random_inds)
-
-        params_t = {'batch_size': batch_size,
-                  'shuffle':False,
-                  'num_workers': cpu_cores,
-                  'sampler': sampler}
-
-        params_v = {'batch_size': test_batch_size,
-                  'shuffle':True,
-                  'num_workers': cpu_cores}
-
-        params_e = {'batch_size': em_batch_size,
-                  'shuffle':True,
-                  'num_workers': cpu_cores}
-
-        n_snapshots = len(train_inds) + len(test_inds)
-
-        training_set = Dataset(train_inds, labels, data)
-        training_generator = torch_data.DataLoader(training_set, **params_t)
-
-        validation_set = Dataset(test_inds, labels, data)
-        validation_generator = torch_data.DataLoader(validation_set, **params_v)
-
-        em_set = Dataset(train_inds, labels, data)
-        em_generator = torch_data.DataLoader(em_set, **params_e)
-
-        return training_generator, validation_generator, em_generator
-    
-    def em_parallel(self, net, em_generator, train_inds, em_batch_size,
-                    indicators, em_bounds, em_n_cores, label_str, epoch):
-        """Use expectation maximization to update all training classification
-           labels.
-
-        Parameters
-        ----------
-        net : nnutils neural network object
-            Neural network 
-        em_generator : Dataset object
-            Training data
-        train_inds : np.ndarray
-            Indices in data that are to be trained on
-        em_batch_size : int
-            Number of examples that are have their classification labels
-             updated in a single round of expectation maximization.
-        indicators : np.ndarray, shape=(len(data),)
-            Value to indicate which variant each data frame came from.
-        em_bounds : np.ndarray, shape=(n_variants,2)
-            A range that sets what fraction of conformations you
-            expect a variant to have biochemical property. Rank order
-            of variants is more important than the ranges themselves.
-        em_n_cores : int
-            CPU cores to use for expectation maximization calculation
-
-        Returns
-        -------
-        new_labels : np.ndarray, shape=(len(data),)
-            Updated classification labels for all training examples
-        """
-        use_cuda = torch.cuda.is_available()
-        device = torch.device("cuda:0" if use_cuda else "cpu")
-
-        n_em = np.ceil(train_inds.shape[0]*1.0/em_batch_size)
-        freq_output = np.floor(n_em/10.0)
-        train_inds = []
-        inputs = []
-        i = 0
-        ##To save DiffNet labels before each EM update
-        pred_labels = -1 * np.ones(indicators.shape[0])
-        for local_batch, local_labels, t_inds in em_generator:
-            t_inds = np.array(t_inds)
-            local_batch, local_labels = local_batch.to(device), local_labels.to(device)
-            if hasattr(net, "decode"):
-                if hasattr(net, "reparameterize"):
-                    x_pred, latent, logvar, class_pred = net(local_batch)
-                else:
-                    x_pred, latent, class_pred = net(local_batch)
-            else:
-                class_pred = net(local_batch)
-            cur_labels = class_pred.cpu().detach().numpy()
-            pred_labels[t_inds] = cur_labels.flatten()
-            inputs.append([cur_labels, indicators[t_inds], em_bounds])
-            if i % freq_output == 0:
-                print("      %d/%d" % (i, n_em))
-            i += 1
-            train_inds.append(t_inds)
-
-        pred_label_fn = os.path.join(self.job['outdir'],"tmp_labels_%s_%s.npy" % (label_str,epoch))
-        np.save(pred_label_fn,pred_labels)
-        pool = mp.Pool(processes=em_n_cores)
-        res = pool.map(self.apply_exmax, inputs)
-        pool.close()
-
-        train_inds = np.concatenate(np.array(train_inds))
-        new_labels = -1 * np.ones((indicators.shape[0], 1))
-        new_labels[train_inds] = np.concatenate(res)
-        return new_labels
-
-    def apply_exmax(self, inputs):
-        """Apply expectation maximization to a batch of data.
-
-        Parameters
-        ----------
-        inputs : list
-            list where the 0th index is a list of current classification
-            labels of length == batch_size. 1st index is a corresponding
-            list of variant simulation indicators. 2nd index is em_bounds.
-            
-        Returns
-        -------
-        Updated labels -- length == batch size
-        """
-        cur_labels, indicators, em_bounds = inputs
-        n_vars = em_bounds.shape[0]
-
-        for i in range(n_vars):
-            inds = np.where(indicators == i)[0]
-            lower = np.int(np.floor(em_bounds[i, 0] * inds.shape[0]))
-            upper = np.int(np.ceil(em_bounds[i, 1] * inds.shape[0]))
-            cur_labels[inds] = exmax.expectation_range_CUBIC(cur_labels[inds], lower, upper).reshape(cur_labels[inds].shape)
-
-        bad_inds = np.where(np.isnan(cur_labels))
-        cur_labels[bad_inds] = 0
-        try:
-            assert((cur_labels >= 0.).all() and (cur_labels <= 1.).all())
-        except AssertionError:
-            neg_inds = np.where(cur_labels<0)[0]
-            pos_inds = np.where(cur_labels>1)[0]
-            bad_inds = neg_inds.tolist() + pos_inds.tolist()
-            for iis in bad_inds:
-                print("      ", indicators[iis], cur_labels[iis])
-            print("      #bad neg, pos", len(neg_inds), len(pos_inds))
-            #np.save("tmp.npy", tmp_labels)
-            cur_labels[neg_inds] = 0.0
-            cur_labels[pos_inds] = 1.0
-            #sys.exit(1)
-        return cur_labels.reshape((cur_labels.shape[0], 1))
-
-    def train(self, data, training_generator, validation_generator, em_generator,
-              targets, indicators, train_inds, test_inds,net, label_str,
-              job, lr_fact=1.0):
-        """Core method for training
-
-        Parameters
-        ----------
-        data : np.ndarray, shape=(n_frames,3*n_atoms) OR str to path
-            Training data
-        training_generator: Dataset object
-            Generator to sample training data
-        validation_generator: Dataset object
-            Generator to sample validation data
-        em_generator: Dataset object
-            Generator to sample training data in batches for expectation
-            maximization
-        targets : np.ndarray, shape=(len(data),)
-            classification labels used for training
-        indicators : np.ndarray, shape=(len(data),)
-            Value to indicate which variant each data frame came from.
-        train_inds : np.ndarray
-            Indices in data that are to be trained on
-        test_inds : np.ndarray
-            Indices in data that are to be validated on
-        net : nnutils neural network object
-            Neural network
-        label_str: int
-            For file naming. Indicates what iteration of training we're
-            on. Training goes through several iterations where neural net
-            architecture is progressively built deeper.
-        job : dict
-            See training_dict.tx for all keys.
-        lr_fact : float
-            Factor to multiply the learning rate by.
-
-        Returns
-        -------
-        best_nn : nnutils neural network object
-            Neural network that has the lowest reconstruction error
-            on the validation set.
-        targets : np.ndarry, shape=(len(data),)
-            Classification labels after training.
-        """
-        job = self.job
-        do_em = job['do_em']
-        n_epochs = job['n_epochs']
-        lr = job['lr'] * lr_fact
-        subsample = job['subsample']
-        batch_size = job['batch_size']
-        batch_output_freq = job['batch_output_freq']
-        epoch_output_freq = job['epoch_output_freq']
-        test_batch_size = job['test_batch_size']
-        em_bounds = job['em_bounds']
-        nntype = job['nntype']
-        em_batch_size = job['em_batch_size']
-        em_n_cores = job['em_n_cores']
-        outdir = job['outdir'] 
-
-        use_cuda = torch.cuda.is_available()
-        device = torch.device("cuda:0" if use_cuda else "cpu")
-
-        n_test = test_inds.shape[0]
-        lam_cls = 1.0
-        lam_corr = 1.0
-
-        n_batch = np.ceil(train_inds.shape[0]*1.0/subsample/batch_size)
-
-        optimizer = optim.Adam(net.parameters(), lr=lr)
-        bce = nn.BCELoss()
-        training_loss_full = []
-        test_loss_full = []
-        epoch_test_loss = []
-        best_loss = np.inf
-        best_nn = None 
-        for epoch in range(n_epochs):
-            # go through mini batches
-            running_loss = 0
-            i = 0
-            for local_batch, local_labels, _ in training_generator:
-                if use_cuda:
-                    local_labels = local_labels.type(torch.cuda.FloatTensor)
-                else:
-                    local_labels = local_labels.type(torch.FloatTensor)
-                local_batch, local_labels = local_batch.to(device), local_labels.to(device)
-
-                optimizer.zero_grad()    
-                x_pred, latent, class_pred = net(local_batch)
-                loss = nnutils.my_mse(local_batch, x_pred)
-                loss += nnutils.my_l1(local_batch, x_pred)
-                if class_pred is not None:
-                    loss += bce(class_pred, local_labels).mul_(lam_cls)
-                #Minimize correlation between latent variables
-                n_feat = net.sizes[-1]
-                my_c00 = torch.einsum('bi,bo->io', (latent, latent)).mul(1.0/local_batch.shape[0])
-                my_mean = torch.mean(latent, 0)
-                my_mean = torch.einsum('i,o->io', (my_mean, my_mean))
-                ide = np.identity(n_feat)
-                if use_cuda:
-                    ide = torch.from_numpy(ide).type(torch.cuda.FloatTensor)
-                else:
-                    ide = torch.from_numpy(ide).type(torch.FloatTensor)
-                #ide = Variable(ide)
-                #ide = torch.from_numpy(np.identity(n_feat))
-                #ide = ide.to(device)
-                zero_inds = np.where(1-ide.cpu().numpy()>0)
-                corr_penalty = nnutils.my_mse(ide[zero_inds], my_c00[zero_inds]-my_mean[zero_inds])
-                loss += corr_penalty
-                loss.backward()
-                optimizer.step()
-                running_loss += loss.item()
-
-                if i%batch_output_freq == 0:
-                    train_loss = running_loss
-                    if i != 0:
-                        train_loss /= batch_output_freq
-                    training_loss_full.append(train_loss)
-
-                    test_loss = 0
-                    for local_batch, local_labels, _ in validation_generator:
-                        local_batch, local_labels = local_batch.to(device), local_labels.to(device)
-                        x_pred, latent, class_pred = net(local_batch)
-                        loss = nnutils.my_mse(local_batch,x_pred)
-                        test_loss += loss.item() * local_batch.shape[0] # mult for averaging across samples, as in train_loss
-                    #print("        ", test_loss)
-                    test_loss /= n_test # division averages across samples, as in train_loss
-                    test_loss_full.append(test_loss)
-                    print("    [%s %d, %5d/%d] train loss: %0.6f    test loss: %0.6f" % (label_str, epoch, i, n_batch, train_loss, test_loss))
-                    running_loss = 0
-
-                    if test_loss < best_loss:
-                        best_loss = test_loss
-                        best_nn = copy.deepcopy(net)
-                i += 1
-
-            if do_em and hasattr(nntype, "classify"):
-                print("    Doing EM")
-                targets = self.em_parallel(net, em_generator, train_inds,
-                                    em_batch_size, indicators, em_bounds,
-                                    em_n_cores, label_str, epoch)
-                training_generator, validation_generator, em_generator = \
-                    self.set_training_data(job, train_inds, test_inds, targets, data)
-
-            if epoch % epoch_output_freq == 0:
-                print("my_l1", nnutils.my_l1(local_batch, x_pred))
-                print("corr penalty",corr_penalty)
-                print("classify", bce(class_pred, local_labels).mul_(lam_cls))
-                print("my_mse", nnutils.my_mse(local_batch, x_pred))
-                epoch_test_loss.append(test_loss)
-                out_fn = os.path.join(outdir, "epoch_test_loss_%s.npy" % label_str)
-                np.save(out_fn, epoch_test_loss)
-                out_fn = os.path.join(outdir, "training_loss_%s.npy" % label_str)
-                np.save(out_fn, training_loss_full)
-                out_fn = os.path.join(outdir, "test_loss_%s.npy" % label_str)
-                np.save(out_fn, test_loss_full)
-            # nets need be on cpu to load multiple in parallel, e.g. with multiprocessing
-                net.cpu()
-                out_fn = os.path.join(outdir, "nn_%s_e%d.pkl" % (label_str, epoch))
-                pickle.dump(net, open(out_fn, 'wb'))
-                if use_cuda:
-                    net.cuda()
-                if hasattr(nntype, "classify"):
-                    out_fn = os.path.join(outdir, "tmp_targets_%s_%s.npy" % (label_str,epoch))
-                    np.save(out_fn, targets)
-
-            # save best net every epoch
-            best_nn.cpu()
-            out_fn = os.path.join(outdir, "nn_best_%s.pkl" % label_str)
-            pickle.dump(best_nn, open(out_fn, 'wb'))
-            if use_cuda:
-                best_nn.cuda()
-        return best_nn, targets    
-
-    def get_targets(self,act_map,indicators,label_spread=None):
-        """Convert variant indicators into classification labels.
-
-        Parameters
-        ----------
-        act_map : np.ndarray, shape=(n_variants,)
-            Initial classification labels to give each variant.
-        indicators : np.ndarray, shape=(len(data),)
-            Value to indicate which variant each data frame came from.
-
-        Returns
-        -------
-        targets : np.ndarry, shape=(len(data),)
-            Classification labels for training.
-        """
-        targets = np.zeros((len(indicators), 1))
-        print(targets.shape)
-        if label_spread == 'gaussian':
-            targets = np.array([np.random.normal(act_map[i],0.1) for i in indicators])
-            zero_inds = np.where(targets < 0)[0]
-            targets[zero_inds] = 0
-            one_inds = np.where(targets > 1)[0]
-            targets[one_inds] = 1
-        elif label_spread == 'uniform':
-            targets = np.vstack([np.random.uniform() for i in targets])
-        elif label_spread == 'bimodal':
-            targets = np.array([np.random.normal(0.8, 0.1) if np.random.uniform() < act_map[i]
-                                else np.random.normal(0.2, 0.1) for i in indicators])
-            zero_inds = np.where(targets < 0)[0]
-            targets[zero_inds] = 0
-            one_inds = np.where(targets > 1)[0]
-            targets[one_inds] = 1
-        else:
-            targets[:, 0] = act_map[indicators]    
-        return targets
-
-    def split_test_train(self,n,frac_test):
-        """Split data into training and validation sets.
-
-        Parameters
-        ----------
-        n : int
-            number of data points
-        frac_test : float between 0 and 1
-            Fraction of dataset to reserve for validation set
-
-        Returns
-        -------
-        train_inds : np.ndarray
-            Indices in data that are to be trained on
-        test_inds : np.ndarray
-            Indices in data that are to be validated on
-        """
-        n_test = int(n*frac_test)
-       
-        inds = np.arange(n)
-        np.random.shuffle(inds)
-        train_inds = inds[:-n_test]
-        test_inds = inds[-n_test:]
-
-        return train_inds, test_inds
-    
-    def run(self, data_in_mem=False):
-        """Wrapper for running the training code
-
-        Parameters
-        ----------
-        data_in_mem: boolean
-            If true, load all training data into memory. Training faster this way.
-        
-        Returns
-        -------
-        net : nnutils neural network object
-            Trained DiffNet
-        """
-        job = self.job 
-        data_dir = job['data_dir']
-        outdir = job['outdir']
-        n_latent = job['n_latent']
-        layer_sizes = job['layer_sizes']
-        nntype = job['nntype']
-        frac_test = job['frac_test']
-        act_map = job['act_map']
-
-        use_cuda = torch.cuda.is_available()
-        print("Using cuda? %s" % use_cuda)
-
-        indicator_dir = os.path.join(data_dir, "indicators")
-        indicators = utils.load_npy_dir(indicator_dir, "*.npy")
-        indicators = np.array(indicators, dtype=int)
-        
-        if 'label_spreading' in job.keys():
-            targets = self.get_targets(act_map,indicators,
-                                       label_spread=job['label_spreading'])
-        else:
-            targets = self.get_targets(act_map,indicators)
-        n_snapshots = len(indicators)
-        np.save(os.path.join(outdir, 'initial_targets.npy'), targets)
-
-        train_inds, test_inds = self.split_test_train(n_snapshots,frac_test)
-        if data_in_mem:
-            xtc_dir = os.path.join(data_dir,"aligned_xtcs")
-            top_fn = os.path.join(data_dir, "master.pdb")
-            master = md.load(top_fn)
-            data = utils.load_traj_coords_dir(xtc_dir, "*.xtc", master.top)
-        else:
-            data = os.path.join(data_dir, "data")
-
-        training_generator, validation_generator, em_generator = \
-            self.set_training_data(job, train_inds, test_inds, targets, data)
-
-        print("  data generators created")
-
-        print("# of examples", targets.shape)
-
-        wm_fn = os.path.join(data_dir, "wm.npy")
-        uwm_fn = os.path.join(data_dir, "uwm.npy")
-        cm_fn = os.path.join(data_dir, "cm.npy")
-        wm = np.load(wm_fn)
-        uwm = np.load(uwm_fn)
-        cm = np.load(cm_fn).flatten()
-
-        n_train = train_inds.shape[0]
-        n_test = test_inds.shape[0]
-        out_fn = os.path.join(outdir, "train_inds.npy")
-        np.save(out_fn, train_inds)
-        out_fn = os.path.join(outdir, "test_inds.npy")
-        np.save(out_fn, test_inds)
-        print("    n train/test", n_train, n_test)
-
-        if hasattr(nntype, 'split_inds'):
-            inds1 = job['inds1']
-            inds2 = job['inds2']
-            old_net = nntype(layer_sizes[0:2],inds1,inds2,wm,uwm)
-        else:
-            old_net = nntype(layer_sizes[0:2],wm,uwm)
-        old_net.freeze_weights()
-
-        for cur_layer in range(2,len(layer_sizes)):
-            if hasattr(nntype, 'split_inds'):
-                net = nntype(layer_sizes[0:cur_layer+1],inds1,inds2,wm,uwm)
-            else:
-                net = nntype(layer_sizes[0:cur_layer+1],wm,uwm)
-            net.freeze_weights(old_net)
-            if use_cuda:
-                net.cuda()
-            net, targets = self.train(data, training_generator, 
-                               validation_generator, em_generator,
-                               targets, indicators, train_inds,
-                               test_inds, net, str(cur_layer), job)
-            #Might make sense to make this optional
-            training_generator, validation_generator, em_generator = \
-                self.set_training_data(job, train_inds, test_inds, targets, data)          
-            old_net = net
-
-        #Polishing
-        net.unfreeze_weights()
-        if use_cuda:
-            net.cuda()
-        net, targets = self.train(data, training_generator, validation_generator,
-                               em_generator, targets, indicators, train_inds,
-                               test_inds, net, "polish", job, lr_fact=0.1)
-        return net
diff --git a/pensa/diffnets/utils.py b/pensa/diffnets/utils.py
deleted file mode 100644
index 26590b94..00000000
--- a/pensa/diffnets/utils.py
+++ /dev/null
@@ -1,33 +0,0 @@
-import os
-import numpy as np
-import glob
-import mdtraj as md
-
-def mkdir(dir_name):
-    if not os.path.exists(dir_name):
-        os.mkdir(dir_name)
-
-def get_fns(dir_name,pattern):
-    return np.sort(glob.glob(os.path.join(dir_name, pattern)))
-
-def load_traj_coords_dir(dir_name,pattern,top):
-    fns = get_fns(dir_name, pattern)
-    all_d = []
-    for fn in fns:
-        t = md.load(fn, top=top)
-        d = t.xyz.reshape((len(t), 3*top.n_atoms))
-        all_d.append(d)
-    all_d = np.vstack(all_d)
-    return all_d
-
-def load_npy_dir(dir_name,pattern):
-    fns = get_fns(dir_name, pattern)
-    all_d = []
-    for fn in fns:
-        d = np.load(fn)
-        all_d.append(d)
-    if len(d.shape) == 1:
-        all_d = np.hstack(all_d)
-    else:
-        all_d = np.vstack(all_d)
-    return all_d