diff --git a/.dockerignore b/.dockerignore
index 91b832e..8ad355b 100644
--- a/.dockerignore
+++ b/.dockerignore
@@ -3,3 +3,6 @@ tests/*/output/*
 tests/*/temp.fasta
 tests/hrefpkg-build/hrefpkg
 *.ssi
+*-env
+deenurp.egg-info
+build
diff --git a/.travis.yml b/.travis.yml
index b5b9a2f..2d12726 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,6 +1,7 @@
-sudo: false
 language: python
-python: 2.7
+python:
+  - 3.8
+  - 3.9
 
 cache:
   # must specify pip dir
@@ -11,13 +12,6 @@ cache:
 
 env:
   global:
-    # These two environment variables could be set by Travis itself, or Travis
-    # could configure itself in /etc/, ~/, or inside of the virtual
-    # environments. In any case if these two values get configured then end
-    # users only need to enable the pip cache and manually run pip wheel before
-    # running pip install.
-    - PIP_WHEEL_DIR=$HOME/.cache/pip/wheels
-    - PIP_FIND_LINKS=file://$HOME/.cache/pip/wheels
     - VIRTUAL_ENV=deenurp-env
 
 addons:
@@ -32,7 +26,7 @@ install:
   - "bin/bootstrap.sh $VIRTUAL_ENV"
 
 script:
-  - python setup.py test
+  - python3 -m deenurp.test
   - tests/run.sh
 
 notifications:
diff --git a/CHANGES.rst b/CHANGES.rst
index 4513c7d..528aa69 100644
--- a/CHANGES.rst
+++ b/CHANGES.rst
@@ -2,6 +2,11 @@
  Changes for deenurp
 =====================
 
+0.3.0
+=====
+
+* Python 3 support and other dependency upgrades and bug fixes
+
 0.2.7
 =====
 
diff --git a/Dockerfile b/Dockerfile
index eb2188a..850f01e 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,30 +1,21 @@
-FROM ubuntu:18.04
-MAINTAINER sminot@fredhutch.org
+FROM python:3.9
+LABEL org.opencontainers.image.authors="sminot@fredhutch.org,nhoffman@uw.edu,crosenth@uw.edu"
 
 # Install prerequisites
-RUN apt-get update && apt-get install --assume-yes --no-install-recommends \
-    build-essential \
-    gfortran \
-    git \
-    liblapack-dev \
-    libopenblas-dev \
-    make \
-    python-dev \
-    python-pip \
-    python2.7 \
-    unzip \
-    wget
+RUN apt-get update && \
+apt-get upgrade --assume-yes && \
+apt-get install --assume-yes --no-install-recommends git wget
 
 # Add files
 RUN mkdir /usr/local/share/deenurp/
 ADD bin /usr/local/share/deenurp/bin
 ADD tests /usr/local/share/deenurp/tests
 ADD deenurp /usr/local/share/deenurp/deenurp
-ADD deenurp.py setup.py requirements.txt MANIFEST.in /usr/local/share/deenurp/
+ADD deenurp.py setup.py requirements.txt /usr/local/share/deenurp/
 
 # Install deenurp and dependencies
 RUN cd /usr/local/share/deenurp/ && \
-    PYTHON=/usr/bin/python2.7 \
+    PYTHON=/usr/local/bin/python3 \
     DEENURP=/usr/local/share/deenurp/ \
     bin/bootstrap.sh /usr/local/
 
@@ -32,17 +23,12 @@ RUN cd /usr/local/share/deenurp/ && \
 RUN rm -rf /var/lib/apt/lists/* && \
     rm -rf /root/.cache/pip && \
     rm -rf /usr/local/share/deenurp/src && \
-    apt-get purge -y --auto-remove \
-      build-essential \
-      unzip \
-      git \
-      python-dev \
-      make
+    apt-get purge -y --auto-remove git
 
 # create some mount points
 RUN mkdir -p /app /fh /mnt /run/shm
 
 # Run tests
-RUN python -m deenurp.test && \
+RUN python3 -m deenurp.test && \
     cd /usr/local/share/deenurp && \
     tests/run.sh
diff --git a/MANIFEST.in b/MANIFEST.in
deleted file mode 100644
index 793ab0b..0000000
--- a/MANIFEST.in
+++ /dev/null
@@ -1,4 +0,0 @@
-include requirements.txt
-include README.rst
-include deenurp/data/*
-include deenurp/test/data/*
diff --git a/README.rst b/README.rst
index 7889048..1740b69 100644
--- a/README.rst
+++ b/README.rst
@@ -12,9 +12,13 @@ The Easy Way
 
 * confirm availability of necessary libraries to compile dependencies
   (on Ubuntu: ``sudo apt-get install gfortran libopenblas-dev liblapack-dev``)
-* Install Python 2.7
-* run `bin/bootstrap.sh`
-* run `source bin-env/bin/activate`
+* Install Python >= 3.8 or Python 3 Virtual Environment 
+::
+
+  % python3 -m venv bin-env
+  % source bin-env/bin/activate
+  % bin/bootstrap.sh
+
 
 the `deenurp` executable should now be on your `$PATH`
 
@@ -25,7 +29,7 @@ See required system libraries above.
 
 First, install binary dependencies:
 
-* Python 2.7
+* Python 3
 
     - pip, for installing python dependencies (http://www.pip-installer.org/)
     - Python packages:
@@ -46,6 +50,12 @@ Finally, install::
 
     python setup.py install
 
+The Docker Way
+==============
+
+Deenurp can be run from a Docker image which can be built locally from the Dockerfile
+or pulled ``docker pull nghoffman/deenurp:v0.3.0``
+
 De-novo reference set creation
 ==============================
 
diff --git a/Singularity b/Singularity
deleted file mode 100644
index 67950dd..0000000
--- a/Singularity
+++ /dev/null
@@ -1,41 +0,0 @@
-Bootstrap:docker
-From:ubuntu:18.04
-
-%setup
-  mkdir ${SINGULARITY_ROOTFS}/src
-
-%files
-  bin /src/bin
-  tests /src/tests
-  deenurp /src/deenurp
-  setup.py /src/
-  requirements.txt /src/
-  MANIFEST.in /src/
-
-%post
-  apt-get update && apt-get install --assume-yes --no-install-recommends \
-    build-essential \
-    gfortran \
-    git \
-    liblapack-dev \
-    libopenblas-dev \
-    make \
-    python-dev \
-    python-pip \
-    python2.7 \
-    unzip \
-    wget
-  cd /src/ && \
-    PYTHON=/usr/bin/python2.7 \
-    DEENURP=/src/ \
-    bin/bootstrap.sh /usr/local/
-  rm -rf /var/lib/apt/lists/*
-  rm -rf /root/.cache/pip
-  rm -rf /src/src
-  apt-get purge -y --auto-remove \
-    build-essential \
-    unzip \
-    git \
-    python-dev \
-    make
-  mkdir -p /fh /app /mnt  # create bind points
diff --git a/bin/bootstrap.sh b/bin/bootstrap.sh
index b7ca66a..53dc22a 100755
--- a/bin/bootstrap.sh
+++ b/bin/bootstrap.sh
@@ -14,23 +14,9 @@ set -e
 # installs deenurp and dependencies to $VIRTUAL_ENV if defined;
 # otherwise creates a virtualenv locally.
 
-# Will attempt to install python packages from wheels if $PIP_FIND_LINKS is defined
-# and pip --use-wheel is specified
-
-# set $PIP_WHEEL_DIR and $PIP_FIND_LINKS in the parent environment if
-# desired
-
-# Will attempt to create wheels if $PIP_WHEEL_DIR is defined
-# see https://pip.pypa.io/en/latest/user_guide.html#environment-variables
-
-
 mkdir -p src
 SRCDIR=$(readlink -f src)
 
-if [[ -n "$PIP_WHEEL_DIR" ]]; then
-    mkdir -p "$PIP_WHEEL_DIR"
-fi
-
 srcdir(){
     tar -tf $1 | head -1
 }
@@ -44,7 +30,7 @@ else
 fi
 
 if [[ -z $PYTHON ]]; then
-    PYTHON=$(which python2)
+    PYTHON=$(which python3)
 fi
 
 # Defines the default source directory for deenurp as the parent of
@@ -55,65 +41,24 @@ fi
 
 VENV_VERSION=15.1.0
 PPLACER_BUILD=1.1.alpha19
-INFERNAL_VERSION=1.1.2
+INFERNAL_VERSION=1.1.4
 RAXML_VERSION=8.0.5
 MUSCLE_VERSION=3.8.31
 VSEARCH_VERSION=2.6.2
 
-check_version(){
-    # usage: check_version module version-string
-    "$PYTHON" <<EOF 2> /dev/null
-import $1
-from distutils.version import LooseVersion
-assert LooseVersion($1.__version__) >= LooseVersion("$2")
-EOF
-}
-
-# create virtualenv if necessary, downloading source if available
-# version is not up to date.
-VENV_URL="https://github.com/pypa/virtualenv/archive/${VENV_VERSION}"
+# create virtualenv
 if [[ ! -f "${venv:?}/bin/activate" ]]; then
-    # if the system virtualenv is up to date, use it
-    if check_version virtualenv $VENV_VERSION; then
-      echo "using $(which virtualenv) (version $(virtualenv --version))"
-	    virtualenv "$venv"
-    else
-	echo "downloading virtualenv version $VENV_VERSION"
-	if [[ ! -f src/virtualenv-${VENV_VERSION}/virtualenv.py ]]; then
-	    mkdir -p src
-	    (cd src && \
-		wget --quiet -nc ${VENV_URL}.tar.gz && \
-		tar -xf ${VENV_VERSION}.tar.gz)
-	fi
-	"$PYTHON" src/virtualenv-${VENV_VERSION}/virtualenv.py "$venv"
-    fi
+    $PYTHON -m venv $venv
 else
     echo "virtualenv $venv already exists"
 fi
 
 source $venv/bin/activate
-
 # full path; set by activate
 venv=$VIRTUAL_ENV
-
-# Preserve the order of installation. The requirements are sorted so
-# that secondary (and higher-order) dependencies appear first. See
-# bin/pipdeptree2requirements.py. We use --no-deps to prevent various
-# packages from being repeatedly installed, uninstalled, reinstalled,
-# etc. Also, enfoprcing the order of installation ensures that
-# install-time dependencies are met (`pip install -r requirements.txt`
-# fails due to a install-time dependency that cogent has for numpy)
-pip2 install -U pip
-
-# install pysqlite and updated sqlite3 libraries
-wget --quiet -O - \
-     https://raw.githubusercontent.com/fhcrc/taxtastic/master/dev/install_pysqlite.sh | bash
-
-while read pkg; do
-    pip2 install "$pkg" --no-deps --upgrade
-done < <(/bin/grep -v -E '^#|^$' "$DEENURP/requirements.txt")
-
-pip2 install "$DEENURP"
+pip install -U pip wheel
+pip install -r requirements.txt
+pip install .
 
 # install pplacer and accompanying python scripts
 PPLACER_DIR=pplacer-Linux-v${PPLACER_BUILD}
@@ -128,11 +73,12 @@ if pplacer_is_installed; then
     $venv/bin/pplacer --version
 else
     mkdir -p src && \
-	(cd src && \
-	wget -nc --quiet https://github.com/matsen/pplacer/releases/download/v$PPLACER_BUILD/$PPLACER_ZIP && \
-	unzip -o $PPLACER_ZIP && \
-	cp $PPLACER_DIR/{pplacer,guppy,rppr} $venv/bin && \
-	pip2 install -U $PPLACER_DIR/scripts)
+	(cd src \
+	     && wget -nc --quiet https://github.com/matsen/pplacer/releases/download/v$PPLACER_BUILD/$PPLACER_ZIP \
+	     && unzip -o $PPLACER_ZIP \
+	     && cp $PPLACER_DIR/{pplacer,guppy,rppr} $venv/bin \
+	     # && pip2 install -U $PPLACER_DIR/scripts \
+	)
     # confirm that we have installed the requested build
     if ! pplacer_is_installed; then
 	echo -n "Error: you requested pplacer build $PPLACER_BUILD "
@@ -218,4 +164,3 @@ else
 	    cd muscle${MUSCLE_VERSION}/src && \
 	    ./mk && cp muscle $venv/bin)
 fi
-
diff --git a/bin/pipdeptree2requirements.py b/bin/pipdeptree2requirements.py
index 960de26..65c7ef5 100755
--- a/bin/pipdeptree2requirements.py
+++ b/bin/pipdeptree2requirements.py
@@ -21,5 +21,5 @@
 deps = set()
 for __, pkg in sorted(lines):
     if pkg not in deps:
-        print pkg
+        print(pkg)
         deps.add(pkg)
diff --git a/deenurp/__init__.py b/deenurp/__init__.py
index ddbaf5c..d61df57 100644
--- a/deenurp/__init__.py
+++ b/deenurp/__init__.py
@@ -24,7 +24,7 @@
 import os
 import pkgutil
 import sys
-import util
+from . import util
 
 log = logging.getLogger(__name__)
 
@@ -89,9 +89,8 @@ def setup_logging(namespace):
                   '%(funcName)s %(lineno)s %(message)s')
     datefmt = '%Y-%m-%d %H:%M:%S'
 
-    logging.basicConfig(stream=namespace.log, format=log_format,
-                        level=loglevel, log_format=log_format,
-                        datefmt=datefmt)
+    logging.basicConfig(
+        stream=namespace.log, format=log_format, level=loglevel, datefmt=datefmt)
 
 
 def parse_version(parser):
@@ -105,7 +104,7 @@ def parse_args(parser):
     parser.add_argument('-l', '--log',
                         metavar='FILE',
                         default=sys.stdout,
-                        type=util.file_opener('a', buffering=0),  # append
+                        type=util.file_opener('a'),  # append
                         help='Send logging to a file')
 
     parser.add_argument('-v', '--verbose',
@@ -128,7 +127,7 @@ def parse_subcommands(parser, argv):
     """
     Setup all sub-commands
     """
-    import subcommands
+    from . import subcommands
 
     subparsers = parser.add_subparsers(dest='subparser_name')
 
@@ -158,7 +157,7 @@ def parse_subcommands(parser, argv):
         try:
             imp = '{}.{}'.format(subcommands.__name__, name)
             mod = importlib.import_module(imp)
-        except Exception, e:
+        except Exception as e:
             log.error('error importing subcommand {}'.format(name))
             log.error(e)
             continue
diff --git a/deenurp/outliers.py b/deenurp/outliers.py
index 8654f52..3928d5c 100644
--- a/deenurp/outliers.py
+++ b/deenurp/outliers.py
@@ -33,7 +33,7 @@ def read_dists(fobj):
         spl = line.split()
         assert len(spl) == N + 1
         taxa.append(spl.pop(0))
-        distmat[row, :] = map(float, spl)
+        distmat[row, :] = list(map(float, spl))
 
     return taxa, distmat
 
@@ -48,7 +48,7 @@ def fasttree_dists(fasta):
 
     cmd = ['FastTree', '-nt', '-makematrix', fasta]
 
-    with tempfile.TemporaryFile('rw') as stdout, open(os.devnull) as devnull:
+    with tempfile.TemporaryFile('w+') as stdout, open(os.devnull) as devnull:
         proc = subprocess.Popen(cmd, stdout=stdout, stderr=devnull)
         proc.communicate()
         stdout.flush()
@@ -98,14 +98,20 @@ def outliers(distmat, radius):
 
     """
 
+    # A previous implementation used a masked_array to guard againt na
+    # values, but apparently this results in undefined behavior:
+    # https://github.com/numpy/numpy/issues/14716
+
     # use a masked array in case there are any nan
-    ma = np.ma.masked_array(distmat, np.isnan(distmat))
+    # ma = np.ma.masked_array(distmat, np.isnan(distmat))
 
     # index of most central element.
-    medoid = find_medoid(ma)
+    # medoid = find_medoid(ma)
+    medoid = find_medoid(distmat)
 
     # distance from each element to most central element
-    dists = ma[medoid, :]
+    # dists = ma[medoid, :]
+    dists = distmat[medoid, :]
     to_prune = dists > radius
 
     return medoid, dists, to_prune
@@ -147,12 +153,13 @@ def outliers_by_cluster(distmat, t, D, min_size=1, cluster_type='single', **kwar
         # all of the sequences
         log.warning('no clusters were found')
 
-        medoids = pd.DataFrame.from_items([
-            ('cluster', [-1]),
-            ('count', [len(clusters)]),
-            ('medoid', [find_medoid(distmat)]),
-            ('dist', [None])
-        ])
+        medoids = pd.DataFrame({
+            'cluster': [-1],
+            'count': [len(clusters)],
+            'medoid': [find_medoid(distmat)],
+            'dist': [None],
+        })
+
         to_prune = pd.Series([False for x in clusters])
     else:
         medoids = find_cluster_medoids(distmat, clusters)
@@ -192,7 +199,7 @@ def scipy_cluster(X, module, t, **kwargs):
     Z = fun(y)
     clusters = scipy.cluster.hierarchy.fcluster(Z, t, **args)
     title = 'scipy.cluster.hierarchy.{} {}'.format(
-        module, ' '.join('%s=%s' % item for item in args.items()))
+        module, ' '.join('%s=%s' % item for item in list(args.items())))
 
     return clusters, title
 
@@ -237,7 +244,7 @@ def find_cluster_medoids(X, clusters):
         zip([0 if c == -1 else 1 for c in uclusters], counts, uclusters),
         reverse=True)
 
-    __, counts, uclusters = zip(*tallies)
+    __, counts, uclusters = list(zip(*tallies))
 
     medoids = [(None if cluster == -1 else find_medoid(X, clusters == cluster))
                for _, _, cluster in tallies]
@@ -245,12 +252,12 @@ def find_cluster_medoids(X, clusters):
     # measure distances from the medoid of the first (largest) cluster
     dists = [None if medoid is None else X[medoids[0], medoid] for medoid in medoids]
 
-    return pd.DataFrame.from_items([
-        ('cluster', uclusters),
-        ('count', counts),
-        ('medoid', medoids),
-        ('dist', dists)
-    ])
+    return pd.DataFrame({
+        'cluster': uclusters,
+        'count': counts,
+        'medoid': medoids,
+        'dist': dists,
+    })
 
 
 def choose_clusters(df, min_size, max_dist):
@@ -317,11 +324,11 @@ def mds(X, taxa, n_jobs=1):
         n_jobs=n_jobs)
 
     if np.all(X == 0):
-        df = pd.DataFrame.from_items([
-            ('seqname', taxa),
-            ('x', np.zeros(n)),
-            ('y', np.zeros(n))
-        ])
+        df = pd.DataFrame({
+            'seqname': taxa,
+            'x': np.zeros(n),
+            'y': np.zeros(n),
+        })
     else:
         mds_fit = mds.fit_transform(X)
         df = pd.DataFrame(mds_fit, columns=['x', 'y'])
diff --git a/deenurp/search.py b/deenurp/search.py
index a0668b0..4b5d939 100644
--- a/deenurp/search.py
+++ b/deenurp/search.py
@@ -13,9 +13,7 @@
 from deenurp import uclust
 from Bio import SeqIO
 
-from .util import SingletonDefaultDict, memoize
-
-_ntf = tempfile.NamedTemporaryFile
+from .util import SingletonDefaultDict, memoize, ntf
 
 SELECT_THRESHOLD = 0.05
 SEARCH_THRESHOLD = 0.90
@@ -123,7 +121,7 @@ def _search(con, quiet=True, select_threshold=SELECT_THRESHOLD,
     cursor = con.cursor()
     count = 0
     ref_name = p['ref_fasta']
-    with open(p['ref_meta']) as fp:
+    with open(p['ref_meta'], 'r') as fp:
         cluster_info = _load_cluster_info(fp, p['group_field'])
 
     @memoize
@@ -140,7 +138,8 @@ def get_seq_id(name):
         cursor.execute(sql, [name])
         return cursor.fetchone()[0]
 
-    with _ntf(prefix='usearch') as uc_fp:
+    with ntf(prefix='usearch') as uc_fp:
+        uc_fp.close()
         uclust.search(
             ref_name,
             p['fasta_file'],
@@ -153,16 +152,16 @@ def get_seq_id(name):
         # import shutil
         # shutil.copy(uc_fp.name, '.')
 
-        records = uclust.parse_uclust_out(uc_fp)
-        records = (i for i in records if i.type ==
-                   'H' and i.pct_id >= p['search_identity'] * 100.0)
+        records = uclust.parse_uclust_out(uc_fp.name)
+        records = (i for i in records
+                   if i.type == 'H' and i.pct_id >= p['search_identity'] * 100.0)
         by_seq = uclust.hits_by_sequence(records)
         by_seq = select_hits(by_seq, select_threshold)
 
         sql = """
-INSERT INTO best_hits (sequence_id, hit_idx, ref_id, pct_id)
-VALUES (?, ?, ?, ?)
-"""
+        INSERT INTO best_hits (sequence_id, hit_idx, ref_id, pct_id)
+        VALUES (?, ?, ?, ?)
+        """
         for _, hits in by_seq:
             # Drop clusters from blacklist
             hits = (
@@ -225,7 +224,7 @@ def get_sample_id(sample_name):
         seq_count += 1
         if sequence.id not in weights:
             continue
-        for sample, weight in weights[sequence.id].items():
+        for sample, weight in list(weights[sequence.id].items()):
             sample_id = get_sample_id(sample)
             cursor.execute("""INSERT INTO sequences_samples
                            (sequence_id, sample_id, weight)
@@ -244,11 +243,13 @@ def _create_tables(
         search_identity=SEARCH_IDENTITY,
         quiet=True,
         group_field='cluster'):
+
     schema = os.path.join(os.path.dirname(__file__), 'data', 'search.schema')
     cursor = con.cursor()
     cursor.executescript(open(schema).read().strip())
+
     # Save parameters
-    rows = [(k, locals().get(k)) for k in _PARAMS.keys()]
+    rows = [(k, v) for k, v in locals().items() if k in _PARAMS]
     cursor.executemany("INSERT INTO params VALUES (?, ?)", rows)
 
 
diff --git a/deenurp/select.py b/deenurp/select.py
index 5903de4..6f74b73 100644
--- a/deenurp/select.py
+++ b/deenurp/select.py
@@ -19,7 +19,7 @@
 
 from . import util, wrap
 from .config import DEFAULT_THREADS
-from .util import as_fasta, tempdir
+from .util import as_fasta, tempdir, ntf
 from .wrap import (cmalign, as_refpkg, redupfile_of_seqs,
                    rppr_min_adcl, guppy_redup, pplacer, esl_sfetch)
 
@@ -64,12 +64,10 @@ def _cluster(sequences, threshold=CLUSTER_THRESHOLD):
     """
     sequences = list(sequences)
     assert sequences
-    with as_fasta(sequences) as fasta_name, \
-            tempfile.NamedTemporaryFile(prefix='uc-') as ntf:
-
-        uclust.cluster(fasta_name, ntf.name, pct_id=threshold, quiet=True)
-        ntf.seek(0)
-        r = list(uclust.cluster_seeds(fasta_name, ntf))
+    with as_fasta(sequences) as fasta_name, ntf(prefix='uc-') as uc:
+        uc.close()
+        uclust.cluster(fasta_name, uc.name, pct_id=threshold, quiet=True)
+        r = list(uclust.cluster_seeds(fasta_name, uc.name))
 
     logging.debug("Clustered %d to %d", len(sequences), len(r))
     return r
@@ -115,7 +113,7 @@ def select_sequences_for_cluster(
     # the operation below assumes unique identifiers for the set of
     # ref and query seqs, so ensure that this is the case
     for seq in query_seqs:
-        seq.id = seq.id + hashlib.md5(seq.id).hexdigest()[:8]
+        seq.id = seq.id + hashlib.md5(seq.id.encode('utf-8')).hexdigest()[:8]
 
     c = itertools.chain(ref_seqs, query_seqs)
 
@@ -127,10 +125,10 @@ def select_sequences_for_cluster(
             redupfile_of_seqs(query_seqs) as redup_path:
 
         jplace = pplacer(rp.path, fasta, out_dir=placedir(), threads=1)
+
         # Redup
         guppy_redup(jplace, redup_path, placedir('redup.jplace'))
-        prune_leaves = set(
-            rppr_min_adcl(placedir('redup.jplace'), keep_leaves))
+        prune_leaves = set(rppr_min_adcl(placedir('redup.jplace'), keep_leaves))
 
     result = frozenset(i.id for i in ref_seqs) - prune_leaves
     assert len(result) == keep_leaves
@@ -223,10 +221,13 @@ def sequences_hitting_cluster(con, cluster_name):
 def esl_sfetch_seqs(sequence_file, sequence_names, fa_idx):
     """
     """
-    with tempfile.NamedTemporaryFile(prefix='esl', suffix='.fasta') as tf:
+    with ntf('wb', prefix='esl', suffix='.fasta') as tf:
+        # esl_sfetch() writes binary data, so we close and reopen the
+        # file to access the sequence data in text mode
         esl_sfetch(sequence_file, sequence_names, tf, fa_idx)
-        tf.seek(0)
-        return list(SeqIO.parse(tf, 'fasta'))
+        tf.close()
+        with open(tf.name, 'r') as seqs:
+            return list(SeqIO.parse(seqs, 'fasta'))
 
 
 def get_total_weight_per_sample(con):
@@ -306,11 +307,11 @@ def choose_references(
             sample_weights = get_sample_weights(deenurp_db, cluster_seq_names)
 
             norm_sw = dict()
-            for k, v in sample_weights.items():
+            for k, v in list(sample_weights.items()):
                 norm_sw[k] = v / sample_total_weights[k]
 
             max_sample, max_weight = max(
-                norm_sw.items(), key=operator.itemgetter(1))
+                list(norm_sw.items()), key=operator.itemgetter(1))
 
             logging.info(
                 'Cluster %s: Max hit by %s: %.3f%%, %d hits',
diff --git a/deenurp/subcommands/add_reps.py b/deenurp/subcommands/add_reps.py
index 9b82a2e..fbbdb70 100644
--- a/deenurp/subcommands/add_reps.py
+++ b/deenurp/subcommands/add_reps.py
@@ -37,7 +37,7 @@ def action(args):
 
     tax = taxonomy.Taxonomy(create_engine('sqlite:///{0}'.format(args.tax_db)), ncbi.ranks)
 
-    sequence_ids = set(k for k, v in tax_map.items()
+    sequence_ids = set(k for k, v in list(tax_map.items())
             if v and tax.lineage(tax_id=v).get(args.rank) in tax_ids)
 
     # Fetch
@@ -47,4 +47,4 @@ def action(args):
             if r.id in sequence_ids:
                 args.outfile.write('{}\n{}\n'.format(r.description, r.seq))
                 count += 1
-    print 'selected', count, 'sequences'
+    print('selected', count, 'sequences')
diff --git a/deenurp/subcommands/cluster_refs.py b/deenurp/subcommands/cluster_refs.py
index effb725..1fe0a8e 100644
--- a/deenurp/subcommands/cluster_refs.py
+++ b/deenurp/subcommands/cluster_refs.py
@@ -28,6 +28,7 @@ def build_parser(p):
             [default: %(default).3f]""")
     p.add_argument('-i', '--cluster-id', default=0.985, type=float, help="""Cluster ID [default: %(default).3f]""")
 
+
 def cluster_identify_redundant(named_sequence_file, named_ids, to_cluster,
         threshold=0.97):
     with util.ntf(suffix='.uc', prefix='to_cluster') as tf:
@@ -38,12 +39,13 @@ def cluster_identify_redundant(named_sequence_file, named_ids, to_cluster,
                 maxrejects=100)
 
         # Uclust.search renames to tf, need a new handle.
-        records = uclust.parse_uclust_out(tf)
+        records = uclust.parse_uclust_out(tf.name)
         hits = (i.query_label for i in records
                 if i.type == 'H' and i.pct_id >= threshold * 100.0)
 
         return frozenset(hits)
 
+
 def taxonomic_clustered(taxonomy, cluster_rank):
     """
     Generate tax_id, sequence_id_set tuples for each tax_id at cluster_rank
@@ -52,6 +54,7 @@ def taxonomic_clustered(taxonomy, cluster_rank):
     return ((node.tax_id, frozenset(node.subtree_sequence_ids()))
             for node in nodes)
 
+
 def identify_otus_unnamed(seq_file, cluster_similarity):
     """
     Generates sequence ids in a cluster
@@ -64,10 +67,11 @@ def identify_otus_unnamed(seq_file, cluster_similarity):
         # Sort and cluster
         uclust.cluster(
             seq_file, tf.name, pct_id=cluster_similarity, quiet=True)
-        clusters = uclust.sequences_by_cluster(uclust.parse_uclust_out(tf))
+        clusters = uclust.sequences_by_cluster(uclust.parse_uclust_out(tf.name))
         for _, sequences in clusters:
             yield [i.query_label for i in sequences]
 
+
 def action(a):
     # index fasta file
     fa_idx = wrap.read_seq_file(a.named_sequence_file)
@@ -154,7 +158,7 @@ def add_cluster(i):
         seqinfo_records = (seqinfo.get(i, {'seqname': i}) for i in done)
         seqinfo_records = (add_cluster(i) for i in seqinfo_records)
 
-        fields = list(seqinfo.values()[0].keys())
+        fields = list(list(seqinfo.values())[0].keys())
         fields.append('cluster')
         w = csv.DictWriter(fp, fields,
                 quoting=csv.QUOTE_NONNUMERIC, lineterminator='\n')
diff --git a/deenurp/subcommands/expand_named.py b/deenurp/subcommands/expand_named.py
index 2e8663b..83cc3f2 100644
--- a/deenurp/subcommands/expand_named.py
+++ b/deenurp/subcommands/expand_named.py
@@ -136,8 +136,8 @@ def action(a):
             w.writerows(i for i in r if i['seqname'] not in overlap)
             if 'cluster' in fn:
                 rows = ({'seqname': k, 'tax_id': v, 'inferred_tax_id': 'yes', 'cluster': v}
-                        for k, v in update_hits.items())
+                        for k, v in list(update_hits.items()))
             else:
                 rows = ({'seqname': k, 'tax_id': v, 'inferred_tax_id': 'yes'}
-                        for k, v in update_hits.items())
+                        for k, v in list(update_hits.items()))
             w.writerows(rows)
diff --git a/deenurp/subcommands/fill_lonely.py b/deenurp/subcommands/fill_lonely.py
index 2ab3558..308d18a 100644
--- a/deenurp/subcommands/fill_lonely.py
+++ b/deenurp/subcommands/fill_lonely.py
@@ -102,7 +102,7 @@ def build_parser(p):
 
     p.add_argument('output',
                    help="""Output file (fasta)""",
-                   type=argparse.FileType('w'))
+                   type=argparse.FileType('wb'))
     p.add_argument('output_seqinfo',
                    help="""Destination to write seqinfo
                            for new representatives""",
@@ -209,7 +209,7 @@ def action(args):
                 additional_reps.add(s)
 
     logging.info("%d additional references", len(additional_reps))
-    with open(args.chosen_fasta) as fp, args.output as ofp:
+    with open(args.chosen_fasta, 'rb') as fp, args.output as ofp:
         shutil.copyfileobj(fp, ofp)
         wrap.esl_sfetch(args.search_fasta, additional_reps, ofp, fa_idx)
 
diff --git a/deenurp/subcommands/filter_outliers.py b/deenurp/subcommands/filter_outliers.py
index ec8fc75..0da6d2c 100644
--- a/deenurp/subcommands/filter_outliers.py
+++ b/deenurp/subcommands/filter_outliers.py
@@ -135,7 +135,7 @@ def build_parser(p):
     output_group.add_argument(
         '--output-seqs', help="""REQUIRED destination for sequences""",
         required=True,
-        type=argparse.FileType('w'), metavar='FILE')
+        type=argparse.FileType('wb'), metavar='FILE')
     output_group.add_argument(
         '--filtered-seqinfo', type=argparse.FileType('w'), metavar='FILE',
         help="""Path to write filtered sequence info""")
@@ -233,22 +233,19 @@ def distmat_muscle(sequence_file, prefix, maxiters=wrap.MUSCLE_MAXITERS):
     with util.ntf(prefix=prefix, suffix='.fasta') as a_fasta:
         wrap.muscle_files(sequence_file, a_fasta.name, maxiters=maxiters)
         a_fasta.flush()
-
         taxa, distmat = outliers.fasttree_dists(a_fasta.name)
 
     return taxa, distmat
 
 
-def distmat_cmalign(
-        sequence_file,
-        prefix,
-        cpu=wrap.CMALIGN_THREADS,
-        min_bitscore=10):
+def distmat_cmalign(sequence_file, prefix, cpu=wrap.CMALIGN_THREADS,
+                    min_bitscore=10):
 
-    with util.ntf(prefix=prefix, suffix='.aln') as a_sto, \
-            util.ntf(prefix=prefix, suffix='.fasta') as a_fasta:
+    with util.ntf('w+', prefix=prefix, suffix='.aln') as a_sto, \
+            util.ntf('w+', prefix=prefix, suffix='.fasta') as a_fasta:
 
         scores = wrap.cmalign_files(sequence_file, a_sto.name, cpu=cpu)
+        a_sto.seek(0)
 
         low_scores = scores['bit_sc'] < min_bitscore
         if low_scores.any():
@@ -294,8 +291,10 @@ def parse_usearch_allpairs(filename, seqnames):
     nseqs = len(seqnames)
     distmat = numpy.repeat(0.0, nseqs ** 2)
     distmat.shape = (nseqs, nseqs)
-    ii = pd.match(data['query'], seqnames)
-    jj = pd.match(data['target'], seqnames)
+
+    idx = dict(zip(seqnames, range(nseqs)))
+    ii = [idx[name] for name in data['query']]
+    jj = [idx[name] for name in data['target']]
 
     # usearch_allpairs_files returns comparisons corresponding to a
     # triangular matrix, whereas vsearch_allpairs_files returns all
@@ -428,7 +427,7 @@ def mock_filter(seqs, keep):
 
     empty = numpy.repeat(numpy.nan, len(seqs))
     return pd.DataFrame({
-        'seqname': seqs,
+        'seqname': list(seqs),
         'centroid': empty,
         'dist': empty,
         'is_out': numpy.repeat(not keep, len(seqs))})
@@ -492,7 +491,7 @@ def filter_worker(tax_id,
 def action(a):
     # itemize sequences provided in the input file
     fa_idx = wrap.read_seq_file(a.sequence_file)
-    seqnames = fa_idx.keys()
+    seqnames = list(fa_idx.keys())
 
     # Load taxonomy
     with a.taxonomy as fp:
@@ -618,7 +617,7 @@ def action(a):
                     log.exception(
                         "Error in child process: %s", exception)
                     executor.shutdown(wait=False)
-                    traceback.print_tb(f._traceback)
+                    traceback.print_tb(sys.exc_info()[2])
                     raise exception
 
                 info = futs.pop(f)
diff --git a/deenurp/subcommands/hrefpkg_build.py b/deenurp/subcommands/hrefpkg_build.py
index a374c32..5d1dcfb 100644
--- a/deenurp/subcommands/hrefpkg_build.py
+++ b/deenurp/subcommands/hrefpkg_build.py
@@ -106,8 +106,8 @@ def action(a):
     hrefpkgs = []
     futs = {}
     with open(j('index.csv'), 'w') as fp, \
-         open(j('train.fasta'), 'w') as train_fp, \
-         open(j('test.fasta'), 'w') as test_fp, \
+         open(j('train.fasta'), 'wb') as train_fp, \
+         open(j('test.fasta'), 'wb') as test_fp, \
          futures.ThreadPoolExecutor(a.threads) as executor:
         def log_hrefpkg(tax_id):
             path = j(tax_id + '.refpkg')
@@ -125,8 +125,8 @@ def log_hrefpkg(tax_id):
                 continue
 
             f = executor.submit(tax_id_refpkg, node.tax_id, taxonomy, seqinfo,
-                    a.sequence_file, fa_idx, output_dir=a.output_dir, test_file=test_fp,
-                    train_file=train_fp)
+                                a.sequence_file, fa_idx, output_dir=a.output_dir,
+                                test_file=test_fp, train_file=train_fp)
             futs[f] = node.tax_id, node.name
 
         while futs:
@@ -135,7 +135,8 @@ def log_hrefpkg(tax_id):
                 tax_id, name = futs.pop(f)
                 r = f.result()
                 if r:
-                    logging.info("Finished refpkg for %s (%s) [%d remaining]", name, tax_id, len(pending))
+                    logging.info(
+                        'Finished refpkg for %s (%s) [%d remaining]', name, tax_id, len(pending))
                     log_hrefpkg(tax_id)
             assert len(futs) == len(pending)
 
@@ -158,7 +159,7 @@ def find_nodes(taxonomy, index_rank, want_rank='species'):
     moving up a rank if no species-level nodes with sequences exist.
     """
     ranks = taxonomy.ranks
-    rdict = dict(zip(ranks, xrange(len(ranks))))
+    rdict = dict(list(zip(ranks, list(range(len(ranks))))))
     assert index_rank in rdict
     assert want_rank in rdict
 
@@ -229,14 +230,14 @@ def sequence_names(f):
     sequence_ids = frozenset(taxonomy.subtree_sequence_ids())
 
     with util.ntf(prefix='aln_fasta', suffix='.fasta') as tf, \
-         util.ntf(prefix='seq_info', suffix='.csv') as seq_info_fp, \
-         util.ntf(prefix='taxonomy', suffix='.csv') as tax_fp:
+         util.ntf('w', prefix='seq_info', suffix='.csv') as seq_info_fp, \
+         util.ntf('w', prefix='taxonomy', suffix='.csv') as tax_fp:
         wrap.esl_sfetch(sequence_file, sequence_ids, tf, fa_idx)
         tf.close()
 
         # Seqinfo file
         r = (i for i in seqinfo if i['seqname'] in sequence_ids)
-        w = csv.DictWriter(seq_info_fp, seqinfo[0].keys(), lineterminator='\n',
+        w = csv.DictWriter(seq_info_fp, list(seqinfo[0].keys()), lineterminator='\n',
                 quoting=csv.QUOTE_NONNUMERIC)
         w.writeheader()
         w.writerows(r)
@@ -252,7 +253,7 @@ def sequence_names(f):
         rp.update_file('taxonomy', tax_fp.name)
         rp.update_file('profile', wrap.CM)
 
-        for k, v in meta.items():
+        for k, v in list(meta.items()):
             rp.update_metadata(k, v)
 
         rp.commit_transaction()
@@ -280,12 +281,12 @@ def tax_id_refpkg(tax_id, full_tax, seqinfo, sequence_file, fa_idx,
     Build a reference package containing all descendants of tax_id from an
     index reference package.
     """
-    with util.ntf(prefix='taxonomy', suffix='.csv') as tax_fp, \
-         util.ntf(prefix='aln_sto', suffix='.sto') as sto_fp, \
-         util.ntf(prefix='aln_fasta', suffix='.fasta') as fasta_fp, \
+    with util.ntf('w', prefix='taxonomy', suffix='.csv') as tax_fp, \
+         util.ntf('w+', prefix='aln_sto', suffix='.sto') as sto_fp, \
+         util.ntf('w', prefix='aln_fasta', suffix='.fasta') as fasta_fp, \
          util.ntf(prefix='tree', suffix='.tre') as tree_fp, \
          util.ntf(prefix='tree', suffix='.stats') as stats_fp, \
-         util.ntf(prefix='seq_info', suffix='.csv') as seq_info_fp:
+         util.ntf('w', prefix='seq_info', suffix='.csv') as seq_info_fp:
 
         # Subset taxonomy
         n = full_tax.get_node(tax_id)
@@ -295,7 +296,7 @@ def tax_id_refpkg(tax_id, full_tax, seqinfo, sequence_file, fa_idx,
         tax_fp.close()
 
         # Subset seq_info
-        w = csv.DictWriter(seq_info_fp, seqinfo[0].keys(),
+        w = csv.DictWriter(seq_info_fp, list(seqinfo[0].keys()),
                 quoting=csv.QUOTE_NONNUMERIC)
         w.writeheader()
         rows = [i for i in seqinfo if i['tax_id'] in descendants]
@@ -311,8 +312,8 @@ def tax_id_refpkg(tax_id, full_tax, seqinfo, sequence_file, fa_idx,
             keep_seq_ids |= frozenset(keep)
             l = len(rest)
             if l >= 2 * PER_TAXON:
-                train_seq_ids |= frozenset(rest[:l / 2])
-                test_seq_ids |= frozenset(rest[l / 2:])
+                train_seq_ids |= frozenset(rest[:l // 2])
+                test_seq_ids |= frozenset(rest[l // 2:])
 
         # Picked
         rows = [sinfo[i] for i in keep_seq_ids]
@@ -320,12 +321,13 @@ def tax_id_refpkg(tax_id, full_tax, seqinfo, sequence_file, fa_idx,
         seq_info_fp.close()
 
         # Fetch sequences
-        with tempfile.NamedTemporaryFile() as tf:
-            wrap.esl_sfetch(sequence_file,
-                            keep_seq_ids, tf, fa_idx)
-            # Rewind
-            tf.seek(0)
-            sequences = list(SeqIO.parse(tf, 'fasta'))
+        with util.ntf() as tf:
+            wrap.esl_sfetch(sequence_file, keep_seq_ids, tf, fa_idx)
+            tf.close()
+            # reopen in text mode and read extracted sequences
+            with open(tf.name) as seqfile:
+                sequences = list(SeqIO.parse(seqfile, 'fasta'))
+
         logging.info("Tax id %s: %d sequences", tax_id, len(sequences))
 
         if len(set(str(i.seq) for i in sequences)) == 1:
@@ -349,8 +351,9 @@ def tax_id_refpkg(tax_id, full_tax, seqinfo, sequence_file, fa_idx,
         aligned = wrap.cmalign(sequences, output=sto_fp)
         aligned = list(aligned)
         assert aligned
+
         # Tree
-        wrap.fasttree(aligned, log_path=stats_fp.name, output_fp=tree_fp, threads=1, gtr=True)
+        wrap.fasttree(aligned, log_path=stats_fp.name, output_fp=tree_fp.name, threads=1, gtr=True)
         tree_fp.close()
         sto_fp.close()
         SeqIO.write(aligned, fasta_fp, 'fasta')
@@ -366,7 +369,7 @@ def tax_id_refpkg(tax_id, full_tax, seqinfo, sequence_file, fa_idx,
         try:
             rp.update_phylo_model('FastTree', stats_fp.name)
         except:
-            print >> sys.stderr, stats_fp.read()
+            print(stats_fp.read(), file=sys.stderr)
             raise
         rp.update_file('profile', wrap.CM)
         rp.commit_transaction()
@@ -403,7 +406,7 @@ def w(*args):
         partition_count = int(partition_prop * child_count)
         logging.info("Pruning %d/%d from %s-%s", partition_count, child_count,
                 node.tax_id, node.name)
-        prune = set(random.sample(range(len(children)), partition_count))
+        prune = set(random.sample(list(range(len(children))), partition_count))
 
         # Lists of taxa to prune from the individual partitions
         p1_prune = [n.tax_id for i, n in enumerate(children) if i in prune]
diff --git a/deenurp/subcommands/rdp_sequence_filter.py b/deenurp/subcommands/rdp_sequence_filter.py
index e010f57..26314a2 100644
--- a/deenurp/subcommands/rdp_sequence_filter.py
+++ b/deenurp/subcommands/rdp_sequence_filter.py
@@ -65,7 +65,7 @@ def action(a):
 
             accepted = 0
             rejected = 0
-            for sequence, info in itertools.izip(sequences, reader):
+            for sequence, info in zip(sequences, reader):
                 assert sequence.id == info['seqname']
 
                 # Check quality
diff --git a/deenurp/subcommands/transfer_names.py b/deenurp/subcommands/transfer_names.py
index f892bea..479855f 100644
--- a/deenurp/subcommands/transfer_names.py
+++ b/deenurp/subcommands/transfer_names.py
@@ -147,7 +147,7 @@ def action(args):
 
         w = csv.DictWriter(new_seq_info, ref_seq_info_reader.fieldnames)
         w.writeheader()
-        w.writerows(ref_seq_info.values())
+        w.writerows(list(ref_seq_info.values()))
         new_seq_info.close()
 
         args.refpkg.start_transaction()
diff --git a/deenurp/test/__main__.py b/deenurp/test/__main__.py
index 3a6806a..ec55a73 100644
--- a/deenurp/test/__main__.py
+++ b/deenurp/test/__main__.py
@@ -9,13 +9,13 @@
     suite = test.suite()
     outcome = suite.run(result)
     if outcome.wasSuccessful():
-        print('ok: ' + str(outcome))
+        print(('ok: ' + str(outcome)))
     else:
-        print('--> {} failures:'.format(len(outcome.failures)))
+        print(('--> {} failures:'.format(len(outcome.failures))))
         for testcase, tb in outcome.failures:
             msg = str(testcase)
-            print('=' * len(msg))
-            print(msg + '\n')
-            print(tb.strip())
-            print('=' * len(msg))
+            print(('=' * len(msg)))
+            print((msg + '\n'))
+            print((tb.strip()))
+            print(('=' * len(msg)))
         sys.exit(1)
diff --git a/deenurp/test/test_outliers.py b/deenurp/test/test_outliers.py
index efbed44..ab8efc6 100644
--- a/deenurp/test/test_outliers.py
+++ b/deenurp/test/test_outliers.py
@@ -8,7 +8,7 @@
 try:
     import numpy as np
     import pandas as pd
-except ImportError, err:
+except ImportError as err:
     # prefer errors within tests over failure at the time the test
     # suites are assembled
     print(err)
@@ -133,7 +133,7 @@ def test_mds_02(self):
 
 try:
     wrap.require_executable(wrap.VSEARCH)
-except MissingDependencyError, e:
+except MissingDependencyError as e:
     vsearch_available = False
 else:
     vsearch_available = True
diff --git a/deenurp/test/test_search.py b/deenurp/test/test_search.py
index a1472fe..5d6993c 100644
--- a/deenurp/test/test_search.py
+++ b/deenurp/test/test_search.py
@@ -1,6 +1,6 @@
 import collections
 import os.path
-from cStringIO import StringIO
+from io import StringIO
 import unittest
 from deenurp import search
 
@@ -58,5 +58,5 @@ def test_basic(self):
         expected = [
             ('seq1', [TestHit('seq1', 't2', 99.9)]),
             ('seq2', [TestHit('seq2', 't6', 98.4)])]
-        self.assertItemsEqual(expected, r)
+        self.assertEqual(expected, r)
 
diff --git a/deenurp/test/test_subcommand_extract_genbank.py b/deenurp/test/test_subcommand_extract_genbank.py
index cac7c1c..a6e7db8 100644
--- a/deenurp/test/test_subcommand_extract_genbank.py
+++ b/deenurp/test/test_subcommand_extract_genbank.py
@@ -7,7 +7,7 @@
 import os
 import unittest
 
-from cStringIO import StringIO
+from io import StringIO
 
 from deenurp.subcommands import ncbi_extract_genbank
 
diff --git a/deenurp/test/test_subcommand_filter_outliers.py b/deenurp/test/test_subcommand_filter_outliers.py
index a4106d7..93cd15f 100644
--- a/deenurp/test/test_subcommand_filter_outliers.py
+++ b/deenurp/test/test_subcommand_filter_outliers.py
@@ -1,5 +1,6 @@
 import unittest
 
+import pandas as pd
 from Bio import SeqIO
 
 from deenurp import wrap
@@ -22,6 +23,14 @@ def test_parse_usearch_allpairs(self):
         distmat = filter_outliers.parse_usearch_allpairs(filename, seqnames)
         self.assertEqual(len(seqnames), distmat.shape[0])
 
+        # confirm pairwise comparisons in the file
+        tab = pd.read_table(filename, header=None, names=filter_outliers.BLAST6NAMES)
+        for __, row in tab.iterrows():
+            dist = distmat[seqnames.index(row['query']),
+                           seqnames.index(row['target'])]
+            self.assertAlmostEqual(dist, 1 - (row['pct_id'] / 100.0))
+
+
     @unittest.skipUnless(which(wrap.VSEARCH), "{} not found.".format(wrap.VSEARCH))
     def test_distmat_pairwise_vsearch(self):
         infile = util.data_path('e_faecalis.head.fasta')
diff --git a/deenurp/test/test_util.py b/deenurp/test/test_util.py
index 3cbbc2e..25a7295 100644
--- a/deenurp/test/test_util.py
+++ b/deenurp/test/test_util.py
@@ -15,7 +15,7 @@ def test_nokey(self):
     def test_key(self):
         keys = ('n', 's')
         v = [(1, 'test'), (2, 'test'), (2, 'other')]
-        l = [dict(zip(keys, i)) for i in v]
+        l = [dict(list(zip(keys, i))) for i in v]
         expected1 = [{'n': 1, 's': 'test'},
                      {'n': 2, 's': 'test'}]
         actual1 = util.unique(l, key=operator.itemgetter('n'))
diff --git a/deenurp/test/test_wrap.py b/deenurp/test/test_wrap.py
index fe019ee..3f79064 100644
--- a/deenurp/test/test_wrap.py
+++ b/deenurp/test/test_wrap.py
@@ -45,14 +45,15 @@ def test_as_refpkg(self):
             self.assertTrue(os.path.isdir(refpkg.path))
 
             if which('rppr'):
-                out = subprocess.check_output(['rppr', 'check', '-c', refpkg.path])
-                self.assertTrue('OK!' in out, out)
+                job = subprocess.run(['rppr', 'check', '-c', refpkg.path],
+                                     capture_output=True, text=True)
+                self.assertTrue('OK!' in job.stdout)
 
 
 @unittest.skipUnless(which('rppr'), "rppr not found")
 class RpprMinAdclTreeTestCase(unittest.TestCase):
     def setUp(self):
-        self.tf = tempfile.NamedTemporaryFile(prefix='adcl', suffix='.tre')
+        self.tf = tempfile.NamedTemporaryFile('w+', prefix='adcl', suffix='.tre')
         self.tf.write("((C000721552:0.20692,C002038857:0.00015)0.844:0.01031,C002038856:0.00014,((C002963332:0.08558,(C001550734:0.06763,((C000004779:0.03889,C002963310:0.04622)0.633:0.00151,(C002963318:0.00014,C002963266:0.00014)0.697:0.00016)0.992:0.15253)0.889:0.07332)0.924:0.07668,C002038858:0.01032)0.907:0.00014);\n")
         self.tf.flush()
 
@@ -70,7 +71,7 @@ def test_min_adcl_prune7(self):
 
 try:
     wrap.require_executable(wrap.VSEARCH)
-except MissingDependencyError, e:
+except MissingDependencyError as e:
     vsearch_available = False
 else:
     vsearch_available = True
@@ -89,7 +90,7 @@ def test_vsearch_version_fail(self):
                           wrap._require_vsearch_version, version='5.0')
 
     def test_vsearch_allpairs_files(self):
-        with deenurp.util.ntf(suffix='.blast6out', mode='rw') as outfile:
+        with deenurp.util.ntf(suffix='.blast6out', mode='w+') as outfile:
             wrap.vsearch_allpairs_files(self.sequencefile, outfile.name)
             self.assertTrue(os.path.exists(outfile.name))
             outfile.flush()
diff --git a/deenurp/uclust.py b/deenurp/uclust.py
index e33a3dd..a610aa1 100644
--- a/deenurp/uclust.py
+++ b/deenurp/uclust.py
@@ -28,8 +28,9 @@
 DEFAULT_PCT_ID = 0.99
 
 # For parsing .uc format
-UCLUST_HEADERS = ['type', 'cluster_number', 'size', 'pct_id', 'strand',
-                  'query_start', 'seed_start', 'alignment', 'query_label', 'target_label']
+UCLUST_HEADERS = ['type', 'cluster_number', 'size', 'pct_id',
+                  'strand', 'query_start', 'seed_start', 'alignment',
+                  'query_label', 'target_label']
 UCLUST_TYPES = {'cluster_number': int, 'pct_id': float, 'query_start': int,
                 'seed_start': int, 'size': int}
 
@@ -44,10 +45,11 @@ def _handle(s, *args, **kwargs):
     If s is a string, opens s and yields the open file. Otherwise, has no
     effect.
     """
-    if isinstance(s, basestring):
+    if isinstance(s, str):
         with open(s, *args, **kwargs) as fp:
             yield fp
     else:
+        raise ValueError('try passing in a string instead')
         yield s
 
 
@@ -65,7 +67,7 @@ def _check_call(cmd, **kwargs):
     Log and run command. Additional arguments are passed to
     ``subprocess.check_call``
     """
-    cmd = map(str, cmd)
+    cmd = list(map(str, cmd))
     logging.debug(' '.join(cmd))
     subprocess.check_call(cmd, **kwargs)
 
@@ -88,8 +90,9 @@ def parse_uclust_out(ucout_fp):
     """
     Parse the results of running UCLUST, returning UClustRecords.
 
-    ucout_fp can be file name or file handle.
+    ucout_fp can be file name or text mode file handle.
     """
+
     with _handle(ucout_fp) as fp:
         # Skip comments
         rows = (i.rstrip() for i in fp if not i.startswith('#'))
@@ -101,10 +104,12 @@ def parse_uclust_out(ucout_fp):
 def parse_uclust_as_df(ucout_fp):
     dtype = {'type': str, 'query_label': str,
              'target_label': str, 'alignment': str}
-    df = pd.read_csv(ucout_fp, sep='\t', na_values='*', names=UCLUST_HEADERS, dtype=dtype)
+    df = pd.read_csv(
+        ucout_fp, sep='\t', na_values='*', names=UCLUST_HEADERS, dtype=dtype)
 
     # define target_label as query_label for seed sequences
-    df['target_label'] = np.where(df['type'] == 'S', df['query_label'], df['target_label'])
+    df['target_label'] = np.where(
+        df['type'] == 'S', df['query_label'], df['target_label'])
 
     return df
 
@@ -303,6 +308,6 @@ def guppy_redup_from_uclust(uclust_records, sample_map=None):
             clusters[number][sample].count += 1
 
     rows = [(seeds[num], dedup_seq.id, dedup_seq.count)
-            for num, samples in clusters.items()
-            for dedup_seq in samples.values()]
+            for num, samples in list(clusters.items())
+            for dedup_seq in list(samples.values())]
     return rows
diff --git a/deenurp/util.py b/deenurp/util.py
index f760208..a90355e 100644
--- a/deenurp/util.py
+++ b/deenurp/util.py
@@ -23,7 +23,7 @@ def apply_df_status(func, df, msg=''):
     """
     tmp_column = 'index_number'
     row_count = float(len(df))
-    df[tmp_column] = xrange(int(row_count))
+    df[tmp_column] = range(int(row_count))
     msg += ' {:.0%}\r'
 
     def apply_func(item, msg):
@@ -120,13 +120,13 @@ def nothing(obj=None):
 
 
 @contextlib.contextmanager
-def ntf(**kwargs):
+def ntf(*args, **kwargs):
     """
     Near-clone of tempfile.NamedTemporaryFile, but the file is deleted when the
     context manager exits, rather than when it's closed.
     """
     kwargs['delete'] = False
-    tf = tempfile.NamedTemporaryFile(**kwargs)
+    tf = tempfile.NamedTemporaryFile(*args, **kwargs)
     try:
         with tf:
             yield tf
@@ -135,17 +135,16 @@ def ntf(**kwargs):
 
 
 @contextlib.contextmanager
-def tempcopy(path, **kwargs):
-    """
-    Create a temporary copy of ``path``, available for the duration of the
-    context manager
+def tempcopy(path):
+    """Create a temporary copy of ``path``, available for the duration of
+    the context manager
+
     """
+
     prefix, suffix = os.path.splitext(os.path.basename(path))
-    a = {'prefix': prefix, 'suffix': suffix}
-    a.update(kwargs)
-    with open(path) as fp, ntf(**a) as tf:
-        shutil.copyfileobj(fp, tf)
+    with ntf(prefix=prefix, suffix=suffix) as tf:
         tf.close()
+        shutil.copyfile(path, tf.name)
         yield tf.name
 
 
@@ -179,7 +178,7 @@ def as_fasta(sequences, **kwargs):
     """
     if 'suffix' not in kwargs:
         kwargs['suffix'] = '.fasta'
-    with ntf(**kwargs) as tf:
+    with ntf('w+', **kwargs) as tf:
         SeqIO.write(sequences, tf, 'fasta')
         tf.flush()
         tf.close()
diff --git a/deenurp/wrap.py b/deenurp/wrap.py
index 6082c35..57226ee 100644
--- a/deenurp/wrap.py
+++ b/deenurp/wrap.py
@@ -10,7 +10,7 @@
 import subprocess
 import re
 from distutils.version import LooseVersion
-from cStringIO import StringIO
+from io import StringIO
 
 import pandas as pd
 
@@ -52,10 +52,9 @@ def as_refpkg(sequences, name='temp.refpkg', threads=FASTTREE_THREADS):
          tempdir(prefix='refpkg') as refpkg_dir:
 
         log_fp.close()
-
-        fasttree(sequences, log_path=log_fp.name, output_fp=tree_fp, gtr=True,
-                 threads=threads)
         tree_fp.close()
+        fasttree(sequences, log_path=log_fp.name, output_fp=tree_fp.name,
+                 gtr=True, threads=threads)
 
         rp = Refpkg(refpkg_dir(name), create=True)
         rp.update_metadata('locus', '')
@@ -63,11 +62,11 @@ def as_refpkg(sequences, name='temp.refpkg', threads=FASTTREE_THREADS):
         rp.update_file('tree', tree_fp.name)
 
         # FASTA and Stockholm alignment
-        with ntf(suffix='.fasta') as f:
+        with ntf('w', suffix='.fasta') as f:
             SeqIO.write(sequences, f, 'fasta')
             f.close()
             rp.update_file('aln_fasta', f.name)
-        with ntf(suffix='.sto') as f:
+        with ntf('w', suffix='.sto') as f:
             SeqIO.write(sequences, f, 'stockholm')
             f.close()
             rp.update_file('aln_sto', f.name)
@@ -76,8 +75,8 @@ def as_refpkg(sequences, name='temp.refpkg', threads=FASTTREE_THREADS):
 
 
 @contextlib.contextmanager
-def redupfile_of_seqs(sequences, **kwargs):
-    with ntf(**kwargs) as tf:
+def redupfile_of_seqs(sequences):
+    with ntf('w') as tf:
         writer = csv.writer(tf, lineterminator='\n')
         rows = ((s.id, s.id, s.annotations.get('weight', 1.0)) for s in sequences)
         writer.writerows(rows)
@@ -89,9 +88,10 @@ def redupfile_of_seqs(sequences, **kwargs):
 def fasttree(sequences, output_fp, log_path=None, quiet=True,
              gtr=False, gamma=False, threads=FASTTREE_THREADS, prefix=None):
 
-    if len(sequences) < 3:
+    nseqs = len(sequences)
+    if nseqs < 3:
         raise ValueError(
-            'at least 3 sequences are required but {} were provided'.format(len(sequences)))
+            f'at least 3 sequences are required but {nseqs} were provided')
 
     executable = 'FastTreeMP' if threads and threads > 1 else 'FastTree'
     if executable == 'FastTreeMP' and not which('FastTreeMP'):
@@ -102,27 +102,25 @@ def fasttree(sequences, output_fp, log_path=None, quiet=True,
     env = os.environ.copy()
     if threads:
         env['OMP_NUM_THREADS'] = str(threads)
-    cmd = (prefix or []) + [executable, '-nt']
-    for k, v in (('-gtr', gtr), ('-gamma', gamma), ('-quiet', quiet)):
-        if v:
-            cmd.append(k)
-    if log_path is not None:
-        cmd.extend(['-log', log_path])
 
-    logging.debug(' '.join(cmd))
+    with ntf('w', suffix='.fasta') as fasta:
+        assert SeqIO.write(sequences, fasta, 'fasta')
+        fasta.flush()
 
-    with ntf() as stderr:
-        p = subprocess.Popen(cmd, stdout=output_fp, stdin=subprocess.PIPE,
-                             stderr=stderr, env=env)
+        cmd = (prefix or []) + [executable]
+        opts = [('-gtr', gtr), ('-gamma', gamma), ('-quiet', quiet)]
+        cmd.extend([k for k, v in opts if v])
 
-        count = SeqIO.write(sequences, p.stdin, 'fasta')
-        assert count
-        p.stdin.close()
-        p.wait()
-        if not p.returncode == 0:
-            stderr.seek(0)
-            logging.error(stderr.read())
-            raise subprocess.CalledProcessError(p.returncode, cmd)
+        if log_path:
+            cmd.extend(['-log', log_path])
+
+        cmd.extend(['-out', output_fp, '-nt', fasta.name])
+        logging.debug(' '.join(cmd))
+
+        job = subprocess.run(cmd, capture_output=True, text=True, env=env)
+        if not job.returncode == 0:
+            logging.error(job.stderr)
+            raise subprocess.CalledProcessError(job.returncode, cmd)
 
 
 def guppy_redup(placefile, redup_file, output):
@@ -175,8 +173,8 @@ def rppr_min_adcl(jplace, leaves, algorithm='pam', posterior_prob=False,
     if always_include:
         cmd.extend(('--always-include', always_include))
     logging.debug(' '.join(cmd))
-    output = subprocess.check_output(cmd)
-    return output.splitlines()
+    job = subprocess.run(cmd, capture_output=True, text=True)
+    return job.stdout.strip().splitlines()
 
 
 def rppr_min_adcl_tree(newick_file, leaves, algorithm='pam', always_include=None):
@@ -200,9 +198,8 @@ def _require_cmalign_11(cmalign='cmalign'):
     Check for cmalign version 1.1, raising an error if not found
     """
     version_str = 'INFERNAL 1.1'
-    cmd = [cmalign, '-h']
-    o = subprocess.check_output(cmd)
-    if version_str not in o:
+    o = subprocess.run([cmalign, '-h'], capture_output=True, text=True)
+    if version_str not in o.stdout:
         msg = ('cmalign 1.1 not found. '
                'Expected {0} in output of "{1}", got:\n{2}').format(
                    version_str, ' '.join(cmd), o)
@@ -213,56 +210,64 @@ def cmalign_scores(text):
     """
     Parse stdout of cmalign into a data.frame
     """
+    dtypes = {
+        "idx": int,
+        "seq_name": str,
+        "length": int,
+        "cm_from": int,
+        "cm_to": int,
+        "trunc": str,
+        "bit_sc": float,
+        "avg_pp": str,
+        "band_calc": float,
+        "alignment": float,
+        "total": float,
+        "mem": float
+        }
+    return pd.read_csv(
+        StringIO(text),
+        comment="#",
+        delim_whitespace=True,
+        dtype=dtypes,
+        index_col='seq_name',
+        names=dtypes.keys()
+        )
 
-    header_rexp = re.compile(r'^#\s+idx')
 
-    lines = []
-    for line in text.splitlines():
-        if header_rexp.search(line):
-            line = ' ' + line[1:].replace(' (Mb)', '')
-            # replace single spaces
-            line = re.sub(r'(?<! ) (?! )', '_', line)
-        elif line.startswith('#'):
-            continue
-        lines.append(line)
-
-    buf = StringIO('\n'.join(lines))
-    tab = pd.read_fwf(buf, index_col=1)
-    return tab
+def cmalign_files(input_file, output_file, cm=CM, cpu=CMALIGN_THREADS):
 
+    executable = 'cmalign'
+    require_executable(executable)
+    _require_cmalign_11(executable)
+    cmd = [executable, '--noprob', '--dnaout']
 
-def cmalign_files(input_file, output_file, cm=CM, cpu=CMALIGN_THREADS):
-    cmd = ['cmalign']
-    require_executable(cmd[0])
-    _require_cmalign_11(cmd[0])
-    cmd.extend(['--noprob', '--dnaout'])
     if cpu is not None:
         cmd.extend(['--cpu', str(cpu)])
     cmd.extend(['-o', output_file, cm, input_file])
-    logging.debug(' '.join(cmd))
-    p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
-    output = p.stdout.read().strip()
-    logging.debug(output)
 
-    scores = cmalign_scores(output)
+    logging.debug(' '.join(cmd))
 
-    error = p.stderr.read().strip()
-    if p.wait() != 0:
+    job = subprocess.run(cmd, capture_output=True, text=True)
+    if job.returncode != 0:
         # TODO: preserve output files (input_file, output_file)
-        raise subprocess.CalledProcessError(p.returncode, error)
+        raise subprocess.CalledProcessError(job.returncode, job.error)
+
+    output = job.stdout.strip()
+    logging.debug(output)
+    scores = cmalign_scores(output)
 
     return scores
 
 
 def cmalign(sequences, output=None, cm=CM, cpu=CMALIGN_THREADS):
-    """
-    Run cmalign
+    """Run cmalign. If provided, saves output to the file path
+    corresponding to file-like object `output`.
+
     """
     with as_fasta(sequences) as fasta, maybe_tempfile(
-            output, prefix='cmalign', suffix='.sto', dir='.') as tf:
+            output, mode='w+', prefix='cmalign', suffix='.sto', dir='.') as tf:
 
         cmalign_files(fasta, tf.name, cm=cm, cpu=cpu)
-
         for sequence in SeqIO.parse(tf, 'stockholm'):
             yield sequence
 
@@ -272,13 +277,8 @@ def _require_vsearch_version(vsearch=VSEARCH, version=VSEARCH_VERSION):
     Check for vsearch with a version >= `version`
     """
 
-    cmd = [vsearch, '--version']
-    p = subprocess.Popen(
-        cmd,
-        stderr=subprocess.PIPE,
-        stdout=open(os.devnull, 'w'))
-    __, stderr = p.communicate()
-    vsearch = re.search(r'^vsearch v(?P<vstr>\d+\.\d+\.[^_]+)', stderr)
+    output = subprocess.run([vsearch, '--version'], capture_output=True, text=True)
+    vsearch = re.search(r'^vsearch v(?P<vstr>\d+\.\d+\.[^_]+)', output.stderr)
     ver = vsearch.groupdict()['vstr']
 
     if LooseVersion(ver) < LooseVersion(version):
@@ -305,32 +305,31 @@ def vsearch_allpairs_files(input_file, output_file, executable=VSEARCH,
            '--blast6out', output_file]
 
     logging.info(' '.join(cmd))
-    p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
-    logging.debug(p.stdout.read().strip())
-    error = p.stderr.read().strip()
-    if p.wait() != 0:
+    job = subprocess.run(cmd, capture_output=True, text=True)
+    logging.debug(job.stdout)
+
+    if job.returncode != 0:
         # TODO: preserve output files (input_file, output_file)
-        raise subprocess.CalledProcessError(p.returncode, error)
+        raise subprocess.CalledProcessError(job.returncode, job.stderr)
 
 
 def muscle_files(input_file, output_file, maxiters=MUSCLE_MAXITERS):
-    cmd = ['muscle']
+    cmd = [
+        'muscle',
+        '-in', input_file,
+        '-out', output_file,
+        # TODO: set value based on number of sequences?
+        '-maxiters', str(maxiters),
+    ]
+    logging.debug(' '.join(cmd))
     require_executable(cmd[0])
 
-    cmd.extend(['-in', input_file])
-    cmd.extend(['-out', output_file])
-
-    # TODO: set value based on number of sequences?
-    cmd.extend(['-maxiters', str(maxiters)])
+    job = subprocess.run(cmd, capture_output=True, text=True)
+    logging.debug(job.stdout)
 
-    logging.debug(' '.join(cmd))
-    p = subprocess.Popen(cmd,
-                         stdout=subprocess.PIPE, stderr=subprocess.PIPE)
-    logging.debug(p.stdout.read().strip())
-    error = p.stderr.read().strip()
-    if p.wait() != 0:
+    if job.returncode != 0:
         # TODO: preserve output files (input_file, output_file)
-        raise subprocess.CalledProcessError(p.returncode, error)
+        raise subprocess.CalledProcessError(p.returncode, job.stderr)
 
 
 def read_seq_file(sequence_file):
@@ -358,7 +357,7 @@ def read_seq_file(sequence_file):
 def esl_sfetch(sequence_file, name_iter, output_fp, fa_idx):
     """
     Fetch sequences named in name_iter from sequence_file, indexing if
-    necessary, writing to output_fp.
+    necessary, writing binary data to open file object output_fp.
     """
     count = 0
     with open(sequence_file, 'rb') as fi:
diff --git a/distribute_setup.py b/distribute_setup.py
deleted file mode 100644
index a1cc2a1..0000000
--- a/distribute_setup.py
+++ /dev/null
@@ -1,546 +0,0 @@
-#!python
-"""Bootstrap distribute installation
-
-If you want to use setuptools in your package's setup.py, just include this
-file in the same directory with it, and add this to the top of your setup.py::
-
-    from distribute_setup import use_setuptools
-    use_setuptools()
-
-If you want to require a specific version of setuptools, set a download
-mirror, or use an alternate download directory, you can do so by supplying
-the appropriate options to ``use_setuptools()``.
-
-This file can also be run as a script to install or upgrade setuptools.
-"""
-import os
-import shutil
-import sys
-import time
-import fnmatch
-import tempfile
-import tarfile
-import optparse
-
-from distutils import log
-
-try:
-    from site import USER_SITE
-except ImportError:
-    USER_SITE = None
-
-try:
-    import subprocess
-
-    def _python_cmd(*args):
-        args = (sys.executable,) + args
-        return subprocess.call(args) == 0
-
-except ImportError:
-    # will be used for python 2.3
-    def _python_cmd(*args):
-        args = (sys.executable,) + args
-        # quoting arguments if windows
-        if sys.platform == 'win32':
-            def quote(arg):
-                if ' ' in arg:
-                    return '"%s"' % arg
-                return arg
-            args = [quote(arg) for arg in args]
-        return os.spawnl(os.P_WAIT, sys.executable, *args) == 0
-
-DEFAULT_VERSION = "0.6.34"
-DEFAULT_URL = "http://pypi.python.org/packages/source/d/distribute/"
-SETUPTOOLS_FAKED_VERSION = "0.6c11"
-
-SETUPTOOLS_PKG_INFO = """\
-Metadata-Version: 1.0
-Name: setuptools
-Version: %s
-Summary: xxxx
-Home-page: xxx
-Author: xxx
-Author-email: xxx
-License: xxx
-Description: xxx
-""" % SETUPTOOLS_FAKED_VERSION
-
-
-def _install(tarball, install_args=()):
-    # extracting the tarball
-    tmpdir = tempfile.mkdtemp()
-    log.warn('Extracting in %s', tmpdir)
-    old_wd = os.getcwd()
-    try:
-        os.chdir(tmpdir)
-        tar = tarfile.open(tarball)
-        _extractall(tar)
-        tar.close()
-
-        # going in the directory
-        subdir = os.path.join(tmpdir, os.listdir(tmpdir)[0])
-        os.chdir(subdir)
-        log.warn('Now working in %s', subdir)
-
-        # installing
-        log.warn('Installing Distribute')
-        if not _python_cmd('setup.py', 'install', *install_args):
-            log.warn('Something went wrong during the installation.')
-            log.warn('See the error message above.')
-            # exitcode will be 2
-            return 2
-    finally:
-        os.chdir(old_wd)
-        shutil.rmtree(tmpdir)
-
-
-def _build_egg(egg, tarball, to_dir):
-    # extracting the tarball
-    tmpdir = tempfile.mkdtemp()
-    log.warn('Extracting in %s', tmpdir)
-    old_wd = os.getcwd()
-    try:
-        os.chdir(tmpdir)
-        tar = tarfile.open(tarball)
-        _extractall(tar)
-        tar.close()
-
-        # going in the directory
-        subdir = os.path.join(tmpdir, os.listdir(tmpdir)[0])
-        os.chdir(subdir)
-        log.warn('Now working in %s', subdir)
-
-        # building an egg
-        log.warn('Building a Distribute egg in %s', to_dir)
-        _python_cmd('setup.py', '-q', 'bdist_egg', '--dist-dir', to_dir)
-
-    finally:
-        os.chdir(old_wd)
-        shutil.rmtree(tmpdir)
-    # returning the result
-    log.warn(egg)
-    if not os.path.exists(egg):
-        raise IOError('Could not build the egg.')
-
-
-def _do_download(version, download_base, to_dir, download_delay):
-    egg = os.path.join(to_dir, 'distribute-%s-py%d.%d.egg'
-                       % (version, sys.version_info[0], sys.version_info[1]))
-    if not os.path.exists(egg):
-        tarball = download_setuptools(version, download_base,
-                                      to_dir, download_delay)
-        _build_egg(egg, tarball, to_dir)
-    sys.path.insert(0, egg)
-    import setuptools
-    setuptools.bootstrap_install_from = egg
-
-
-def use_setuptools(version=DEFAULT_VERSION, download_base=DEFAULT_URL,
-                   to_dir=os.curdir, download_delay=15, no_fake=True):
-    # making sure we use the absolute path
-    to_dir = os.path.abspath(to_dir)
-    was_imported = 'pkg_resources' in sys.modules or \
-        'setuptools' in sys.modules
-    try:
-        try:
-            import pkg_resources
-            if not hasattr(pkg_resources, '_distribute'):
-                if not no_fake:
-                    _fake_setuptools()
-                raise ImportError
-        except ImportError:
-            return _do_download(version, download_base, to_dir, download_delay)
-        try:
-            pkg_resources.require("distribute>=" + version)
-            return
-        except pkg_resources.VersionConflict:
-            e = sys.exc_info()[1]
-            if was_imported:
-                sys.stderr.write(
-                "The required version of distribute (>=%s) is not available,\n"
-                "and can't be installed while this script is running. Please\n"
-                "install a more recent version first, using\n"
-                "'easy_install -U distribute'."
-                "\n\n(Currently using %r)\n" % (version, e.args[0]))
-                sys.exit(2)
-            else:
-                del pkg_resources, sys.modules['pkg_resources']    # reload ok
-                return _do_download(version, download_base, to_dir,
-                                    download_delay)
-        except pkg_resources.DistributionNotFound:
-            return _do_download(version, download_base, to_dir,
-                                download_delay)
-    finally:
-        if not no_fake:
-            _create_fake_setuptools_pkg_info(to_dir)
-
-
-def download_setuptools(version=DEFAULT_VERSION, download_base=DEFAULT_URL,
-                        to_dir=os.curdir, delay=15):
-    """Download distribute from a specified location and return its filename
-
-    `version` should be a valid distribute version number that is available
-    as an egg for download under the `download_base` URL (which should end
-    with a '/'). `to_dir` is the directory where the egg will be downloaded.
-    `delay` is the number of seconds to pause before an actual download
-    attempt.
-    """
-    # making sure we use the absolute path
-    to_dir = os.path.abspath(to_dir)
-    try:
-        from urllib.request import urlopen
-    except ImportError:
-        from urllib2 import urlopen
-    tgz_name = "distribute-%s.tar.gz" % version
-    url = download_base + tgz_name
-    saveto = os.path.join(to_dir, tgz_name)
-    src = dst = None
-    if not os.path.exists(saveto):  # Avoid repeated downloads
-        try:
-            log.warn("Downloading %s", url)
-            src = urlopen(url)
-            # Read/write all in one block, so we don't create a corrupt file
-            # if the download is interrupted.
-            data = src.read()
-            dst = open(saveto, "wb")
-            dst.write(data)
-        finally:
-            if src:
-                src.close()
-            if dst:
-                dst.close()
-    return os.path.realpath(saveto)
-
-
-def _no_sandbox(function):
-    def __no_sandbox(*args, **kw):
-        try:
-            from setuptools.sandbox import DirectorySandbox
-            if not hasattr(DirectorySandbox, '_old'):
-                def violation(*args):
-                    pass
-                DirectorySandbox._old = DirectorySandbox._violation
-                DirectorySandbox._violation = violation
-                patched = True
-            else:
-                patched = False
-        except ImportError:
-            patched = False
-
-        try:
-            return function(*args, **kw)
-        finally:
-            if patched:
-                DirectorySandbox._violation = DirectorySandbox._old
-                del DirectorySandbox._old
-
-    return __no_sandbox
-
-
-def _patch_file(path, content):
-    """Will backup the file then patch it"""
-    f = open(path)
-    existing_content = f.read()
-    f.close()
-    if existing_content == content:
-        # already patched
-        log.warn('Already patched.')
-        return False
-    log.warn('Patching...')
-    _rename_path(path)
-    f = open(path, 'w')
-    try:
-        f.write(content)
-    finally:
-        f.close()
-    return True
-
-_patch_file = _no_sandbox(_patch_file)
-
-
-def _same_content(path, content):
-    f = open(path)
-    existing_content = f.read()
-    f.close()
-    return existing_content == content
-
-
-def _rename_path(path):
-    new_name = path + '.OLD.%s' % time.time()
-    log.warn('Renaming %s to %s', path, new_name)
-    os.rename(path, new_name)
-    return new_name
-
-
-def _remove_flat_installation(placeholder):
-    if not os.path.isdir(placeholder):
-        log.warn('Unkown installation at %s', placeholder)
-        return False
-    found = False
-    for file in os.listdir(placeholder):
-        if fnmatch.fnmatch(file, 'setuptools*.egg-info'):
-            found = True
-            break
-    if not found:
-        log.warn('Could not locate setuptools*.egg-info')
-        return
-
-    log.warn('Moving elements out of the way...')
-    pkg_info = os.path.join(placeholder, file)
-    if os.path.isdir(pkg_info):
-        patched = _patch_egg_dir(pkg_info)
-    else:
-        patched = _patch_file(pkg_info, SETUPTOOLS_PKG_INFO)
-
-    if not patched:
-        log.warn('%s already patched.', pkg_info)
-        return False
-    # now let's move the files out of the way
-    for element in ('setuptools', 'pkg_resources.py', 'site.py'):
-        element = os.path.join(placeholder, element)
-        if os.path.exists(element):
-            _rename_path(element)
-        else:
-            log.warn('Could not find the %s element of the '
-                     'Setuptools distribution', element)
-    return True
-
-_remove_flat_installation = _no_sandbox(_remove_flat_installation)
-
-
-def _after_install(dist):
-    log.warn('After install bootstrap.')
-    placeholder = dist.get_command_obj('install').install_purelib
-    _create_fake_setuptools_pkg_info(placeholder)
-
-
-def _create_fake_setuptools_pkg_info(placeholder):
-    if not placeholder or not os.path.exists(placeholder):
-        log.warn('Could not find the install location')
-        return
-    pyver = '%s.%s' % (sys.version_info[0], sys.version_info[1])
-    setuptools_file = 'setuptools-%s-py%s.egg-info' % \
-            (SETUPTOOLS_FAKED_VERSION, pyver)
-    pkg_info = os.path.join(placeholder, setuptools_file)
-    if os.path.exists(pkg_info):
-        log.warn('%s already exists', pkg_info)
-        return
-
-    log.warn('Creating %s', pkg_info)
-    try:
-        f = open(pkg_info, 'w')
-    except EnvironmentError:
-        log.warn("Don't have permissions to write %s, skipping", pkg_info)
-        return
-    try:
-        f.write(SETUPTOOLS_PKG_INFO)
-    finally:
-        f.close()
-
-    pth_file = os.path.join(placeholder, 'setuptools.pth')
-    log.warn('Creating %s', pth_file)
-    f = open(pth_file, 'w')
-    try:
-        f.write(os.path.join(os.curdir, setuptools_file))
-    finally:
-        f.close()
-
-_create_fake_setuptools_pkg_info = _no_sandbox(
-    _create_fake_setuptools_pkg_info
-)
-
-
-def _patch_egg_dir(path):
-    # let's check if it's already patched
-    pkg_info = os.path.join(path, 'EGG-INFO', 'PKG-INFO')
-    if os.path.exists(pkg_info):
-        if _same_content(pkg_info, SETUPTOOLS_PKG_INFO):
-            log.warn('%s already patched.', pkg_info)
-            return False
-    _rename_path(path)
-    os.mkdir(path)
-    os.mkdir(os.path.join(path, 'EGG-INFO'))
-    pkg_info = os.path.join(path, 'EGG-INFO', 'PKG-INFO')
-    f = open(pkg_info, 'w')
-    try:
-        f.write(SETUPTOOLS_PKG_INFO)
-    finally:
-        f.close()
-    return True
-
-_patch_egg_dir = _no_sandbox(_patch_egg_dir)
-
-
-def _before_install():
-    log.warn('Before install bootstrap.')
-    _fake_setuptools()
-
-
-def _under_prefix(location):
-    if 'install' not in sys.argv:
-        return True
-    args = sys.argv[sys.argv.index('install') + 1:]
-    for index, arg in enumerate(args):
-        for option in ('--root', '--prefix'):
-            if arg.startswith('%s=' % option):
-                top_dir = arg.split('root=')[-1]
-                return location.startswith(top_dir)
-            elif arg == option:
-                if len(args) > index:
-                    top_dir = args[index + 1]
-                    return location.startswith(top_dir)
-        if arg == '--user' and USER_SITE is not None:
-            return location.startswith(USER_SITE)
-    return True
-
-
-def _fake_setuptools():
-    log.warn('Scanning installed packages')
-    try:
-        import pkg_resources
-    except ImportError:
-        # we're cool
-        log.warn('Setuptools or Distribute does not seem to be installed.')
-        return
-    ws = pkg_resources.working_set
-    try:
-        setuptools_dist = ws.find(
-            pkg_resources.Requirement.parse('setuptools', replacement=False)
-            )
-    except TypeError:
-        # old distribute API
-        setuptools_dist = ws.find(
-            pkg_resources.Requirement.parse('setuptools')
-        )
-
-    if setuptools_dist is None:
-        log.warn('No setuptools distribution found')
-        return
-    # detecting if it was already faked
-    setuptools_location = setuptools_dist.location
-    log.warn('Setuptools installation detected at %s', setuptools_location)
-
-    # if --root or --preix was provided, and if
-    # setuptools is not located in them, we don't patch it
-    if not _under_prefix(setuptools_location):
-        log.warn('Not patching, --root or --prefix is installing Distribute'
-                 ' in another location')
-        return
-
-    # let's see if its an egg
-    if not setuptools_location.endswith('.egg'):
-        log.warn('Non-egg installation')
-        res = _remove_flat_installation(setuptools_location)
-        if not res:
-            return
-    else:
-        log.warn('Egg installation')
-        pkg_info = os.path.join(setuptools_location, 'EGG-INFO', 'PKG-INFO')
-        if (os.path.exists(pkg_info) and
-            _same_content(pkg_info, SETUPTOOLS_PKG_INFO)):
-            log.warn('Already patched.')
-            return
-        log.warn('Patching...')
-        # let's create a fake egg replacing setuptools one
-        res = _patch_egg_dir(setuptools_location)
-        if not res:
-            return
-    log.warn('Patching complete.')
-    _relaunch()
-
-
-def _relaunch():
-    log.warn('Relaunching...')
-    # we have to relaunch the process
-    # pip marker to avoid a relaunch bug
-    _cmd1 = ['-c', 'install', '--single-version-externally-managed']
-    _cmd2 = ['-c', 'install', '--record']
-    if sys.argv[:3] == _cmd1 or sys.argv[:3] == _cmd2:
-        sys.argv[0] = 'setup.py'
-    args = [sys.executable] + sys.argv
-    sys.exit(subprocess.call(args))
-
-
-def _extractall(self, path=".", members=None):
-    """Extract all members from the archive to the current working
-       directory and set owner, modification time and permissions on
-       directories afterwards. `path' specifies a different directory
-       to extract to. `members' is optional and must be a subset of the
-       list returned by getmembers().
-    """
-    import copy
-    import operator
-    from tarfile import ExtractError
-    directories = []
-
-    if members is None:
-        members = self
-
-    for tarinfo in members:
-        if tarinfo.isdir():
-            # Extract directories with a safe mode.
-            directories.append(tarinfo)
-            tarinfo = copy.copy(tarinfo)
-            tarinfo.mode = 448  # decimal for oct 0700
-        self.extract(tarinfo, path)
-
-    # Reverse sort directories.
-    if sys.version_info < (2, 4):
-        def sorter(dir1, dir2):
-            return cmp(dir1.name, dir2.name)
-        directories.sort(sorter)
-        directories.reverse()
-    else:
-        directories.sort(key=operator.attrgetter('name'), reverse=True)
-
-    # Set correct owner, mtime and filemode on directories.
-    for tarinfo in directories:
-        dirpath = os.path.join(path, tarinfo.name)
-        try:
-            self.chown(tarinfo, dirpath)
-            self.utime(tarinfo, dirpath)
-            self.chmod(tarinfo, dirpath)
-        except ExtractError:
-            e = sys.exc_info()[1]
-            if self.errorlevel > 1:
-                raise
-            else:
-                self._dbg(1, "tarfile: %s" % e)
-
-
-def _build_install_args(options):
-    """
-    Build the arguments to 'python setup.py install' on the distribute package
-    """
-    install_args = []
-    if options.user_install:
-        if sys.version_info < (2, 6):
-            log.warn("--user requires Python 2.6 or later")
-            raise SystemExit(1)
-        install_args.append('--user')
-    return install_args
-
-def _parse_args():
-    """
-    Parse the command line for options
-    """
-    parser = optparse.OptionParser()
-    parser.add_option(
-        '--user', dest='user_install', action='store_true', default=False,
-        help='install in user site package (requires Python 2.6 or later)')
-    parser.add_option(
-        '--download-base', dest='download_base', metavar="URL",
-        default=DEFAULT_URL,
-        help='alternative URL from where to download the distribute package')
-    options, args = parser.parse_args()
-    # positional arguments are ignored
-    return options
-
-def main(version=DEFAULT_VERSION):
-    """Install or upgrade setuptools and EasyInstall"""
-    options = _parse_args()
-    tarball = download_setuptools(download_base=options.download_base)
-    return _install(tarball, _build_install_args(options))
-
-if __name__ == '__main__':
-    sys.exit(main())
diff --git a/requirements.in b/requirements.in
deleted file mode 100644
index d078113..0000000
--- a/requirements.in
+++ /dev/null
@@ -1,10 +0,0 @@
-numpy
-cython
-pandas
-scipy
-scikit-learn
-hdbscan
-biopython
-taxtastic
-futures
-seqmagick
diff --git a/requirements.txt b/requirements.txt
index 2ad65b0..512b913 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,32 +1,24 @@
-# this file is order-dependent! see bin/pipdeptree2requirements.py
-
-# to add new dependencies:
-# - create a virtualenv
-# - install packages: pip install -r requirements.in
-# - install pipdeptree
-# - run `pipdeptree -f --nowarn | bin/pipdeptree2requirements.py` to generate a new requirements.txt
-# - manually replace any references to github repos
-# - correct any incompatibilities (eg, seqmagick running under python2 requires biopython <= 1.66
-
-MarkupSafe==1.0
-numpy==1.14.0
-setuptools==38.4.0
-six==1.11.0
-DendroPy==4.3.0
-Jinja2==2.10
-PyYAML==3.12
-SQLAlchemy==1.2.0
-biopython==1.66
-decorator==4.1.2
+biopython==1.79
+Cython==0.29.28
+decorator==5.1.1
+DendroPy==4.5.2
 fastalite==0.3
-psycopg2==2.7.3.2
-python-dateutil==2.6.1
-pytz==2017.3
-scikit-learn==0.19.1
-Cython==0.27.3
-futures==3.2.0
-hdbscan==0.8.11
-pandas==0.22.0
-scipy==1.0.0
-seqmagick==0.6.2
-taxtastic==0.8.5
+greenlet==1.1.2
+hdbscan==0.8.28
+Jinja2==3.0.3
+joblib==1.1.0
+MarkupSafe==2.1.0
+numpy==1.22.2
+pandas==1.4.1
+psycopg2-binary==2.9.3
+pygtrie==2.4.2
+python-dateutil==2.8.2
+pytz==2021.3
+PyYAML==6.0
+scikit-learn==1.0.2
+scipy==1.8.0
+seqmagick==0.8.4
+six==1.16.0
+SQLAlchemy==1.4.32
+taxtastic==0.9.2
+threadpoolctl==3.1.0
diff --git a/setup.py b/setup.py
index 22682be..7bbe388 100644
--- a/setup.py
+++ b/setup.py
@@ -1,27 +1,44 @@
 import os
 import sys
 import subprocess
+
 from setuptools import setup, find_packages, Command
 
-# Fix for `setup.py test`
-# See http://bugs.python.org/issue15881
-try:
-    import multiprocessing
-    from concurrent import futures
-except ImportError:
-    pass
+datadir = 'deenurp/data'
+version_file = f'{datadir}/version.txt'
 
 subprocess.call(
-    ('mkdir -p {data} && '
-     'git describe --tags --dirty > {data}/{file}.tmp '
-     '&& mv {data}/{file}.tmp {data}/{file} '
-     '|| rm -f {data}/{file}.tmp').format(data='deenurp/data', file='version.txt'),
+    (f'mkdir -p {datadir} && '
+     f'git describe --tags --dirty > {version_file}.tmp '
+     f'&& mv {version_file}.tmp {version_file} '
+     f'|| rm -f {version_file}.tmp'),
     shell=True, stderr=open(os.devnull, "w"))
 
 # import must follow 'git describe' command above to update version
 from deenurp import __version__
 
 
+class CheckVersion(Command):
+    description = 'Confirm that the stored package version is correct'
+    user_options = []
+
+    def initialize_options(self):
+        pass
+
+    def finalize_options(self):
+        pass
+
+    def run(self):
+        with open(version_file) as f:
+            stored_version = f.read().strip()
+
+        git_version = subprocess.check_output(
+            ['git', 'describe', '--tags', '--dirty']).strip()
+
+        assert stored_version == git_version
+        print('the current version is', stored_version)
+
+
 class run_audit(Command):
 
     """Audits source code using PyFlakes for following issues:
@@ -41,7 +58,7 @@ def run(self):
         try:
             import pyflakes.scripts.pyflakes as flakes
         except ImportError:
-            print "Audit requires PyFlakes installed in your system."
+            print("Audit requires PyFlakes installed in your system.")
             sys.exit(-1)
 
         warns = 0
@@ -53,17 +70,30 @@ def run(self):
                     if file != '__init__.py' and file.endswith('.py'):
                         warns += flakes.checkPath(os.path.join(root, file))
         if warns > 0:
-            print "Audit finished with total %d warnings." % warns
+            print("Audit finished with total %d warnings." % warns)
         else:
-            print "No problems found in sourcecode."
-
-
-setup(name='deenurp',
-      version=__version__,
-      package_data={'deenurp': ['data/*', 'test/data/*']},
-      entry_points={
-          'console_scripts': {'deenurp = deenurp:main'}},
-      cmdclass={'audit': run_audit},
-      test_suite='deenurp.test.suite',
-      packages=find_packages(exclude=['tests'])
-      )
+            print("No problems found in sourcecode.")
+
+
+setup(
+    name='deenurp',
+    version=__version__,
+    package_data={'deenurp': ['data/*', 'test/data/*']},
+    entry_points={
+        'console_scripts': {'deenurp = deenurp:main'}},
+    cmdclass={'audit': run_audit, 'check_version': CheckVersion},
+    test_suite='deenurp.test.suite',
+    packages=find_packages(exclude=['tests']),
+    python_requires='>=3.8',
+    install_requires=[
+        'numpy',
+        'cython',
+        'pandas',
+        'scipy',
+        'scikit-learn',
+        'hdbscan',
+        'biopython',
+        'taxtastic',
+        'seqmagick',
+    ],
+)
diff --git a/tests/hrefpkg-build/run.sh b/tests/hrefpkg-build/run.sh
index 324385f..96d734b 100755
--- a/tests/hrefpkg-build/run.sh
+++ b/tests/hrefpkg-build/run.sh
@@ -6,4 +6,5 @@ BASE=../rdp_10_30_named1200bp_subset
 rm -rf hrefpkg
 mkdir hrefpkg
 DEENURP=${DEENURP-../../deenurp.py}
-$DEENURP hrefpkg_build --index-rank=family $BASE.fasta $BASE.seqinfo.csv $BASE.taxonomy.csv --output-dir hrefpkg
+$DEENURP hrefpkg_build --index-rank=family \
+	 $BASE.fasta $BASE.seqinfo.csv $BASE.taxonomy.csv --output-dir hrefpkg
diff --git a/tests/run.sh b/tests/run.sh
index b88ee54..69d668e 100755
--- a/tests/run.sh
+++ b/tests/run.sh
@@ -10,6 +10,7 @@ while read subdir; do
     if echo $subdir | grep -qv -E '^#'; then
 	echo $subdir
 	(cd $TESTS_DIR/$subdir && ./run.sh)
+	# (cd $TESTS_DIR/$subdir && bash -v ./run.sh)
     fi
 done <<EOF
 search-select