diff --git a/examples/constrained/branin_con.py b/examples/constrained/branin_con.py
index 919147e..eb0fb76 100755
--- a/examples/constrained/branin_con.py
+++ b/examples/constrained/branin_con.py
@@ -1,3 +1,4 @@
+from __future__ import print_function
 import math
 import numpy as np
 
@@ -6,7 +7,7 @@ def evaluate(job_id, params):
     x = params['X']
     y = params['Y']
 
-    print 'Evaluating at (%f, %f)' % (x, y)
+    print('Evaluating at (%f, %f)' % (x, y))
 
     if x < 0 or x > 5.0 or y > 5.0:
         return np.nan
@@ -30,6 +31,6 @@ def main(job_id, params):
     try:
         return evaluate(job_id, params)
     except Exception as ex:
-        print ex
-        print 'An error occurred in branin_con.py'
+        print(ex)
+        print('An error occurred in branin_con.py')
         return np.nan
diff --git a/examples/distributed/branin.py b/examples/distributed/branin.py
index e553505..07d3e36 100755
--- a/examples/distributed/branin.py
+++ b/examples/distributed/branin.py
@@ -1,3 +1,4 @@
+from __future__ import print_function
 import numpy as np
 import sys
 import math
@@ -13,12 +14,12 @@ def branin(x, y):
   #if np.random.rand > 0.75:
   #  raise Exception('Blah!')
 
-  print 'Result = %f' % result
+  print('Result = %f' % result)
   time.sleep(np.random.randint(30))
   return {'branin' : result}
 
 # Write a function like this called 'main'
 def main(job_id, params):
-  print 'Anything printed here will end up in the output directory for job #%d' % job_id
-  print params
+  print('Anything printed here will end up in the output directory for job #%d' % job_id)
+  print(params)
   return branin(params['x'], params['y'])
diff --git a/examples/noisy/branin_noisy.py b/examples/noisy/branin_noisy.py
index 748570b..85a2ccf 100755
--- a/examples/noisy/branin_noisy.py
+++ b/examples/noisy/branin_noisy.py
@@ -1,3 +1,4 @@
+from __future__ import print_function
 import numpy as np
 import math
 
@@ -9,12 +10,12 @@ def branin(x, y):
     result = float(result)
     noise = np.random.normal() * 50.
     
-    print 'Result = %f, noise %f, total %f' % (result, noise, result+noise)
+    print('Result = %f, noise %f, total %f' % (result, noise, result+noise))
     #time.sleep(np.random.randint(60))
     return result + noise
 
 # Write a function like this called 'main'
 def main(job_id, params):
-    print 'Anything printed here will end up in the output directory for job #%d' % job_id
-    print params
+    print('Anything printed here will end up in the output directory for job #%d' % job_id)
+    print(params)
     return branin(params['x'], params['y'])
diff --git a/examples/simple/branin.py b/examples/simple/branin.py
index e26df65..19a46ed 100755
--- a/examples/simple/branin.py
+++ b/examples/simple/branin.py
@@ -1,3 +1,4 @@
+from __future__ import print_function
 import numpy as np
 import math
 
@@ -8,12 +9,12 @@ def branin(x, y):
     
     result = float(result)
     
-    print 'Result = %f' % result
+    print('Result = %f' % result)
     #time.sleep(np.random.randint(60))
     return result
 
 # Write a function like this called 'main'
 def main(job_id, params):
-    print 'Anything printed here will end up in the output directory for job #%d' % job_id
-    print params
+    print('Anything printed here will end up in the output directory for job #%d' % job_id)
+    print(params)
     return branin(params['x'], params['y'])
diff --git a/examples/simple/make_plots.py b/examples/simple/make_plots.py
index 70200b1..912b390 100644
--- a/examples/simple/make_plots.py
+++ b/examples/simple/make_plots.py
@@ -1,6 +1,12 @@
+from __future__ import print_function
 import importlib
 import sys
-from itertools import izip
+
+if sys.version<'3':
+    from itertools import izip
+else:
+    izip = zip
+
 
 import numpy as np
 import matplotlib.pyplot as plt
@@ -8,18 +14,19 @@
 
 
 from spearmint.utils.database.mongodb import MongoDB
+from spearmint.utils.fixes import items
 
 from spearmint.main import get_options, parse_resources_from_config, load_jobs, remove_broken_jobs, \
     load_task_group, load_hypers
 
 def print_dict(d, level=1):
     if isinstance(d, dict):
-        if level > 1: print ""
-        for k, v in d.iteritems():
-            print "  " * level, k,
+        if level > 1: print("")
+        for k, v in items(d):
+            print("  " * level, k,)
             print_dict(v, level=level+1)
     else:
-        print d 
+        print(d)
 
 def main():
     """
@@ -32,7 +39,7 @@ def main():
     unstandardized)
     """
     options, expt_dir = get_options()
-    print "options:"
+    print("options:")
     print_dict(options)
     
     # reduce the grid size
@@ -43,7 +50,7 @@ def main():
     # Load up the chooser.
     chooser_module = importlib.import_module('spearmint.choosers.' + options['chooser'])
     chooser = chooser_module.init(options)
-    print "chooser", chooser
+    print("chooser", chooser)
     experiment_name     = options.get("experiment-name", 'unnamed-experiment')
 
     # Connect to the database
@@ -55,38 +62,42 @@ def main():
     jobs = load_jobs(db, experiment_name)
     remove_broken_jobs(db, jobs, experiment_name, resources)
 
-    print "resources:", resources
+    print("resources:", resources)
     print_dict(resources)
-    resource = resources.itervalues().next()
+
+    if sys.version < '3':
+        resource = resources.itervalues().next()
+    else:
+        resource = list(resources.values())[0]  # ugly code
     
     task_options = { task: options["tasks"][task] for task in resource.tasks }
-    print "task_options:"
+    print("task_options:")
     print_dict(task_options) # {'main': {'likelihood': u'NOISELESS', 'type': 'OBJECTIVE'}}
     
     task_group = load_task_group(db, options, resource.tasks)
-    print "task_group", task_group # TaskGroup
-    print "tasks:"
+    print("task_group", task_group) # TaskGroup
+    print("tasks:")
     print_dict(task_group.tasks) # {'main': <spearmint.tasks.task.Task object at 0x10bf63290>}
     
     
     hypers = load_hypers(db, experiment_name)
-    print "loaded hypers", hypers # from GP.to_dict()
+    print("loaded hypers", hypers) # from GP.to_dict()
     
     hypers = chooser.fit(task_group, hypers, task_options)
-    print "\nfitted hypers:"
+    print("\nfitted hypers:")
     print_dict(hypers)
 
     lp, x = chooser.best()
     x = x.flatten()
-    print "best", lp, x
+    print("best", lp, x)
     bestp = task_group.paramify(task_group.from_unit(x))
-    print "expected best position", bestp
+    print("expected best position", bestp)
     
     # get the grid of points
     grid = chooser.grid
 #     print "chooser objectives:", 
 #     print_dict(chooser.objective)
-    print "chooser models:", chooser.models
+    print("chooser models:", chooser.models)
     print_dict(chooser.models)
     obj_model = chooser.models[chooser.objective['name']]
     obj_mean, obj_var = obj_model.function_over_hypers(obj_model.predict, grid)
@@ -105,10 +116,10 @@ def main():
     
     xymv = [(xy[0], xy[1], m, v) for xy, m, v in izip(grid, obj_mean, obj_std)]# if .2 < xy[0] < .25] 
     
-    x = map(lambda x:x[0], xymv)
-    y = map(lambda x:x[1], xymv)
-    m = map(lambda x:x[2], xymv)
-    sig = map(lambda x:x[3], xymv)
+    x = list(map(lambda x:x[0], xymv))
+    y = list(map(lambda x:x[1], xymv))
+    m = list(map(lambda x:x[2], xymv))
+    sig = list(map(lambda x:x[3], xymv))
 #     print y
     
     fig = plt.figure(dpi=100)
@@ -124,7 +135,7 @@ def main():
     task = task_group.tasks['main']
     idata = task.valid_normalized_data_dict
     xy = idata["inputs"]
-    xy = map(task_group.from_unit, xy)
+    xy = list(map(task_group.from_unit, xy))
     xy = np.array(xy)
     vals = idata["values"]
     vals = [obj_task.unstandardize_mean(obj_task.unstandardize_variance(v)) for v in vals]
diff --git a/spearmint/choosers/acquisition_functions.py b/spearmint/choosers/acquisition_functions.py
index ddd319d..7216ce4 100755
--- a/spearmint/choosers/acquisition_functions.py
+++ b/spearmint/choosers/acquisition_functions.py
@@ -183,6 +183,7 @@
 # its Institution.
 
 
+import sys
 import os
 import tempfile
 import copy
@@ -191,10 +192,14 @@
 import scipy.linalg   as spla
 import scipy.stats    as sps
 import scipy.optimize as spo
-import cPickle
 import multiprocessing
 import ast
 
+if sys.version < '3':
+    import cPickle
+else:
+    import pickle as cPickle
+
 def compute_ei(model, pred, ei_target=None, compute_grad=True):
     # TODO: use ei_target
     if pred.ndim == 1:
diff --git a/spearmint/choosers/default_chooser.py b/spearmint/choosers/default_chooser.py
index 26bf585..f3c07a6 100755
--- a/spearmint/choosers/default_chooser.py
+++ b/spearmint/choosers/default_chooser.py
@@ -193,9 +193,11 @@
 
 from .acquisition_functions  import compute_ei
 from ..utils.grad_check      import check_grad
+from ..utils.fixes           import items, xrange
 from ..grids                 import sobol_grid
 from ..models.abstract_model import function_over_hypers
 from ..                      import models
+from functools import reduce
 
 DEFAULT_GRIDSIZE  = 20000
 DEFAULT_GRIDSEED  = 0
@@ -260,7 +262,7 @@ def fit(self, task_group, hypers=None, options=None):
                                         grid_seed=self.grid_seed)
 
         # A useful hack: add previously visited points to the grid
-        for task_name, task in task_group.tasks.iteritems():
+        for task_name, task in items(task_group.tasks):
             if task.has_valid_inputs():
                 self.grid = np.append(self.grid, task.valid_normalized_data_dict['inputs'], axis=0)
             if task.has_pending():
@@ -274,7 +276,7 @@ def fit(self, task_group, hypers=None, options=None):
 
         # print 'Fittings tasks: %s' % str(task_group.tasks.keys())
 
-        for task_name, task in task_group.tasks.iteritems():
+        for task_name, task in items(task_group.tasks):
             if task.type.lower() == 'objective':
                 data_dict = self.objective # confusing: this is how self.objective gets populated
             elif task.type.lower() == 'constraint':
@@ -299,7 +301,7 @@ def fit(self, task_group, hypers=None, options=None):
 
                 self.models[task_name] = getattr(models, model_class)(task_group.num_dims, **task.options)
 
-                vals = data_dict['values'] if data_dict.has_key('values') else data_dict['counts']
+                vals = data_dict['values'] if 'values' in data_dict else data_dict['counts']
 
                 sys.stderr.write('Fitting %s for %s task...\n' % (model_class, task_name))
                 new_hypers[task_name] = self.models[task_name].fit(
@@ -351,7 +353,7 @@ def suggest(self):
         best_grid_ei  = grid_ei[best_grid_ind]
         
         if VERBOSE:
-            print 'Best EI before optimization: %f' % best_grid_ei
+            print('Best EI before optimization: %f' % best_grid_ei)
 
         if self.check_grad:
             check_grad(lambda x: self.acq_optimize_wrapper(x, current_best, True), 
@@ -387,8 +389,8 @@ def suggest(self):
         # Optimization should always be better unless the optimization
         # breaks in some way.
         if VERBOSE:
-            print 'Best EI after  optimization: %f' % best_opt_ei
-            print 'Suggested input %s' % cand[best_opt_ind]
+            print('Best EI after  optimization: %f' % best_opt_ei)
+            print('Suggested input %s' % cand[best_opt_ind])
 
         if best_opt_ei >= best_grid_ei:
             suggestion = cand[best_opt_ind]
@@ -520,7 +522,7 @@ def probabilistic_constraint(self, pred):
                 np.ones(pred.shape[0], dtype=bool))
 
     def acquisition_function_over_hypers(self, *args, **kwargs):
-        return function_over_hypers(self.models.values(), self.acquisition_function, *args, **kwargs)
+        return function_over_hypers(self.models.values(), self.acquisition_function, *args, **kwargs)  #TODO might need to use list(.values())
 
     def acquisition_function(self, cand, current_best, compute_grad=True):
         obj_model = self.models[self.objective['name']]
diff --git a/spearmint/grids/sobol.py b/spearmint/grids/sobol.py
index 558e663..7275c4f 100755
--- a/spearmint/grids/sobol.py
+++ b/spearmint/grids/sobol.py
@@ -182,8 +182,15 @@
 # to enter into this License and Terms of Use on behalf of itself and
 # its Institution.
 
+import sys
 import numpy as np
-import cPickle as pickle
+
+from spearmint.utils.fixes import xrange
+
+if sys.version < '3':
+    import cPickle as pickle
+else:
+    import pickle
 
 # Numba autojit might be nice.  Currently asplodes.
 def sobol(num_points, num_dims):
@@ -236,11 +243,12 @@ def sobol(num_points, num_dims):
     return Z
 
 def to_binary(X, bits):
-    return 1 & (X[:,np.newaxis]/2**np.arange(bits-1,-1,-1, dtype=np.uint32))
+    temp = X[:,np.newaxis]//2**np.arange(bits-1,-1,-1, dtype=np.uint32)  # freaking integer division required here!
+    return np.ones_like(temp) & temp
 
 # These are the parameters for the Sobol sequence.
 # This is hilarious.
-params = """(lp1
+params = b"""(lp1
 (dp2
 S'a'
 I0
diff --git a/spearmint/kernels/__init__.py b/spearmint/kernels/__init__.py
index e79df88..25e96a0 100755
--- a/spearmint/kernels/__init__.py
+++ b/spearmint/kernels/__init__.py
@@ -1,8 +1,8 @@
-from matern           import Matern52
-from sum_kernel       import SumKernel
-from product_kernel   import ProductKernel
-from noise            import Noise
-from scale            import Scale
-from transform_kernel import TransformKernel
+from spearmint.kernels.matern import Matern52
+from spearmint.kernels.sum_kernel import SumKernel
+from spearmint.kernels.product_kernel import ProductKernel
+from spearmint.kernels.noise import Noise
+from spearmint.kernels.scale import Scale
+from spearmint.kernels.transform_kernel import TransformKernel
 
 __all__ = ["Matern52", "SumKernel", "ProductKernel", "Noise", "Scale", "TransformKernel"]
diff --git a/spearmint/kernels/abstract_kernel.py b/spearmint/kernels/abstract_kernel.py
index 65bc6c1..e1b83b0 100755
--- a/spearmint/kernels/abstract_kernel.py
+++ b/spearmint/kernels/abstract_kernel.py
@@ -186,7 +186,7 @@
 
 from abc import ABCMeta, abstractmethod
 
-class AbstractKernel(object):
+class AbstractKernel(object):  # metaclass is an additional parameter in py3: AbstractKernel(object, metaclass=...)
     __metaclass__ = ABCMeta
 
     @property
diff --git a/spearmint/kernels/kernel_utils.py b/spearmint/kernels/kernel_utils.py
index c84248d..a8d9565 100755
--- a/spearmint/kernels/kernel_utils.py
+++ b/spearmint/kernels/kernel_utils.py
@@ -184,7 +184,13 @@
 
 
 import numpy as np
-import scipy.weave
+
+try:
+    import scipy.weave
+except ImportError:
+    pass
+
+from spearmint.utils.fixes import xrange
 from scipy.spatial.distance import cdist
 
 def dist2(ls, x1, x2=None):
diff --git a/spearmint/kernels/matern.py b/spearmint/kernels/matern.py
index 19f3836..16bf1e3 100755
--- a/spearmint/kernels/matern.py
+++ b/spearmint/kernels/matern.py
@@ -184,7 +184,7 @@
 
 
 import numpy as np
-import kernel_utils
+from spearmint.kernels import kernel_utils
 
 from .abstract_kernel import AbstractKernel
 from ..utils          import priors
diff --git a/spearmint/kernels/product.py b/spearmint/kernels/product.py
index 22e037c..e042f59 100755
--- a/spearmint/kernels/product.py
+++ b/spearmint/kernels/product.py
@@ -188,14 +188,16 @@
 # of multiple different covariance functions.
 import sys
 import numpy as np
-import priors
-import kernel_utils
+import spearmint.utils.priors
+from . import kernel_utils
 import scipy.stats as sps
 import warnings
 import scipy.special as spe
 import logging
 #warnings.filterwarnings("ignore", category=RuntimeWarning)
 
+from spearmint.utils.fixes import xrange
+
 class productCov:
     def __init__(self, num_dimensions, **kwargs):
         # The sub-covariances of which this is the elementwise product
diff --git a/spearmint/kernels/product_kernel.py b/spearmint/kernels/product_kernel.py
index 35f8205..e6f165a 100755
--- a/spearmint/kernels/product_kernel.py
+++ b/spearmint/kernels/product_kernel.py
@@ -186,7 +186,7 @@
 import numpy as np
 
 from .abstract_kernel import AbstractKernel
-
+from functools import reduce
 
 class ProductKernel(AbstractKernel):
     # TODO: If all kernel values are positive then we can do things in log-space
diff --git a/spearmint/kernels/sum_kernel.py b/spearmint/kernels/sum_kernel.py
index 2e25dab..62cab8f 100755
--- a/spearmint/kernels/sum_kernel.py
+++ b/spearmint/kernels/sum_kernel.py
@@ -184,6 +184,7 @@
 
 
 from .abstract_kernel import AbstractKernel
+from functools import reduce
 
 
 class SumKernel(AbstractKernel):
diff --git a/spearmint/launcher.py b/spearmint/launcher.py
index b2ce21f..7777fee 100755
--- a/spearmint/launcher.py
+++ b/spearmint/launcher.py
@@ -190,6 +190,7 @@
 import numpy as np
 
 from spearmint.utils.database.mongodb import MongoDB
+from spearmint.utils.fixes import items
 
 def main():
     parser = optparse.OptionParser(usage="usage: %prog [options]")
@@ -264,14 +265,14 @@ def launch(db_address, experiment_name, job_id):
                 result = {'main' : result}
         
         if set(result.keys()) != set(job['tasks']):
-            raise Exception("Result task names %s did not match job task names %s." % (result.keys(), job['tasks']))
+            raise Exception("Result task names %s did not match job task names %s." % (list(result.keys()), job['tasks']))
 
         success = True
     except:
         import traceback
         traceback.print_exc()
         sys.stderr.write("Problem executing the function\n")
-        print sys.exc_info()
+        print(sys.exc_info())
         
     end_time = time.time()
 
@@ -305,7 +306,7 @@ def python_launcher(job):
 
     # Convert the JSON object into useful parameters.
     params = {}
-    for name, param in job['params'].iteritems():
+    for name, param in items(job['params']):
         vals = param['values']
 
         if param['type'].lower() == 'float':
@@ -351,7 +352,7 @@ def matlab_launcher(job):
     session.run("cd('%s')" % os.path.realpath(job['expt_dir']))
 
     session.run('params = struct()')
-    for name, param in job['params'].iteritems():
+    for name, param in items(job['params']):
         vals = param['values']
 
         # sys.stderr.write('%s = %s\n' % (param['name'], str(vals)))
@@ -400,7 +401,7 @@ def mcr_launcher(job):
     # Change into the directory.
     os.chdir(job['expt_dir'])
 
-    if os.environ.has_key('MATLAB'):
+    if 'MATLAB' in os.environ:
         mcr_loc = os.environ['MATLAB']
     else:
         raise Exception("Please set the MATLAB environment variable")
diff --git a/spearmint/main.py b/spearmint/main.py
index 052bea6..7e4d689 100755
--- a/spearmint/main.py
+++ b/spearmint/main.py
@@ -196,6 +196,7 @@
 from collections import OrderedDict
 
 from spearmint.utils.database.mongodb import MongoDB
+from spearmint.utils.fixes            import items
 from spearmint.tasks.task_group       import TaskGroup
 
 from spearmint.resources.resource import parse_resources_from_config
@@ -203,6 +204,7 @@
 
 from spearmint.utils.parsing import parse_db_address
 
+
 def get_options():
     parser = optparse.OptionParser(usage="usage: %prog [options] directory")
 
@@ -262,7 +264,7 @@ def main():
     
     while True:
 
-        for resource_name, resource in resources.iteritems():
+        for resource_name, resource in items(resources):
 
             jobs = load_jobs(db, experiment_name)
             # resource.printStatus(jobs)
@@ -301,7 +303,7 @@ def main():
 
                 # Print out the status of the resources
                 # resource.printStatus(jobs)
-                print_resources_status(resources.values(), jobs)
+                print_resources_status(list(resources.values()), jobs)
 
         # If no resources are accepting jobs, sleep
         # (they might be accepting if suggest takes a while and so some jobs already finished by the time this point is reached)
@@ -313,7 +315,7 @@ def tired(db, experiment_name, resources):
     return True if no resources are accepting jobs
     """
     jobs = load_jobs(db, experiment_name)
-    for resource_name, resource in resources.iteritems():
+    for resource_name, resource in items(resources):
         if resource.acceptingJobs(jobs):
             return False
     return True
@@ -431,7 +433,7 @@ def save_job(job, db, experiment_name):
 
 def load_task_group(db, options, task_names=None):
     if task_names is None:
-        task_names = options['tasks'].keys()
+        task_names = list(options['tasks'].keys())
     task_options = { task: options["tasks"][task] for task in task_names }
 
     jobs = load_jobs(db, options['experiment-name'])
diff --git a/spearmint/models/__init__.py b/spearmint/models/__init__.py
index abd0926..26b67f6 100755
--- a/spearmint/models/__init__.py
+++ b/spearmint/models/__init__.py
@@ -1,4 +1,4 @@
-from gp            import GP
-from gp_classifier import GPClassifier
+from spearmint.models.gp import GP
+from spearmint.models.gp_classifier import GPClassifier
 
 __all__ = ["GP", "GPClassifier"]
diff --git a/spearmint/models/abstract_model.py b/spearmint/models/abstract_model.py
index 588379b..496974c 100755
--- a/spearmint/models/abstract_model.py
+++ b/spearmint/models/abstract_model.py
@@ -186,6 +186,9 @@
 
 from abc import ABCMeta, abstractmethod
 
+from spearmint.utils.fixes import xrange
+from functools import reduce
+
 class AbstractModel(object):
     __metaclass__ = ABCMeta
 
diff --git a/spearmint/models/gp.py b/spearmint/models/gp.py
index 3d41dee..f4c2230 100755
--- a/spearmint/models/gp.py
+++ b/spearmint/models/gp.py
@@ -194,6 +194,7 @@
 from ..kernels                import Matern52, Noise, Scale, SumKernel, TransformKernel
 from ..sampling.slice_sampler import SliceSampler
 from ..utils                  import priors
+from ..utils.fixes            import items, xrange
 from ..transformations        import BetaWarp, Transformer
 
 try:
@@ -201,7 +202,7 @@
     log    = logging.getLogger(module)
 except:
     log    = logging.getLogger()
-    print 'Not running from main.'
+    print('Not running from main.')
 
 DEFAULT_MCMC_ITERS = 10
 DEFAULT_BURNIN     = 100
@@ -274,7 +275,7 @@ def _set_likelihood(self, options):
             self.noiseless = False
 
     def _set_params_from_dict(self, hypers_dict):
-        for name, hyper in self.params.iteritems():
+        for name, hyper in items(self.params):
             self.params[name].value = hypers_dict[name]
 
     def _reset_params(self):
@@ -463,7 +464,7 @@ def set_state(self, state):
     def to_dict(self):
         """return a dictionary that saves the values of the hypers and the chain length"""
         gp_dict = {'hypers' : {}}
-        for name, hyper in self.params.iteritems():
+        for name, hyper in items(self.params):
             gp_dict['hypers'][name] = hyper.value
 
         gp_dict['chain length'] = self.chain_length
diff --git a/spearmint/models/gp_classifier.py b/spearmint/models/gp_classifier.py
index 32ce8ad..6676d56 100755
--- a/spearmint/models/gp_classifier.py
+++ b/spearmint/models/gp_classifier.py
@@ -191,11 +191,11 @@
 import scipy.optimize    as spo
 import scipy.io          as sio
 import scipy.stats       as sps
-import scipy.weave
 
 
 from .gp                                     import GP
 from ..utils.param                           import Param as Hyperparameter
+from ..utils.fixes                           import items, xrange
 from ..kernels                               import Matern52, Noise, Scale, SumKernel, TransformKernel
 from ..sampling.slice_sampler                import SliceSampler
 from ..sampling.whitened_prior_slice_sampler import WhitenedPriorSliceSampler
@@ -208,7 +208,7 @@
     log    = logging.getLogger(module)
 except:
     log = logging.getLogger()
-    print 'Not running from main.'
+    print('Not running from main.')
 
 class GPClassifier(GP):
     def __init__(self, num_dims, **options):
@@ -476,7 +476,7 @@ def to_dict(self):
         gp_dict = {}
 
         gp_dict['hypers'] = {}
-        for name, hyper in self.params.iteritems():
+        for name, hyper in items(self.params):
             gp_dict['hypers'][name] = hyper.value
 
         # Save the latent values as a dict with keys as hashes of the data
diff --git a/spearmint/resources/resource.py b/spearmint/resources/resource.py
index e693bf1..a79efa9 100755
--- a/spearmint/resources/resource.py
+++ b/spearmint/resources/resource.py
@@ -189,6 +189,9 @@
 from operator import add
 import numpy as np
 import sys
+from functools import reduce
+
+from spearmint.utils.fixes import items
 
 def parse_resources_from_config(config):
     """Parse the config dict and return a dictionary of resource objects keyed by resource name"""
@@ -202,7 +205,7 @@ def parse_resources_from_config(config):
     # If resources are specified
     else:
         resources = dict()
-        for resource_name, resource_opts in config["resources"].iteritems():
+        for resource_name, resource_opts in items(config["resources"]):
             task_names = parse_tasks_in_resource_from_config(config, resource_name)
             resources[resource_name] = resource_factory(resource_name, task_names, resource_opts)
         return resources
@@ -217,7 +220,7 @@ def parse_tasks_in_resource_from_config(config, resource_name):
         return ['main']
     else:
         tasks = list()
-        for task_name, task_config in config["tasks"].iteritems():
+        for task_name, task_config in items(config["tasks"]):
             # If the user specified tasks but not specific resources for those tasks,
             # We have to assume the tasks run on all resources...
             if "resources" not in task_config:
diff --git a/spearmint/sampling/__init__.py b/spearmint/sampling/__init__.py
index 861ffd1..7a04b80 100755
--- a/spearmint/sampling/__init__.py
+++ b/spearmint/sampling/__init__.py
@@ -1,6 +1,6 @@
-from abstract_sampler             import AbstractSampler
-from slice_sampler                import SliceSampler
-from whitened_prior_slice_sampler import WhitenedPriorSliceSampler
-from elliptical_slice_sampler     import EllipticalSliceSampler
+from spearmint.sampling.abstract_sampler             import AbstractSampler
+from spearmint.sampling.slice_sampler                import SliceSampler
+from spearmint.sampling.whitened_prior_slice_sampler import WhitenedPriorSliceSampler
+from spearmint.sampling.elliptical_slice_sampler     import EllipticalSliceSampler
 
 __all__ = ["AbstractSampler", "SliceSampler", "WhitenedPriorSliceSampler", "EllipticalSliceSampler"]
\ No newline at end of file
diff --git a/spearmint/sampling/abstract_sampler.py b/spearmint/sampling/abstract_sampler.py
index e2419b1..c561dac 100755
--- a/spearmint/sampling/abstract_sampler.py
+++ b/spearmint/sampling/abstract_sampler.py
@@ -199,7 +199,7 @@ def __init__(self, *params_to_sample, **sampler_options):
         # Note: thinning is currently implemented such that each sampler does its thinning
         # We could also do a different type of thinning, implemented in SamplerCollection,
         # where all samplers produce a sample, and then you thin (ABABAB rather than AAABBB)
-        self.thinning_overrideable = not sampler_options.has_key('thinning') # Thinning can be overrided if True
+        self.thinning_overrideable = 'thinning' not in sampler_options # Thinning can be overrided if True
         self.thinning              = sampler_options.get('thinning', 0)
 
     @abstractmethod
diff --git a/spearmint/sampling/elliptical_slice_sampler.py b/spearmint/sampling/elliptical_slice_sampler.py
index 98d0a4a..38a078d 100755
--- a/spearmint/sampling/elliptical_slice_sampler.py
+++ b/spearmint/sampling/elliptical_slice_sampler.py
@@ -192,6 +192,7 @@
 
 from .abstract_sampler import AbstractSampler
 from ..utils import param as hyperparameter_utils
+from ..utils.fixes import xrange
 
 
 class EllipticalSliceSampler(AbstractSampler):
@@ -292,7 +293,7 @@ def elliptical_slice(xx, log_like_fn, prior_chol, prior_mean, *log_like_args, **
     from utils import priors
     import time
 
-    print '2D Gaussian:'
+    print('2D Gaussian:')
 
     n = 1000000
 
@@ -312,20 +313,20 @@ def elliptical_slice(xx, log_like_fn, prior_chol, prior_mean, *log_like_args, **
     like_cov = np.dot(like_L, like_L.T)
     like = priors.MultivariateNormal(mu=like_mu, cov=like_cov)
 
-    print 'Prior cov:'
-    print prior_cov
-    print 'Like cov:'
-    print like_cov
+    print('Prior cov:')
+    print(prior_cov)
+    print('Like cov:')
+    print(like_cov)
 
     current_time = time.time()
     cur_ll = None
     for i in xrange(n):
         if i % 1000 == 0:
-            print 'Elliptical Slice Sample %d/%d' % (i,n)
+            print('Elliptical Slice Sample %d/%d' % (i,n))
         x, cur_ll = elliptical_slice(x, like.logprob, prior_L, prior_mu, cur_log_like=cur_ll)
         x_samples[:,i] = x.copy()
 
-    print 'Elliptical slice sampling took %f seconds' % (time.time() - current_time)
+    print('Elliptical slice sampling took %f seconds' % (time.time() - current_time))
 
     # Formula for the actual mean and covariance matrix below came from
     # the wikipedia page on conjugate priors
@@ -334,13 +335,13 @@ def elliptical_slice(xx, log_like_fn, prior_chol, prior_mean, *log_like_args, **
     B = spla.cho_solve((like_L, True), like_mu)
     actual_mean = np.dot(actual_cov, A+B)
 
-    print 'Actual mean:           %s' % actual_mean
-    print 'Mean of ESS samples:   %s' % np.mean(x_samples,axis=1)
+    print('Actual mean:           %s' % actual_mean)
+    print('Mean of ESS samples:   %s' % np.mean(x_samples,axis=1))
 
-    print 'Actual Cov:'
-    print actual_cov
-    print 'Cov of ESS samples:'
-    print np.cov(x_samples)
+    print('Actual Cov:')
+    print(actual_cov)
+    print('Cov of ESS samples:')
+    print(np.cov(x_samples))
 
     # below: also compare with regular slice sampling (slower)
 
diff --git a/spearmint/sampling/mcmc.py b/spearmint/sampling/mcmc.py
index fa57e74..06fc0a1 100755
--- a/spearmint/sampling/mcmc.py
+++ b/spearmint/sampling/mcmc.py
@@ -185,6 +185,9 @@
 
 import numpy        as np
 import numpy.random as npr
+import sys
+
+from spearmint.utils.fixes import xrange
 
 def elliptical_slice(xx, chol_Sigma, log_like_fn, *log_like_fn_args):
     D  = xx.size
@@ -316,7 +319,7 @@ def acceptable(z, llh_s, L, U):
             new_z     = (upper - lower)*npr.rand() + lower
             new_llh   = dir_logprob(new_z)
             if np.isnan(new_llh):
-                print new_z, direction*new_z + init_x, new_llh, llh_s, init_x, logprob(init_x, *logprob_args)
+                print(new_z, direction*new_z + init_x, new_llh, llh_s, init_x, logprob(init_x, *logprob_args))
                 raise Exception("Slice sampler got a NaN")
             if new_llh > llh_s and acceptable(new_z, llh_s, start_lower, start_upper):
                 break
@@ -328,7 +331,7 @@ def acceptable(z, llh_s, L, U):
                 raise Exception("Slice sampler shrank to zero!")
 
         if verbose:
-            print "Steps Out:", l_steps_out, u_steps_out, " Steps In:", steps_in
+            print("Steps Out:", l_steps_out, u_steps_out, " Steps In:", steps_in)
 
         return new_z*direction + init_x, new_llh
 
@@ -340,7 +343,7 @@ def acceptable(z, llh_s, L, U):
 
     dims = init_x.shape[0]
     if compwise:
-        ordering = range(dims)
+        ordering = list(range(dims))
         npr.shuffle(ordering)
         new_x = init_x.copy()
         for d in ordering:
@@ -377,7 +380,7 @@ def dir_logprob(z): # logprob of the proposed point (x + dir*z) where z must be
             try:
                 return logprob(direction*z + init_x, *logprob_args)
             except:
-                print 'ERROR: Logprob failed at input %s' % str(direction*z + init_x)
+                print('ERROR: Logprob failed at input %s' % str(direction*z + init_x))
                 raise
                 
     
@@ -407,7 +410,7 @@ def dir_logprob(z): # logprob of the proposed point (x + dir*z) where z must be
             new_z     = (upper - lower)*npr.rand() + lower  # uniformly sample between upper and lower
             new_llh   = dir_logprob(new_z)  # new current logprob
             if np.isnan(new_llh):
-                print new_z, direction*new_z + init_x, new_llh, llh_s, init_x, logprob(init_x)
+                print(new_z, direction*new_z + init_x, new_llh, llh_s, init_x, logprob(init_x))
                 raise Exception("Slice sampler got a NaN logprob")
             if new_llh > llh_s:  # this is the termination condition
                 break       # it says, if you got to a better place than you started, you're done
@@ -422,7 +425,7 @@ def dir_logprob(z): # logprob of the proposed point (x + dir*z) where z must be
                 raise Exception("Slice sampler shrank to zero!")
 
         if verbose:
-            print "Steps Out:", l_steps_out, u_steps_out, " Steps In:", steps_in, "Final logprob:", new_llh
+            print("Steps Out:", l_steps_out, u_steps_out, " Steps In:", steps_in, "Final logprob:", new_llh)
 
         # return new the point
         return new_z*direction + init_x, new_llh
@@ -443,7 +446,7 @@ def dir_logprob(z): # logprob of the proposed point (x + dir*z) where z must be
 
     dims = init_x.shape[0]
     if compwise:   # if component-wise (independent) sampling
-        ordering = range(dims)
+        ordering = list(range(dims))
         npr.shuffle(ordering)
         cur_x = init_x.copy()
         for d in ordering:
diff --git a/spearmint/sampling/slice_sampler.py b/spearmint/sampling/slice_sampler.py
index cc2328a..8d86f9c 100755
--- a/spearmint/sampling/slice_sampler.py
+++ b/spearmint/sampling/slice_sampler.py
@@ -191,6 +191,7 @@
 # from .mcmc             import slice_sample_simple as slice_sample
 from .abstract_sampler import AbstractSampler
 from ..utils           import param as hyperparameter_utils
+from ..utils.fixes     import xrange
 
 
 class SliceSampler(AbstractSampler):
@@ -228,9 +229,9 @@ def logprob(self, x, model):
             lp += param.prior_logprob()
 
             if np.isnan(lp): # Positive infinity should be ok, right?
-                print 'Param diagnostics:'
+                print('Param diagnostics:')
                 param.print_diagnostics()
-                print 'Prior logprob: %f' % param.prior_logprob()
+                print('Prior logprob: %f' % param.prior_logprob())
                 raise Exception("Prior returned %f logprob" % lp)
 
         if not np.isfinite(lp):
@@ -282,13 +283,13 @@ def sample(self, model):
 
     for i in xrange(n):
         if i % 1000 == 0:
-            print 'Sample %d/%d' % (i,n)
+            print('Sample %d/%d' % (i,n))
 
         x, cur_ll = slice_sample(x, gsn. logprob)
         x_samples[i] = x.copy()
 
-    print '1D Gaussian actual mean: %f, mean of samples: %f' % (-1, np.mean(x_samples))
-    print '1D Gaussian actual sigma: %f, std of samples: %f' % (4, np.std(x_samples))
+    print('1D Gaussian actual mean: %f, mean of samples: %f' % (-1, np.mean(x_samples)))
+    print('1D Gaussian actual sigma: %f, std of samples: %f' % (4, np.std(x_samples)))
 
 
     plt.figure(1)
@@ -307,19 +308,19 @@ def sample(self, model):
 
     for i in xrange(n):
         if i % 1000 == 0:
-            print 'Sample %d/%d' % (i,n)
+            print('Sample %d/%d' % (i,n))
 
         x, cur_ll = slice_sample(x, mvn.logprob)
         x_samples[:,i] = x.copy()
 
     mu_samp = np.mean(x_samples,axis=1)
-    print '2D Gaussian:'
-    print 'Actual mean:     [%f,%f]' % (mu[0], mu[1])
-    print 'Mean of samples: [%f,%f]' % (mu_samp[0], mu_samp[1])
-    print 'Actual Cov:'
-    print str(cov)
-    print 'Cov of samples'
-    print str(np.cov(x_samples))
+    print('2D Gaussian:')
+    print('Actual mean:     [%f,%f]' % (mu[0], mu[1]))
+    print('Mean of samples: [%f,%f]' % (mu_samp[0], mu_samp[1]))
+    print('Actual Cov:')
+    print(str(cov))
+    print('Cov of samples')
+    print(str(np.cov(x_samples)))
 
     # plt.figure(1)
     # plt.clf()
diff --git a/spearmint/sampling/whitened_prior_slice_sampler.py b/spearmint/sampling/whitened_prior_slice_sampler.py
index 6a7c770..61b12c0 100755
--- a/spearmint/sampling/whitened_prior_slice_sampler.py
+++ b/spearmint/sampling/whitened_prior_slice_sampler.py
@@ -192,6 +192,7 @@
 # from .mcmc             import slice_sample_simple as slice_sample
 from .abstract_sampler import AbstractSampler
 from ..utils           import param as hyperparameter_utils
+from ..utils.fixes     import xrange 
 
 
 class WhitenedPriorSliceSampler(AbstractSampler): 
@@ -268,13 +269,13 @@ def sample_fun(self, model, **sampler_options):
 
     for i in xrange(n):
         if i % 1000 == 0:
-            print 'Sample %d/%d' % (i,n)
+            print('Sample %d/%d' % (i,n))
 
         x, cur_ll = slice_sample(x, gsn. logprob)
         x_samples[i] = x.copy()
 
-    print '1D Gaussian actual mean: %f, mean of samples: %f' % (-1, np.mean(x_samples))
-    print '1D Gaussian actual sigma: %f, std of samples: %f' % (4, np.std(x_samples))
+    print('1D Gaussian actual mean: %f, mean of samples: %f' % (-1, np.mean(x_samples)))
+    print('1D Gaussian actual sigma: %f, std of samples: %f' % (4, np.std(x_samples)))
 
 
     plt.figure(1)
@@ -293,19 +294,19 @@ def sample_fun(self, model, **sampler_options):
 
     for i in xrange(n):
         if i % 1000 == 0:
-            print 'Sample %d/%d' % (i,n)
+            print('Sample %d/%d' % (i,n))
 
         x, cur_ll = slice_sample(x, mvn.logprob)
         x_samples[:,i] = x.copy()
 
     mu_samp = np.mean(x_samples,axis=1)
-    print '2D Gaussian:'
-    print 'Actual mean:     [%f,%f]' % (mu[0], mu[1])
-    print 'Mean of samples: [%f,%f]' % (mu_samp[0], mu_samp[1])
-    print 'Actual Cov:'
-    print str(cov)
-    print 'Cov of samples'
-    print str(np.cov(x_samples))
+    print('2D Gaussian:')
+    print('Actual mean:     [%f,%f]' % (mu[0], mu[1]))
+    print('Mean of samples: [%f,%f]' % (mu_samp[0], mu_samp[1]))
+    print('Actual Cov:')
+    print(str(cov))
+    print('Cov of samples')
+    print(str(np.cov(x_samples)))
 
     # plt.figure(1)
     # plt.clf()
diff --git a/spearmint/schedulers/PBS.py b/spearmint/schedulers/PBS.py
index c86f589..6dbcbf4 100755
--- a/spearmint/schedulers/PBS.py
+++ b/spearmint/schedulers/PBS.py
@@ -184,7 +184,7 @@
 
 import sys
 import spearmint
-from cluster_scheduler import AbstractClusterScheduler
+from .cluster_scheduler import AbstractClusterScheduler
 # Torque PBS scheduler python code from: https://oss.trac.surfsara.nl/pbs_python/
 import pbs
 from PBSQuery import PBSQuery
diff --git a/spearmint/schedulers/SGE.py b/spearmint/schedulers/SGE.py
index 40297fa..c8cb819 100755
--- a/spearmint/schedulers/SGE.py
+++ b/spearmint/schedulers/SGE.py
@@ -182,7 +182,7 @@
 # to enter into this License and Terms of Use on behalf of itself and
 # its Institution.
 
-from cluster_scheduler import AbstractClusterScheduler
+from .cluster_scheduler import AbstractClusterScheduler
 
 def init(*args, **kwargs):
     return SGEScheduler(*args, **kwargs)
diff --git a/spearmint/schedulers/SLURM.py b/spearmint/schedulers/SLURM.py
index 3c9ff78..ebcfa44 100755
--- a/spearmint/schedulers/SLURM.py
+++ b/spearmint/schedulers/SLURM.py
@@ -183,7 +183,7 @@
 # its Institution.
 
 import spearmint
-from cluster_scheduler import AbstractClusterScheduler
+from .cluster_scheduler import AbstractClusterScheduler
 
 def init(*args, **kwargs):
     return SLURMScheduler(*args, **kwargs)
diff --git a/spearmint/schedulers/local.py b/spearmint/schedulers/local.py
index 04072d9..4059dc2 100755
--- a/spearmint/schedulers/local.py
+++ b/spearmint/schedulers/local.py
@@ -183,7 +183,7 @@
 # its Institution.
 
 import spearmint
-from abstract_scheduler import AbstractScheduler
+from spearmint.schedulers.abstract_scheduler import AbstractScheduler
 import os
 import subprocess
 import sys
diff --git a/spearmint/tasks/base_task.py b/spearmint/tasks/base_task.py
index 34c183a..6903654 100755
--- a/spearmint/tasks/base_task.py
+++ b/spearmint/tasks/base_task.py
@@ -187,6 +187,7 @@
 import numpy as np
 from collections import OrderedDict
 
+from spearmint.utils.fixes import items, xrange
 
 class BaseTask(object):
     """
@@ -207,7 +208,7 @@ def variables_config_to_meta(self, variables_config):
         cardinality    = 0 # The number of distinct variables
         num_dims       = 0 # The number of dimensions in the matrix representation
 
-        for name, variable in variables_config.iteritems():
+        for name, variable in items(variables_config):
             cardinality += variable['size']
             vdict = {'type'    : variable['type'].lower(),
                      'indices' : []} # indices stores a mapping from these variable(s) to their matrix column(s)
@@ -250,7 +251,7 @@ def paramify_and_print(self, data_vector, left_indent=0, indent_top_row=False):
         sys.stderr.write(indentation)
         sys.stderr.write('----          ----       -----\n')
 
-        for param_name, param in params.iteritems():
+        for param_name, param in items(params):
 
             if param['type'] == 'float':
                 format_str = '%s%-12.12s  %-9.9s  %-12f\n'
@@ -271,7 +272,7 @@ def paramify(self, data_vector):
             raise Exception('Input to paramify must be a 1-D array.')
 
         params = {}
-        for name, vdict in self.variables_meta.iteritems():
+        for name, vdict in items(self.variables_meta):
             indices = vdict['indices']
             params[name] = {}
             params[name]['type'] = vdict['type']
@@ -290,7 +291,7 @@ def paramify(self, data_vector):
     # Converts a dict of params to the corresponding vector in puts space
     def vectorify(self, params):
         v = np.zeros(self.num_dims)
-        for name, param in params.iteritems():
+        for name, param in items(params):
             indices = self.variables_meta[name]['indices']
 
             if param['type'] == 'int' or param['type'] == 'float':
@@ -316,7 +317,7 @@ def to_unit(self, V):
             squeeze = False
 
         U = np.zeros(V.shape)
-        for name, variable in self.variables_meta.iteritems():
+        for name, variable in items(self.variables_meta):
             indices = variable['indices']
             if variable['type'] == 'int':
                 vals = V[:,indices]
@@ -346,7 +347,7 @@ def from_unit(self, U):
             squeeze = False
 
         V = np.zeros(U.shape)
-        for name, variable in self.variables_meta.iteritems():
+        for name, variable in items(self.variables_meta):
             indices = variable['indices']
             if variable['type'] == 'int':
                 vals = U[:,indices]
diff --git a/spearmint/tasks/task_group.py b/spearmint/tasks/task_group.py
index 1ed01a1..b807d57 100755
--- a/spearmint/tasks/task_group.py
+++ b/spearmint/tasks/task_group.py
@@ -185,11 +185,12 @@
 
 import copy
 import numpy as np
+import sys
 
 from collections import OrderedDict
 
 from .task import Task
-
+from spearmint.utils.fixes import items
 
 class TaskGroup(object):
     """
@@ -203,7 +204,7 @@ class TaskGroup(object):
     
     def __init__(self, tasks_config, variables_config):
         self.tasks = {}
-        for task_name, task_options in tasks_config.iteritems():
+        for task_name, task_options in items(tasks_config):
             self.tasks[task_name] = Task(task_name,
                                          task_options,
                                          variables_config)
@@ -245,7 +246,7 @@ def pending(self, pending):
     @property
     def values(self):
         """return a dictionary of the task values keyed by task name"""
-        return {task_name : task.values for task_name, task in self.tasks.iteritems()}
+        return {task_name : task.values for task_name, task in items(self.tasks)}
 
     @values.setter
     def values(self, values):
diff --git a/spearmint/tests/kernels/test_matern.py b/spearmint/tests/kernels/test_matern.py
index 4a131da..8e2d29b 100755
--- a/spearmint/tests/kernels/test_matern.py
+++ b/spearmint/tests/kernels/test_matern.py
@@ -186,6 +186,7 @@
 import numpy.random as npr
 
 from spearmint.kernels import Matern52
+from spearmint.utils.fixes import xrange
 
 def test_matern_grad():
     npr.seed(1)
diff --git a/spearmint/tests/kernels/test_product_kernel.py b/spearmint/tests/kernels/test_product_kernel.py
index 95826b1..131ab01 100755
--- a/spearmint/tests/kernels/test_product_kernel.py
+++ b/spearmint/tests/kernels/test_product_kernel.py
@@ -186,6 +186,7 @@
 import numpy.random as npr
 
 from spearmint.kernels import Matern52, ProductKernel
+from spearmint.utils.fixes import xrange
 
 def test_product_kernel_grad():
     npr.seed(1)
diff --git a/spearmint/tests/kernels/test_scale.py b/spearmint/tests/kernels/test_scale.py
index ace7495..2a56b02 100755
--- a/spearmint/tests/kernels/test_scale.py
+++ b/spearmint/tests/kernels/test_scale.py
@@ -186,6 +186,7 @@
 import numpy.random as npr
 
 from spearmint.kernels import Matern52, Scale
+from spearmint.utils.fixes import xrange
 
 def test_grad():
     npr.seed(1)
diff --git a/spearmint/tests/kernels/test_subset.py b/spearmint/tests/kernels/test_subset.py
index b254c81..ff0b428 100755
--- a/spearmint/tests/kernels/test_subset.py
+++ b/spearmint/tests/kernels/test_subset.py
@@ -186,6 +186,7 @@
 import numpy.random as npr
 
 from spearmint.kernels import Matern52, Subset
+from spearmint.utils.fixes import xrange
 
 def test_grad():
     npr.seed(1)
@@ -214,8 +215,8 @@ def test_grad():
             data2[i,j] += eps
             dloss_est[i,j] = ((loss_1 - loss_2) / (2*eps))
 
-    print 'Subset kernel grad using indices %s:' % inds
-    print dloss
+    print('Subset kernel grad using indices %s:' % inds)
+    print(dloss)
 
     assert np.linalg.norm(dloss - dloss_est) < 1e-6
 
diff --git a/spearmint/tests/kernels/test_sum_kernel.py b/spearmint/tests/kernels/test_sum_kernel.py
index d40cd10..ad9bd55 100755
--- a/spearmint/tests/kernels/test_sum_kernel.py
+++ b/spearmint/tests/kernels/test_sum_kernel.py
@@ -186,6 +186,7 @@
 import numpy.random as npr
 
 from spearmint.kernels import Matern52, SumKernel
+from spearmint.utils.fixes import xrange
 
 def test_sum_kernel_grad():
     npr.seed(1)
diff --git a/spearmint/tests/kernels/test_transform_kernel.py b/spearmint/tests/kernels/test_transform_kernel.py
index 344cf75..bb19793 100755
--- a/spearmint/tests/kernels/test_transform_kernel.py
+++ b/spearmint/tests/kernels/test_transform_kernel.py
@@ -187,6 +187,7 @@
 
 from spearmint.kernels         import Matern52, TransformKernel
 from spearmint.transformations import BetaWarp, Normalization, Linear, Transformer
+from spearmint.utils.fixes import xrange
 
 def test_grad():
     npr.seed(1)
diff --git a/spearmint/tests/models/in_progress/gp.py b/spearmint/tests/models/in_progress/gp.py
index fda1a2c..d95199e 100755
--- a/spearmint/tests/models/in_progress/gp.py
+++ b/spearmint/tests/models/in_progress/gp.py
@@ -181,7 +181,7 @@
 # 13. End User represents and warrants that it has the legal authority
 # to enter into this License and Terms of Use on behalf of itself and
 # its Institution.
-
+from __future__ import print_function
 import sys
 import os
 import ast
@@ -195,6 +195,7 @@
 from spearmint.kernels                import Matern, Noise, Scale, SumKernel, TransformKernel
 from spearmint.sampling.slice_sampler import SliceSampler
 from spearmint.utils                  import priors
+from spearmint.utils.fixes            import xrange
 from spearmint.transformations        import BetaWarp, IgnoreDims, Linear, Normalization, Transformer
 
 import spearmint.utils.param      as param_util
@@ -221,7 +222,7 @@ def observation_model(self, y):
     # https://hips.seas.harvard.edu/blog/2013/06/10/testing-mcmc-code-part-2-integration-tests/
     # This test uses an arbitrary statistic of the data (outputs). Here we use the sum.
     def geweke_correctness_test(self):
-        print 'Initiating Geweke Correctness test'
+        print('Initiating Geweke Correctness test')
         # Note: the horseshoe prior on the noise will make the line slightly not straight
         # because we don't have the actual log pdf
 
@@ -230,7 +231,7 @@ def geweke_correctness_test(self):
         # First, check that all priors and models can be sampled from
         for param in self.hypers:
             if not hasattr(param.prior, 'sample'):
-                print 'Prior of param %s cannot be sampled from. Cannot perform the Geweke correctness test.' % param.name
+                print('Prior of param %s cannot be sampled from. Cannot perform the Geweke correctness test.' % param.name)
                 return
 
         n = 10000 # number of samples # n = self.mcmc_iters
@@ -244,7 +245,7 @@ def geweke_correctness_test(self):
         caseA = np.zeros(n)
         for i in xrange(n):
             if i % 1000 == 0:
-                print 'Geweke Part A Sample %d/%d' % (i,n)
+                print('Geweke Part A Sample %d/%d' % (i,n))
             for param in self.hypers:
                 param.sample_from_prior()
             latent_y = self.sample_from_prior_given_hypers(self.data) # only inputs used
@@ -263,7 +264,7 @@ def geweke_correctness_test(self):
         caseB = np.zeros(n)
         for i in xrange(n):
             if i % 1000 == 0:
-                print 'Geweke Part B Sample %d/%d' % (i,n)
+                print('Geweke Part B Sample %d/%d' % (i, n))
             # Take MCMC step on theta given data
             self.sampler.generate_sample() # data['inputs'] and data['values'] used
 
@@ -277,10 +278,10 @@ def geweke_correctness_test(self):
 
             caseB[i] = statistic_of_interest(self.data['values'])
         
-        print np.mean(caseA)
-        print np.std(caseA)
-        print np.mean(caseB)
-        print np.std(caseB)
+        print(np.mean(caseA))
+        print(np.std(caseA))
+        print(np.mean(caseB))
+        print(np.std(caseB))
 
         # Then, sort the sets A and B.
         caseA = np.sort(caseA)
diff --git a/spearmint/tests/models/test_gp.py b/spearmint/tests/models/test_gp.py
index 14f711d..3f51f2b 100755
--- a/spearmint/tests/models/test_gp.py
+++ b/spearmint/tests/models/test_gp.py
@@ -184,7 +184,7 @@
 
 import numpy        as np
 import numpy.random as npr
-
+from spearmint.utils.fixes import xrange
 from spearmint.models import GP
 
 def test_gp_init():
diff --git a/spearmint/tests/models/test_gp_classifier.py b/spearmint/tests/models/test_gp_classifier.py
index c92bcbb..a80c530 100755
--- a/spearmint/tests/models/test_gp_classifier.py
+++ b/spearmint/tests/models/test_gp_classifier.py
@@ -186,6 +186,7 @@
 import numpy.random as npr
 
 from spearmint.models import GPClassifier
+from spearmint.utils.fixes import xrange
 
 def test_gp_init():
     gp = GPClassifier(5)
diff --git a/spearmint/tests/tasks/test_task.py b/spearmint/tests/tasks/test_task.py
index 5920d7b..908de3c 100755
--- a/spearmint/tests/tasks/test_task.py
+++ b/spearmint/tests/tasks/test_task.py
@@ -186,6 +186,7 @@
 
 from collections          import OrderedDict
 from spearmint.tasks.task import Task
+from spearmint.utils.fixes import items, xrange
 
 def create_task():
     task_name = "mytask"
@@ -211,7 +212,7 @@ def create_task():
     # Create a set of inputs that satisfies the constraints of each variable
     X = np.zeros((10,num_dims))
     for i in xrange(10):
-        for name, variable in variables_meta.iteritems():
+        for name, variable in items(variables_meta):
             indices = variable['indices']
             if variable['type'] == 'int':
                 X[i,indices] = np.random.randint(variable['min'], variable['max']+1, len(indices))
diff --git a/spearmint/tests/transformations/test_beta_warp.py b/spearmint/tests/transformations/test_beta_warp.py
index b72e4eb..a466186 100755
--- a/spearmint/tests/transformations/test_beta_warp.py
+++ b/spearmint/tests/transformations/test_beta_warp.py
@@ -191,6 +191,7 @@
 
 from spearmint.transformations import BetaWarp
 from spearmint.utils           import priors
+from spearmint.utils.fixes     import xrange
 from spearmint.utils.param     import Param as Hyperparameter
 
 def test_validation():
diff --git a/spearmint/tests/transformations/test_kumar_warp.py b/spearmint/tests/transformations/test_kumar_warp.py
index 40b5fa9..d382962 100755
--- a/spearmint/tests/transformations/test_kumar_warp.py
+++ b/spearmint/tests/transformations/test_kumar_warp.py
@@ -191,6 +191,7 @@
 
 from spearmint.transformations import KumarWarp
 from spearmint.utils           import priors
+from spearmint.utils.fixes     import xrange
 from spearmint.utils.param     import Param as Hyperparameter
 
 def test_validation():
diff --git a/spearmint/tests/transformations/test_linear.py b/spearmint/tests/transformations/test_linear.py
index 89ba002..86dab42 100755
--- a/spearmint/tests/transformations/test_linear.py
+++ b/spearmint/tests/transformations/test_linear.py
@@ -189,6 +189,7 @@
 
 from spearmint.transformations import Linear
 from spearmint.utils           import priors
+from spearmint.utils.fixes     import xrange
 from spearmint.utils.param     import Param as Hyperparameter
 
 def test_backward_pass():
diff --git a/spearmint/tests/transformations/test_norm_lin.py b/spearmint/tests/transformations/test_norm_lin.py
index 2c69c17..705391d 100755
--- a/spearmint/tests/transformations/test_norm_lin.py
+++ b/spearmint/tests/transformations/test_norm_lin.py
@@ -189,6 +189,7 @@
 
 from spearmint.transformations import NormLin
 from spearmint.utils           import priors
+from spearmint.utils.fixes     import xrange
 from spearmint.utils.param     import Param as Hyperparameter
 
 def test_backward_pass():
diff --git a/spearmint/tests/transformations/test_normalization.py b/spearmint/tests/transformations/test_normalization.py
index 50f58d4..1d7d5ce 100755
--- a/spearmint/tests/transformations/test_normalization.py
+++ b/spearmint/tests/transformations/test_normalization.py
@@ -189,6 +189,7 @@
 
 from spearmint.transformations import Normalization
 from spearmint.utils           import priors
+from spearmint.utils.fixes     import xrange
 from spearmint.utils.param     import Param as Hyperparameter
 
 def test_backward_pass():
diff --git a/spearmint/tests/transformations/test_transformer.py b/spearmint/tests/transformations/test_transformer.py
index ae266d8..2f70f42 100755
--- a/spearmint/tests/transformations/test_transformer.py
+++ b/spearmint/tests/transformations/test_transformer.py
@@ -189,6 +189,7 @@
 
 from spearmint.transformations                         import Transformer, BetaWarp, Normalization, Linear
 from spearmint.transformations.abstract_transformation import AbstractTransformation
+from spearmint.utils.fixes import xrange
 
 class SimpleTransformation(AbstractTransformation):
     def __init__(self, num_dims):
@@ -405,5 +406,5 @@ def test_add_layer():
 
     output_inds = t.add_layer(st3)
     assert len(t.layer_transformations) == 2
-    assert output_inds == range(10)
+    assert output_inds == list(range(10))
 
diff --git a/spearmint/transformations/__init__.py b/spearmint/transformations/__init__.py
index 52f774c..6742a91 100755
--- a/spearmint/transformations/__init__.py
+++ b/spearmint/transformations/__init__.py
@@ -1,9 +1,9 @@
-from beta_warp     import BetaWarp
-from ignore_dims   import IgnoreDims
-from kumar_warp    import KumarWarp
-from normalization import Normalization
-from linear        import Linear
-from transformer   import Transformer
-from norm_lin      import NormLin
+from spearmint.transformations.beta_warp     import BetaWarp
+from spearmint.transformations.ignore_dims   import IgnoreDims
+from spearmint.transformations.kumar_warp    import KumarWarp
+from spearmint.transformations.normalization import Normalization
+from spearmint.transformations.linear        import Linear
+from spearmint.transformations.transformer   import Transformer
+from spearmint.transformations.norm_lin      import NormLin
 
 __all__ = ["BetaWarp", "IgnoreDims", "KumarWarp", "Normalization", "Linear", "Transformer", "NormLin"]
diff --git a/spearmint/transformations/transformer.py b/spearmint/transformations/transformer.py
index e237b23..80834ce 100755
--- a/spearmint/transformations/transformer.py
+++ b/spearmint/transformations/transformer.py
@@ -209,7 +209,7 @@ def add_layer(self, *layer_transformations):
         if len(layer_transformations) == 1 and isinstance(layer_transformations[0], AbstractTransformation):
             assert layer_transformations[0].num_dims == num_input_dims, 'Transformation must have the same number of input dimensions as the transformer layer.'
             transformations = layer_transformations
-            t_inds = [range(num_input_dims)]
+            t_inds = [list(range(num_input_dims))]  # 2to3: not so sure here about the list, gives a double list: [[1,2,3]] ; but same happens in py2
         else:
             transformations, t_inds = zip(*layer_transformations)
 
@@ -243,7 +243,7 @@ def validate_layer(self, layer_inds):
             for i in inds:
                 counts[i] += 1
 
-        assert np.array(counts.keys()).max() < self.num_dims, 'Maximum index exceeds number of dimensions.'
+        assert np.array(list(counts.keys())).max() < self.num_dims, 'Maximum index exceeds number of dimensions.'
         assert all([count == 1 for count in counts.values()]), 'Each index may only be used once.'
 
     def forward_pass(self, inputs):
@@ -270,11 +270,11 @@ def forward_pass(self, inputs):
     def backward_pass(self, V):
         assert self.layer_transformations, 'Transformer should contain transformations.'
 
-        for transformations, t_inds, remaining_inds, output_num_dims in zip(
+        for transformations, t_inds, remaining_inds, output_num_dims in reversed(list(zip(  # bit ugly here, but simplist reversed(zip(... doenst work for some reason
                 self.layer_transformations,
                 self.layer_inds,
                 self.layer_remaining_inds,
-                self.layer_output_dims)[::-1]:
+                self.layer_output_dims))):
 
             JV = np.zeros(list(V.shape[:-1])+[len([i for inds in t_inds for i in inds]) + len(remaining_inds)])
             i = 0
diff --git a/spearmint/utils/cleanup.py b/spearmint/utils/cleanup.py
index 71660c3..f388594 100755
--- a/spearmint/utils/cleanup.py
+++ b/spearmint/utils/cleanup.py
@@ -187,7 +187,7 @@
 import sys
 import pymongo
 import json
-from parsing import parse_db_address
+from .parsing import parse_db_address
 
 
 def cleanup(path):
@@ -199,7 +199,7 @@ def cleanup(path):
         cfg = json.load(f)
 
     db_address = parse_db_address(cfg)
-    print 'Cleaning up experiment %s in database at %s' % (cfg["experiment-name"], db_address)
+    print('Cleaning up experiment %s in database at %s' + cfg["experiment-name"] + db_address)
 
     client = pymongo.MongoClient(db_address)
 
diff --git a/spearmint/utils/compression.py b/spearmint/utils/compression.py
index b0101e9..87d2ac4 100755
--- a/spearmint/utils/compression.py
+++ b/spearmint/utils/compression.py
@@ -184,24 +184,54 @@
 
 import zlib
 import numpy as np
+import codecs
+from spearmint.utils.fixes import items
+import sys
 
 COMPRESS_TYPE = 'compressed array'
 
 # TODO: see if there is a better way to encode this than base64
 # It takes about 0.65 seconds to compress a 1000x1000 array on a 2011 Macbook air
 def compress_array(a):
-    return {'ctype'  : COMPRESS_TYPE,
-            'shape'  : list(a.shape),
-            'value'  : (zlib.compress(a).encode('base64'))}
+
+    if sys.version < '3':
+        return {'ctype': COMPRESS_TYPE,
+                'shape': list(a.shape),
+                'value': (zlib.compress(a).encode('base64'))} # compress returns bytes, encode turns it into a string
+    else:
+        # TODO not so sure if this string encoding/decoding is of any use; in py3 it stays a bytes object
+        return {'ctype'  : COMPRESS_TYPE,
+                'shape'  : list(a.shape),
+                'value': (codecs.encode(zlib.compress(a), encoding='base64'))  # compress returns bytes, encode turns it into a string (actually from b'\x00' to b'eJwBIA')
+                }
 
 # It takes about 0.15 seconds to decompress a 1000x1000 array on a 2011 Macbook air
 def decompress_array(a):
-    return np.fromstring(zlib.decompress(a['value'].decode('base64'))).reshape(a['shape'])
+    """
+    what this does in py27:
+    a['value'] is a string
+    .decode()         -> turns it into bytes (but still a STRING object, justthis weird \x03..)
+    zlib.decompress() -> decompressed bytes (still a STRING)
+    fromstring()      -> turns the string back into an array
+    Parameters
+    ----------
+    a
+
+    Returns
+    -------
+
+    """
+    if sys.version < '3':
+        return np.fromstring(zlib.decompress(a['value'].decode('base64'))).reshape(a['shape'])
+    else:
+        decoded = codecs.decode(a['value'], encoding='base64')  # str -> bytes
+        decomp = zlib.decompress(decoded)   # bytes -> bytes
+        return np.fromstring(decomp).reshape(a['shape'])  # bytes -> np.array
 
 def compress_nested_container(u_container):
     if isinstance(u_container, dict):
         cdict = {}
-        for key, value in u_container.iteritems():
+        for key, value in items(u_container):
             if isinstance(value, dict) or isinstance(value, list):
                 cdict[key] = compress_nested_container(value)
             else:
@@ -226,14 +256,14 @@ def compress_nested_container(u_container):
 
 def decompress_nested_container(c_container):
     if isinstance(c_container, dict):
-        if c_container.has_key('ctype') and c_container['ctype'] == COMPRESS_TYPE:
+        if 'ctype' in c_container and c_container['ctype'] == COMPRESS_TYPE:
             try:
                 return decompress_array(c_container)
             except:
-                raise Exception('Container does not contain a valid array.')
+                raise Exception('Container does not contain a valid array.')  # TODO, dangerous, very generic exception catch here
         else:
             udict = {}
-            for key, value in c_container.iteritems():
+            for key, value in items(c_container):
                 if isinstance(value, dict) or isinstance(value, list):
                     udict[key] = decompress_nested_container(value)
                 else:
diff --git a/spearmint/utils/database/mongodb.py b/spearmint/utils/database/mongodb.py
index 28ab540..f664d1f 100755
--- a/spearmint/utils/database/mongodb.py
+++ b/spearmint/utils/database/mongodb.py
@@ -187,7 +187,7 @@
 import pymongo
 import numpy.random as npr
 
-from abstractdb                  import AbstractDB
+from spearmint.utils.database.abstractdb import AbstractDB
 from spearmint.utils.compression import compress_nested_container, decompress_nested_container
 
 class MongoDB(AbstractDB):
diff --git a/spearmint/utils/fixes.py b/spearmint/utils/fixes.py
new file mode 100644
index 0000000..527bc64
--- /dev/null
+++ b/spearmint/utils/fixes.py
@@ -0,0 +1,11 @@
+import sys
+
+def items(x):
+    if sys.version < '3':
+        return x.iteritems()
+    return x.items()
+
+def xrange(*args):
+    if sys.version < '3':
+        return __builtins__['xrange'](*args)  # seems a bit fishy, in my console __builtins__.xrange() works
+    return range(*args)
diff --git a/spearmint/utils/linalg.py b/spearmint/utils/linalg.py
index 659f02a..4bac189 100755
--- a/spearmint/utils/linalg.py
+++ b/spearmint/utils/linalg.py
@@ -184,9 +184,14 @@
 
 
 import numpy as np
-import scipy.weave
+try:
+    import scipy.weave
+except ImportError:
+    pass
 import scipy.linalg as spla
 
+from spearmint.utils.fixes import xrange
+
 # Update Cholesky decomposition to include a single extra
 # row/column in the input matrix which is significantly faster than
 # recomputing the entire cholesky decomposition.
diff --git a/spearmint/utils/locker.py b/spearmint/utils/locker.py
index 12f2f97..4afdf46 100755
--- a/spearmint/utils/locker.py
+++ b/spearmint/utils/locker.py
@@ -218,7 +218,7 @@ def clear_locks(self):
             self.unlock(filename)
 
     def lock(self, filename):
-        if self.locks.has_key(filename):
+        if filename in self.locks:
             self.locks[filename] += 1
             return True
         else:
@@ -246,7 +246,7 @@ def lock(self, filename):
     #        return not fail
 
     def unlock(self, filename):
-        if not self.locks.has_key(filename):
+        if filename not in self.locks:
             sys.stderr.write("Trying to unlock not-locked file %s.\n" % 
                              (filename))
             return True
diff --git a/spearmint/utils/param.py b/spearmint/utils/param.py
index 459bd89..dc6d7a5 100755
--- a/spearmint/utils/param.py
+++ b/spearmint/utils/param.py
@@ -186,8 +186,8 @@
 
 import numpy as np
 
-import priors
-from compression import compress_array
+from spearmint.utils import priors
+from spearmint.utils.compression import compress_array
 
 def set_params_from_array(params_iterable, params_array):
     """Update the params in params_iterable with the new values stored in params_array"""
@@ -272,6 +272,6 @@ def sample_from_prior(self):
 
     def print_diagnostics(self):
         if self.size() == 1:
-            print '    %s: %s' % (self.name, self.value)
+            print(('    %s: %s') % (self.name, self.value))
         else:
-            print '    %s: min=%s, max=%s (size=%d)' % (self.name, self.value.min(), self.value.max(), self.size())
+            print('(    %s: min=%s, max=%s (size=%d)' % (self.name, self.value.min(), self.value.max(), self.size()))