diff --git a/.gitignore b/.gitignore
index 253c9a8f4..aa5948dad 100644
--- a/.gitignore
+++ b/.gitignore
@@ -65,3 +65,4 @@ tests/cache/*
 
 .DS_Store
 
+.coverage
diff --git a/doc/source/changelog.rst b/doc/source/changelog.rst
index af9552982..167ad75a9 100644
--- a/doc/source/changelog.rst
+++ b/doc/source/changelog.rst
@@ -12,9 +12,29 @@ Development was migrated into the
 `OpenBioSim <https://github.com/openbiosim>`__
 organisation on `GitHub <https://github.com/openbiosim/sire>`__.
 
-`2023.4.0 <https://github.com/openbiosim/sire/compare/2023.4.0...2023.5.0>`__ - December 2023
+`2023.5.0 <https://github.com/openbiosim/sire/compare/2023.4.0...2023.5.0>`__ - December 2023
 ---------------------------------------------------------------------------------------------
 
+* Fixed regression introduced in 2023.4.0 that meant that removed the constraints
+  from water molecules that had no internal bonds. These waters would blow up
+  as there was nothing holding them together. The need for these constraints is
+  now better detected and explicitly added.
+
+* Significantly sped up the OpenMM layer by checking for similar constraint lengths
+  and matching them all to be the same (within 0.05 A for calculated constraints,
+  e.g. unbonded atoms or angle constraints) or to R0 for bonds where the bond
+  length is within 0.1 A of R0 and the molecule isn't perturbable.
+
+* Added a custom minimiser that is based on OpenMM's LocalEnergyMinimizer,
+  but that copes better with exclusion errors, and that has deep integration
+  with the progress bar / interuption system.
+
+* Fixed a bug where the exclusions and exceptions were mismatched for the
+  OpenMM CPU platform, leading to exclusion errors.
+
+* Fixed an issue where the vacuum dynamics and minimisation simulations still
+  had a spurious periodic box added when ``.commit()`` was called.
+
 * Please add an item to this changelog when you create your PR
 
 `2023.4.0 <https://github.com/openbiosim/sire/compare/2023.3.0...2023.4.0>`__ - October 2023
diff --git a/doc/source/tutorial/part06/06_faster_fep.rst b/doc/source/tutorial/part06/06_faster_fep.rst
index fa4b5495a..5e600ba6e 100644
--- a/doc/source/tutorial/part06/06_faster_fep.rst
+++ b/doc/source/tutorial/part06/06_faster_fep.rst
@@ -323,7 +323,9 @@ ethane and methanol.
 ...     lambda_value = l / 100.0
 ...     print(f"Simulating lambda={lambda_value:.2f}")
 ...     # minimise the system at this lambda value
-...     min_mols = mols.minimisation(lambda_value=lambda_value).run().commit()
+...     min_mols = mols.minimisation(lambda_value=lambda_value,
+...                                  constraint="h-bonds",
+...                                  perturbable_constraint="none").run().commit()
 ...     # create a dynamics object for the system
 ...     d = min_mols.dynamics(timestep="4fs", temperature="25oC",
 ...                           lambda_value=lambda_value,
diff --git a/doc/source/tutorial/part06/scripts/run_md.py b/doc/source/tutorial/part06/scripts/run_md.py
index f30b8ad0a..63ed1891b 100644
--- a/doc/source/tutorial/part06/scripts/run_md.py
+++ b/doc/source/tutorial/part06/scripts/run_md.py
@@ -25,7 +25,9 @@
 for i, lambda_value in enumerate(lambda_values):
     print(f"Simulating lambda={lambda_value:.2f}")
     # minimise the system at this lambda value
-    min_mols = mols.minimisation(lambda_value=lambda_value).run().commit()
+    min_mols = mols.minimisation(lambda_value=lambda_value,
+                                 constraint=constraint,
+                                 perturbable_constraint="none").run().commit()
 
     # create a dynamics object for the system
     d = min_mols.dynamics(
@@ -68,10 +70,14 @@
 
 for i, lambda_value in enumerate(lambda_values):
     print(f"Simulating lambda={lambda_value:.2f}")
-    # minimise the system at this lambda value
+    # minimise the system at this lambda value using the
+    # same constraints as the dynamics (but switching
+    # off the perturbable constraint)
     min_mols = (
         mols[0]
-        .minimisation(lambda_value=lambda_value, vacuum=True)
+        .minimisation(lambda_value=lambda_value, vacuum=True,
+                      constraint=constraint,
+                      perturbable_constraint="none")
         .run()
         .commit()
     )
diff --git a/src/sire/__init__.py b/src/sire/__init__.py
index c598c3c05..391434ce8 100644
--- a/src/sire/__init__.py
+++ b/src/sire/__init__.py
@@ -90,7 +90,7 @@ def _fix_openmm_path():
     if need_path:
         # importing openmm should add this path
         try:
-            import openmm
+            import openmm  # noqa: F401 (imported but unused)
         except Exception:
             print("OpenMM import failed!")
             # Copy the files
diff --git a/src/sire/_load.py b/src/sire/_load.py
index 40c678b10..e94f33ad7 100644
--- a/src/sire/_load.py
+++ b/src/sire/_load.py
@@ -293,7 +293,8 @@ def expand(base: str, path: _Union[str, _List[str]], *args, **kwargs):
 
      Examples:
          >>> expand("https://sire.openbiosim.org/m", "urea.gro", "urea.top")
-         ["https://sire.openbiosim.org/m/urea.gro", "https://sire.openbiosim.org/n/urea.top"]
+         ["https://sire.openbiosim.org/m/urea.gro",
+          "https://sire.openbiosim.org/n/urea.top"]
 
          >>> expand("input", ["ala.top", "ala.crd"])
          ["input/ala.top", "input/ala.crd"]
@@ -438,8 +439,6 @@ def load(
     for key in kwargs.keys():
         m[key] = kwargs[key]
 
-    from .base import create_map
-
     map = create_map(map, m)
 
     return load_molecules(paths, map=create_map(map))
diff --git a/src/sire/_pythonize.py b/src/sire/_pythonize.py
index 5fe58c5dc..231ea05fb 100644
--- a/src/sire/_pythonize.py
+++ b/src/sire/_pythonize.py
@@ -177,7 +177,7 @@ def _load_new_api_modules(delete_old: bool = True, is_base: bool = False):
     _is_in_loading_process = True
 
     # call Pythonize on all of the new modules
-    from .legacy import (
+    from .legacy import (  # noqa: F401
         Base,
         Move,
         IO,
@@ -189,7 +189,8 @@ def _load_new_api_modules(delete_old: bool = True, is_base: bool = False):
         Analysis,
         CAS,
         Cluster,
-        Convert,  # does not need pythonizing, but importing will make it visible
+        # doesn't need pythonizing but importing will make it visible
+        Convert,
         Error,
         ID,
         Maths,
diff --git a/src/sire/analysis/__init__.py b/src/sire/analysis/__init__.py
index 94098a31f..a3675a299 100644
--- a/src/sire/analysis/__init__.py
+++ b/src/sire/analysis/__init__.py
@@ -1,6 +1,7 @@
 __all__ = []
 
-from ..legacy import Analysis as _Analysis
+# Need to import the module to pythonize it
+from ..legacy import Analysis as _Analysis  # noqa: F401
 
 from .. import use_new_api as _use_new_api
 
diff --git a/src/sire/base/__init__.py b/src/sire/base/__init__.py
index 8d67cddac..d60d6f2c7 100644
--- a/src/sire/base/__init__.py
+++ b/src/sire/base/__init__.py
@@ -1,4 +1,4 @@
-__all__ = ["create_map", "wrap", "PropertyMap"]
+__all__ = ["create_map", "wrap", "PropertyMap", "ProgressBar"]
 
 from ..legacy import Base as _Base
 
@@ -6,7 +6,7 @@
 
 from ..legacy.Base import PropertyMap
 
-from ._progressbar import *
+from ._progressbar import ProgressBar
 
 _use_new_api(is_base=True)
 
diff --git a/src/sire/cluster/__init__.py b/src/sire/cluster/__init__.py
index 6038c8d92..7d93d0678 100644
--- a/src/sire/cluster/__init__.py
+++ b/src/sire/cluster/__init__.py
@@ -1,6 +1,7 @@
 __all__ = []
 
-from ..legacy import Cluster as _Cluster
+# Need to import so it is pythonized
+from ..legacy import Cluster as _Cluster  # noqa: F401
 
 from .. import use_new_api as _use_new_api
 
diff --git a/src/sire/convert/__init__.py b/src/sire/convert/__init__.py
index 533d909ea..1a5fbbb93 100644
--- a/src/sire/convert/__init__.py
+++ b/src/sire/convert/__init__.py
@@ -143,7 +143,7 @@ def _to_type(o):
         else:
             raise TypeError(f"Unrecognised type {typ[0]}")
 
-        if type(c) != System:
+        if not isinstance(c, System):
             c = c.molecules()
 
         converted.append(c)
@@ -157,7 +157,7 @@ def _to_type(o):
         for i in range(1, len(converted)):
             mols += converted[i]
 
-    if type(mols) == System:
+    if isinstance(mols, System):
         return mols
     elif len(mols) == 1:
         return mols[0]
@@ -286,7 +286,7 @@ def sire_to_biosimspace(obj, map=None):
 
     from ..system import System
 
-    if type(obj) == System:
+    if isinstance(obj, System):
         return _BSS._SireWrappers.System(obj._system)
 
     obj = _to_selectormol(obj)
diff --git a/src/sire/ff/__init__.py b/src/sire/ff/__init__.py
index d9bb4500a..26147a09e 100644
--- a/src/sire/ff/__init__.py
+++ b/src/sire/ff/__init__.py
@@ -1,6 +1,7 @@
 __all__ = []
 
-from ..legacy import FF as _FF
+# Need to import so the module is pythonized
+from ..legacy import FF as _FF  # noqa: F401
 
 from .. import use_new_api as _use_new_api
 
diff --git a/src/sire/io/__init__.py b/src/sire/io/__init__.py
index 866bd867e..c2098ebc7 100644
--- a/src/sire/io/__init__.py
+++ b/src/sire/io/__init__.py
@@ -4,9 +4,10 @@
 from .. import use_new_api as _use_new_api
 
 # Imported to ensure that sire.maths.Vector is properly wrapped
-from ..maths import Vector as _Vector
+from ..maths import Vector as _Vector  # noqa: F401
 
-from . import parser
+# Importing the parser sub-module so that it autocompletes when used
+from . import parser  # noqa: F401
 
 _use_new_api()
 
diff --git a/src/sire/maths/_vector.py b/src/sire/maths/_vector.py
index ba19367c1..f3a9f2d14 100644
--- a/src/sire/maths/_vector.py
+++ b/src/sire/maths/_vector.py
@@ -166,7 +166,10 @@ def __str__(obj):
     _Vector.__repr__ = __str__
 
     def __format__(obj, format):
-        return f"({obj.x().value():{format}}, {obj.y().value():{format}}, {obj.z().value():{format}})"
+        return (
+            f"({obj.x().value():{format}}, {obj.y().value():{format}}, "
+            f"{obj.z().value():{format}})"
+        )
 
     _Vector.__format__ = __format__
 
diff --git a/src/sire/mol/__init__.py b/src/sire/mol/__init__.py
index 5c6129cea..b1c781edb 100644
--- a/src/sire/mol/__init__.py
+++ b/src/sire/mol/__init__.py
@@ -47,7 +47,9 @@
 from ..legacy import Mol as _Mol
 from .. import use_new_api as _use_new_api
 
-from ..legacy import Base as _Base
+# Imported as we need to ensure that Base is pythonized before
+# loading the rest of this module
+from ..legacy import Base as _Base  # noqa: F401
 
 
 from ..legacy.Mol import (
@@ -114,8 +116,8 @@
     _selector_view2d,
 )
 
-# make sure that Vector has units attached
-from ..maths import Vector as _Vector
+# Imported to make sure that Vector has units attached
+from ..maths import Vector as _Vector  # noqa: F401
 
 _use_new_api()
 
@@ -1578,6 +1580,13 @@ def _dynamics(
 
         map.set("space", Cartesian())
 
+        view = view.clone()
+
+        try:
+            view.set_property("space", Cartesian())
+        except Exception:
+            pass
+
     if not map.specified("space"):
         map = create_map(map, {"space": "space"})
 
@@ -1588,7 +1597,7 @@ def _dynamics(
         and not view.shared_properties().has_property(map["space"])
     ):
         # space is not a shared property, so may be lost when we
-        # convert to molecules. Make sure this doens't happen by
+        # convert to molecules. Make sure this doesn't happen by
         # adding the space directly to the property map
         try:
             map.set("space", view.property(map["space"]))
@@ -1828,7 +1837,6 @@ def _minimisation(
     """
     from ..base import create_map
     from ..system import System
-    from .. import u
 
     map = create_map(map)
 
@@ -1837,6 +1845,13 @@ def _minimisation(
 
         map.set("space", Cartesian())
 
+        view = view.clone()
+
+        try:
+            view.set_property("space", Cartesian())
+        except Exception:
+            pass
+
     if not map.specified("space"):
         map = create_map(map, {"space": "space"})
 
diff --git a/src/sire/mol/_cursor.py b/src/sire/mol/_cursor.py
index 48c2ce8b9..e7d8c06f1 100644
--- a/src/sire/mol/_cursor.py
+++ b/src/sire/mol/_cursor.py
@@ -54,8 +54,8 @@ def __init__(self, molecule=None, map=None):
                     from ..utils import Console
 
                     Console.warning(
-                        f"Cannot auto-generate a connectivity for {self.molecule}."
-                        f" The error is:\n\n{e}"
+                        "Cannot auto-generate a connectivity for "
+                        f"{self.molecule}. The error is:\n\n{e}"
                     )
 
     def number(self):
diff --git a/src/sire/mol/_dynamics.py b/src/sire/mol/_dynamics.py
index 17cad4472..d792734e6 100644
--- a/src/sire/mol/_dynamics.py
+++ b/src/sire/mol/_dynamics.py
@@ -1,9 +1,6 @@
 __all__ = ["Dynamics"]
 
 
-from typing import Any
-
-
 class DynamicsData:
     """
     Internal class that is designed to only be used by the Dynamics
@@ -128,25 +125,32 @@ def _update_from(self, state, state_has_cv, nsteps_completed):
             mols = openmm_extract_coordinates_and_velocities(
                 state,
                 self._sire_mols.molecules(),
-                perturbable_maps=self._omm_mols.get_lambda_lever().get_perturbable_molecule_maps(),
+                # black auto-formats this to a long line
+                perturbable_maps=self._omm_mols.get_lambda_lever().get_perturbable_molecule_maps(),  # noqa: E501
                 map=self._map,
             )
         else:
             mols = openmm_extract_coordinates(
                 state,
                 self._sire_mols.molecules(),
-                perturbable_maps=self._omm_mols.get_lambda_lever().get_perturbable_molecule_maps(),
+                # black auto-formats this to a long line
+                perturbable_maps=self._omm_mols.get_lambda_lever().get_perturbable_molecule_maps(),  # noqa: E501
                 map=self._map,
             )
 
-        space = openmm_extract_space(state)
-
         self._current_step = nsteps_completed
 
         self._sire_mols.update(mols.to_molecules())
-        self._sire_mols.set_property("space", space)
+
+        if self._ffinfo.space().is_periodic():
+            # don't change the space if it is infinite - this
+            # cannot change during the simulation and OpenMM
+            # likes giving back fake spaces
+            space = openmm_extract_space(state)
+            self._sire_mols.set_property("space", space)
+            self._ffinfo.set_space(space)
+
         self._sire_mols.set_property("time", self._current_time)
-        self._ffinfo.set_space(space)
 
     def _enter_dynamics_block(self):
         if self._is_running:
@@ -521,39 +525,12 @@ def run_minimisation(self, max_iterations: int):
 
         - max_iterations (int): The maximum number of iterations to run
         """
-        from openmm import LocalEnergyMinimizer
-        from concurrent.futures import ThreadPoolExecutor
+        from ..legacy.Convert import minimise_openmm_context
 
         if max_iterations <= 0:
             max_iterations = 0
 
-        from ..base import ProgressBar
-
-        def runfunc(max_its):
-            try:
-                LocalEnergyMinimizer.minimize(
-                    self._omm_mols, maxIterations=max_its
-                )
-
-                return 0
-            except Exception as e:
-                return e
-
-        with ProgressBar(text="minimisation") as spinner:
-            spinner.set_speed_unit("checks / s")
-
-            with ThreadPoolExecutor() as pool:
-                run_promise = pool.submit(runfunc, max_iterations)
-
-                while not run_promise.done():
-                    try:
-                        result = run_promise.result(timeout=0.2)
-                    except Exception:
-                        spinner.tick()
-                        pass
-
-                if result != 0:
-                    raise result
+        minimise_openmm_context(self._omm_mols, max_iterations=max_iterations)
 
     def _rebuild_and_minimise(self):
         if self.is_null():
@@ -803,12 +780,20 @@ class NeedsMinimiseError(Exception):
                             # run the current block in the background
                             run_promise = pool.submit(runfunc, nrun)
 
+                            result = None
+
                             while not run_promise.done():
                                 try:
                                     result = run_promise.result(timeout=1.0)
                                 except Exception:
                                     pass
 
+                            # catch rare edge case where the promise timed
+                            # out, but then completed before the .done()
+                            # test in the next loop iteration
+                            if result is None:
+                                result = run_promise.result()
+
                             if result == 0:
                                 completed += nrun
                                 nrun_till_save -= nrun
@@ -861,12 +846,13 @@ class NeedsMinimiseError(Exception):
                                 raise NeedsMinimiseError()
 
                             raise RuntimeError(
-                                "The kinetic energy has exceeded 1000 kcal mol-1 "
-                                f"per atom (it is {ke_per_atom} kcal mol-1 atom-1,"
-                                f" and {kinetic_energy} kcal mol-1 total). This "
-                                "suggests that the simulation has become "
-                                "unstable. Try reducing the timestep and/or "
-                                "minimising the system and run again."
+                                "The kinetic energy has exceeded 1000 kcal "
+                                f"mol-1 per atom (it is {ke_per_atom} kcal "
+                                f"mol-1 atom-1, and {kinetic_energy} kcal "
+                                "mol-1 total). This suggests that the "
+                                "simulation has become unstable. Try reducing "
+                                "the timestep and/or minimising the system "
+                                "and run again."
                             )
 
                 self._walltime += (
diff --git a/src/sire/mol/_smiles.py b/src/sire/mol/_smiles.py
index 4d74cdc5e..f7a8af5c8 100644
--- a/src/sire/mol/_smiles.py
+++ b/src/sire/mol/_smiles.py
@@ -10,7 +10,8 @@
 _rdkit_import_error = None
 
 try:
-    from rdkit import Chem as _Chem
+    # imported to check if it is available
+    from rdkit import Chem as _Chem  # noqa: F401
 
     _has_rdkit = True
 except ImportError:
@@ -497,7 +498,6 @@ def _view2d(
 
         from rdkit.Chem import rdDepictor
         from rdkit.Chem.Draw import rdMolDraw2D
-        from rdkit.Chem.Draw import IPythonConsole
 
         rdDepictor.SetPreferCoordGen(True)
 
diff --git a/src/sire/mol/_trajectory.py b/src/sire/mol/_trajectory.py
index 9c43f7c24..b2dcdebc1 100644
--- a/src/sire/mol/_trajectory.py
+++ b/src/sire/mol/_trajectory.py
@@ -1,7 +1,7 @@
 __all__ = ["TrajectoryIterator"]
 
 # make sure that GeneralUnit has been modernised
-from ..units import GeneralUnit as _GeneralUnit
+from ..units import GeneralUnit as _GeneralUnit  # noqa: F401
 
 
 def _get_align_atoms_and_reference(
diff --git a/src/sire/morph/__init__.py b/src/sire/morph/__init__.py
index 13596f702..1a30c86d5 100644
--- a/src/sire/morph/__init__.py
+++ b/src/sire/morph/__init__.py
@@ -6,12 +6,12 @@
     "Perturbation",
 ]
 
-from ._perturbation import *
+from ._perturbation import Perturbation
 
-from ._ghost_atoms import *
+from ._ghost_atoms import shrink_ghost_atoms
 
-from ._repex import *
+from ._repex import replica_exchange
 
-from ._hmr import *
+from ._hmr import repartition_hydrogen_masses
 
-from ._alchemy import *
+from ._alchemy import to_alchemlyb
diff --git a/src/sire/qt/__init__.py b/src/sire/qt/__init__.py
index 8982f28c6..63c37177f 100644
--- a/src/sire/qt/__init__.py
+++ b/src/sire/qt/__init__.py
@@ -1,6 +1,7 @@
 __all__ = []
 
-from ..legacy import Qt as _Qt
+# Imported so that it is pythonized
+from ..legacy import Qt as _Qt  # noqa: F401
 
 from .. import use_new_api as _use_new_api
 
diff --git a/src/sire/restraints/__init__.py b/src/sire/restraints/__init__.py
index dfc8a3efb..4b59de758 100644
--- a/src/sire/restraints/__init__.py
+++ b/src/sire/restraints/__init__.py
@@ -1,3 +1,3 @@
 __all__ = ["positional", "bond", "distance", "boresch"]
 
-from ._restraints import *
+from ._restraints import positional, bond, distance, boresch
diff --git a/src/sire/search/__init__.py b/src/sire/search/__init__.py
index fa45724b7..224f44fbf 100644
--- a/src/sire/search/__init__.py
+++ b/src/sire/search/__init__.py
@@ -18,7 +18,8 @@
     "set_token",
 ]
 
-from ..legacy import Search as _Search
+# Imported so that it is pythonized
+from ..legacy import Search as _Search  # noqa: F401
 
 from .. import use_new_api as _use_new_api
 
diff --git a/src/sire/squire/__init__.py b/src/sire/squire/__init__.py
index 2a0e51d60..361cd5985 100644
--- a/src/sire/squire/__init__.py
+++ b/src/sire/squire/__init__.py
@@ -1,6 +1,7 @@
 __all__ = []
 
-from ..legacy import Squire as _Squire
+# Imported so that it is pythonized
+from ..legacy import Squire as _Squire  # noqa: F401
 
 from .. import use_new_api as _use_new_api
 
diff --git a/src/sire/system/_system.py b/src/sire/system/_system.py
index c5b8ace8f..5e4c8dfd8 100644
--- a/src/sire/system/_system.py
+++ b/src/sire/system/_system.py
@@ -33,7 +33,7 @@ def __init__(self, system=None):
                     f"not a {type(system)}"
                 )
 
-            if type(system) == System:
+            if isinstance(system, System):
                 self._system == system._system
             else:
                 self._system = system
@@ -48,7 +48,7 @@ def is_system(obj):
         """
         from ..legacy.System import System as _System
 
-        return type(obj) == System or type(obj) == _System
+        return isinstance(obj, System) or isinstance(obj, _System)
 
     def _to_legacy_system(self):
         """
@@ -458,7 +458,7 @@ def minimisation(self, *args, **kwargs):
             set in this dictionary that are also specified via one of
             the arguments above will be overridden by the argument
             value
-        """
+        """  # noqa: E501
         from ..mol import _minimisation
 
         return _minimisation(self, *args, **kwargs)
@@ -605,7 +605,7 @@ def dynamics(self, *args, **kwargs):
             set in this dictionary that are also specified via one of
             the arguments above will be overridden by the argument
             value
-        """
+        """  # noqa: E501
         from ..mol import _dynamics
 
         return _dynamics(self, *args, **kwargs)
@@ -711,9 +711,10 @@ def set_space(self, space, map=None):
 
         if not issubclass(type(space), Space):
             raise TypeError(
-                "You can only set the space to a type derived from sire.vol.Space, "
-                "e.g. sire.vol.PeriodicBox, sire.vol.TriclinicBox or "
-                f"sire.vol.Cartesian. You cannot use a {type(space)}."
+                "You can only set the space to a type derived from "
+                "sire.vol.Space, e.g. sire.vol.PeriodicBox, "
+                "sire.vol.TriclinicBox or sire.vol.Cartesian. "
+                f"You cannot use a {type(space)}."
             )
 
         if map is None:
@@ -742,8 +743,9 @@ def set_time(self, time, map=None):
         else:
             if not hasattr(time, "has_same_units"):
                 raise TypeError(
-                    "You can only set the time to a value with units 'time', e.g. "
-                    f"5 * sire.units.picosecond. YOu cannot use a {type(time)}."
+                    "You can only set the time to a value with units 'time', "
+                    "e.g. 5 * sire.units.picosecond. "
+                    f"You cannot use a {type(time)}."
                 )
 
             if not time.has_same_units(picosecond):
@@ -780,11 +782,12 @@ def energy_trajectory(
         ----------
 
         to_pandas: bool
-            Whether or not to return the energy trajectory as a pandas DataFrame.
+            Whether or not to return the energy trajectory as a
+            pandas DataFrame.
 
         to_alchemlyb: bool
-            Whether or not to return the energy trajectory as a pandas DataFrame
-            that is formatted to usable in alchemlyb
+            Whether or not to return the energy trajectory as a
+            pandas DataFrame that is formatted to usable in alchemlyb
         """
         from ..base import create_map
 
diff --git a/src/sire/units/__init__.py b/src/sire/units/__init__.py
index d1d54eee6..eeb7fc66c 100644
--- a/src/sire/units/__init__.py
+++ b/src/sire/units/__init__.py
@@ -155,7 +155,8 @@
     "yards",
 ]
 
-from ..legacy import Units as _Units
+# Imported so that it is pythonized
+from ..legacy import Units as _Units  # noqa: F401
 
 from .. import use_new_api as _use_new_api
 
@@ -720,7 +721,7 @@ def length(length):
     except Exception:
         pass
 
-    if type(length) != type(angstrom):
+    if not isinstance(length, type(angstrom)):
         raise TypeError(
             f"The value '{length}' of type {type(length)} is "
             "not a type that is compatible with a GeneralUnit Length"
@@ -741,7 +742,7 @@ def angle(angle):
     except Exception:
         pass
 
-    if type(angle) != type(degrees):
+    if not isinstance(angle, type(degrees)):
         raise TypeError(
             f"The value '{angle}' of type {type(angle)} is "
             "not a type that is compatible with a GeneralUnit angle"
diff --git a/src/sire/utils/_console.py b/src/sire/utils/_console.py
index 5748428a7..aecbc7390 100644
--- a/src/sire/utils/_console.py
+++ b/src/sire/utils/_console.py
@@ -2,8 +2,6 @@
 from typing import List as _List
 from typing import IO as _IO
 
-from datetime import datetime as _datetime
-
 from contextlib import contextmanager as _contextmanager
 
 
diff --git a/src/sire/utils/_profiler.py b/src/sire/utils/_profiler.py
index 602949cc3..18a380532 100644
--- a/src/sire/utils/_profiler.py
+++ b/src/sire/utils/_profiler.py
@@ -70,8 +70,8 @@ def _to_string(self):
             lines.append(f"  \\-{clines[0]}")
 
             if len(clines) > 1:
-                for l in clines[1:]:
-                    lines.append(f"    {l}")
+                for line in clines[1:]:
+                    lines.append(f"    {line}")
 
         return lines
 
diff --git a/src/sire/utils/_try_import.py b/src/sire/utils/_try_import.py
index f09f9773f..2b7060718 100644
--- a/src/sire/utils/_try_import.py
+++ b/src/sire/utils/_try_import.py
@@ -20,11 +20,6 @@ def _find_conda():
     else:
         conda = None
 
-    if "CONDA_DEFAULT_ENV" in os.environ:
-        conda_env = os.environ["CONDA_DEFAULT_ENV"]
-    else:
-        conda_env = None
-
     if os.path.exists(os.path.join(conda_base, "python.exe")):
         # Windows
         conda_bin = os.path.join(conda_base, "Library", "bin")
@@ -80,7 +75,8 @@ def _install_package(name, package_registry, version=None):
     try:
         if version is not None:
             try:
-                _v = float(version)
+                # Check first that 'version' is a number
+                _v = float(version)  # noqa: F841
                 version = "==%s" % version
             except Exception:
                 pass
@@ -263,7 +259,7 @@ def assert_imported(module):
     This will raise a ModuleNotFoundError if the module
     has not been imported, and has instead been stubbed.
     """
-    if type(module) == _ModuleStub:
+    if isinstance(module, _ModuleStub):
         module.this_will_break()
 
 
@@ -272,4 +268,4 @@ def have_imported(module) -> bool:
     Return whether or not the passed module has indeed
     been imported (and thus is not stubbed).
     """
-    return type(module) != _ModuleStub
+    return not isinstance(module, _ModuleStub)
diff --git a/tests/convert/test_openmm.py b/tests/convert/test_openmm.py
index 2ea8bf90b..de104ecce 100644
--- a/tests/convert/test_openmm.py
+++ b/tests/convert/test_openmm.py
@@ -87,7 +87,7 @@ def test_openmm_multi_energy_all_cart(kigaki_mols):
         "cutoff": 10000 * sr.units.angstrom,
         "cutoff_type": "REACTION_FIELD",
         "dielectric": 1.0,
-        "platform": "Reference",
+        "platform": "cpu",
         "constraint": "bonds-h-angles",
     }
 
@@ -119,7 +119,7 @@ def test_openmm_multi_energy_all_cart_cutoff(kigaki_mols):
         "cutoff": 10 * sr.units.angstrom,
         "cutoff_type": "REACTION_FIELD",
         "dielectric": 78.0,
-        "platform": "Reference",
+        "platform": "cpu",
         "constraint": "bonds-h-angles",
     }
 
diff --git a/tests/convert/test_openmm_lambda.py b/tests/convert/test_openmm_lambda.py
index 1131f9807..5363420dd 100644
--- a/tests/convert/test_openmm_lambda.py
+++ b/tests/convert/test_openmm_lambda.py
@@ -56,7 +56,7 @@ def get_end_state(mol, state, remove_state):
 
     # need to use the reference platform on GH Actions
     map = {
-        "platform": "Reference",
+        "platform": "cpu",
         "schedule": l,
         "constraint": "bonds-h-angles",
     }
diff --git a/tests/convert/test_openmm_minimise.py b/tests/convert/test_openmm_minimise.py
new file mode 100644
index 000000000..cab4a4247
--- /dev/null
+++ b/tests/convert/test_openmm_minimise.py
@@ -0,0 +1,103 @@
+import pytest
+import sire as sr
+
+
+@pytest.mark.skipif(
+    "openmm" not in sr.convert.supported_formats(),
+    reason="openmm support is not available",
+)
+def test_openmm_simple_minimise(ala_mols):
+    mols = ala_mols
+
+    nrg = mols.energy()
+
+    mols = mols.minimisation(platform="cpu").run(5).commit()
+
+    nrg2 = mols.energy()
+
+    assert nrg2.value() < nrg.value()
+
+
+@pytest.mark.skipif(
+    "openmm" not in sr.convert.supported_formats(),
+    reason="openmm support is not available",
+)
+def test_openmm_minimise_lambda(merged_ethane_methanol):
+    mols = merged_ethane_methanol.clone()
+
+    for mol in mols.molecules("molecule property is_perturbable"):
+        mols.update(mol.perturbation().link_to_reference().commit())
+
+    # this blows up because of incompatible exceptions/exclusions
+    mol = (
+        mols[0].minimisation(platform="cpu", lambda_value=1.0).run(5).commit()
+    )
+
+    mols = (
+        mols[0].minimisation(platform="cpu", lambda_value=0.0).run(5).commit()
+    )
+
+    mols = (
+        mols[0].minimisation(platform="cpu", lambda_value=0.5).run(5).commit()
+    )
+
+
+@pytest.mark.skipif(
+    "openmm" not in sr.convert.supported_formats(),
+    reason="openmm support is not available",
+)
+def test_openmm_minimise_unbonded_water(kigaki_mols):
+    mols = kigaki_mols
+
+    atoms = mols[100].atoms()
+
+    # the water molecules have no internal bonds, so this tests
+    # whether or not constraints have been added correctly
+    mols = mols.minimisation(platform="cpu").run(10).commit()
+
+    new_atoms = mols[100].atoms()
+
+    # make sure the water has not blown up... (low tolerance as
+    # the water bond lengths will be constrained to a common shared
+    # value)
+    assert (
+        sr.measure(atoms[0], atoms[1]).value()
+        == pytest.approx(
+            sr.measure(new_atoms[0], new_atoms[1]).value(), abs=1e-2
+        )
+    ) or (
+        sr.measure(new_atoms[0], new_atoms[2]).value() == pytest.approx(0.9572)
+    )
+
+    assert (
+        sr.measure(atoms[0], atoms[2]).value()
+        == pytest.approx(
+            sr.measure(new_atoms[0], new_atoms[2]).value(), abs=1e-2
+        )
+    ) or (
+        sr.measure(new_atoms[0], new_atoms[2]).value() == pytest.approx(0.9572)
+    )
+
+    assert sr.measure(atoms[1], atoms[2]).value() == pytest.approx(
+        sr.measure(new_atoms[1], new_atoms[2]).value(), abs=1e-2
+    )
+
+
+@pytest.mark.skipif(
+    "openmm" not in sr.convert.supported_formats(),
+    reason="openmm support is not available",
+)
+def test_openmm_minimise_vacuum(kigaki_mols):
+    mols = kigaki_mols
+
+    mols = mols.minimisation(platform="cpu", vacuum=True).run(10).commit()
+
+    assert not mols.property("space").is_periodic()
+
+    mols = kigaki_mols.clone()
+
+    mols.add_shared_property("space", sr.vol.Cartesian())
+
+    mols = mols.minimisation(platform="cpu", vacuum=True).run(10).commit()
+
+    assert not mols.property("space").is_periodic()
diff --git a/wrapper/Convert/SireOpenMM/CMakeLists.txt b/wrapper/Convert/SireOpenMM/CMakeLists.txt
index 91003c267..bee36a1f6 100644
--- a/wrapper/Convert/SireOpenMM/CMakeLists.txt
+++ b/wrapper/Convert/SireOpenMM/CMakeLists.txt
@@ -30,10 +30,13 @@ if (${SIRE_USE_OPENMM})
           _SireOpenMM.main.cpp
 
           lambdalever.cpp
+          openmmminimise.cpp
           openmmmolecule.cpp
           sire_openmm.cpp
           sire_to_openmm_system.cpp
 
+          lbgfs/lbfgs.cpp
+
       )
 
     # Create the library that holds all of the class wrappers
diff --git a/wrapper/Convert/SireOpenMM/_SireOpenMM.main.cpp b/wrapper/Convert/SireOpenMM/_SireOpenMM.main.cpp
index c8c317313..0757bc9bc 100644
--- a/wrapper/Convert/SireOpenMM/_SireOpenMM.main.cpp
+++ b/wrapper/Convert/SireOpenMM/_SireOpenMM.main.cpp
@@ -7,6 +7,8 @@
 
 #include "lambdalever.h"
 
+#include "openmmminimise.h"
+
 #include "Helpers/convertlist.hpp"
 
 #include <QDebug>
@@ -115,6 +117,11 @@ BOOST_PYTHON_MODULE(_SireOpenMM)
             (bp::arg("context"), bp::arg("key"), bp::arg("value")),
             "Set the Platform property for the passed context.");
 
+    bp::def("_minimise_openmm_context",
+            &minimise_openmm_context,
+            (bp::arg("context"), bp::arg("tolerance") = 10, bp::arg("max_iterations") = -1),
+            "Minimise the passed context");
+
     bp::converter::registry::insert(&extract_swig_wrapped_pointer, bp::type_id<OpenMM::System>());
     bp::converter::registry::insert(&extract_swig_wrapped_pointer, bp::type_id<OpenMM::Context>());
     bp::converter::registry::insert(&extract_swig_wrapped_pointer, bp::type_id<OpenMM::State>());
diff --git a/wrapper/Convert/SireOpenMM/lambdalever.cpp b/wrapper/Convert/SireOpenMM/lambdalever.cpp
index 358889699..0f014f58f 100644
--- a/wrapper/Convert/SireOpenMM/lambdalever.cpp
+++ b/wrapper/Convert/SireOpenMM/lambdalever.cpp
@@ -540,6 +540,9 @@ double LambdaLever::setLambda(OpenMM::Context &context,
     if (ghost_14ff)
         ghost_14ff->updateParametersInContext(context);
 
+    // in OpenMM 8.1beta updating the bond parameters past lambda=0.25
+    // causes a "All Forces must have identical exclusions" error,
+    // when running minimisation without h-bond constraints...
     if (bondff)
         bondff->updateParametersInContext(context);
 
diff --git a/wrapper/Convert/SireOpenMM/lbgfs/arithmetic_ansi.h b/wrapper/Convert/SireOpenMM/lbgfs/arithmetic_ansi.h
new file mode 100644
index 000000000..d6cff7c8f
--- /dev/null
+++ b/wrapper/Convert/SireOpenMM/lbgfs/arithmetic_ansi.h
@@ -0,0 +1,133 @@
+/*
+ *      ANSI C implementation of vector operations.
+ *
+ * Copyright (c) 2007-2010 Naoaki Okazaki
+ * All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+/* $Id: arithmetic_ansi.h 65 2010-01-29 12:19:16Z naoaki $ */
+
+#include <stdlib.h>
+#include <string.h>
+
+#if     LBFGS_FLOAT == 32 && LBFGS_IEEE_FLOAT
+#define fsigndiff(x, y) (((*(uint32_t*)(x)) ^ (*(uint32_t*)(y))) & 0x80000000U)
+#else
+#define fsigndiff(x, y) (*(x) * (*(y) / fabs(*(y))) < 0.)
+#endif/*LBFGS_IEEE_FLOAT*/
+
+inline static void* vecalloc(size_t size)
+{
+    void *memblock = malloc(size);
+    if (memblock) {
+        memset(memblock, 0, size);
+    }
+    return memblock;
+}
+
+inline static void vecfree(void *memblock)
+{
+    free(memblock);
+}
+
+inline static void vecset(lbfgsfloatval_t *x, const lbfgsfloatval_t c, const int n)
+{
+    int i;
+    
+    for (i = 0;i < n;++i) {
+        x[i] = c;
+    }
+}
+
+inline static void veccpy(lbfgsfloatval_t *y, const lbfgsfloatval_t *x, const int n)
+{
+    int i;
+
+    for (i = 0;i < n;++i) {
+        y[i] = x[i];
+    }
+}
+
+inline static void vecncpy(lbfgsfloatval_t *y, const lbfgsfloatval_t *x, const int n)
+{
+    int i;
+
+    for (i = 0;i < n;++i) {
+        y[i] = -x[i];
+    }
+}
+
+inline static void vecadd(lbfgsfloatval_t *y, const lbfgsfloatval_t *x, const lbfgsfloatval_t c, const int n)
+{
+    int i;
+
+    for (i = 0;i < n;++i) {
+        y[i] += c * x[i];
+    }
+}
+
+inline static void vecdiff(lbfgsfloatval_t *z, const lbfgsfloatval_t *x, const lbfgsfloatval_t *y, const int n)
+{
+    int i;
+
+    for (i = 0;i < n;++i) {
+        z[i] = x[i] - y[i];
+    }
+}
+
+inline static void vecscale(lbfgsfloatval_t *y, const lbfgsfloatval_t c, const int n)
+{
+    int i;
+
+    for (i = 0;i < n;++i) {
+        y[i] *= c;
+    }
+}
+
+inline static void vecmul(lbfgsfloatval_t *y, const lbfgsfloatval_t *x, const int n)
+{
+    int i;
+
+    for (i = 0;i < n;++i) {
+        y[i] *= x[i];
+    }
+}
+
+inline static void vecdot(lbfgsfloatval_t* s, const lbfgsfloatval_t *x, const lbfgsfloatval_t *y, const int n)
+{
+    int i;
+    *s = 0.;
+    for (i = 0;i < n;++i) {
+        *s += x[i] * y[i];
+    }
+}
+
+inline static void vec2norm(lbfgsfloatval_t* s, const lbfgsfloatval_t *x, const int n)
+{
+    vecdot(s, x, x, n);
+    *s = (lbfgsfloatval_t)sqrt(*s);
+}
+
+inline static void vec2norminv(lbfgsfloatval_t* s, const lbfgsfloatval_t *x, const int n)
+{
+    vec2norm(s, x, n);
+    *s = (lbfgsfloatval_t)(1.0 / *s);
+}
diff --git a/wrapper/Convert/SireOpenMM/lbgfs/arithmetic_sse_double.h b/wrapper/Convert/SireOpenMM/lbgfs/arithmetic_sse_double.h
new file mode 100644
index 000000000..38334476e
--- /dev/null
+++ b/wrapper/Convert/SireOpenMM/lbgfs/arithmetic_sse_double.h
@@ -0,0 +1,287 @@
+/*
+ *      SSE2 implementation of vector oprations (64bit double).
+ *
+ * Copyright (c) 2007-2010 Naoaki Okazaki
+ * All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+/* $Id: arithmetic_sse_double.h 65 2010-01-29 12:19:16Z naoaki $ */
+
+#include <stdlib.h>
+#include <malloc.h>
+#include <memory.h>
+
+#if     1400 <= _MSC_VER
+#include <intrin.h>
+#endif/*1400 <= _MSC_VER*/
+
+#if     HAVE_EMMINTRIN_H
+#include <emmintrin.h>
+#endif/*HAVE_EMMINTRIN_H*/
+
+inline static void* vecalloc(size_t size)
+{
+#ifdef	_MSC_VER
+    void *memblock = _aligned_malloc(size, 16);
+#else
+    void *memblock = memalign(16, size);
+#endif
+    if (memblock != NULL) {
+        memset(memblock, 0, size);
+    }
+    return memblock;
+}
+
+inline static void vecfree(void *memblock)
+{
+#ifdef	_MSC_VER
+    _aligned_free(memblock);
+#else
+    free(memblock);
+#endif
+}
+
+#define fsigndiff(x, y) \
+    ((_mm_movemask_pd(_mm_set_pd(*(x), *(y))) + 1) & 0x002)
+
+#define vecset(x, c, n) \
+{ \
+    int i; \
+    __m128d XMM0 = _mm_set1_pd(c); \
+    for (i = 0;i < (n);i += 8) { \
+        _mm_store_pd((x)+i  , XMM0); \
+        _mm_store_pd((x)+i+2, XMM0); \
+        _mm_store_pd((x)+i+4, XMM0); \
+        _mm_store_pd((x)+i+6, XMM0); \
+    } \
+}
+
+#define veccpy(y, x, n) \
+{ \
+    int i; \
+    for (i = 0;i < (n);i += 8) { \
+        __m128d XMM0 = _mm_load_pd((x)+i  ); \
+        __m128d XMM1 = _mm_load_pd((x)+i+2); \
+        __m128d XMM2 = _mm_load_pd((x)+i+4); \
+        __m128d XMM3 = _mm_load_pd((x)+i+6); \
+        _mm_store_pd((y)+i  , XMM0); \
+        _mm_store_pd((y)+i+2, XMM1); \
+        _mm_store_pd((y)+i+4, XMM2); \
+        _mm_store_pd((y)+i+6, XMM3); \
+    } \
+}
+
+#define vecncpy(y, x, n) \
+{ \
+    int i; \
+    for (i = 0;i < (n);i += 8) { \
+        __m128d XMM0 = _mm_setzero_pd(); \
+        __m128d XMM1 = _mm_setzero_pd(); \
+        __m128d XMM2 = _mm_setzero_pd(); \
+        __m128d XMM3 = _mm_setzero_pd(); \
+        __m128d XMM4 = _mm_load_pd((x)+i  ); \
+        __m128d XMM5 = _mm_load_pd((x)+i+2); \
+        __m128d XMM6 = _mm_load_pd((x)+i+4); \
+        __m128d XMM7 = _mm_load_pd((x)+i+6); \
+        XMM0 = _mm_sub_pd(XMM0, XMM4); \
+        XMM1 = _mm_sub_pd(XMM1, XMM5); \
+        XMM2 = _mm_sub_pd(XMM2, XMM6); \
+        XMM3 = _mm_sub_pd(XMM3, XMM7); \
+        _mm_store_pd((y)+i  , XMM0); \
+        _mm_store_pd((y)+i+2, XMM1); \
+        _mm_store_pd((y)+i+4, XMM2); \
+        _mm_store_pd((y)+i+6, XMM3); \
+    } \
+}
+
+#define vecadd(y, x, c, n) \
+{ \
+    int i; \
+    __m128d XMM7 = _mm_set1_pd(c); \
+    for (i = 0;i < (n);i += 4) { \
+        __m128d XMM0 = _mm_load_pd((x)+i  ); \
+        __m128d XMM1 = _mm_load_pd((x)+i+2); \
+        __m128d XMM2 = _mm_load_pd((y)+i  ); \
+        __m128d XMM3 = _mm_load_pd((y)+i+2); \
+        XMM0 = _mm_mul_pd(XMM0, XMM7); \
+        XMM1 = _mm_mul_pd(XMM1, XMM7); \
+        XMM2 = _mm_add_pd(XMM2, XMM0); \
+        XMM3 = _mm_add_pd(XMM3, XMM1); \
+        _mm_store_pd((y)+i  , XMM2); \
+        _mm_store_pd((y)+i+2, XMM3); \
+    } \
+}
+
+#define vecdiff(z, x, y, n) \
+{ \
+    int i; \
+    for (i = 0;i < (n);i += 8) { \
+        __m128d XMM0 = _mm_load_pd((x)+i  ); \
+        __m128d XMM1 = _mm_load_pd((x)+i+2); \
+        __m128d XMM2 = _mm_load_pd((x)+i+4); \
+        __m128d XMM3 = _mm_load_pd((x)+i+6); \
+        __m128d XMM4 = _mm_load_pd((y)+i  ); \
+        __m128d XMM5 = _mm_load_pd((y)+i+2); \
+        __m128d XMM6 = _mm_load_pd((y)+i+4); \
+        __m128d XMM7 = _mm_load_pd((y)+i+6); \
+        XMM0 = _mm_sub_pd(XMM0, XMM4); \
+        XMM1 = _mm_sub_pd(XMM1, XMM5); \
+        XMM2 = _mm_sub_pd(XMM2, XMM6); \
+        XMM3 = _mm_sub_pd(XMM3, XMM7); \
+        _mm_store_pd((z)+i  , XMM0); \
+        _mm_store_pd((z)+i+2, XMM1); \
+        _mm_store_pd((z)+i+4, XMM2); \
+        _mm_store_pd((z)+i+6, XMM3); \
+    } \
+}
+
+#define vecscale(y, c, n) \
+{ \
+    int i; \
+    __m128d XMM7 = _mm_set1_pd(c); \
+    for (i = 0;i < (n);i += 4) { \
+        __m128d XMM0 = _mm_load_pd((y)+i  ); \
+        __m128d XMM1 = _mm_load_pd((y)+i+2); \
+        XMM0 = _mm_mul_pd(XMM0, XMM7); \
+        XMM1 = _mm_mul_pd(XMM1, XMM7); \
+        _mm_store_pd((y)+i  , XMM0); \
+        _mm_store_pd((y)+i+2, XMM1); \
+    } \
+}
+
+#define vecmul(y, x, n) \
+{ \
+    int i; \
+    for (i = 0;i < (n);i += 8) { \
+        __m128d XMM0 = _mm_load_pd((x)+i  ); \
+        __m128d XMM1 = _mm_load_pd((x)+i+2); \
+        __m128d XMM2 = _mm_load_pd((x)+i+4); \
+        __m128d XMM3 = _mm_load_pd((x)+i+6); \
+        __m128d XMM4 = _mm_load_pd((y)+i  ); \
+        __m128d XMM5 = _mm_load_pd((y)+i+2); \
+        __m128d XMM6 = _mm_load_pd((y)+i+4); \
+        __m128d XMM7 = _mm_load_pd((y)+i+6); \
+        XMM4 = _mm_mul_pd(XMM4, XMM0); \
+        XMM5 = _mm_mul_pd(XMM5, XMM1); \
+        XMM6 = _mm_mul_pd(XMM6, XMM2); \
+        XMM7 = _mm_mul_pd(XMM7, XMM3); \
+        _mm_store_pd((y)+i  , XMM4); \
+        _mm_store_pd((y)+i+2, XMM5); \
+        _mm_store_pd((y)+i+4, XMM6); \
+        _mm_store_pd((y)+i+6, XMM7); \
+    } \
+}
+
+
+
+#if     3 <= __SSE__
+/*
+    Horizontal add with haddps SSE3 instruction. The work register (rw)
+    is unused.
+ */
+#define __horizontal_sum(r, rw) \
+    r = _mm_hadd_ps(r, r); \
+    r = _mm_hadd_ps(r, r);
+
+#else
+/*
+    Horizontal add with SSE instruction. The work register (rw) is used.
+ */
+#define __horizontal_sum(r, rw) \
+    rw = r; \
+    r = _mm_shuffle_ps(r, rw, _MM_SHUFFLE(1, 0, 3, 2)); \
+    r = _mm_add_ps(r, rw); \
+    rw = r; \
+    r = _mm_shuffle_ps(r, rw, _MM_SHUFFLE(2, 3, 0, 1)); \
+    r = _mm_add_ps(r, rw);
+
+#endif
+
+#define vecdot(s, x, y, n) \
+{ \
+    int i; \
+    __m128d XMM0 = _mm_setzero_pd(); \
+    __m128d XMM1 = _mm_setzero_pd(); \
+    __m128d XMM2, XMM3, XMM4, XMM5; \
+    for (i = 0;i < (n);i += 4) { \
+        XMM2 = _mm_load_pd((x)+i  ); \
+        XMM3 = _mm_load_pd((x)+i+2); \
+        XMM4 = _mm_load_pd((y)+i  ); \
+        XMM5 = _mm_load_pd((y)+i+2); \
+        XMM2 = _mm_mul_pd(XMM2, XMM4); \
+        XMM3 = _mm_mul_pd(XMM3, XMM5); \
+        XMM0 = _mm_add_pd(XMM0, XMM2); \
+        XMM1 = _mm_add_pd(XMM1, XMM3); \
+    } \
+    XMM0 = _mm_add_pd(XMM0, XMM1); \
+    XMM1 = _mm_shuffle_pd(XMM0, XMM0, _MM_SHUFFLE2(1, 1)); \
+    XMM0 = _mm_add_pd(XMM0, XMM1); \
+    _mm_store_sd((s), XMM0); \
+}
+
+#define vec2norm(s, x, n) \
+{ \
+    int i; \
+    __m128d XMM0 = _mm_setzero_pd(); \
+    __m128d XMM1 = _mm_setzero_pd(); \
+    __m128d XMM2, XMM3, XMM4, XMM5; \
+    for (i = 0;i < (n);i += 4) { \
+        XMM2 = _mm_load_pd((x)+i  ); \
+        XMM3 = _mm_load_pd((x)+i+2); \
+        XMM4 = XMM2; \
+        XMM5 = XMM3; \
+        XMM2 = _mm_mul_pd(XMM2, XMM4); \
+        XMM3 = _mm_mul_pd(XMM3, XMM5); \
+        XMM0 = _mm_add_pd(XMM0, XMM2); \
+        XMM1 = _mm_add_pd(XMM1, XMM3); \
+    } \
+    XMM0 = _mm_add_pd(XMM0, XMM1); \
+    XMM1 = _mm_shuffle_pd(XMM0, XMM0, _MM_SHUFFLE2(1, 1)); \
+    XMM0 = _mm_add_pd(XMM0, XMM1); \
+    XMM0 = _mm_sqrt_pd(XMM0); \
+    _mm_store_sd((s), XMM0); \
+}
+
+
+#define vec2norminv(s, x, n) \
+{ \
+    int i; \
+    __m128d XMM0 = _mm_setzero_pd(); \
+    __m128d XMM1 = _mm_setzero_pd(); \
+    __m128d XMM2, XMM3, XMM4, XMM5; \
+    for (i = 0;i < (n);i += 4) { \
+        XMM2 = _mm_load_pd((x)+i  ); \
+        XMM3 = _mm_load_pd((x)+i+2); \
+        XMM4 = XMM2; \
+        XMM5 = XMM3; \
+        XMM2 = _mm_mul_pd(XMM2, XMM4); \
+        XMM3 = _mm_mul_pd(XMM3, XMM5); \
+        XMM0 = _mm_add_pd(XMM0, XMM2); \
+        XMM1 = _mm_add_pd(XMM1, XMM3); \
+    } \
+    XMM2 = _mm_set1_pd(1.0); \
+    XMM0 = _mm_add_pd(XMM0, XMM1); \
+    XMM1 = _mm_shuffle_pd(XMM0, XMM0, _MM_SHUFFLE2(1, 1)); \
+    XMM0 = _mm_add_pd(XMM0, XMM1); \
+    XMM0 = _mm_sqrt_pd(XMM0); \
+    XMM2 = _mm_div_pd(XMM2, XMM0); \
+    _mm_store_sd((s), XMM2); \
+}
diff --git a/wrapper/Convert/SireOpenMM/lbgfs/arithmetic_sse_float.h b/wrapper/Convert/SireOpenMM/lbgfs/arithmetic_sse_float.h
new file mode 100644
index 000000000..03050f741
--- /dev/null
+++ b/wrapper/Convert/SireOpenMM/lbgfs/arithmetic_sse_float.h
@@ -0,0 +1,287 @@
+/*
+ *      SSE/SSE3 implementation of vector oprations (32bit float).
+ *
+ * Copyright (c) 2007-2010 Naoaki Okazaki
+ * All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+/* $Id: arithmetic_sse_float.h 65 2010-01-29 12:19:16Z naoaki $ */
+
+#include <stdlib.h>
+#include <malloc.h>
+#include <memory.h>
+
+#if     1400 <= _MSC_VER
+#include <intrin.h>
+#endif/*_MSC_VER*/
+
+#if     HAVE_XMMINTRIN_H
+#include <xmmintrin.h>
+#endif/*HAVE_XMMINTRIN_H*/
+
+#if     LBFGS_FLOAT == 32 && LBFGS_IEEE_FLOAT
+#define fsigndiff(x, y) (((*(uint32_t*)(x)) ^ (*(uint32_t*)(y))) & 0x80000000U)
+#else
+#define fsigndiff(x, y) (*(x) * (*(y) / fabs(*(y))) < 0.)
+#endif/*LBFGS_IEEE_FLOAT*/
+
+inline static void* vecalloc(size_t size)
+{
+    void *memblock = _aligned_malloc(size, 16);
+    if (memblock != NULL) {
+        memset(memblock, 0, size);
+    }
+    return memblock;
+}
+
+inline static void vecfree(void *memblock)
+{
+    _aligned_free(memblock);
+}
+
+#define vecset(x, c, n) \
+{ \
+    int i; \
+    __m128 XMM0 = _mm_set_ps1(c); \
+    for (i = 0;i < (n);i += 16) { \
+        _mm_store_ps((x)+i   , XMM0); \
+        _mm_store_ps((x)+i+ 4, XMM0); \
+        _mm_store_ps((x)+i+ 8, XMM0); \
+        _mm_store_ps((x)+i+12, XMM0); \
+    } \
+}
+
+#define veccpy(y, x, n) \
+{ \
+    int i; \
+    for (i = 0;i < (n);i += 16) { \
+        __m128 XMM0 = _mm_load_ps((x)+i   ); \
+        __m128 XMM1 = _mm_load_ps((x)+i+ 4); \
+        __m128 XMM2 = _mm_load_ps((x)+i+ 8); \
+        __m128 XMM3 = _mm_load_ps((x)+i+12); \
+        _mm_store_ps((y)+i   , XMM0); \
+        _mm_store_ps((y)+i+ 4, XMM1); \
+        _mm_store_ps((y)+i+ 8, XMM2); \
+        _mm_store_ps((y)+i+12, XMM3); \
+    } \
+}
+
+#define vecncpy(y, x, n) \
+{ \
+    int i; \
+    const uint32_t mask = 0x80000000; \
+    __m128 XMM4 = _mm_load_ps1((float*)&mask); \
+    for (i = 0;i < (n);i += 16) { \
+        __m128 XMM0 = _mm_load_ps((x)+i   ); \
+        __m128 XMM1 = _mm_load_ps((x)+i+ 4); \
+        __m128 XMM2 = _mm_load_ps((x)+i+ 8); \
+        __m128 XMM3 = _mm_load_ps((x)+i+12); \
+        XMM0 = _mm_xor_ps(XMM0, XMM4); \
+        XMM1 = _mm_xor_ps(XMM1, XMM4); \
+        XMM2 = _mm_xor_ps(XMM2, XMM4); \
+        XMM3 = _mm_xor_ps(XMM3, XMM4); \
+        _mm_store_ps((y)+i   , XMM0); \
+        _mm_store_ps((y)+i+ 4, XMM1); \
+        _mm_store_ps((y)+i+ 8, XMM2); \
+        _mm_store_ps((y)+i+12, XMM3); \
+    } \
+}
+
+#define vecadd(y, x, c, n) \
+{ \
+    int i; \
+    __m128 XMM7 = _mm_set_ps1(c); \
+    for (i = 0;i < (n);i += 8) { \
+        __m128 XMM0 = _mm_load_ps((x)+i  ); \
+        __m128 XMM1 = _mm_load_ps((x)+i+4); \
+        __m128 XMM2 = _mm_load_ps((y)+i  ); \
+        __m128 XMM3 = _mm_load_ps((y)+i+4); \
+        XMM0 = _mm_mul_ps(XMM0, XMM7); \
+        XMM1 = _mm_mul_ps(XMM1, XMM7); \
+        XMM2 = _mm_add_ps(XMM2, XMM0); \
+        XMM3 = _mm_add_ps(XMM3, XMM1); \
+        _mm_store_ps((y)+i  , XMM2); \
+        _mm_store_ps((y)+i+4, XMM3); \
+    } \
+}
+
+#define vecdiff(z, x, y, n) \
+{ \
+    int i; \
+    for (i = 0;i < (n);i += 16) { \
+        __m128 XMM0 = _mm_load_ps((x)+i   ); \
+        __m128 XMM1 = _mm_load_ps((x)+i+ 4); \
+        __m128 XMM2 = _mm_load_ps((x)+i+ 8); \
+        __m128 XMM3 = _mm_load_ps((x)+i+12); \
+        __m128 XMM4 = _mm_load_ps((y)+i   ); \
+        __m128 XMM5 = _mm_load_ps((y)+i+ 4); \
+        __m128 XMM6 = _mm_load_ps((y)+i+ 8); \
+        __m128 XMM7 = _mm_load_ps((y)+i+12); \
+        XMM0 = _mm_sub_ps(XMM0, XMM4); \
+        XMM1 = _mm_sub_ps(XMM1, XMM5); \
+        XMM2 = _mm_sub_ps(XMM2, XMM6); \
+        XMM3 = _mm_sub_ps(XMM3, XMM7); \
+        _mm_store_ps((z)+i   , XMM0); \
+        _mm_store_ps((z)+i+ 4, XMM1); \
+        _mm_store_ps((z)+i+ 8, XMM2); \
+        _mm_store_ps((z)+i+12, XMM3); \
+    } \
+}
+
+#define vecscale(y, c, n) \
+{ \
+    int i; \
+    __m128 XMM7 = _mm_set_ps1(c); \
+    for (i = 0;i < (n);i += 8) { \
+        __m128 XMM0 = _mm_load_ps((y)+i  ); \
+        __m128 XMM1 = _mm_load_ps((y)+i+4); \
+        XMM0 = _mm_mul_ps(XMM0, XMM7); \
+        XMM1 = _mm_mul_ps(XMM1, XMM7); \
+        _mm_store_ps((y)+i  , XMM0); \
+        _mm_store_ps((y)+i+4, XMM1); \
+    } \
+}
+
+#define vecmul(y, x, n) \
+{ \
+    int i; \
+    for (i = 0;i < (n);i += 16) { \
+        __m128 XMM0 = _mm_load_ps((x)+i   ); \
+        __m128 XMM1 = _mm_load_ps((x)+i+ 4); \
+        __m128 XMM2 = _mm_load_ps((x)+i+ 8); \
+        __m128 XMM3 = _mm_load_ps((x)+i+12); \
+        __m128 XMM4 = _mm_load_ps((y)+i   ); \
+        __m128 XMM5 = _mm_load_ps((y)+i+ 4); \
+        __m128 XMM6 = _mm_load_ps((y)+i+ 8); \
+        __m128 XMM7 = _mm_load_ps((y)+i+12); \
+        XMM4 = _mm_mul_ps(XMM4, XMM0); \
+        XMM5 = _mm_mul_ps(XMM5, XMM1); \
+        XMM6 = _mm_mul_ps(XMM6, XMM2); \
+        XMM7 = _mm_mul_ps(XMM7, XMM3); \
+        _mm_store_ps((y)+i   , XMM4); \
+        _mm_store_ps((y)+i+ 4, XMM5); \
+        _mm_store_ps((y)+i+ 8, XMM6); \
+        _mm_store_ps((y)+i+12, XMM7); \
+    } \
+}
+
+
+
+#if     3 <= __SSE__
+/*
+    Horizontal add with haddps SSE3 instruction. The work register (rw)
+    is unused.
+ */
+#define __horizontal_sum(r, rw) \
+    r = _mm_hadd_ps(r, r); \
+    r = _mm_hadd_ps(r, r);
+
+#else
+/*
+    Horizontal add with SSE instruction. The work register (rw) is used.
+ */
+#define __horizontal_sum(r, rw) \
+    rw = r; \
+    r = _mm_shuffle_ps(r, rw, _MM_SHUFFLE(1, 0, 3, 2)); \
+    r = _mm_add_ps(r, rw); \
+    rw = r; \
+    r = _mm_shuffle_ps(r, rw, _MM_SHUFFLE(2, 3, 0, 1)); \
+    r = _mm_add_ps(r, rw);
+
+#endif
+
+#define vecdot(s, x, y, n) \
+{ \
+    int i; \
+    __m128 XMM0 = _mm_setzero_ps(); \
+    __m128 XMM1 = _mm_setzero_ps(); \
+    __m128 XMM2, XMM3, XMM4, XMM5; \
+    for (i = 0;i < (n);i += 8) { \
+        XMM2 = _mm_load_ps((x)+i  ); \
+        XMM3 = _mm_load_ps((x)+i+4); \
+        XMM4 = _mm_load_ps((y)+i  ); \
+        XMM5 = _mm_load_ps((y)+i+4); \
+        XMM2 = _mm_mul_ps(XMM2, XMM4); \
+        XMM3 = _mm_mul_ps(XMM3, XMM5); \
+        XMM0 = _mm_add_ps(XMM0, XMM2); \
+        XMM1 = _mm_add_ps(XMM1, XMM3); \
+    } \
+    XMM0 = _mm_add_ps(XMM0, XMM1); \
+    __horizontal_sum(XMM0, XMM1); \
+    _mm_store_ss((s), XMM0); \
+}
+
+#define vec2norm(s, x, n) \
+{ \
+    int i; \
+    __m128 XMM0 = _mm_setzero_ps(); \
+    __m128 XMM1 = _mm_setzero_ps(); \
+    __m128 XMM2, XMM3; \
+    for (i = 0;i < (n);i += 8) { \
+        XMM2 = _mm_load_ps((x)+i  ); \
+        XMM3 = _mm_load_ps((x)+i+4); \
+        XMM2 = _mm_mul_ps(XMM2, XMM2); \
+        XMM3 = _mm_mul_ps(XMM3, XMM3); \
+        XMM0 = _mm_add_ps(XMM0, XMM2); \
+        XMM1 = _mm_add_ps(XMM1, XMM3); \
+    } \
+    XMM0 = _mm_add_ps(XMM0, XMM1); \
+    __horizontal_sum(XMM0, XMM1); \
+    XMM2 = XMM0; \
+    XMM1 = _mm_rsqrt_ss(XMM0); \
+    XMM3 = XMM1; \
+    XMM1 = _mm_mul_ss(XMM1, XMM1); \
+    XMM1 = _mm_mul_ss(XMM1, XMM3); \
+    XMM1 = _mm_mul_ss(XMM1, XMM0); \
+    XMM1 = _mm_mul_ss(XMM1, _mm_set_ss(-0.5f)); \
+    XMM3 = _mm_mul_ss(XMM3, _mm_set_ss(1.5f)); \
+    XMM3 = _mm_add_ss(XMM3, XMM1); \
+    XMM3 = _mm_mul_ss(XMM3, XMM2); \
+    _mm_store_ss((s), XMM3); \
+}
+
+#define vec2norminv(s, x, n) \
+{ \
+    int i; \
+    __m128 XMM0 = _mm_setzero_ps(); \
+    __m128 XMM1 = _mm_setzero_ps(); \
+    __m128 XMM2, XMM3; \
+    for (i = 0;i < (n);i += 16) { \
+        XMM2 = _mm_load_ps((x)+i  ); \
+        XMM3 = _mm_load_ps((x)+i+4); \
+        XMM2 = _mm_mul_ps(XMM2, XMM2); \
+        XMM3 = _mm_mul_ps(XMM3, XMM3); \
+        XMM0 = _mm_add_ps(XMM0, XMM2); \
+        XMM1 = _mm_add_ps(XMM1, XMM3); \
+    } \
+    XMM0 = _mm_add_ps(XMM0, XMM1); \
+    __horizontal_sum(XMM0, XMM1); \
+    XMM2 = XMM0; \
+    XMM1 = _mm_rsqrt_ss(XMM0); \
+    XMM3 = XMM1; \
+    XMM1 = _mm_mul_ss(XMM1, XMM1); \
+    XMM1 = _mm_mul_ss(XMM1, XMM3); \
+    XMM1 = _mm_mul_ss(XMM1, XMM0); \
+    XMM1 = _mm_mul_ss(XMM1, _mm_set_ss(-0.5f)); \
+    XMM3 = _mm_mul_ss(XMM3, _mm_set_ss(1.5f)); \
+    XMM3 = _mm_add_ss(XMM3, XMM1); \
+    _mm_store_ss((s), XMM3); \
+}
diff --git a/wrapper/Convert/SireOpenMM/lbgfs/lbfgs.cpp b/wrapper/Convert/SireOpenMM/lbgfs/lbfgs.cpp
new file mode 100644
index 000000000..5e88fb3db
--- /dev/null
+++ b/wrapper/Convert/SireOpenMM/lbgfs/lbfgs.cpp
@@ -0,0 +1,1556 @@
+/*
+ *      Limited memory BFGS (L-BFGS).
+ *
+ * Copyright (c) 1990, Jorge Nocedal
+ * Copyright (c) 2007-2010 Naoaki Okazaki
+ * All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+/* $Id: lbfgs.c 65 2010-01-29 12:19:16Z naoaki $ */
+
+/*
+This library is a C port of the FORTRAN implementation of Limited-memory
+Broyden-Fletcher-Goldfarb-Shanno (L-BFGS) method written by Jorge Nocedal.
+The original FORTRAN source code is available at:
+http://www.ece.northwestern.edu/~nocedal/lbfgs.html
+
+The L-BFGS algorithm is described in:
+    - Jorge Nocedal.
+      Updating Quasi-Newton Matrices with Limited Storage.
+      <i>Mathematics of Computation</i>, Vol. 35, No. 151, pp. 773--782, 1980.
+    - Dong C. Liu and Jorge Nocedal.
+      On the limited memory BFGS method for large scale optimization.
+      <i>Mathematical Programming</i> B, Vol. 45, No. 3, pp. 503-528, 1989.
+
+The line search algorithms used in this implementation are described in:
+    - John E. Dennis and Robert B. Schnabel.
+      <i>Numerical Methods for Unconstrained Optimization and Nonlinear
+      Equations</i>, Englewood Cliffs, 1983.
+    - Jorge J. More and David J. Thuente.
+      Line search algorithm with guaranteed sufficient decrease.
+      <i>ACM Transactions on Mathematical Software (TOMS)</i>, Vol. 20, No. 3,
+      pp. 286-307, 1994.
+
+This library also implements Orthant-Wise Limited-memory Quasi-Newton (OWL-QN)
+method presented in:
+    - Galen Andrew and Jianfeng Gao.
+      Scalable training of L1-regularized log-linear models.
+      In <i>Proceedings of the 24th International Conference on Machine
+      Learning (ICML 2007)</i>, pp. 33-40, 2007.
+
+I would like to thank the original author, Jorge Nocedal, who has been
+distributing the effieicnt and explanatory implementation in an open source
+licence.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif /*HAVE_CONFIG_H*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+
+#include "lbfgs.h"
+
+#ifdef _MSC_VER
+#define inline __inline
+typedef unsigned int uint32_t;
+#endif /*_MSC_VER*/
+
+#if defined(USE_SSE) && defined(__SSE2__) && LBFGS_FLOAT == 64
+/* Use SSE2 optimization for 64bit double precision. */
+#include "arithmetic_sse_double.h"
+
+#elif defined(USE_SSE) && defined(__SSE__) && LBFGS_FLOAT == 32
+/* Use SSE optimization for 32bit float precision. */
+#include "arithmetic_sse_float.h"
+
+#else
+/* No CPU specific optimization. */
+#include "arithmetic_ansi.h"
+
+#endif
+
+#define min2(a, b) ((a) <= (b) ? (a) : (b))
+#define max2(a, b) ((a) >= (b) ? (a) : (b))
+#define max3(a, b, c) max2(max2((a), (b)), (c));
+
+struct tag_callback_data
+{
+    int n;
+    void *instance;
+    lbfgs_evaluate_t proc_evaluate;
+    lbfgs_progress_t proc_progress;
+};
+typedef struct tag_callback_data callback_data_t;
+
+struct tag_iteration_data
+{
+    lbfgsfloatval_t alpha;
+    lbfgsfloatval_t *s; /* [n] */
+    lbfgsfloatval_t *y; /* [n] */
+    lbfgsfloatval_t ys; /* vecdot(y, s) */
+};
+typedef struct tag_iteration_data iteration_data_t;
+
+static const lbfgs_parameter_t _defparam = {
+    6,
+    1e-5,
+    0,
+    1e-5,
+    0,
+    LBFGS_LINESEARCH_DEFAULT,
+    40,
+    1e-20,
+    1e20,
+    1e-4,
+    0.9,
+    0.9,
+    1.0e-16,
+    0.0,
+    0,
+    -1,
+};
+
+/* Forward function declarations. */
+
+typedef int (*line_search_proc)(
+    int n,
+    lbfgsfloatval_t *x,
+    lbfgsfloatval_t *f,
+    lbfgsfloatval_t *g,
+    lbfgsfloatval_t *s,
+    lbfgsfloatval_t *stp,
+    const lbfgsfloatval_t *xp,
+    const lbfgsfloatval_t *gp,
+    lbfgsfloatval_t *wa,
+    callback_data_t *cd,
+    const lbfgs_parameter_t *param);
+
+static int line_search_backtracking(
+    int n,
+    lbfgsfloatval_t *x,
+    lbfgsfloatval_t *f,
+    lbfgsfloatval_t *g,
+    lbfgsfloatval_t *s,
+    lbfgsfloatval_t *stp,
+    const lbfgsfloatval_t *xp,
+    const lbfgsfloatval_t *gp,
+    lbfgsfloatval_t *wa,
+    callback_data_t *cd,
+    const lbfgs_parameter_t *param);
+
+static int line_search_backtracking_owlqn(
+    int n,
+    lbfgsfloatval_t *x,
+    lbfgsfloatval_t *f,
+    lbfgsfloatval_t *g,
+    lbfgsfloatval_t *s,
+    lbfgsfloatval_t *stp,
+    const lbfgsfloatval_t *xp,
+    const lbfgsfloatval_t *gp,
+    lbfgsfloatval_t *wp,
+    callback_data_t *cd,
+    const lbfgs_parameter_t *param);
+
+static int line_search_morethuente(
+    int n,
+    lbfgsfloatval_t *x,
+    lbfgsfloatval_t *f,
+    lbfgsfloatval_t *g,
+    lbfgsfloatval_t *s,
+    lbfgsfloatval_t *stp,
+    const lbfgsfloatval_t *xp,
+    const lbfgsfloatval_t *gp,
+    lbfgsfloatval_t *wa,
+    callback_data_t *cd,
+    const lbfgs_parameter_t *param);
+
+static int update_trial_interval(
+    lbfgsfloatval_t *x,
+    lbfgsfloatval_t *fx,
+    lbfgsfloatval_t *dx,
+    lbfgsfloatval_t *y,
+    lbfgsfloatval_t *fy,
+    lbfgsfloatval_t *dy,
+    lbfgsfloatval_t *t,
+    lbfgsfloatval_t *ft,
+    lbfgsfloatval_t *dt,
+    const lbfgsfloatval_t tmin,
+    const lbfgsfloatval_t tmax,
+    int *brackt);
+
+static lbfgsfloatval_t owlqn_x1norm(
+    const lbfgsfloatval_t *x,
+    const int start,
+    const int n);
+
+static void owlqn_pseudo_gradient(
+    lbfgsfloatval_t *pg,
+    const lbfgsfloatval_t *x,
+    const lbfgsfloatval_t *g,
+    const int n,
+    const lbfgsfloatval_t c,
+    const int start,
+    const int end);
+
+static void owlqn_project(
+    lbfgsfloatval_t *d,
+    const lbfgsfloatval_t *sign,
+    const int start,
+    const int end);
+
+#if defined(USE_SSE) && (defined(__SSE__) || defined(__SSE2__))
+static int round_out_variables(int n)
+{
+    n += 7;
+    n /= 8;
+    n *= 8;
+    return n;
+}
+#endif /*defined(USE_SSE)*/
+
+lbfgsfloatval_t *lbfgs_malloc(int n)
+{
+#if defined(USE_SSE) && (defined(__SSE__) || defined(__SSE2__))
+    n = round_out_variables(n);
+#endif /*defined(USE_SSE)*/
+    return (lbfgsfloatval_t *)vecalloc(sizeof(lbfgsfloatval_t) * n);
+}
+
+void lbfgs_free(lbfgsfloatval_t *x)
+{
+    vecfree(x);
+}
+
+void lbfgs_parameter_init(lbfgs_parameter_t *param)
+{
+    memcpy(param, &_defparam, sizeof(*param));
+}
+
+int lbfgs(
+    int n,
+    lbfgsfloatval_t *x,
+    lbfgsfloatval_t *ptr_fx,
+    lbfgs_evaluate_t proc_evaluate,
+    lbfgs_progress_t proc_progress,
+    void *instance,
+    lbfgs_parameter_t *_param)
+{
+    int ret;
+    int i, j, k, ls, end, bound;
+    lbfgsfloatval_t step;
+
+    /* Constant parameters and their default values. */
+    lbfgs_parameter_t param = (_param != NULL) ? (*_param) : _defparam;
+    const int m = param.m;
+
+    lbfgsfloatval_t *xp = NULL;
+    lbfgsfloatval_t *g = NULL, *gp = NULL, *pg = NULL;
+    lbfgsfloatval_t *d = NULL, *w = NULL, *pf = NULL;
+    iteration_data_t *lm = NULL, *it = NULL;
+    lbfgsfloatval_t ys, yy;
+    lbfgsfloatval_t xnorm, gnorm, beta;
+    lbfgsfloatval_t fx = 0.;
+    lbfgsfloatval_t rate = 0.;
+    line_search_proc linesearch = line_search_morethuente;
+
+    /* Construct a callback data. */
+    callback_data_t cd;
+    cd.n = n;
+    cd.instance = instance;
+    cd.proc_evaluate = proc_evaluate;
+    cd.proc_progress = proc_progress;
+
+#if defined(USE_SSE) && (defined(__SSE__) || defined(__SSE2__))
+    /* Round out the number of variables. */
+    n = round_out_variables(n);
+#endif /*defined(USE_SSE)*/
+
+    /* Check the input parameters for errors. */
+    if (n <= 0)
+    {
+        return LBFGSERR_INVALID_N;
+    }
+#if defined(USE_SSE) && (defined(__SSE__) || defined(__SSE2__))
+    if (n % 8 != 0)
+    {
+        return LBFGSERR_INVALID_N_SSE;
+    }
+    if (((unsigned short)x & 0x000F) != 0)
+    {
+        return LBFGSERR_INVALID_X_SSE;
+    }
+#endif /*defined(USE_SSE)*/
+    if (param.epsilon < 0.)
+    {
+        return LBFGSERR_INVALID_EPSILON;
+    }
+    if (param.past < 0)
+    {
+        return LBFGSERR_INVALID_TESTPERIOD;
+    }
+    if (param.delta < 0.)
+    {
+        return LBFGSERR_INVALID_DELTA;
+    }
+    if (param.min_step < 0.)
+    {
+        return LBFGSERR_INVALID_MINSTEP;
+    }
+    if (param.max_step < param.min_step)
+    {
+        return LBFGSERR_INVALID_MAXSTEP;
+    }
+    if (param.ftol < 0.)
+    {
+        return LBFGSERR_INVALID_FTOL;
+    }
+    if (param.linesearch == LBFGS_LINESEARCH_BACKTRACKING_WOLFE ||
+        param.linesearch == LBFGS_LINESEARCH_BACKTRACKING_STRONG_WOLFE)
+    {
+        if (param.wolfe <= param.ftol || 1. <= param.wolfe)
+        {
+            return LBFGSERR_INVALID_WOLFE;
+        }
+    }
+    if (param.gtol < 0.)
+    {
+        return LBFGSERR_INVALID_GTOL;
+    }
+    if (param.xtol < 0.)
+    {
+        return LBFGSERR_INVALID_XTOL;
+    }
+    if (param.max_linesearch <= 0)
+    {
+        return LBFGSERR_INVALID_MAXLINESEARCH;
+    }
+    if (param.orthantwise_c < 0.)
+    {
+        return LBFGSERR_INVALID_ORTHANTWISE;
+    }
+    if (param.orthantwise_start < 0 || n < param.orthantwise_start)
+    {
+        return LBFGSERR_INVALID_ORTHANTWISE_START;
+    }
+    if (param.orthantwise_end < 0)
+    {
+        param.orthantwise_end = n;
+    }
+    if (n < param.orthantwise_end)
+    {
+        return LBFGSERR_INVALID_ORTHANTWISE_END;
+    }
+    if (param.orthantwise_c != 0.)
+    {
+        switch (param.linesearch)
+        {
+        case LBFGS_LINESEARCH_BACKTRACKING:
+            linesearch = line_search_backtracking_owlqn;
+            break;
+        default:
+            /* Only the backtracking method is available. */
+            return LBFGSERR_INVALID_LINESEARCH;
+        }
+    }
+    else
+    {
+        switch (param.linesearch)
+        {
+        case LBFGS_LINESEARCH_MORETHUENTE:
+            linesearch = line_search_morethuente;
+            break;
+        case LBFGS_LINESEARCH_BACKTRACKING_ARMIJO:
+        case LBFGS_LINESEARCH_BACKTRACKING_WOLFE:
+        case LBFGS_LINESEARCH_BACKTRACKING_STRONG_WOLFE:
+            linesearch = line_search_backtracking;
+            break;
+        default:
+            return LBFGSERR_INVALID_LINESEARCH;
+        }
+    }
+
+    /* Allocate working space. */
+    xp = (lbfgsfloatval_t *)vecalloc(n * sizeof(lbfgsfloatval_t));
+    g = (lbfgsfloatval_t *)vecalloc(n * sizeof(lbfgsfloatval_t));
+    gp = (lbfgsfloatval_t *)vecalloc(n * sizeof(lbfgsfloatval_t));
+    d = (lbfgsfloatval_t *)vecalloc(n * sizeof(lbfgsfloatval_t));
+    w = (lbfgsfloatval_t *)vecalloc(n * sizeof(lbfgsfloatval_t));
+    if (xp == NULL || g == NULL || gp == NULL || d == NULL || w == NULL)
+    {
+        ret = LBFGSERR_OUTOFMEMORY;
+        goto lbfgs_exit;
+    }
+
+    if (param.orthantwise_c != 0.)
+    {
+        /* Allocate working space for OW-LQN. */
+        pg = (lbfgsfloatval_t *)vecalloc(n * sizeof(lbfgsfloatval_t));
+        if (pg == NULL)
+        {
+            ret = LBFGSERR_OUTOFMEMORY;
+            goto lbfgs_exit;
+        }
+    }
+
+    /* Allocate limited memory storage. */
+    lm = (iteration_data_t *)vecalloc(m * sizeof(iteration_data_t));
+    if (lm == NULL)
+    {
+        ret = LBFGSERR_OUTOFMEMORY;
+        goto lbfgs_exit;
+    }
+
+    /* Initialize the limited memory. */
+    for (i = 0; i < m; ++i)
+    {
+        it = &lm[i];
+        it->alpha = 0;
+        it->ys = 0;
+        it->s = (lbfgsfloatval_t *)vecalloc(n * sizeof(lbfgsfloatval_t));
+        it->y = (lbfgsfloatval_t *)vecalloc(n * sizeof(lbfgsfloatval_t));
+        if (it->s == NULL || it->y == NULL)
+        {
+            ret = LBFGSERR_OUTOFMEMORY;
+            goto lbfgs_exit;
+        }
+    }
+
+    /* Allocate an array for storing previous values of the objective function. */
+    if (0 < param.past)
+    {
+        pf = (lbfgsfloatval_t *)vecalloc(param.past * sizeof(lbfgsfloatval_t));
+    }
+
+    try
+    {
+        /* Evaluate the function value and its gradient. */
+        fx = cd.proc_evaluate(cd.instance, x, g, cd.n, 0);
+        if (0. != param.orthantwise_c)
+        {
+            /* Compute the L1 norm of the variable and add it to the object value. */
+            xnorm = owlqn_x1norm(x, param.orthantwise_start, param.orthantwise_end);
+            fx += xnorm * param.orthantwise_c;
+            owlqn_pseudo_gradient(
+                pg, x, g, n,
+                param.orthantwise_c, param.orthantwise_start, param.orthantwise_end);
+        }
+
+        /* Store the initial value of the objective function. */
+        if (pf != NULL)
+        {
+            pf[0] = fx;
+        }
+
+        /*
+            Compute the direction;
+            we assume the initial hessian matrix H_0 as the identity matrix.
+         */
+        if (param.orthantwise_c == 0.)
+        {
+            vecncpy(d, g, n);
+        }
+        else
+        {
+            vecncpy(d, pg, n);
+        }
+
+        /*
+           Make sure that the initial variables are not a minimizer.
+         */
+        vec2norm(&xnorm, x, n);
+        if (param.orthantwise_c == 0.)
+        {
+            vec2norm(&gnorm, g, n);
+        }
+        else
+        {
+            vec2norm(&gnorm, pg, n);
+        }
+        if (xnorm < 1.0)
+            xnorm = 1.0;
+        if (gnorm / xnorm <= param.epsilon)
+        {
+            ret = LBFGS_ALREADY_MINIMIZED;
+            goto lbfgs_exit;
+        }
+
+        /* Compute the initial step:
+            step = 1.0 / sqrt(vecdot(d, d, n))
+         */
+        vec2norminv(&step, d, n);
+
+        k = 1;
+        end = 0;
+        for (;;)
+        {
+            /* Store the current position and gradient vectors. */
+            veccpy(xp, x, n);
+            veccpy(gp, g, n);
+
+            /* Search for an optimal step. */
+            if (param.orthantwise_c == 0.)
+            {
+                ls = linesearch(n, x, &fx, g, d, &step, xp, gp, w, &cd, &param);
+            }
+            else
+            {
+                ls = linesearch(n, x, &fx, g, d, &step, xp, pg, w, &cd, &param);
+                owlqn_pseudo_gradient(
+                    pg, x, g, n,
+                    param.orthantwise_c, param.orthantwise_start, param.orthantwise_end);
+            }
+            if (ls < 0)
+            {
+                /* Revert to the previous point. */
+                veccpy(x, xp, n);
+                veccpy(g, gp, n);
+                ret = ls;
+                goto lbfgs_exit;
+            }
+
+            /* Compute x and g norms. */
+            vec2norm(&xnorm, x, n);
+            if (param.orthantwise_c == 0.)
+            {
+                vec2norm(&gnorm, g, n);
+            }
+            else
+            {
+                vec2norm(&gnorm, pg, n);
+            }
+
+            /* Report the progress. */
+            if (cd.proc_progress)
+            {
+                if (ret = cd.proc_progress(cd.instance, x, g, fx, xnorm, gnorm, step, cd.n, k, ls))
+                {
+                    goto lbfgs_exit;
+                }
+            }
+
+            /*
+                Convergence test.
+                The criterion is given by the following formula:
+                    |g(x)| / \max(1, |x|) < \epsilon
+             */
+            if (xnorm < 1.0)
+                xnorm = 1.0;
+            if (gnorm / xnorm <= param.epsilon)
+            {
+                /* Convergence. */
+                ret = LBFGS_SUCCESS;
+                break;
+            }
+
+            /*
+                Test for stopping criterion.
+                The criterion is given by the following formula:
+                    (f(past_x) - f(x)) / f(x) < \delta
+             */
+            if (pf != NULL)
+            {
+                /* We don't test the stopping criterion while k < past. */
+                if (param.past <= k)
+                {
+                    /* Compute the relative improvement from the past. */
+                    rate = (pf[k % param.past] - fx) / fx;
+
+                    /* The stopping criterion. */
+                    if (rate < param.delta)
+                    {
+                        ret = LBFGS_STOP;
+                        break;
+                    }
+                }
+
+                /* Store the current value of the objective function. */
+                pf[k % param.past] = fx;
+            }
+
+            if (param.max_iterations != 0 && param.max_iterations < k + 1)
+            {
+                /* Maximum number of iterations. */
+                ret = LBFGSERR_MAXIMUMITERATION;
+                break;
+            }
+
+            /*
+                Update vectors s and y:
+                    s_{k+1} = x_{k+1} - x_{k} = \step * d_{k}.
+                    y_{k+1} = g_{k+1} - g_{k}.
+             */
+            it = &lm[end];
+            vecdiff(it->s, x, xp, n);
+            vecdiff(it->y, g, gp, n);
+
+            /*
+                Compute scalars ys and yy:
+                    ys = y^t \cdot s = 1 / \rho.
+                    yy = y^t \cdot y.
+                Notice that yy is used for scaling the hessian matrix H_0 (Cholesky factor).
+             */
+            vecdot(&ys, it->y, it->s, n);
+            vecdot(&yy, it->y, it->y, n);
+            it->ys = ys;
+
+            /*
+                Recursive formula to compute dir = -(H \cdot g).
+                    This is described in page 779 of:
+                    Jorge Nocedal.
+                    Updating Quasi-Newton Matrices with Limited Storage.
+                    Mathematics of Computation, Vol. 35, No. 151,
+                    pp. 773--782, 1980.
+             */
+            bound = (m <= k) ? m : k;
+            ++k;
+            end = (end + 1) % m;
+
+            /* Compute the steepest direction. */
+            if (param.orthantwise_c == 0.)
+            {
+                /* Compute the negative of gradients. */
+                vecncpy(d, g, n);
+            }
+            else
+            {
+                vecncpy(d, pg, n);
+            }
+
+            j = end;
+            for (i = 0; i < bound; ++i)
+            {
+                j = (j + m - 1) % m; /* if (--j == -1) j = m-1; */
+                it = &lm[j];
+                /* \alpha_{j} = \rho_{j} s^{t}_{j} \cdot q_{k+1}. */
+                vecdot(&it->alpha, it->s, d, n);
+                it->alpha /= it->ys;
+                /* q_{i} = q_{i+1} - \alpha_{i} y_{i}. */
+                vecadd(d, it->y, -it->alpha, n);
+            }
+
+            vecscale(d, ys / yy, n);
+
+            for (i = 0; i < bound; ++i)
+            {
+                it = &lm[j];
+                /* \beta_{j} = \rho_{j} y^t_{j} \cdot \gamma_{i}. */
+                vecdot(&beta, it->y, d, n);
+                beta /= it->ys;
+                /* \gamma_{i+1} = \gamma_{i} + (\alpha_{j} - \beta_{j}) s_{j}. */
+                vecadd(d, it->s, it->alpha - beta, n);
+                j = (j + 1) % m; /* if (++j == m) j = 0; */
+            }
+
+            /*
+                Constrain the search direction for orthant-wise updates.
+             */
+            if (param.orthantwise_c != 0.)
+            {
+                for (i = param.orthantwise_start; i < param.orthantwise_end; ++i)
+                {
+                    if (d[i] * pg[i] >= 0)
+                    {
+                        d[i] = 0;
+                    }
+                }
+            }
+
+            /*
+                Now the search direction d is ready. We try step = 1 first.
+             */
+            step = 1.0;
+        }
+    }
+    catch (...)
+    {
+        vecfree(pf);
+
+        /* Free memory blocks used by this function. */
+        if (lm != NULL)
+        {
+            for (i = 0; i < m; ++i)
+            {
+                vecfree(lm[i].s);
+                vecfree(lm[i].y);
+            }
+            vecfree(lm);
+        }
+        vecfree(pg);
+        vecfree(w);
+        vecfree(d);
+        vecfree(gp);
+        vecfree(g);
+        vecfree(xp);
+        throw;
+    }
+
+lbfgs_exit:
+    /* Return the final value of the objective function. */
+    if (ptr_fx != NULL)
+    {
+        *ptr_fx = fx;
+    }
+
+    vecfree(pf);
+
+    /* Free memory blocks used by this function. */
+    if (lm != NULL)
+    {
+        for (i = 0; i < m; ++i)
+        {
+            vecfree(lm[i].s);
+            vecfree(lm[i].y);
+        }
+        vecfree(lm);
+    }
+    vecfree(pg);
+    vecfree(w);
+    vecfree(d);
+    vecfree(gp);
+    vecfree(g);
+    vecfree(xp);
+
+    return ret;
+}
+
+static int line_search_backtracking(
+    int n,
+    lbfgsfloatval_t *x,
+    lbfgsfloatval_t *f,
+    lbfgsfloatval_t *g,
+    lbfgsfloatval_t *s,
+    lbfgsfloatval_t *stp,
+    const lbfgsfloatval_t *xp,
+    const lbfgsfloatval_t *gp,
+    lbfgsfloatval_t *wp,
+    callback_data_t *cd,
+    const lbfgs_parameter_t *param)
+{
+    int ret = 0, count = 0;
+    lbfgsfloatval_t width, dg, norm = 0.;
+    lbfgsfloatval_t finit, dginit = 0., dgtest;
+    const lbfgsfloatval_t dec = 0.5, inc = 2.1;
+
+    /* Check the input parameters for errors. */
+    if (*stp <= 0.)
+    {
+        return LBFGSERR_INVALIDPARAMETERS;
+    }
+
+    /* Compute the initial gradient in the search direction. */
+    vecdot(&dginit, g, s, n);
+
+    /* Make sure that s points to a descent direction. */
+    if (0 < dginit)
+    {
+        return LBFGSERR_INCREASEGRADIENT;
+    }
+
+    /* The initial value of the objective function. */
+    finit = *f;
+    dgtest = param->ftol * dginit;
+
+    for (;;)
+    {
+        veccpy(x, xp, n);
+        vecadd(x, s, *stp, n);
+
+        /* Evaluate the function and gradient values. */
+        *f = cd->proc_evaluate(cd->instance, x, g, cd->n, *stp);
+
+        ++count;
+
+        if (*f > finit + *stp * dgtest)
+        {
+            width = dec;
+        }
+        else
+        {
+            /* The sufficient decrease condition (Armijo condition). */
+            if (param->linesearch == LBFGS_LINESEARCH_BACKTRACKING_ARMIJO)
+            {
+                /* Exit with the Armijo condition. */
+                return count;
+            }
+
+            /* Check the Wolfe condition. */
+            vecdot(&dg, g, s, n);
+            if (dg < param->wolfe * dginit)
+            {
+                width = inc;
+            }
+            else
+            {
+                if (param->linesearch == LBFGS_LINESEARCH_BACKTRACKING_WOLFE)
+                {
+                    /* Exit with the regular Wolfe condition. */
+                    return count;
+                }
+
+                /* Check the strong Wolfe condition. */
+                if (dg > -param->wolfe * dginit)
+                {
+                    width = dec;
+                }
+                else
+                {
+                    /* Exit with the strong Wolfe condition. */
+                    return count;
+                }
+            }
+        }
+
+        if (*stp < param->min_step)
+        {
+            /* The step is the minimum value. */
+            return LBFGSERR_MINIMUMSTEP;
+        }
+        if (*stp > param->max_step)
+        {
+            /* The step is the maximum value. */
+            return LBFGSERR_MAXIMUMSTEP;
+        }
+        if (param->max_linesearch <= count)
+        {
+            /* Maximum number of iteration. */
+            return LBFGSERR_MAXIMUMLINESEARCH;
+        }
+
+        (*stp) *= width;
+    }
+}
+
+static int line_search_backtracking_owlqn(
+    int n,
+    lbfgsfloatval_t *x,
+    lbfgsfloatval_t *f,
+    lbfgsfloatval_t *g,
+    lbfgsfloatval_t *s,
+    lbfgsfloatval_t *stp,
+    const lbfgsfloatval_t *xp,
+    const lbfgsfloatval_t *gp,
+    lbfgsfloatval_t *wp,
+    callback_data_t *cd,
+    const lbfgs_parameter_t *param)
+{
+    int i, ret = 0, count = 0;
+    lbfgsfloatval_t width = 0.5, norm = 0.;
+    lbfgsfloatval_t finit = *f, dgtest;
+
+    /* Check the input parameters for errors. */
+    if (*stp <= 0.)
+    {
+        return LBFGSERR_INVALIDPARAMETERS;
+    }
+
+    /* Choose the orthant for the new point. */
+    for (i = 0; i < n; ++i)
+    {
+        wp[i] = (xp[i] == 0.) ? -gp[i] : xp[i];
+    }
+
+    for (;;)
+    {
+        /* Update the current point. */
+        veccpy(x, xp, n);
+        vecadd(x, s, *stp, n);
+
+        /* The current point is projected onto the orthant. */
+        owlqn_project(x, wp, param->orthantwise_start, param->orthantwise_end);
+
+        /* Evaluate the function and gradient values. */
+        *f = cd->proc_evaluate(cd->instance, x, g, cd->n, *stp);
+
+        /* Compute the L1 norm of the variables and add it to the object value. */
+        norm = owlqn_x1norm(x, param->orthantwise_start, param->orthantwise_end);
+        *f += norm * param->orthantwise_c;
+
+        ++count;
+
+        dgtest = 0.;
+        for (i = 0; i < n; ++i)
+        {
+            dgtest += (x[i] - xp[i]) * gp[i];
+        }
+
+        if (*f <= finit + param->ftol * dgtest)
+        {
+            /* The sufficient decrease condition. */
+            return count;
+        }
+
+        if (*stp < param->min_step)
+        {
+            /* The step is the minimum value. */
+            return LBFGSERR_MINIMUMSTEP;
+        }
+        if (*stp > param->max_step)
+        {
+            /* The step is the maximum value. */
+            return LBFGSERR_MAXIMUMSTEP;
+        }
+        if (param->max_linesearch <= count)
+        {
+            /* Maximum number of iteration. */
+            return LBFGSERR_MAXIMUMLINESEARCH;
+        }
+
+        (*stp) *= width;
+    }
+}
+
+static int line_search_morethuente(
+    int n,
+    lbfgsfloatval_t *x,
+    lbfgsfloatval_t *f,
+    lbfgsfloatval_t *g,
+    lbfgsfloatval_t *s,
+    lbfgsfloatval_t *stp,
+    const lbfgsfloatval_t *xp,
+    const lbfgsfloatval_t *gp,
+    lbfgsfloatval_t *wa,
+    callback_data_t *cd,
+    const lbfgs_parameter_t *param)
+{
+    int count = 0;
+    int brackt, stage1, uinfo = 0;
+    lbfgsfloatval_t dg;
+    lbfgsfloatval_t stx, fx, dgx;
+    lbfgsfloatval_t sty, fy, dgy;
+    lbfgsfloatval_t fxm, dgxm, fym, dgym, fm, dgm;
+    lbfgsfloatval_t finit, ftest1, dginit, dgtest;
+    lbfgsfloatval_t width, prev_width;
+    lbfgsfloatval_t stmin, stmax;
+
+    /* Check the input parameters for errors. */
+    if (*stp <= 0.)
+    {
+        return LBFGSERR_INVALIDPARAMETERS;
+    }
+
+    /* Compute the initial gradient in the search direction. */
+    vecdot(&dginit, g, s, n);
+
+    /* Make sure that s points to a descent direction. */
+    if (0 < dginit)
+    {
+        return LBFGSERR_INCREASEGRADIENT;
+    }
+
+    /* Initialize local variables. */
+    brackt = 0;
+    stage1 = 1;
+    finit = *f;
+    dgtest = param->ftol * dginit;
+    width = param->max_step - param->min_step;
+    prev_width = 2.0 * width;
+
+    /*
+        The variables stx, fx, dgx contain the values of the step,
+        function, and directional derivative at the best step.
+        The variables sty, fy, dgy contain the value of the step,
+        function, and derivative at the other endpoint of
+        the interval of uncertainty.
+        The variables stp, f, dg contain the values of the step,
+        function, and derivative at the current step.
+    */
+    stx = sty = 0.;
+    fx = fy = finit;
+    dgx = dgy = dginit;
+
+    for (;;)
+    {
+        /*
+            Set the minimum and maximum steps to correspond to the
+            present interval of uncertainty.
+         */
+        if (brackt)
+        {
+            stmin = min2(stx, sty);
+            stmax = max2(stx, sty);
+        }
+        else
+        {
+            stmin = stx;
+            stmax = *stp + 4.0 * (*stp - stx);
+        }
+
+        /* Clip the step in the range of [stpmin, stpmax]. */
+        if (*stp < param->min_step)
+            *stp = param->min_step;
+        if (param->max_step < *stp)
+            *stp = param->max_step;
+
+        /*
+            If an unusual termination is to occur then let
+            stp be the lowest point obtained so far.
+         */
+        if ((brackt && ((*stp <= stmin || stmax <= *stp) || param->max_linesearch <= count + 1 || uinfo != 0)) || (brackt && (stmax - stmin <= param->xtol * stmax)))
+        {
+            *stp = stx;
+        }
+
+        /*
+            Compute the current value of x:
+                x <- x + (*stp) * s.
+         */
+        veccpy(x, xp, n);
+        vecadd(x, s, *stp, n);
+
+        /* Evaluate the function and gradient values. */
+        *f = cd->proc_evaluate(cd->instance, x, g, cd->n, *stp);
+        vecdot(&dg, g, s, n);
+
+        ftest1 = finit + *stp * dgtest;
+        ++count;
+
+        /* Test for errors and convergence. */
+        if (brackt && ((*stp <= stmin || stmax <= *stp) || uinfo != 0))
+        {
+            /* Rounding errors prevent further progress. */
+            return LBFGSERR_ROUNDING_ERROR;
+        }
+        if (*stp == param->max_step && *f <= ftest1 && dg <= dgtest)
+        {
+            /* The step is the maximum value. */
+            return LBFGSERR_MAXIMUMSTEP;
+        }
+        if (*stp == param->min_step && (ftest1 < *f || dgtest <= dg))
+        {
+            /* The step is the minimum value. */
+            return LBFGSERR_MINIMUMSTEP;
+        }
+        if (brackt && (stmax - stmin) <= param->xtol * stmax)
+        {
+            /* Relative width of the interval of uncertainty is at most xtol. */
+            return LBFGSERR_WIDTHTOOSMALL;
+        }
+        if (param->max_linesearch <= count)
+        {
+            /* Maximum number of iteration. */
+            return LBFGSERR_MAXIMUMLINESEARCH;
+        }
+        if (*f <= ftest1 && fabs(dg) <= param->gtol * (-dginit))
+        {
+            /* The sufficient decrease condition and the directional derivative condition hold. */
+            return count;
+        }
+
+        /*
+            In the first stage we seek a step for which the modified
+            function has a nonpositive value and nonnegative derivative.
+         */
+        if (stage1 && *f <= ftest1 && min2(param->ftol, param->gtol) * dginit <= dg)
+        {
+            stage1 = 0;
+        }
+
+        /*
+            A modified function is used to predict the step only if
+            we have not obtained a step for which the modified
+            function has a nonpositive function value and nonnegative
+            derivative, and if a lower function value has been
+            obtained but the decrease is not sufficient.
+         */
+        if (stage1 && ftest1 < *f && *f <= fx)
+        {
+            /* Define the modified function and derivative values. */
+            fm = *f - *stp * dgtest;
+            fxm = fx - stx * dgtest;
+            fym = fy - sty * dgtest;
+            dgm = dg - dgtest;
+            dgxm = dgx - dgtest;
+            dgym = dgy - dgtest;
+
+            /*
+                Call update_trial_interval() to update the interval of
+                uncertainty and to compute the new step.
+             */
+            uinfo = update_trial_interval(
+                &stx, &fxm, &dgxm,
+                &sty, &fym, &dgym,
+                stp, &fm, &dgm,
+                stmin, stmax, &brackt);
+
+            /* Reset the function and gradient values for f. */
+            fx = fxm + stx * dgtest;
+            fy = fym + sty * dgtest;
+            dgx = dgxm + dgtest;
+            dgy = dgym + dgtest;
+        }
+        else
+        {
+            /*
+                Call update_trial_interval() to update the interval of
+                uncertainty and to compute the new step.
+             */
+            uinfo = update_trial_interval(
+                &stx, &fx, &dgx,
+                &sty, &fy, &dgy,
+                stp, f, &dg,
+                stmin, stmax, &brackt);
+        }
+
+        /*
+            Force a sufficient decrease in the interval of uncertainty.
+         */
+        if (brackt)
+        {
+            if (0.66 * prev_width <= fabs(sty - stx))
+            {
+                *stp = stx + 0.5 * (sty - stx);
+            }
+            prev_width = width;
+            width = fabs(sty - stx);
+        }
+    }
+
+    return LBFGSERR_LOGICERROR;
+}
+
+/**
+ * Define the local variables for computing minimizers.
+ */
+#define USES_MINIMIZER \
+    lbfgsfloatval_t a, d, gamma, theta, p, q, r, s;
+
+/**
+ * Find a minimizer of an interpolated cubic function.
+ *  @param  cm      The minimizer of the interpolated cubic.
+ *  @param  u       The value of one point, u.
+ *  @param  fu      The value of f(u).
+ *  @param  du      The value of f'(u).
+ *  @param  v       The value of another point, v.
+ *  @param  fv      The value of f(v).
+ *  @param  du      The value of f'(v).
+ */
+#define CUBIC_MINIMIZER(cm, u, fu, du, v, fv, dv)        \
+    d = (v) - (u);                                       \
+    theta = ((fu) - (fv)) * 3 / d + (du) + (dv);         \
+    p = fabs(theta);                                     \
+    q = fabs(du);                                        \
+    r = fabs(dv);                                        \
+    s = max3(p, q, r);                                   \
+    /* gamma = s*sqrt((theta/s)**2 - (du/s) * (dv/s)) */ \
+    a = theta / s;                                       \
+    gamma = s * sqrt(a * a - ((du) / s) * ((dv) / s));   \
+    if ((v) < (u))                                       \
+        gamma = -gamma;                                  \
+    p = gamma - (du) + theta;                            \
+    q = gamma - (du) + gamma + (dv);                     \
+    r = p / q;                                           \
+    (cm) = (u) + r * d;
+
+/**
+ * Find a minimizer of an interpolated cubic function.
+ *  @param  cm      The minimizer of the interpolated cubic.
+ *  @param  u       The value of one point, u.
+ *  @param  fu      The value of f(u).
+ *  @param  du      The value of f'(u).
+ *  @param  v       The value of another point, v.
+ *  @param  fv      The value of f(v).
+ *  @param  du      The value of f'(v).
+ *  @param  xmin    The maximum value.
+ *  @param  xmin    The minimum value.
+ */
+#define CUBIC_MINIMIZER2(cm, u, fu, du, v, fv, dv, xmin, xmax)  \
+    d = (v) - (u);                                              \
+    theta = ((fu) - (fv)) * 3 / d + (du) + (dv);                \
+    p = fabs(theta);                                            \
+    q = fabs(du);                                               \
+    r = fabs(dv);                                               \
+    s = max3(p, q, r);                                          \
+    /* gamma = s*sqrt((theta/s)**2 - (du/s) * (dv/s)) */        \
+    a = theta / s;                                              \
+    gamma = s * sqrt(max2(0, a * a - ((du) / s) * ((dv) / s))); \
+    if ((u) < (v))                                              \
+        gamma = -gamma;                                         \
+    p = gamma - (dv) + theta;                                   \
+    q = gamma - (dv) + gamma + (du);                            \
+    r = p / q;                                                  \
+    if (r < 0. && gamma != 0.)                                  \
+    {                                                           \
+        (cm) = (v)-r * d;                                       \
+    }                                                           \
+    else if (a < 0)                                             \
+    {                                                           \
+        (cm) = (xmax);                                          \
+    }                                                           \
+    else                                                        \
+    {                                                           \
+        (cm) = (xmin);                                          \
+    }
+
+/**
+ * Find a minimizer of an interpolated quadratic function.
+ *  @param  qm      The minimizer of the interpolated quadratic.
+ *  @param  u       The value of one point, u.
+ *  @param  fu      The value of f(u).
+ *  @param  du      The value of f'(u).
+ *  @param  v       The value of another point, v.
+ *  @param  fv      The value of f(v).
+ */
+#define QUARD_MINIMIZER(qm, u, fu, du, v, fv) \
+    a = (v) - (u);                            \
+    (qm) = (u) + (du) / (((fu) - (fv)) / a + (du)) / 2 * a;
+
+/**
+ * Find a minimizer of an interpolated quadratic function.
+ *  @param  qm      The minimizer of the interpolated quadratic.
+ *  @param  u       The value of one point, u.
+ *  @param  du      The value of f'(u).
+ *  @param  v       The value of another point, v.
+ *  @param  dv      The value of f'(v).
+ */
+#define QUARD_MINIMIZER2(qm, u, du, v, dv) \
+    a = (u) - (v);                         \
+    (qm) = (v) + (dv) / ((dv) - (du)) * a;
+
+/**
+ * Update a safeguarded trial value and interval for line search.
+ *
+ *  The parameter x represents the step with the least function value.
+ *  The parameter t represents the current step. This function assumes
+ *  that the derivative at the point of x in the direction of the step.
+ *  If the bracket is set to true, the minimizer has been bracketed in
+ *  an interval of uncertainty with endpoints between x and y.
+ *
+ *  @param  x       The pointer to the value of one endpoint.
+ *  @param  fx      The pointer to the value of f(x).
+ *  @param  dx      The pointer to the value of f'(x).
+ *  @param  y       The pointer to the value of another endpoint.
+ *  @param  fy      The pointer to the value of f(y).
+ *  @param  dy      The pointer to the value of f'(y).
+ *  @param  t       The pointer to the value of the trial value, t.
+ *  @param  ft      The pointer to the value of f(t).
+ *  @param  dt      The pointer to the value of f'(t).
+ *  @param  tmin    The minimum value for the trial value, t.
+ *  @param  tmax    The maximum value for the trial value, t.
+ *  @param  brackt  The pointer to the predicate if the trial value is
+ *                  bracketed.
+ *  @retval int     Status value. Zero indicates a normal termination.
+ *
+ *  @see
+ *      Jorge J. More and David J. Thuente. Line search algorithm with
+ *      guaranteed sufficient decrease. ACM Transactions on Mathematical
+ *      Software (TOMS), Vol 20, No 3, pp. 286-307, 1994.
+ */
+static int update_trial_interval(
+    lbfgsfloatval_t *x,
+    lbfgsfloatval_t *fx,
+    lbfgsfloatval_t *dx,
+    lbfgsfloatval_t *y,
+    lbfgsfloatval_t *fy,
+    lbfgsfloatval_t *dy,
+    lbfgsfloatval_t *t,
+    lbfgsfloatval_t *ft,
+    lbfgsfloatval_t *dt,
+    const lbfgsfloatval_t tmin,
+    const lbfgsfloatval_t tmax,
+    int *brackt)
+{
+    int bound;
+    int dsign = fsigndiff(dt, dx);
+    lbfgsfloatval_t mc;   /* minimizer of an interpolated cubic. */
+    lbfgsfloatval_t mq;   /* minimizer of an interpolated quadratic. */
+    lbfgsfloatval_t newt; /* new trial value. */
+    USES_MINIMIZER;       /* for CUBIC_MINIMIZER and QUARD_MINIMIZER. */
+
+    /* Check the input parameters for errors. */
+    if (*brackt)
+    {
+        if (*t <= min2(*x, *y) || max2(*x, *y) <= *t)
+        {
+            /* The trival value t is out of the interval. */
+            return LBFGSERR_OUTOFINTERVAL;
+        }
+        if (0. <= *dx * (*t - *x))
+        {
+            /* The function must decrease from x. */
+            return LBFGSERR_INCREASEGRADIENT;
+        }
+        if (tmax < tmin)
+        {
+            /* Incorrect tmin and tmax specified. */
+            return LBFGSERR_INCORRECT_TMINMAX;
+        }
+    }
+
+    /*
+        Trial value selection.
+     */
+    if (*fx < *ft)
+    {
+        /*
+            Case 1: a higher function value.
+            The minimum is brackt. If the cubic minimizer is closer
+            to x than the quadratic one, the cubic one is taken, else
+            the average of the minimizers is taken.
+         */
+        *brackt = 1;
+        bound = 1;
+        CUBIC_MINIMIZER(mc, *x, *fx, *dx, *t, *ft, *dt);
+        QUARD_MINIMIZER(mq, *x, *fx, *dx, *t, *ft);
+        if (fabs(mc - *x) < fabs(mq - *x))
+        {
+            newt = mc;
+        }
+        else
+        {
+            newt = mc + 0.5 * (mq - mc);
+        }
+    }
+    else if (dsign)
+    {
+        /*
+            Case 2: a lower function value and derivatives of
+            opposite sign. The minimum is brackt. If the cubic
+            minimizer is closer to x than the quadratic (secant) one,
+            the cubic one is taken, else the quadratic one is taken.
+         */
+        *brackt = 1;
+        bound = 0;
+        CUBIC_MINIMIZER(mc, *x, *fx, *dx, *t, *ft, *dt);
+        QUARD_MINIMIZER2(mq, *x, *dx, *t, *dt);
+        if (fabs(mc - *t) > fabs(mq - *t))
+        {
+            newt = mc;
+        }
+        else
+        {
+            newt = mq;
+        }
+    }
+    else if (fabs(*dt) < fabs(*dx))
+    {
+        /*
+            Case 3: a lower function value, derivatives of the
+            same sign, and the magnitude of the derivative decreases.
+            The cubic minimizer is only used if the cubic tends to
+            infinity in the direction of the minimizer or if the minimum
+            of the cubic is beyond t. Otherwise the cubic minimizer is
+            defined to be either tmin or tmax. The quadratic (secant)
+            minimizer is also computed and if the minimum is brackt
+            then the the minimizer closest to x is taken, else the one
+            farthest away is taken.
+         */
+        bound = 1;
+        CUBIC_MINIMIZER2(mc, *x, *fx, *dx, *t, *ft, *dt, tmin, tmax);
+        QUARD_MINIMIZER2(mq, *x, *dx, *t, *dt);
+        if (*brackt)
+        {
+            if (fabs(*t - mc) < fabs(*t - mq))
+            {
+                newt = mc;
+            }
+            else
+            {
+                newt = mq;
+            }
+        }
+        else
+        {
+            if (fabs(*t - mc) > fabs(*t - mq))
+            {
+                newt = mc;
+            }
+            else
+            {
+                newt = mq;
+            }
+        }
+    }
+    else
+    {
+        /*
+            Case 4: a lower function value, derivatives of the
+            same sign, and the magnitude of the derivative does
+            not decrease. If the minimum is not brackt, the step
+            is either tmin or tmax, else the cubic minimizer is taken.
+         */
+        bound = 0;
+        if (*brackt)
+        {
+            CUBIC_MINIMIZER(newt, *t, *ft, *dt, *y, *fy, *dy);
+        }
+        else if (*x < *t)
+        {
+            newt = tmax;
+        }
+        else
+        {
+            newt = tmin;
+        }
+    }
+
+    /*
+        Update the interval of uncertainty. This update does not
+        depend on the new step or the case analysis above.
+
+        - Case a: if f(x) < f(t),
+            x <- x, y <- t.
+        - Case b: if f(t) <= f(x) && f'(t)*f'(x) > 0,
+            x <- t, y <- y.
+        - Case c: if f(t) <= f(x) && f'(t)*f'(x) < 0,
+            x <- t, y <- x.
+     */
+    if (*fx < *ft)
+    {
+        /* Case a */
+        *y = *t;
+        *fy = *ft;
+        *dy = *dt;
+    }
+    else
+    {
+        /* Case c */
+        if (dsign)
+        {
+            *y = *x;
+            *fy = *fx;
+            *dy = *dx;
+        }
+        /* Cases b and c */
+        *x = *t;
+        *fx = *ft;
+        *dx = *dt;
+    }
+
+    /* Clip the new trial value in [tmin, tmax]. */
+    if (tmax < newt)
+        newt = tmax;
+    if (newt < tmin)
+        newt = tmin;
+
+    /*
+        Redefine the new trial value if it is close to the upper bound
+        of the interval.
+     */
+    if (*brackt && bound)
+    {
+        mq = *x + 0.66 * (*y - *x);
+        if (*x < *y)
+        {
+            if (mq < newt)
+                newt = mq;
+        }
+        else
+        {
+            if (newt < mq)
+                newt = mq;
+        }
+    }
+
+    /* Return the new trial value. */
+    *t = newt;
+    return 0;
+}
+
+static lbfgsfloatval_t owlqn_x1norm(
+    const lbfgsfloatval_t *x,
+    const int start,
+    const int n)
+{
+    int i;
+    lbfgsfloatval_t norm = 0.;
+
+    for (i = start; i < n; ++i)
+    {
+        norm += fabs(x[i]);
+    }
+
+    return norm;
+}
+
+static void owlqn_pseudo_gradient(
+    lbfgsfloatval_t *pg,
+    const lbfgsfloatval_t *x,
+    const lbfgsfloatval_t *g,
+    const int n,
+    const lbfgsfloatval_t c,
+    const int start,
+    const int end)
+{
+    int i;
+
+    /* Compute the negative of gradients. */
+    for (i = 0; i < start; ++i)
+    {
+        pg[i] = g[i];
+    }
+
+    /* Compute the psuedo-gradients. */
+    for (i = start; i < end; ++i)
+    {
+        if (x[i] < 0.)
+        {
+            /* Differentiable. */
+            pg[i] = g[i] - c;
+        }
+        else if (0. < x[i])
+        {
+            /* Differentiable. */
+            pg[i] = g[i] + c;
+        }
+        else
+        {
+            if (g[i] < -c)
+            {
+                /* Take the right partial derivative. */
+                pg[i] = g[i] + c;
+            }
+            else if (c < g[i])
+            {
+                /* Take the left partial derivative. */
+                pg[i] = g[i] - c;
+            }
+            else
+            {
+                pg[i] = 0.;
+            }
+        }
+    }
+
+    for (i = end; i < n; ++i)
+    {
+        pg[i] = g[i];
+    }
+}
+
+static void owlqn_project(
+    lbfgsfloatval_t *d,
+    const lbfgsfloatval_t *sign,
+    const int start,
+    const int end)
+{
+    int i;
+
+    for (i = start; i < end; ++i)
+    {
+        if (d[i] * sign[i] <= 0)
+        {
+            d[i] = 0;
+        }
+    }
+}
diff --git a/wrapper/Convert/SireOpenMM/lbgfs/lbfgs.h b/wrapper/Convert/SireOpenMM/lbgfs/lbfgs.h
new file mode 100644
index 000000000..35b835bd8
--- /dev/null
+++ b/wrapper/Convert/SireOpenMM/lbgfs/lbfgs.h
@@ -0,0 +1,746 @@
+/*
+ *      C library of Limited memory BFGS (L-BFGS).
+ *
+ * Copyright (c) 1990, Jorge Nocedal
+ * Copyright (c) 2007-2010 Naoaki Okazaki
+ * All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+/* $Id: lbfgs.h 65 2010-01-29 12:19:16Z naoaki $ */
+
+#ifndef __LBFGS_H__
+#define __LBFGS_H__
+
+#ifdef  __cplusplus
+extern "C" {
+#endif/*__cplusplus*/
+
+#ifdef _MSC_VER
+#if defined(OPENMM_BUILDING_SHARED_LIBRARY)
+    #define WINDOWS_EXPORT __declspec(dllexport)
+#else
+#define WINDOWS_EXPORT
+#endif
+#else
+#define WINDOWS_EXPORT
+#endif
+
+/*
+ * The default precision of floating point values is 64bit (double).
+ */
+#ifndef LBFGS_FLOAT
+#define LBFGS_FLOAT     64
+#endif/*LBFGS_FLOAT*/
+
+/*
+ * Activate optimization routines for IEEE754 floating point values.
+ */
+#ifndef LBFGS_IEEE_FLOAT
+#define LBFGS_IEEE_FLOAT    1
+#endif/*LBFGS_IEEE_FLOAT*/
+
+#if     LBFGS_FLOAT == 32
+typedef float lbfgsfloatval_t;
+
+#elif   LBFGS_FLOAT == 64
+typedef double lbfgsfloatval_t;
+
+#else
+#error "libLBFGS supports single (float; LBFGS_FLOAT = 32) or double (double; LBFGS_FLOAT=64) precision only."
+
+#endif
+
+
+/** 
+ * \addtogroup liblbfgs_api libLBFGS API
+ * @{
+ *
+ *  The libLBFGS API.
+ */
+
+/**
+ * Return values of lbfgs().
+ * 
+ *  Roughly speaking, a negative value indicates an error.
+ */
+enum {
+    /** L-BFGS reaches convergence. */
+    LBFGS_SUCCESS = 0,
+    LBFGS_CONVERGENCE = 0,
+    LBFGS_STOP,
+    /** The initial variables already minimize the objective function. */
+    LBFGS_ALREADY_MINIMIZED,
+
+    /** Unknown error. */
+    LBFGSERR_UNKNOWNERROR = -1024,
+    /** Logic error. */
+    LBFGSERR_LOGICERROR,
+    /** Insufficient memory. */
+    LBFGSERR_OUTOFMEMORY,
+    /** The minimization process has been canceled. */
+    LBFGSERR_CANCELED,
+    /** Invalid number of variables specified. */
+    LBFGSERR_INVALID_N,
+    /** Invalid number of variables (for SSE) specified. */
+    LBFGSERR_INVALID_N_SSE,
+    /** The array x must be aligned to 16 (for SSE). */
+    LBFGSERR_INVALID_X_SSE,
+    /** Invalid parameter lbfgs_parameter_t::epsilon specified. */
+    LBFGSERR_INVALID_EPSILON,
+    /** Invalid parameter lbfgs_parameter_t::past specified. */
+    LBFGSERR_INVALID_TESTPERIOD,
+    /** Invalid parameter lbfgs_parameter_t::delta specified. */
+    LBFGSERR_INVALID_DELTA,
+    /** Invalid parameter lbfgs_parameter_t::linesearch specified. */
+    LBFGSERR_INVALID_LINESEARCH,
+    /** Invalid parameter lbfgs_parameter_t::max_step specified. */
+    LBFGSERR_INVALID_MINSTEP,
+    /** Invalid parameter lbfgs_parameter_t::max_step specified. */
+    LBFGSERR_INVALID_MAXSTEP,
+    /** Invalid parameter lbfgs_parameter_t::ftol specified. */
+    LBFGSERR_INVALID_FTOL,
+    /** Invalid parameter lbfgs_parameter_t::wolfe specified. */
+    LBFGSERR_INVALID_WOLFE,
+    /** Invalid parameter lbfgs_parameter_t::gtol specified. */
+    LBFGSERR_INVALID_GTOL,
+    /** Invalid parameter lbfgs_parameter_t::xtol specified. */
+    LBFGSERR_INVALID_XTOL,
+    /** Invalid parameter lbfgs_parameter_t::max_linesearch specified. */
+    LBFGSERR_INVALID_MAXLINESEARCH,
+    /** Invalid parameter lbfgs_parameter_t::orthantwise_c specified. */
+    LBFGSERR_INVALID_ORTHANTWISE,
+    /** Invalid parameter lbfgs_parameter_t::orthantwise_start specified. */
+    LBFGSERR_INVALID_ORTHANTWISE_START,
+    /** Invalid parameter lbfgs_parameter_t::orthantwise_end specified. */
+    LBFGSERR_INVALID_ORTHANTWISE_END,
+    /** The line-search step went out of the interval of uncertainty. */
+    LBFGSERR_OUTOFINTERVAL,
+    /** A logic error occurred; alternatively, the interval of uncertainty
+        became too small. */
+    LBFGSERR_INCORRECT_TMINMAX,
+    /** A rounding error occurred; alternatively, no line-search step
+        satisfies the sufficient decrease and curvature conditions. */
+    LBFGSERR_ROUNDING_ERROR,
+    /** The line-search step became smaller than lbfgs_parameter_t::min_step. */
+    LBFGSERR_MINIMUMSTEP,
+    /** The line-search step became larger than lbfgs_parameter_t::max_step. */
+    LBFGSERR_MAXIMUMSTEP,
+    /** The line-search routine reaches the maximum number of evaluations. */
+    LBFGSERR_MAXIMUMLINESEARCH,
+    /** The algorithm routine reaches the maximum number of iterations. */
+    LBFGSERR_MAXIMUMITERATION,
+    /** Relative width of the interval of uncertainty is at most
+        lbfgs_parameter_t::xtol. */
+    LBFGSERR_WIDTHTOOSMALL,
+    /** A logic error (negative line-search step) occurred. */
+    LBFGSERR_INVALIDPARAMETERS,
+    /** The current search direction increases the objective function value. */
+    LBFGSERR_INCREASEGRADIENT,
+};
+
+/**
+ * Line search algorithms.
+ */
+enum {
+    /** The default algorithm (MoreThuente method). */
+    LBFGS_LINESEARCH_DEFAULT = 0,
+    /** MoreThuente method proposd by More and Thuente. */
+    LBFGS_LINESEARCH_MORETHUENTE = 0,
+    /**
+     * Backtracking method with the Armijo condition.
+     *  The backtracking method finds the step length such that it satisfies
+     *  the sufficient decrease (Armijo) condition,
+     *    - f(x + a * d) <= f(x) + lbfgs_parameter_t::ftol * a * g(x)^T d,
+     *
+     *  where x is the current point, d is the current search direction, and
+     *  a is the step length.
+     */
+    LBFGS_LINESEARCH_BACKTRACKING_ARMIJO = 1,
+    /** The backtracking method with the defualt (regular Wolfe) condition. */
+    LBFGS_LINESEARCH_BACKTRACKING = 2,
+    /**
+     * Backtracking method with regular Wolfe condition.
+     *  The backtracking method finds the step length such that it satisfies
+     *  both the Armijo condition (LBFGS_LINESEARCH_BACKTRACKING_ARMIJO)
+     *  and the curvature condition,
+     *    - g(x + a * d)^T d >= lbfgs_parameter_t::wolfe * g(x)^T d,
+     *
+     *  where x is the current point, d is the current search direction, and
+     *  a is the step length.
+     */
+    LBFGS_LINESEARCH_BACKTRACKING_WOLFE = 2,
+    /**
+     * Backtracking method with strong Wolfe condition.
+     *  The backtracking method finds the step length such that it satisfies
+     *  both the Armijo condition (LBFGS_LINESEARCH_BACKTRACKING_ARMIJO)
+     *  and the following condition,
+     *    - |g(x + a * d)^T d| <= lbfgs_parameter_t::wolfe * |g(x)^T d|,
+     *
+     *  where x is the current point, d is the current search direction, and
+     *  a is the step length.
+     */
+    LBFGS_LINESEARCH_BACKTRACKING_STRONG_WOLFE = 3,
+};
+
+/**
+ * L-BFGS optimization parameters.
+ *  Call lbfgs_parameter_init() function to initialize parameters to the
+ *  default values.
+ */
+typedef struct {
+    /**
+     * The number of corrections to approximate the inverse hessian matrix.
+     *  The L-BFGS routine stores the computation results of previous \ref m
+     *  iterations to approximate the inverse hessian matrix of the current
+     *  iteration. This parameter controls the size of the limited memories
+     *  (corrections). The default value is \c 6. Values less than \c 3 are
+     *  not recommended. Large values will result in excessive computing time.
+     */
+    int             m;
+
+    /**
+     * Epsilon for convergence test.
+     *  This parameter determines the accuracy with which the solution is to
+     *  be found. A minimization terminates when
+     *      ||g|| < \ref epsilon * max(1, ||x||),
+     *  where ||.|| denotes the Euclidean (L2) norm. The default value is
+     *  \c 1e-5.
+     */
+    lbfgsfloatval_t epsilon;
+
+    /**
+     * Distance for delta-based convergence test.
+     *  This parameter determines the distance, in iterations, to compute
+     *  the rate of decrease of the objective function. If the value of this
+     *  parameter is zero, the library does not perform the delta-based
+     *  convergence test. The default value is \c 0.
+     */
+    int             past;
+
+    /**
+     * Delta for convergence test.
+     *  This parameter determines the minimum rate of decrease of the
+     *  objective function. The library stops iterations when the
+     *  following condition is met:
+     *      (f' - f) / f < \ref delta,
+     *  where f' is the objective value of \ref past iterations ago, and f is
+     *  the objective value of the current iteration.
+     *  The default value is \c 0.
+     */
+    lbfgsfloatval_t delta;
+
+    /**
+     * The maximum number of iterations.
+     *  The lbfgs() function terminates an optimization process with
+     *  ::LBFGSERR_MAXIMUMITERATION status code when the iteration count
+     *  exceedes this parameter. Setting this parameter to zero continues an
+     *  optimization process until a convergence or error. The default value
+     *  is \c 0.
+     */
+    int             max_iterations;
+
+    /**
+     * The line search algorithm.
+     *  This parameter specifies a line search algorithm to be used by the
+     *  L-BFGS routine.
+     */
+    int             linesearch;
+
+    /**
+     * The maximum number of trials for the line search.
+     *  This parameter controls the number of function and gradients evaluations
+     *  per iteration for the line search routine. The default value is \c 20.
+     */
+    int             max_linesearch;
+
+    /**
+     * The minimum step of the line search routine.
+     *  The default value is \c 1e-20. This value need not be modified unless
+     *  the exponents are too large for the machine being used, or unless the
+     *  problem is extremely badly scaled (in which case the exponents should
+     *  be increased).
+     */
+    lbfgsfloatval_t min_step;
+
+    /**
+     * The maximum step of the line search.
+     *  The default value is \c 1e+20. This value need not be modified unless
+     *  the exponents are too large for the machine being used, or unless the
+     *  problem is extremely badly scaled (in which case the exponents should
+     *  be increased).
+     */
+    lbfgsfloatval_t max_step;
+
+    /**
+     * A parameter to control the accuracy of the line search routine.
+     *  The default value is \c 1e-4. This parameter should be greater
+     *  than zero and smaller than \c 0.5.
+     */
+    lbfgsfloatval_t ftol;
+
+    /**
+     * A coefficient for the Wolfe condition.
+     *  This parameter is valid only when the backtracking line-search
+     *  algorithm is used with the Wolfe condition,
+     *  ::LBFGS_LINESEARCH_BACKTRACKING_STRONG_WOLFE or
+     *  ::LBFGS_LINESEARCH_BACKTRACKING_WOLFE .
+     *  The default value is \c 0.9. This parameter should be greater
+     *  the \ref ftol parameter and smaller than \c 1.0.
+     */
+    lbfgsfloatval_t wolfe;
+
+    /**
+     * A parameter to control the accuracy of the line search routine.
+     *  The default value is \c 0.9. If the function and gradient
+     *  evaluations are inexpensive with respect to the cost of the
+     *  iteration (which is sometimes the case when solving very large
+     *  problems) it may be advantageous to set this parameter to a small
+     *  value. A typical small value is \c 0.1. This parameter shuold be
+     *  greater than the \ref ftol parameter (\c 1e-4) and smaller than
+     *  \c 1.0.
+     */
+    lbfgsfloatval_t gtol;
+
+    /**
+     * The machine precision for floating-point values.
+     *  This parameter must be a positive value set by a client program to
+     *  estimate the machine precision. The line search routine will terminate
+     *  with the status code (::LBFGSERR_ROUNDING_ERROR) if the relative width
+     *  of the interval of uncertainty is less than this parameter.
+     */
+    lbfgsfloatval_t xtol;
+
+    /**
+     * Coeefficient for the L1 norm of variables.
+     *  This parameter should be set to zero for standard minimization
+     *  problems. Setting this parameter to a positive value activates
+     *  Orthant-Wise Limited-memory Quasi-Newton (OWL-QN) method, which
+     *  minimizes the objective function F(x) combined with the L1 norm |x|
+     *  of the variables, {F(x) + C |x|}. This parameter is the coeefficient
+     *  for the |x|, i.e., C. As the L1 norm |x| is not differentiable at
+     *  zero, the library modifies function and gradient evaluations from
+     *  a client program suitably; a client program thus have only to return
+     *  the function value F(x) and gradients G(x) as usual. The default value
+     *  is zero.
+     */
+    lbfgsfloatval_t orthantwise_c;
+
+    /**
+     * Start index for computing L1 norm of the variables.
+     *  This parameter is valid only for OWL-QN method
+     *  (i.e., \ref orthantwise_c != 0). This parameter b (0 <= b < N)
+     *  specifies the index number from which the library computes the
+     *  L1 norm of the variables x,
+     *      |x| := |x_{b}| + |x_{b+1}| + ... + |x_{N}| .
+     *  In other words, variables x_1, ..., x_{b-1} are not used for
+     *  computing the L1 norm. Setting b (0 < b < N), one can protect
+     *  variables, x_1, ..., x_{b-1} (e.g., a bias term of logistic
+     *  regression) from being regularized. The default value is zero.
+     */
+    int             orthantwise_start;
+
+    /**
+     * End index for computing L1 norm of the variables.
+     *  This parameter is valid only for OWL-QN method
+     *  (i.e., \ref orthantwise_c != 0). This parameter e (0 < e <= N)
+     *  specifies the index number at which the library stops computing the
+     *  L1 norm of the variables x,
+     */
+    int             orthantwise_end;
+} lbfgs_parameter_t;
+
+
+/**
+ * Callback interface to provide objective function and gradient evaluations.
+ *
+ *  The lbfgs() function call this function to obtain the values of objective
+ *  function and its gradients when needed. A client program must implement
+ *  this function to evaluate the values of the objective function and its
+ *  gradients, given current values of variables.
+ *  
+ *  @param  instance    The user data sent for lbfgs() function by the client.
+ *  @param  x           The current values of variables.
+ *  @param  g           The gradient vector. The callback function must compute
+ *                      the gradient values for the current variables.
+ *  @param  n           The number of variables.
+ *  @param  step        The current step of the line search routine.
+ *  @retval lbfgsfloatval_t The value of the objective function for the current
+ *                          variables.
+ */
+typedef lbfgsfloatval_t (*lbfgs_evaluate_t)(
+    void *instance,
+    const lbfgsfloatval_t *x,
+    lbfgsfloatval_t *g,
+    const int n,
+    const lbfgsfloatval_t step
+    );
+
+/**
+ * Callback interface to receive the progress of the optimization process.
+ *
+ *  The lbfgs() function call this function for each iteration. Implementing
+ *  this function, a client program can store or display the current progress
+ *  of the optimization process.
+ *
+ *  @param  instance    The user data sent for lbfgs() function by the client.
+ *  @param  x           The current values of variables.
+ *  @param  g           The current gradient values of variables.
+ *  @param  fx          The current value of the objective function.
+ *  @param  xnorm       The Euclidean norm of the variables.
+ *  @param  gnorm       The Euclidean norm of the gradients.
+ *  @param  step        The line-search step used for this iteration.
+ *  @param  n           The number of variables.
+ *  @param  k           The iteration count.
+ *  @param  ls          The number of evaluations called for this iteration.
+ *  @retval int         Zero to continue the optimization process. Returning a
+ *                      non-zero value will cancel the optimization process.
+ */
+typedef int (*lbfgs_progress_t)(
+    void *instance,
+    const lbfgsfloatval_t *x,
+    const lbfgsfloatval_t *g,
+    const lbfgsfloatval_t fx,
+    const lbfgsfloatval_t xnorm,
+    const lbfgsfloatval_t gnorm,
+    const lbfgsfloatval_t step,
+    int n,
+    int k,
+    int ls
+    );
+
+/*
+A user must implement a function compatible with ::lbfgs_evaluate_t (evaluation
+callback) and pass the pointer to the callback function to lbfgs() arguments.
+Similarly, a user can implement a function compatible with ::lbfgs_progress_t
+(progress callback) to obtain the current progress (e.g., variables, function
+value, ||G||, etc) and to cancel the iteration process if necessary.
+Implementation of a progress callback is optional: a user can pass \c NULL if
+progress notification is not necessary.
+
+In addition, a user must preserve two requirements:
+    - The number of variables must be multiples of 16 (this is not 4).
+    - The memory block of variable array ::x must be aligned to 16.
+
+This algorithm terminates an optimization
+when:
+
+    ||G|| < \epsilon \cdot \max(1, ||x||) .
+
+In this formula, ||.|| denotes the Euclidean norm.
+*/
+
+/**
+ * Start a L-BFGS optimization.
+ *
+ *  @param  n           The number of variables.
+ *  @param  x           The array of variables. A client program can set
+ *                      default values for the optimization and receive the
+ *                      optimization result through this array. This array
+ *                      must be allocated by ::lbfgs_malloc function
+ *                      for libLBFGS built with SSE/SSE2 optimization routine
+ *                      enabled. The library built without SSE/SSE2
+ *                      optimization does not have such a requirement.
+ *  @param  ptr_fx      The pointer to the variable that receives the final
+ *                      value of the objective function for the variables.
+ *                      This argument can be set to \c NULL if the final
+ *                      value of the objective function is unnecessary.
+ *  @param  proc_evaluate   The callback function to provide function and
+ *                          gradient evaluations given a current values of
+ *                          variables. A client program must implement a
+ *                          callback function compatible with \ref
+ *                          lbfgs_evaluate_t and pass the pointer to the
+ *                          callback function.
+ *  @param  proc_progress   The callback function to receive the progress
+ *                          (the number of iterations, the current value of
+ *                          the objective function) of the minimization
+ *                          process. This argument can be set to \c NULL if
+ *                          a progress report is unnecessary.
+ *  @param  instance    A user data for the client program. The callback
+ *                      functions will receive the value of this argument.
+ *  @param  param       The pointer to a structure representing parameters for
+ *                      L-BFGS optimization. A client program can set this
+ *                      parameter to \c NULL to use the default parameters.
+ *                      Call lbfgs_parameter_init() function to fill a
+ *                      structure with the default values.
+ *  @retval int         The status code. This function returns zero if the
+ *                      minimization process terminates without an error. A
+ *                      non-zero value indicates an error.
+ */
+int WINDOWS_EXPORT lbfgs(
+    int n,
+    lbfgsfloatval_t *x,
+    lbfgsfloatval_t *ptr_fx,
+    lbfgs_evaluate_t proc_evaluate,
+    lbfgs_progress_t proc_progress,
+    void *instance,
+    lbfgs_parameter_t *param
+    );
+
+/**
+ * Initialize L-BFGS parameters to the default values.
+ *
+ *  Call this function to fill a parameter structure with the default values
+ *  and overwrite parameter values if necessary.
+ *
+ *  @param  param       The pointer to the parameter structure.
+ */
+void WINDOWS_EXPORT lbfgs_parameter_init(lbfgs_parameter_t *param);
+
+/**
+ * Allocate an array for variables.
+ *
+ *  This function allocates an array of variables for the convenience of
+ *  ::lbfgs function; the function has a requreiemt for a variable array
+ *  when libLBFGS is built with SSE/SSE2 optimization routines. A user does
+ *  not have to use this function for libLBFGS built without SSE/SSE2
+ *  optimization.
+ *  
+ *  @param  n           The number of variables.
+ */
+lbfgsfloatval_t WINDOWS_EXPORT * lbfgs_malloc(int n);
+
+/**
+ * Free an array of variables.
+ *  
+ *  @param  x           The array of variables allocated by ::lbfgs_malloc
+ *                      function.
+ */
+void WINDOWS_EXPORT lbfgs_free(lbfgsfloatval_t *x);
+
+/** @} */
+
+#ifdef  __cplusplus
+}
+#endif/*__cplusplus*/
+
+
+
+/**
+@mainpage libLBFGS: a library of Limited-memory Broyden-Fletcher-Goldfarb-Shanno (L-BFGS)
+
+@section intro Introduction
+
+This library is a C port of the implementation of Limited-memory
+Broyden-Fletcher-Goldfarb-Shanno (L-BFGS) method written by Jorge Nocedal.
+The original FORTRAN source code is available at:
+http://www.ece.northwestern.edu/~nocedal/lbfgs.html
+
+The L-BFGS method solves the unconstrainted minimization problem,
+
+<pre>
+    minimize F(x), x = (x1, x2, ..., xN),
+</pre>
+
+only if the objective function F(x) and its gradient G(x) are computable. The
+well-known Newton's method requires computation of the inverse of the hessian
+matrix of the objective function. However, the computational cost for the
+inverse hessian matrix is expensive especially when the objective function
+takes a large number of variables. The L-BFGS method iteratively finds a
+minimizer by approximating the inverse hessian matrix by information from last
+m iterations. This innovation saves the memory storage and computational time
+drastically for large-scaled problems.
+
+Among the various ports of L-BFGS, this library provides several features:
+- <b>Optimization with L1-norm (Orthant-Wise Limited-memory Quasi-Newton
+  (OWL-QN) method)</b>:
+  In addition to standard minimization problems, the library can minimize
+  a function F(x) combined with L1-norm |x| of the variables,
+  {F(x) + C |x|}, where C is a constant scalar parameter. This feature is
+  useful for estimating parameters of sparse log-linear models (e.g.,
+  logistic regression and maximum entropy) with L1-regularization (or
+  Laplacian prior).
+- <b>Clean C code</b>:
+  Unlike C codes generated automatically by f2c (Fortran 77 into C converter),
+  this port includes changes based on my interpretations, improvements,
+  optimizations, and clean-ups so that the ported code would be well-suited
+  for a C code. In addition to comments inherited from the original code,
+  a number of comments were added through my interpretations.
+- <b>Callback interface</b>:
+  The library receives function and gradient values via a callback interface.
+  The library also notifies the progress of the optimization by invoking a
+  callback function. In the original implementation, a user had to set
+  function and gradient values every time the function returns for obtaining
+  updated values.
+- <b>Thread safe</b>:
+  The library is thread-safe, which is the secondary gain from the callback
+  interface.
+- <b>Cross platform.</b> The source code can be compiled on Microsoft Visual
+  Studio 2005, GNU C Compiler (gcc), etc.
+- <b>Configurable precision</b>: A user can choose single-precision (float)
+  or double-precision (double) accuracy by changing ::LBFGS_FLOAT macro.
+- <b>SSE/SSE2 optimization</b>:
+  This library includes SSE/SSE2 optimization (written in compiler intrinsics)
+  for vector arithmetic operations on Intel/AMD processors. The library uses
+  SSE for float values and SSE2 for double values. The SSE/SSE2 optimization
+  routine is disabled by default.
+
+This library is used by:
+- <a href="http://www.chokkan.org/software/crfsuite/">CRFsuite: A fast implementation of Conditional Random Fields (CRFs)</a>
+- <a href="http://www.chokkan.org/software/classias/">Classias: A collection of machine-learning algorithms for classification</a>
+- <a href="http://www.public.iastate.edu/~gdancik/mlegp/">mlegp: an R package for maximum likelihood estimates for Gaussian processes</a>
+- <a href="http://infmath.uibk.ac.at/~matthiasf/imaging2/">imaging2: the imaging2 class library</a>
+- <a href="http://search.cpan.org/~laye/Algorithm-LBFGS-0.16/">Algorithm::LBFGS - Perl extension for L-BFGS</a>
+- <a href="http://www.cs.kuleuven.be/~bernd/yap-lbfgs/">YAP-LBFGS (an interface to call libLBFGS from YAP Prolog)</a>
+
+@section download Download
+
+- <a href="http://www.chokkan.org/software/dist/liblbfgs-1.9.tar.gz">Source code</a>
+
+libLBFGS is distributed under the term of the
+<a href="http://opensource.org/licenses/mit-license.php">MIT license</a>.
+
+@section changelog History
+- Version 1.9 (2010-01-29):
+    - Fixed a mistake in checking the validity of the parameters "ftol" and
+      "wolfe"; this was discovered by Kevin S. Van Horn.
+- Version 1.8 (2009-07-13):
+    - Accepted the patch submitted by Takashi Imamichi;
+      the backtracking method now has three criteria for choosing the step
+      length:
+        - ::LBFGS_LINESEARCH_BACKTRACKING_ARMIJO: sufficient decrease (Armijo)
+          condition only
+        - ::LBFGS_LINESEARCH_BACKTRACKING_WOLFE: regular Wolfe condition
+          (sufficient decrease condition + curvature condition)
+        - ::LBFGS_LINESEARCH_BACKTRACKING_STRONG_WOLFE: strong Wolfe condition
+    - Updated the documentation to explain the above three criteria.
+- Version 1.7 (2009-02-28):
+    - Improved OWL-QN routines for stability.
+    - Removed the support of OWL-QN method in MoreThuente algorithm because
+      it accidentally fails in early stages of iterations for some objectives.
+      Because of this change, <b>the OW-LQN method must be used with the
+      backtracking algorithm (::LBFGS_LINESEARCH_BACKTRACKING)</b>, or the
+      library returns ::LBFGSERR_INVALID_LINESEARCH.
+    - Renamed line search algorithms as follows:
+        - ::LBFGS_LINESEARCH_BACKTRACKING: regular Wolfe condition.
+        - ::LBFGS_LINESEARCH_BACKTRACKING_LOOSE: regular Wolfe condition.
+        - ::LBFGS_LINESEARCH_BACKTRACKING_STRONG: strong Wolfe condition.
+    - Source code clean-up.
+- Version 1.6 (2008-11-02):
+    - Improved line-search algorithm with strong Wolfe condition, which was
+      contributed by Takashi Imamichi. This routine is now default for
+      ::LBFGS_LINESEARCH_BACKTRACKING. The previous line search algorithm
+      with regular Wolfe condition is still available as
+      ::LBFGS_LINESEARCH_BACKTRACKING_LOOSE.
+    - Configurable stop index for L1-norm computation. A member variable
+      ::lbfgs_parameter_t::orthantwise_end was added to specify the index
+      number at which the library stops computing the L1 norm of the
+      variables. This is useful to prevent some variables from being
+      regularized by the OW-LQN method.
+    - A sample program written in C++ (sample/sample.cpp).
+- Version 1.5 (2008-07-10):
+    - Configurable starting index for L1-norm computation. A member variable
+      ::lbfgs_parameter_t::orthantwise_start was added to specify the index
+      number from which the library computes the L1 norm of the variables.
+      This is useful to prevent some variables from being regularized by the
+      OWL-QN method.
+    - Fixed a zero-division error when the initial variables have already
+      been a minimizer (reported by Takashi Imamichi). In this case, the
+      library returns ::LBFGS_ALREADY_MINIMIZED status code.
+    - Defined ::LBFGS_SUCCESS status code as zero; removed unused constants,
+      LBFGSFALSE and LBFGSTRUE.
+    - Fixed a compile error in an implicit down-cast.
+- Version 1.4 (2008-04-25):
+    - Configurable line search algorithms. A member variable
+      ::lbfgs_parameter_t::linesearch was added to choose either MoreThuente
+      method (::LBFGS_LINESEARCH_MORETHUENTE) or backtracking algorithm
+      (::LBFGS_LINESEARCH_BACKTRACKING).
+    - Fixed a bug: the previous version did not compute psuedo-gradients
+      properly in the line search routines for OWL-QN. This bug might quit
+      an iteration process too early when the OWL-QN routine was activated
+      (0 < ::lbfgs_parameter_t::orthantwise_c).
+    - Configure script for POSIX environments.
+    - SSE/SSE2 optimizations with GCC.
+    - New functions ::lbfgs_malloc and ::lbfgs_free to use SSE/SSE2 routines
+      transparently. It is uncessary to use these functions for libLBFGS built
+      without SSE/SSE2 routines; you can still use any memory allocators if
+      SSE/SSE2 routines are disabled in libLBFGS.
+- Version 1.3 (2007-12-16):
+    - An API change. An argument was added to lbfgs() function to receive the
+      final value of the objective function. This argument can be set to
+      \c NULL if the final value is unnecessary.
+    - Fixed a null-pointer bug in the sample code (reported by Takashi Imamichi).
+    - Added build scripts for Microsoft Visual Studio 2005 and GCC.
+    - Added README file.
+- Version 1.2 (2007-12-13):
+    - Fixed a serious bug in orthant-wise L-BFGS.
+      An important variable was used without initialization.
+- Version 1.1 (2007-12-01):
+    - Implemented orthant-wise L-BFGS.
+    - Implemented lbfgs_parameter_init() function.
+    - Fixed several bugs.
+    - API documentation.
+- Version 1.0 (2007-09-20):
+    - Initial release.
+
+@section api Documentation
+
+- @ref liblbfgs_api "libLBFGS API"
+
+@section sample Sample code
+
+@include sample.c
+
+@section ack Acknowledgements
+
+The L-BFGS algorithm is described in:
+    - Jorge Nocedal.
+      Updating Quasi-Newton Matrices with Limited Storage.
+      <i>Mathematics of Computation</i>, Vol. 35, No. 151, pp. 773--782, 1980.
+    - Dong C. Liu and Jorge Nocedal.
+      On the limited memory BFGS method for large scale optimization.
+      <i>Mathematical Programming</i> B, Vol. 45, No. 3, pp. 503-528, 1989.
+
+The line search algorithms used in this implementation are described in:
+    - John E. Dennis and Robert B. Schnabel.
+      <i>Numerical Methods for Unconstrained Optimization and Nonlinear
+      Equations</i>, Englewood Cliffs, 1983.
+    - Jorge J. More and David J. Thuente.
+      Line search algorithm with guaranteed sufficient decrease.
+      <i>ACM Transactions on Mathematical Software (TOMS)</i>, Vol. 20, No. 3,
+      pp. 286-307, 1994.
+
+This library also implements Orthant-Wise Limited-memory Quasi-Newton (OWL-QN)
+method presented in:
+    - Galen Andrew and Jianfeng Gao.
+      Scalable training of L1-regularized log-linear models.
+      In <i>Proceedings of the 24th International Conference on Machine
+      Learning (ICML 2007)</i>, pp. 33-40, 2007.
+
+Special thanks go to:
+    - Yoshimasa Tsuruoka and Daisuke Okanohara for technical information about
+      OWL-QN
+    - Takashi Imamichi for the useful enhancements of the backtracking method
+
+Finally I would like to thank the original author, Jorge Nocedal, who has been
+distributing the effieicnt and explanatory implementation in an open source
+licence.
+
+@section reference Reference
+
+- <a href="http://www.ece.northwestern.edu/~nocedal/lbfgs.html">L-BFGS</a> by Jorge Nocedal.
+- <a href="http://research.microsoft.com/en-us/downloads/b1eb1016-1738-4bd5-83a9-370c9d498a03/default.aspx">Orthant-Wise Limited-memory Quasi-Newton Optimizer for L1-regularized Objectives</a> by Galen Andrew.
+- <a href="http://chasen.org/~taku/software/misc/lbfgs/">C port (via f2c)</a> by Taku Kudo.
+- <a href="http://www.alglib.net/optimization/lbfgs.php">C#/C++/Delphi/VisualBasic6 port</a> in ALGLIB.
+- <a href="http://cctbx.sourceforge.net/">Computational Crystallography Toolbox</a> includes
+  <a href="http://cctbx.sourceforge.net/current_cvs/c_plus_plus/namespacescitbx_1_1lbfgs.html">scitbx::lbfgs</a>.
+*/
+
+#endif/*__LBFGS_H__*/
diff --git a/wrapper/Convert/SireOpenMM/openmmminimise.cpp b/wrapper/Convert/SireOpenMM/openmmminimise.cpp
new file mode 100644
index 000000000..91bcbecdb
--- /dev/null
+++ b/wrapper/Convert/SireOpenMM/openmmminimise.cpp
@@ -0,0 +1,430 @@
+
+#include "openmmminimise.h"
+
+/**
+ *  This code is heavily inspired / adapted from the LocalEnergyMinimizer
+ *  in LocalEnergyMinimizer.cpp in the OpenMM source code (version 8.1 beta).
+ *
+ *  The copyright notice for that file is copied below.
+ *
+ */
+
+/* -------------------------------------------------------------------------- *
+ *                                   OpenMM                                   *
+ * -------------------------------------------------------------------------- *
+ * This is part of the OpenMM molecular simulation toolkit originating from   *
+ * Simbios, the NIH National Center for Physics-Based Simulation of           *
+ * Biological Structures at Stanford, funded under the NIH Roadmap for        *
+ * Medical Research, grant U54 GM072970. See https://simtk.org.               *
+ *                                                                            *
+ * Portions copyright (c) 2010-2020 Stanford University and the Authors.      *
+ * Authors: Peter Eastman                                                     *
+ * Contributors:                                                              *
+ *                                                                            *
+ * Permission is hereby granted, free of charge, to any person obtaining a    *
+ * copy of this software and associated documentation files (the "Software"), *
+ * to deal in the Software without restriction, including without limitation  *
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,   *
+ * and/or sell copies of the Software, and to permit persons to whom the      *
+ * Software is furnished to do so, subject to the following conditions:       *
+ *                                                                            *
+ * The above copyright notice and this permission notice shall be included in *
+ * all copies or substantial portions of the Software.                        *
+ *                                                                            *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,   *
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL    *
+ * THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,    *
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR      *
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE  *
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.                                     *
+ * -------------------------------------------------------------------------- */
+
+#include "openmm/OpenMMException.h"
+#include "openmm/Platform.h"
+#include "openmm/VerletIntegrator.h"
+#include "lbgfs/lbfgs.h"
+#include <cmath>
+#include <sstream>
+#include <string>
+#include <vector>
+#include <algorithm>
+
+#include "SireError/errors.h"
+#include "SireBase/releasegil.h"
+
+#include "SireBase/progressbar.h"
+#include "SireUnits/units.h"
+
+#include <QDebug>
+
+namespace SireOpenMM
+{
+    class MinimizerData
+    {
+    public:
+        MinimizerData(OpenMM::Context &c, double tolerance, SireBase::ProgressBar &bar)
+            : context(&c), bar(&bar), k(tolerance), cpu_integrator(1.0), it(0)
+        {
+            QString platform_name = QString::fromStdString(context->getPlatform().getName());
+            check_large_forces = (platform_name == "CUDA" || platform_name == "OpenCL" || platform_name == "HIP" || platform_name == "Metal");
+        }
+
+        ~MinimizerData()
+        {
+        }
+
+        OpenMM::Context &getCpuContext()
+        {
+            // Get an alternate context that runs on the CPU and doesn't place any limits
+            // on the magnitude of forces.
+            if (context == 0)
+                throw SireError::program_bug(QObject::tr("MinimizerData: The context has been destroyed"),
+                                             CODELOC);
+
+            if (cpu_context.get() == 0)
+            {
+                OpenMM::Platform *cpu_platform;
+                try
+                {
+                    cpu_platform = &OpenMM::Platform::getPlatformByName("CPU");
+
+                    // The CPU context sometimes fails to initialize because it flags
+                    // a different number of exclusions / exceptions, despite this
+                    // working for all other platforms...
+                    cpu_context.reset(new OpenMM::Context(context->getSystem(), cpu_integrator, *cpu_platform));
+                }
+                catch (...)
+                {
+                    cpu_platform = &OpenMM::Platform::getPlatformByName("Reference");
+
+                    // In those cases, we will fall back to the Reference platform,
+                    // which does work
+                    cpu_context.reset(new OpenMM::Context(context->getSystem(), cpu_integrator, *cpu_platform));
+                }
+                cpu_context->setState(context->getState(OpenMM::State::Positions | OpenMM::State::Velocities | OpenMM::State::Parameters));
+            }
+
+            return *cpu_context;
+        }
+
+        bool checkLargeForces() const
+        {
+            return check_large_forces;
+        }
+
+        OpenMM::Context &getContext()
+        {
+            if (context == 0)
+                throw SireError::program_bug(
+                    QObject::tr("MinimizerData: The context has been destroyed"),
+                    CODELOC);
+
+            return *context;
+        }
+
+        SireBase::ProgressBar &getProgressBar()
+        {
+            if (bar == 0)
+                throw SireError::program_bug(
+                    QObject::tr("MinimizerData: The progress bar has been destroyed"),
+                    CODELOC);
+
+            return *bar;
+        }
+
+        double getK() const
+        {
+            return k;
+        }
+
+        void scaleK(double factor)
+        {
+            k *= factor;
+        }
+
+        qint64 getIteration() const
+        {
+            return it;
+        }
+
+        void incrementIteration()
+        {
+            it++;
+        }
+
+    private:
+        /** This is a pointer to the context being minimised */
+        OpenMM::Context *context;
+
+        /** An integrator and context in case we need to create a
+         *  CPU or Reference context to calculate forces when they
+         *  become too large for the GPU-accelerated contexts
+         */
+        OpenMM::VerletIntegrator cpu_integrator;
+        std::shared_ptr<OpenMM::Context> cpu_context;
+
+        /** The progress bar */
+        SireBase::ProgressBar *bar;
+
+        /** The tolerance */
+        double k;
+
+        /** The current iteration */
+        qint64 it;
+
+        /** Whether or not we need to check for large forces on this platform */
+        bool check_large_forces;
+    };
+
+    /** Calculate the forces and energies for the passed context, given the
+     *  positions in 'positions', returning the energy and storing the
+     *  forces in 'g'
+     */
+    static double computeForcesAndEnergy(OpenMM::Context &context,
+                                         const std::vector<OpenMM::Vec3> &positions,
+                                         lbfgsfloatval_t *g)
+    {
+        context.setPositions(positions);
+        context.computeVirtualSites();
+
+        OpenMM::State state = context.getState(OpenMM::State::Forces | OpenMM::State::Energy,
+                                               false, context.getIntegrator().getIntegrationForceGroups());
+
+        const std::vector<OpenMM::Vec3> &forces = state.getForces();
+        const OpenMM::System &system = context.getSystem();
+
+        for (int i = 0; i < forces.size(); i++)
+        {
+            if (system.getParticleMass(i) == 0)
+            {
+                g[3 * i] = 0.0;
+                g[3 * i + 1] = 0.0;
+                g[3 * i + 2] = 0.0;
+            }
+            else
+            {
+                g[3 * i] = -forces[i][0];
+                g[3 * i + 1] = -forces[i][1];
+                g[3 * i + 2] = -forces[i][2];
+            }
+        }
+
+        return state.getPotentialEnergy();
+    }
+
+    static lbfgsfloatval_t evaluate(void *instance, const lbfgsfloatval_t *x,
+                                    lbfgsfloatval_t *g, const int n,
+                                    const lbfgsfloatval_t step)
+    {
+        MinimizerData *data = reinterpret_cast<MinimizerData *>(instance);
+
+        OpenMM::Context &context = data->getContext();
+
+        const OpenMM::System &system = context.getSystem();
+
+        int num_particles = system.getNumParticles();
+
+        // Compute the force and energy for this configuration.
+        std::vector<OpenMM::Vec3> positions(num_particles);
+
+        for (int i = 0; i < num_particles; i++)
+            positions[i] = OpenMM::Vec3(x[3 * i], x[3 * i + 1], x[3 * i + 2]);
+
+        double energy = computeForcesAndEnergy(context, positions, g);
+
+        if (data->checkLargeForces())
+        {
+            // The CUDA, OpenCL and HIP platforms accumulate forces in fixed point, so they
+            // can't handle very large forces.  Check for problematic forces (very large,
+            // infinite, or NaN) and if necessary recompute them on the CPU.
+
+            for (int i = 0; i < 3 * num_particles; i++)
+            {
+                if (!(std::fabs(g[i]) < 2e9))
+                {
+                    energy = computeForcesAndEnergy(data->getCpuContext(), positions, g);
+                    break;
+                }
+            }
+        }
+
+        // Add harmonic forces for any constraints.
+        int num_constraints = system.getNumConstraints();
+        double k = data->getK();
+
+        for (int i = 0; i < num_constraints; ++i)
+        {
+            int particle1, particle2;
+            double distance;
+            system.getConstraintParameters(i, particle1, particle2, distance);
+
+            OpenMM::Vec3 delta = positions[particle2] - positions[particle1];
+
+            double r2 = delta.dot(delta);
+            double r = std::sqrt(r2);
+            delta *= 1 / r;
+            double dr = r - distance;
+            double kdr = k * dr;
+
+            energy += 0.5 * kdr * dr;
+
+            if (system.getParticleMass(particle1) != 0)
+            {
+                g[3 * particle1] -= kdr * delta[0];
+                g[3 * particle1 + 1] -= kdr * delta[1];
+                g[3 * particle1 + 2] -= kdr * delta[2];
+            }
+            if (system.getParticleMass(particle2) != 0)
+            {
+                g[3 * particle2] += kdr * delta[0];
+                g[3 * particle2 + 1] += kdr * delta[1];
+                g[3 * particle2 + 2] += kdr * delta[2];
+            }
+        }
+
+        data->incrementIteration();
+        auto &bar = data->getProgressBar();
+
+        auto nrg = SireUnits::Dimension::GeneralUnit((energy)*SireUnits::kJ_per_mol);
+
+        bar.tick(QString("Minimising: %1 : %2").arg(data->getIteration()).arg(nrg.toString()));
+
+        return energy;
+    }
+
+    void minimise_openmm_context(OpenMM::Context &context,
+                                 double tolerance, int max_iterations)
+    {
+        auto gil = SireBase::release_gil();
+
+        const OpenMM::System &system = context.getSystem();
+
+        int num_particles = system.getNumParticles();
+
+        double constraint_tol = context.getIntegrator().getConstraintTolerance();
+
+        double working_constraint_tol = std::max(1e-4, constraint_tol);
+
+        double k = 100 / working_constraint_tol;
+
+        lbfgsfloatval_t *x = lbfgs_malloc(num_particles * 3);
+
+        if (x == 0)
+            throw SireError::unavailable_resource(
+                QObject::tr("LocalEnergyMinimizer: Failed to allocate memory"),
+                CODELOC);
+
+        SireBase::ProgressBar bar("Minimising: initialise");
+        bar.setSpeedUnit("steps / s");
+
+        bar = bar.enter();
+
+        try
+        {
+            // Initialize the minimizer.
+            lbfgs_parameter_t param;
+            lbfgs_parameter_init(&param);
+
+            if (!context.getPlatform().supportsDoublePrecision())
+                param.xtol = 1e-7;
+
+            param.max_iterations = max_iterations;
+            param.linesearch = LBFGS_LINESEARCH_BACKTRACKING_STRONG_WOLFE;
+
+            // Make sure the initial configuration satisfies all constraints.
+            context.applyConstraints(working_constraint_tol);
+
+            // Record the initial positions and determine a normalization constant for scaling the tolerance.
+            std::vector<OpenMM::Vec3> initial_pos = context.getState(OpenMM::State::Positions).getPositions();
+
+            double norm = 0.0;
+            for (int i = 0; i < num_particles; i++)
+            {
+                x[3 * i] = initial_pos[i][0];
+                x[3 * i + 1] = initial_pos[i][1];
+                x[3 * i + 2] = initial_pos[i][2];
+                norm += initial_pos[i].dot(initial_pos[i]);
+            }
+
+            norm /= num_particles;
+            norm = (norm < 1 ? 1 : sqrt(norm));
+            param.epsilon = tolerance / norm;
+
+            // Repeatedly minimize, steadily increasing the strength of the springs until all constraints are satisfied.
+            double prev_max_error = 1e10;
+            MinimizerData data(context, k, bar);
+
+            while (true)
+            {
+                // Perform the minimization.
+                lbfgsfloatval_t fx;
+
+                lbfgs(num_particles * 3, x, &fx, evaluate, 0, &data, &param);
+
+                // Check whether all constraints are satisfied.
+                std::vector<OpenMM::Vec3> positions = context.getState(OpenMM::State::Positions).getPositions();
+
+                int num_constraints = system.getNumConstraints();
+
+                double max_error = 0.0;
+
+                for (int i = 0; i < num_constraints; ++i)
+                {
+                    int particle1, particle2;
+                    double distance;
+
+                    system.getConstraintParameters(i, particle1, particle2, distance);
+
+                    OpenMM::Vec3 delta = positions[particle2] - positions[particle1];
+
+                    double r = std::sqrt(delta.dot(delta));
+
+                    double error = std::fabs(r - distance) / distance;
+
+                    if (error > max_error)
+                        max_error = error;
+                }
+
+                if (max_error <= working_constraint_tol)
+                    break; // All constraints are satisfied.
+
+                context.setPositions(initial_pos);
+
+                if (max_error >= prev_max_error)
+                    break; // Further tightening the springs doesn't seem to be helping, so just give up.
+
+                prev_max_error = max_error;
+                data.scaleK(10);
+
+                if (max_error > 100 * working_constraint_tol)
+                {
+                    // We've gotten far enough from a valid state that we might have trouble getting
+                    // back, so reset to the original positions.
+
+                    for (int i = 0; i < num_particles; ++i)
+                    {
+                        x[3 * i] = initial_pos[i][0];
+                        x[3 * i + 1] = initial_pos[i][1];
+                        x[3 * i + 2] = initial_pos[i][2];
+                    }
+                }
+            }
+        }
+        catch (...)
+        {
+            bar.failure();
+            lbfgs_free(x);
+            throw;
+        }
+        lbfgs_free(x);
+
+        // If necessary, do a final constraint projection to make sure they are satisfied
+        // to the full precision requested by the user.
+        if (constraint_tol < working_constraint_tol)
+            context.applyConstraints(working_constraint_tol);
+
+        bar.success();
+    }
+
+}
diff --git a/wrapper/Convert/SireOpenMM/openmmminimise.h b/wrapper/Convert/SireOpenMM/openmmminimise.h
new file mode 100644
index 000000000..03180c182
--- /dev/null
+++ b/wrapper/Convert/SireOpenMM/openmmminimise.h
@@ -0,0 +1,30 @@
+#ifndef SIREOPENMM_OPENMMMINIMISE_H
+#define SIREOPENMM_OPENMMMINIMISE_H
+
+#include <OpenMM.h>
+
+#include "sireglobal.h"
+
+SIRE_BEGIN_HEADER
+
+namespace SireOpenMM
+{
+    /** This is a minimiser heavily inspired by the
+     *  LocalEnergyMinimizer included in OpenMM. This is re-written
+     *  for sire to;
+     *
+     *  1. Better integrate minimisation into the sire progress
+     *     monitoring / interupting framework.
+     *  2. Avoid errors caused by OpenMM switching from the desired
+     *     context to the CPU context, thus triggering spurious exceptions
+     *     related to exclusions / exceptions not matching
+     */
+    void minimise_openmm_context(OpenMM::Context &context,
+                                 double tolerance = 10,
+                                 int max_iterations = -1);
+
+}
+
+SIRE_END_HEADER
+
+#endif
diff --git a/wrapper/Convert/SireOpenMM/openmmmolecule.cpp b/wrapper/Convert/SireOpenMM/openmmmolecule.cpp
index 23cd19bc9..9df2420a9 100644
--- a/wrapper/Convert/SireOpenMM/openmmmolecule.cpp
+++ b/wrapper/Convert/SireOpenMM/openmmmolecule.cpp
@@ -31,6 +31,8 @@
 
 #include <QSet>
 
+#include <QReadWriteLock>
+
 #include <QDebug>
 
 using namespace SireOpenMM;
@@ -310,6 +312,60 @@ std::tuple<int, int, double, double, double> OpenMMMolecule::getException(
                            charge, sigma, epsilon);
 }
 
+/** Return closest constraint length to 'length' based on what
+ *  we have seen before and the constraint_length_tolerance
+ */
+double getSharedConstraintLength(double length)
+{
+    // distance below which constraints are considered to be equal (e.g. to
+    // r0 or to another angle - units in nanometers)
+    const double constraint_length_tolerance = 0.005;
+
+    // is this close to a O-H bond length of water?
+    if (std::abs(length - 0.09572) < constraint_length_tolerance)
+    {
+        return 0.09572;
+    }
+
+    static QVector<double> angle_constraint_lengths;
+    static QReadWriteLock l;
+
+    // most of the time we expect a hit, so a read lock is ok
+    QReadLocker locker(&l);
+
+    // is this close to any of the existing angle constraints?
+    for (const auto &cl : angle_constraint_lengths)
+    {
+        if (std::abs(cl - length) < constraint_length_tolerance)
+        {
+            // this is close enough to an existing constraint
+            // so we will use that instead
+            return cl;
+        }
+    }
+
+    locker.unlock();
+
+    // ok, we didn't find it - we should append it to the list
+    // unless another thread has got here first
+    QWriteLocker locker2(&l);
+
+    for (const auto &cl : angle_constraint_lengths)
+    {
+        if (std::abs(cl - length) < constraint_length_tolerance)
+        {
+            // this is close enough to an existing constraint
+            // so we will use that instead
+            return cl;
+        }
+    }
+
+    // this is a new constraint, so add it to the list
+    angle_constraint_lengths.append(length);
+
+    return length;
+}
+
 void OpenMMMolecule::constructFromAmber(const Molecule &mol,
                                         const AmberParams &params,
                                         const AmberParams &params1,
@@ -444,6 +500,14 @@ void OpenMMMolecule::constructFromAmber(const Molecule &mol,
     this->bond_params.clear();
     this->constraints.clear();
 
+    // initialise all atoms as being unbonded
+    this->unbonded_atoms.reserve(nats);
+
+    for (int i = 0; i < nats; ++i)
+    {
+        this->unbonded_atoms.insert(i);
+    }
+
     // now the bonds
     const double bond_k_to_openmm = 2.0 * (SireUnits::kcal_per_mol / (SireUnits::angstrom * SireUnits::angstrom)).to(SireUnits::kJ_per_mol / (SireUnits::nanometer * SireUnits::nanometer));
     const double bond_r0_to_openmm = SireUnits::angstrom.to(SireUnits::nanometer);
@@ -473,6 +537,12 @@ void OpenMMMolecule::constructFromAmber(const Molecule &mol,
         const double k = bondparam.k() * bond_k_to_openmm;
         const double r0 = bondparam.r0() * bond_r0_to_openmm;
 
+        if (k != 0)
+        {
+            this->unbonded_atoms.remove(atom0);
+            this->unbonded_atoms.remove(atom1);
+        }
+
         const bool has_light_atom = (masses_data[atom0] < 2.5 or masses_data[atom1] < 2.5);
         const bool has_massless_atom = masses_data[atom0] < 0.5 or masses_data[atom1] < 0.5;
 
@@ -489,9 +559,16 @@ void OpenMMMolecule::constructFromAmber(const Molecule &mol,
         {
             // add the constraint - this constrains the bond to whatever length it has now
             const auto delta = coords[atom1] - coords[atom0];
-            const auto constraint_length = std::sqrt((delta[0] * delta[0]) +
-                                                     (delta[1] * delta[1]) +
-                                                     (delta[2] * delta[2]));
+            auto constraint_length = std::sqrt((delta[0] * delta[0]) +
+                                               (delta[1] * delta[1]) +
+                                               (delta[2] * delta[2]));
+
+            // use the r0 for the bond if this is close to the measured length and this
+            // is not a perturbable molecule
+            if (not is_perturbable and std::abs(constraint_length - r0) < 0.01)
+            {
+                constraint_length = r0;
+            }
 
             this->constraints.append(std::make_tuple(atom0, atom1, constraint_length));
             constrained_pairs.insert(to_pair(atom0, atom1));
@@ -543,9 +620,19 @@ void OpenMMMolecule::constructFromAmber(const Molecule &mol,
             if ((this_constraint_type & CONSTRAIN_HANGLES) and is_h_x_h)
             {
                 const auto delta = coords[atom2] - coords[atom0];
-                const auto constraint_length = std::sqrt((delta[0] * delta[0]) +
-                                                         (delta[1] * delta[1]) +
-                                                         (delta[2] * delta[2]));
+                auto constraint_length = std::sqrt((delta[0] * delta[0]) +
+                                                   (delta[1] * delta[1]) +
+                                                   (delta[2] * delta[2]));
+
+                // we can speed up OpenMM by making sure that constraints are
+                // equal if they operate on similar molecules (e.g. all water
+                // constraints are the same. We will check for this if this is
+                // a non-perturbable small molecule
+                if (mol.nAtoms() < 10 and not is_perturbable)
+                {
+                    constraint_length = getSharedConstraintLength(constraint_length);
+                }
+
                 constraints.append(std::make_tuple(atom0, atom2,
                                                    constraint_length));
                 constrained_pairs.insert(key);
@@ -765,6 +852,9 @@ void OpenMMMolecule::alignInternals(const PropertyMap &map)
             bond_params_1.append(bond1);
 
             found_index_1[j] = true;
+
+            unbonded_atoms.remove(atom0);
+            unbonded_atoms.remove(atom1);
         }
     }
 
@@ -1014,10 +1104,13 @@ void OpenMMMolecule::buildExceptions(const Molecule &mol,
     // we need to allow this set to morph
     exception_params.clear();
 
+    // save memory
+    if (unbonded_atoms.isEmpty())
+        unbonded_atoms = QSet<qint32>();
+
     const int nats = this->cljs.count();
 
     const auto &nbpairs = mol.property(map["intrascale"]).asA<CLJNBPairs>();
-    const auto &connectivity = mol.property(map["connectivity"]).asA<Connectivity>();
 
     const int ncgs = mol.nCutGroups();
 
@@ -1036,8 +1129,7 @@ void OpenMMMolecule::buildExceptions(const Molecule &mol,
                 // are any of these atoms unbonded? If so, then
                 // we will need to add a constraint to hold them
                 // in place
-                if (connectivity.nConnections(AtomIdx(i)) == 0 or
-                    connectivity.nConnections(AtomIdx(j)) == 0)
+                if (unbonded_atoms.contains(i) or unbonded_atoms.contains(j))
                 {
                     if (not constrained_pairs.contains(to_pair(i, j)))
                     {
@@ -1045,7 +1137,7 @@ void OpenMMMolecule::buildExceptions(const Molecule &mol,
                         const auto length = std::sqrt((delta[0] * delta[0]) +
                                                       (delta[1] * delta[1]) +
                                                       (delta[2] * delta[2]));
-                        constraints.append(std::make_tuple(i, j, length));
+                        constraints.append(std::make_tuple(i, j, getSharedConstraintLength(length)));
                         constrained_pairs.insert(to_pair(i, j));
                     }
                 }
@@ -1064,13 +1156,51 @@ void OpenMMMolecule::buildExceptions(const Molecule &mol,
         {
             // two atoms must be excluded
             exception_params.append(std::make_tuple(0, 1, 0.0, 0.0));
+
+            if (unbonded_atoms.contains(0) or unbonded_atoms.contains(1))
+            {
+                // these atoms are not bonded - we need to add a constraint
+                // to keep them in place
+                const auto delta = coords[1] - coords[0];
+                const auto length = std::sqrt((delta[0] * delta[0]) +
+                                              (delta[1] * delta[1]) +
+                                              (delta[2] * delta[2]));
+                constraints.append(std::make_tuple(0, 1, getSharedConstraintLength(length)));
+                constrained_pairs.insert(to_pair(0, 1));
+            }
         }
-        else if (nats <= 3)
+        else if (nats == 3)
         {
             // three atoms must be excluded
             exception_params.append(std::make_tuple(0, 1, 0.0, 0.0));
             exception_params.append(std::make_tuple(1, 2, 0.0, 0.0));
             exception_params.append(std::make_tuple(0, 2, 0.0, 0.0));
+
+            if (unbonded_atoms.contains(0) or unbonded_atoms.contains(1) or unbonded_atoms.contains(2))
+            {
+                // these atoms are not bonded - we need to add a constraint
+                // to keep them in place
+                auto delta = coords[1] - coords[0];
+                auto length = std::sqrt((delta[0] * delta[0]) +
+                                        (delta[1] * delta[1]) +
+                                        (delta[2] * delta[2]));
+                constraints.append(std::make_tuple(0, 1, getSharedConstraintLength(length)));
+                constrained_pairs.insert(to_pair(0, 1));
+
+                delta = coords[2] - coords[0];
+                length = std::sqrt((delta[0] * delta[0]) +
+                                   (delta[1] * delta[1]) +
+                                   (delta[2] * delta[2]));
+                constraints.append(std::make_tuple(0, 2, getSharedConstraintLength(length)));
+                constrained_pairs.insert(to_pair(0, 2));
+
+                delta = coords[2] - coords[1];
+                length = std::sqrt((delta[0] * delta[0]) +
+                                   (delta[1] * delta[1]) +
+                                   (delta[2] * delta[2]));
+                constraints.append(std::make_tuple(1, 2, getSharedConstraintLength(length)));
+                constrained_pairs.insert(to_pair(1, 2));
+            }
         }
         else
         {
diff --git a/wrapper/Convert/SireOpenMM/openmmmolecule.h b/wrapper/Convert/SireOpenMM/openmmmolecule.h
index 8158e5c64..5241dc8ff 100644
--- a/wrapper/Convert/SireOpenMM/openmmmolecule.h
+++ b/wrapper/Convert/SireOpenMM/openmmmolecule.h
@@ -144,6 +144,9 @@ namespace SireOpenMM
          */
         SireBase::PropertyMap perturtable_map;
 
+        /** The atoms that are missing any internal parameters */
+        QSet<qint32> unbonded_atoms;
+
         /** Alpha values for all of the atoms. This is equal to zero for
          *  non-ghost atoms, and one for ghost atoms
          */
diff --git a/wrapper/Convert/SireOpenMM/sire_to_openmm_system.cpp b/wrapper/Convert/SireOpenMM/sire_to_openmm_system.cpp
index 724bd861c..a7c7f47e4 100644
--- a/wrapper/Convert/SireOpenMM/sire_to_openmm_system.cpp
+++ b/wrapper/Convert/SireOpenMM/sire_to_openmm_system.cpp
@@ -1401,32 +1401,20 @@ OpenMMMetaData SireOpenMM::sire_to_openmm_system(OpenMM::System &system,
                 // these are the indexes of the exception in the
                 // non-bonded forcefields and also the ghost-14 forcefield
                 exception_idxs[j] = std::make_pair(idx, nbidx);
-
-                // remove this interaction from the ghost forcefields, only
-                // if it involves ghost atoms. If it doens't involve ghost atoms
-                // then we cannot remove it, because OpenMM doesn't support
-                // having a non-zero exception in NonbondedForce while having
-                // exclusions between the same atoms in CustomNonbondedForce
-                if (ghost_ghostff != 0 and (atom0_is_ghost or atom1_is_ghost))
-                {
-                    ghost_ghostff->addExclusion(std::get<0>(p), std::get<1>(p));
-                    ghost_nonghostff->addExclusion(std::get<0>(p), std::get<1>(p));
-                }
             }
             else
             {
                 cljff->addException(std::get<0>(p), std::get<1>(p),
                                     std::get<2>(p), std::get<3>(p),
                                     std::get<4>(p), true);
+            }
 
-                // we need to make sure that the list of exclusions in
-                // the NonbondedForce match those in the CustomNonbondedForces
-                if (ghost_ghostff != 0 and std::get<2>(p) == 0 and
-                    std::get<3>(p) == 0 and std::get<4>(p) == 0)
-                {
-                    ghost_ghostff->addExclusion(std::get<0>(p), std::get<1>(p));
-                    ghost_nonghostff->addExclusion(std::get<0>(p), std::get<1>(p));
-                }
+            // we need to make sure that the list of exclusions in
+            // the NonbondedForce match those in the CustomNonbondedForces
+            if (ghost_ghostff != 0)
+            {
+                ghost_ghostff->addExclusion(std::get<0>(p), std::get<1>(p));
+                ghost_nonghostff->addExclusion(std::get<0>(p), std::get<1>(p));
             }
         }
 
diff --git a/wrapper/Convert/SireRDKit/CMakeLists.txt b/wrapper/Convert/SireRDKit/CMakeLists.txt
index 2d8f1b3a9..0c4237949 100644
--- a/wrapper/Convert/SireRDKit/CMakeLists.txt
+++ b/wrapper/Convert/SireRDKit/CMakeLists.txt
@@ -26,6 +26,14 @@ if (SIRE_HAS_CPP_17)
       add_definitions(-DRDKIT_DYN_LINK)
     endif()
 
+    if (APPLE)
+      # RDKit uses std::bad_any_cast, which isn't available by default
+      # with MacOS 10.9. But it is supplied with conda, so need to mark
+      # that, e.g. see
+      # https://conda-forge.org/docs/maintainer/knowledge_base.html#newer-c-features-with-old-sdk
+      add_definitions(-D_LIBCPP_DISABLE_AVAILABILITY)
+    endif()
+
     # Sire include paths
     include_directories( BEFORE ${SIRE_INCLUDE_DIR} )
 
diff --git a/wrapper/Convert/SireRDKit/_SireRDKit.main.cpp b/wrapper/Convert/SireRDKit/_SireRDKit.main.cpp
index 0ee66fdd0..2959db59b 100644
--- a/wrapper/Convert/SireRDKit/_SireRDKit.main.cpp
+++ b/wrapper/Convert/SireRDKit/_SireRDKit.main.cpp
@@ -2,6 +2,7 @@
 // (C) Christopher Woods, GPL >= 3 License
 
 #include "boost/python.hpp"
+#include "boost/python/converter/registry.hpp"
 
 #include "sire_rdkit.h"
 
@@ -119,4 +120,13 @@ BOOST_PYTHON_MODULE(_SireRDKit)
                 "Internal function called once used to register smarts searching");
 
         register_list<QList<RDKit::ROMOL_SPTR>>();
+
+        // make sure we have at least one registration of the
+        // smart pointer wrapper to ROMOL_SPTR
+        const auto info = boost::python::type_id<RDKit::ROMOL_SPTR>();
+        const auto *reg = bp::converter::registry::query(info);
+        if (reg == NULL)
+        {
+                bp::register_ptr_to_python<RDKit::ROMOL_SPTR>();
+        }
 }
diff --git a/wrapper/Convert/__init__.py b/wrapper/Convert/__init__.py
index f85c0f4e6..21ca59e3b 100644
--- a/wrapper/Convert/__init__.py
+++ b/wrapper/Convert/__init__.py
@@ -72,6 +72,7 @@ def smarts_to_rdkit(*args, **kwargs):
         _openmm_extract_coordinates,
         _openmm_extract_coordinates_and_velocities,
         _openmm_extract_space,
+        _minimise_openmm_context,
     )
 
     _has_openmm = True
@@ -390,6 +391,11 @@ def openmm_extract_coordinates_and_velocities(
     def openmm_extract_space(state):
         return _openmm_extract_space(state)
 
+    def minimise_openmm_context(
+        context, tolerance: float = 10, max_iterations: int = -1
+    ):
+        return _minimise_openmm_context(context, tolerance, max_iterations)
+
 except Exception as e:
     _openmm_import_exception = e
 
@@ -419,6 +425,9 @@ def openmm_extract_coordinates_and_velocities(*args, **kwargs):
     def openmm_extract_space(*args, **kwargs):
         _no_openmm()
 
+    def minimise_openmm_context(*args, **kwargs):
+        _no_openmm()
+
 
 def supported_formats():
     """Return all of the formats supported by this installation"""