Skip to content

Commit

Permalink
Support node-local compilation
Browse files Browse the repository at this point in the history
Do this by splitting the input communicators to the Compiler
constructor.  If MPI-3 is available, we use MPI_Split_type, otherwise
we rely on querying the filesystem.  The resulting communicator is
stashed as an attribute on the input communicator to the compile
command (freed when that communicator disappears).
  • Loading branch information
wence- committed Nov 3, 2017
1 parent 268d1a9 commit ce30df4
Show file tree
Hide file tree
Showing 3 changed files with 72 additions and 4 deletions.
52 changes: 49 additions & 3 deletions pyop2/compilation.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@


from pyop2.mpi import MPI, collective, COMM_WORLD
from pyop2.mpi import dup_comm, get_compilation_comm, set_compilation_comm
from pyop2.configuration import configuration
from pyop2.logger import debug, progress, INFO
from pyop2.exceptions import CompilationError
Expand Down Expand Up @@ -98,6 +99,49 @@ def sniff_compiler_version(cc):
return CompilerInfo(compiler, ver)


@collective
def compilation_comm(comm):
"""Get a communicator for compilation.
:arg comm: The input communicator.
:returns: A communicator used for compilation (may be smaller)
"""
# Should we try and do node-local compilation?
if not configuration["node_local_compilation"]:
return comm
retcomm = get_compilation_comm(comm)
if retcomm is not None:
debug("Found existing compilation communicator")
return retcomm
if MPI.VERSION >= 3:
debug("Creating compilation communicator using MPI_Split_type")
retcomm = comm.Split_type(MPI.COMM_TYPE_SHARED)
set_compilation_comm(comm, retcomm)
return retcomm
debug("Creating compilation communicator using MPI_Split + filesystem")
import tempfile
if comm.rank == 0:
if not os.path.exists(configuration["cache_dir"]):
os.makedirs(configuration["cache_dir"])
tmpname = tempfile.mkdtemp(prefix="rank-determination-",
dir=configuration["cache_dir"])
else:
tmpname = None
tmpname = comm.bcast(tmpname, root=0)
if tmpname is None:
raise CompilationError("Cannot determine sharedness of filesystem")
# Touch file
with open(os.path.join(tmpname, str(comm.rank)), "wb"):
pass
comm.barrier()
import glob
ranks = sorted(int(os.path.basename(name))
for name in glob.glob("%s/[0-9]*" % tmpname))
retcomm = comm.Split(color=min(ranks), key=comm.rank)
set_compilation_comm(comm, retcomm)
return retcomm


class Compiler(object):

compiler_versions = {}
Expand All @@ -115,8 +159,8 @@ class Compiler(object):
flags specified as the ldflags configuration option).
:arg cpp: Should we try and use the C++ compiler instead of the C
compiler?.
:kwarg comm: Optional communicator to compile the code on (only
rank 0 compiles code) (defaults to COMM_WORLD).
:kwarg comm: Optional communicator to compile the code on
(defaults to COMM_WORLD).
"""
def __init__(self, cc, ld=None, cppargs=[], ldargs=[],
cpp=False, comm=None):
Expand All @@ -125,7 +169,9 @@ def __init__(self, cc, ld=None, cppargs=[], ldargs=[],
self._ld = os.environ.get('LDSHARED', ld)
self._cppargs = cppargs + configuration['cflags'].split() + self.workaround_cflags
self._ldargs = ldargs + configuration['ldflags'].split()
self.comm = comm or COMM_WORLD
# Ensure that this is an internal communicator.
comm = dup_comm(comm or COMM_WORLD)
self.comm = compilation_comm(comm)

@property
def compiler_version(self):
Expand Down
6 changes: 6 additions & 0 deletions pyop2/configuration.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,11 @@ class Configuration(dict):
yes)
:param check_src_hashes: Should PyOP2 check that generated code is
the same on all processes? (Default, yes). Uses an allreduce.
:param cache_dir: Where should generated code be cached?
:param node_local_compilation: Should generated code by compiled
"node-local" (one process for each set of processes that share
a filesystem)? You should probably arrange to set cache_dir
to a node-local filesystem too.
:param log_level: How chatty should PyOP2 be? Valid values
are "DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL".
:param lazy_evaluation: Should lazy evaluation be on or off?
Expand Down Expand Up @@ -93,6 +98,7 @@ class Configuration(dict):
"cache_dir": ("PYOP2_CACHE_DIR", str,
os.path.join(gettempdir(),
"pyop2-cache-uid%s" % os.getuid())),
"node_local_compilation": ("PYOP2_NODE_LOCAL_COMPILATION", bool, True),
"no_fork_available": ("PYOP2_NO_FORK_AVAILABLE", bool, False),
"print_cache_size": ("PYOP2_PRINT_CACHE_SIZE", bool, False),
"print_summary": ("PYOP2_PRINT_SUMMARY", bool, False),
Expand Down
18 changes: 17 additions & 1 deletion pyop2/mpi.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,18 @@ def dup_comm(comm_in=None):
return comm_out


# Comm used for compilation, stashed on the internal communicator
compilationcomm_keyval = MPI.Comm.Create_keyval()


def get_compilation_comm(comm):
return comm.Get_attr(compilationcomm_keyval)


def set_compilation_comm(comm, inner):
comm.Set_attr(compilationcomm_keyval, inner)


def free_comm(comm, remove=True):
"""Free an internal communicator.
Expand Down Expand Up @@ -197,6 +209,9 @@ def free_comm(comm, remove=True):
if remove:
# Only do this if not called from free_comms.
dupped_comms.remove(comm)
compilation_comm = get_compilation_comm(comm)
if compilation_comm is not None:
compilation_comm.Free()
comm.Free()


Expand All @@ -210,7 +225,8 @@ def free_comms():
free_comm(c, remove=False)
map(MPI.Comm.Free_keyval, [refcount_keyval,
innercomm_keyval,
outercomm_keyval])
outercomm_keyval,
compilationcomm_keyval])


def collective(fn):
Expand Down

0 comments on commit ce30df4

Please sign in to comment.