Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement base restart functionality #115

Draft
wants to merge 7 commits into
base: support/aiida-2.X
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 8 additions & 15 deletions aiida_kkr/calculations/kkr.py
Original file line number Diff line number Diff line change
Expand Up @@ -257,21 +257,14 @@ def define(cls, spec):
spec.default_output_node = 'output_parameters'

# define exit codes, also used in parser
spec.exit_code(
301,
'ERROR_NO_OUTPUT_FILE',
message='KKR output file not found',
)
spec.exit_code(
302,
'ERROR_KKR_PARSING_FAILED',
message='KKR parser retuned an error',
)
spec.exit_code(
303,
'ERROR_NO_SHAPEFUN_FOUND',
message='Could not find shapefun from voronoi parent',
)
spec.exit_code(301, 'ERROR_NO_OUTPUT_FILE', message='KKR output file not found')
spec.exit_code(302, 'ERROR_NOT_ENOUGH_MEMORY', message='KkrCalculation needs more memory')
spec.exit_code(303, 'ERROR_TIME_LIMIT', message='KkrCalculation needs more runtime')
spec.exit_code(304, 'ERROR_KKR_PARSING_FAILED', message='KKR parser retuned an error')
spec.exit_code(305, 'ERROR_OPENING_OUTPUTS', message='Kkr parser could not open an output file')
spec.exit_code(306, 'ERROR_CALCULATION_FAILED', message='KkrCalculation failed for an unknown reason')
spec.exit_code(307, 'ERROR_NO_SHAPEFUN_FOUND', message='Could not find shapefun from voronoi parent')
spec.exit_code(308, 'ERROR_RLOG_TOO_SMALL', message='RLOG too small for Chebychev solver')

def prepare_for_submission(self, tempfolder):
"""
Expand Down
14 changes: 6 additions & 8 deletions aiida_kkr/calculations/kkrimp.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,21 +168,19 @@ def define(cls, spec):
Note: The length of the theta, phi and fix_dir lists have to be equal to the number of atoms in the impurity cluster.
"""
)
spec.input(
'cleanup_outfiles',
valid_type=Bool,
required=False,
default=lambda: Bool(False),
help='Cleanup and compress output (works only in aiida-core<2.0 and breaks caching ability).'
)

# define outputs
spec.output('output_parameters', valid_type=Dict, required=True, help='results of the KKRimp calculation')
spec.default_output_node = 'output_parameters'
# define exit codes, also used in parser
spec.exit_code(301, 'ERROR_NO_RETRIEVED_FOLDER', message='Retrieved folder of KKRimp calculation not found.')
spec.exit_code(302, 'ERROR_PARSING_KKRIMPCALC', message='KKRimp parser returned an error.')
#TBD
spec.exit_code(303, 'ERROR_NO_OUTPUT_FILE', message='KKRimp output file not found')
spec.exit_code(304, 'ERROR_NOT_ENOUGH_MEMORY', message='KkrimpCalculation needs more memory')
spec.exit_code(305, 'ERROR_TIME_LIMIT', message='KkrimpCalculation needs more runtime')
spec.exit_code(306, 'ERROR_OPENING_OUTPUTS', message='Kkrimp parser could not open an output file')
spec.exit_code(307, 'ERROR_CALCULATION_FAILED', message='KkrimpCalculation failed for an unknown reason')
spec.exit_code(308, 'ERROR_RLOG_TOO_SMALL', message='RLOG too small for Chebychev solver')

def prepare_for_submission(self, tempfolder):
"""
Expand Down
7 changes: 6 additions & 1 deletion aiida_kkr/calculations/voro.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,12 @@ def define(cls, spec):
spec.default_output_node = 'output_parameters'
# define exit codes, also used in parser
spec.exit_code(301, 'ERROR_NO_OUTPUT_FILE', message='Voronoi output file not found')
spec.exit_code(302, 'ERROR_VORONOI_PARSING_FAILED', message='Voronoi parser retuned an error')
spec.exit_code(302, 'ERROR_NOT_ENOUGH_MEMORY', message='VoronoiCalculation needs more memory')
spec.exit_code(303, 'ERROR_TIME_LIMIT', message='VoronoiCalculation needs more runtime')
spec.exit_code(304, 'ERROR_VORONOI_PARSING_FAILED', message='Voronoi parser retuned an error')
spec.exit_code(305, 'ERROR_OPENING_OUTPUTS', message='Voronoi parser could not open an output file')
spec.exit_code(306, 'ERROR_CALCULATION_FAILED', message='VoronoiCalculation failed for an unknown reason')
spec.exit_code(307, 'ERROR_NACLSD_TOO_SMALL', message='NACLSD in VoronoiCalculation too small.')

def prepare_for_submission(self, tempfolder):
"""Create the input files from the input nodes passed to this instance of the `CalcJob`.
Expand Down
77 changes: 59 additions & 18 deletions aiida_kkr/parsers/kkr.py
Original file line number Diff line number Diff line change
Expand Up @@ -222,23 +222,64 @@ def parse(self, debug=False, **kwargs):
msg = 'Automatically returned success=True for KKR importer although some parsing errors occurred'
self.logger.warning(msg)

# return an exit code if parsing fails
if not success:
# check error file
exit_code = self.check_error_file(out_folder)
if exit_code is not None:
return exit_code
# if nothing was returned we have a general parising failure
return self.exit_codes.ERROR_KKR_PARSING_FAILED
else: # cleanup after parsing (only if parsing was successful)
# cleanup only works below aiida-core v2.0
if int(aiida_core_version.split('.')[0]) < 2:
# delete completely parsed output files
self.remove_unnecessary_files()
# then (maybe) tar the output to save space
# TODO needs implementing (see kkrimp parser)

def remove_unnecessary_files(self):
"""
Remove files that are not needed anymore after parsing
The information is completely parsed (i.e. in outdict of calculation)
and keeping the file would just be a duplication.
"""
files_to_delete = [KkrCalculation._POTENTIAL, KkrCalculation._SHAPEFUN]
for fileid in files_to_delete:
if fileid in self.retrieved.list_object_names():
self.retrieved.delete_object(fileid, force=True)

def check_error_file(self, out_folder):
"""Check if anything is in the error file and get some hints for error handler in restart workchain"""

# check if something was written to the error file
errorfile = self.node.attributes['scheduler_stderr']

if errorfile in out_folder.list_object_names():
# read
try:
with out_folder.open(errorfile, 'r') as efile:
error_file_lines = efile.read() # Note: read(), not readlines()
except OSError:
self.logger.error(f'Failed to open error file: {errorfile}.')
return self.exit_codes.ERROR_OPENING_OUTPUTS

# check lines in the errorfile
if error_file_lines:

if isinstance(error_file_lines, bytes):
error_file_lines = error_file_lines.replace(b'\x00', b' ')
else:
error_file_lines = error_file_lines.replace('\x00', ' ')

print(f'The following was written into std error and piped to {errorfile} : \n {error_file_lines}')
self.logger.warning(
f'The following was written into std error and piped to {errorfile} : \n {error_file_lines}'
)

# check for some errors which we can fix automatically
if 'STOP Error creating newmesh!' in error_file_lines:
return self.exit_codes.ERROR_RLOG_TOO_SMALL

# here we estimate how much walltime was available and consumed
try:
time_avail_sec = self.node.attributes['last_job_info']['requested_wallclock_time_seconds']
time_calculated = self.node.attributes['last_job_info']['wallclock_time_seconds']
if 0.97 * time_avail_sec < time_calculated:
return self.exit_codes.ERROR_TIME_LIMIT
except KeyError:
if 'TIME LIMIT' in error_file_lines.upper() or 'time limit' in error_file_lines:
return self.exit_codes.ERROR_TIME_LIMIT

# check for out of memory errors
OUT_OF_MEMORY_PHRASES = [
'cgroup out-of-memory handler',
'Out Of Memory',
]
if any(phrase in error_file_lines for phrase in OUT_OF_MEMORY_PHRASES):
return self.exit_codes.ERROR_NOT_ENOUGH_MEMORY

# Catch all exit code for an unknown failure
return self.exit_codes.ERROR_CALCULATION_FAILED
142 changes: 59 additions & 83 deletions aiida_kkr/parsers/kkrimp.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,22 +132,13 @@ def parse(self, debug=False, **kwargs):
# create output node and link
self.out('output_parameters', Dict(dict=out_dict))

# cleanup after parsing (only if parsing was successful), only works below aiida-core v2.0
if success:
if int(aiida_core_version.split('.')[0]) < 2:
# check if we should do the cleanup or not
cleanup_outfiles = False
if 'cleanup_outfiles' in self.node.inputs:
cleanup_outfiles = self.node.inputs.cleanup_outfiles.value
if cleanup_outfiles:
# reduce size of timing file
self.cleanup_outfiles(files['out_timing'], ['Iteration number', 'time until scf starts'])
# reduce size of out_log file
self.cleanup_outfiles(files['out_log'], ['Iteration Number'])
# delete completely parsed output files and create a tar ball to reduce size
self.remove_unnecessary_files()
self.final_cleanup()
else:
# return an exit code if parsing fails
if not success:
# check error file
exit_code = self.check_error_file(out_folder)
if exit_code is not None:
return exit_code
# if nothing was retuned here we mark the calculation with the general parsing failure
return self.exit_codes.ERROR_PARSING_KKRIMPCALC

def _check_file_existance(self, files, keyname, fname, icrit, file_errors):
Expand All @@ -169,70 +160,55 @@ def _check_file_existance(self, files, keyname, fname, icrit, file_errors):
file_errors.append((icrit, crit_level + f" File '{fname}' not found."))
files[keyname] = None

def cleanup_outfiles(self, fileidentifier, keyslist):
"""open file and remove unneeded output"""
if fileidentifier is not None:
lineids = []
with self.retrieved.open(fileidentifier) as tfile:
txt = tfile.readlines()
for iline in range(len(txt)):
for key in keyslist: # go through all keys
if key in txt[iline]: # add line id to list if key has been found
lineids.append(iline)
# rewrite file deleting the middle part
if len(lineids) > 1: # cut only if more than one iteration was found
txt = txt[:lineids[0]] + \
['# ... [removed output except for last iteration] ...\n'] + \
txt[lineids[-1]:]
with self.retrieved.open(fileidentifier, 'w') as tfilenew:
tfilenew.writelines(txt)

def remove_unnecessary_files(self):
"""
Remove files that are not needed anymore after parsing
The information is completely parsed (i.e. in outdict of calculation)
and keeping the file would just be a duplication.
"""
# first delete unused files (completely in parsed output)
files_to_delete = [
KkrimpCalculation._OUT_ENERGYSP_PER_ATOM, KkrimpCalculation._OUT_ENERGYTOT_PER_ATOM,
KkrimpCalculation._SHAPEFUN
]
for fileid in files_to_delete:
if fileid in self.retrieved.list_object_names():
self.retrieved.delete_object(fileid, force=True)

def final_cleanup(self):
"""Create a tarball of the rest."""

# short name for retrieved folder
ret = self.retrieved

# Now create tarball of output
#
# check if output has been packed to tarfile already
# only if tarfile is not there we create the output tar file
if KkrimpCalculation._FILENAME_TAR not in ret.list_object_names():
# first create dummy file which is used to extract the full path that is given to tarfile.open
with ret.open(KkrimpCalculation._FILENAME_TAR, 'w') as f:
filepath_tar = f.name

# now create tarfile and loop over content of retrieved directory
to_delete = []
with tarfile.open(filepath_tar, 'w:gz') as tf:
for f in ret.list_object_names():
with ret.open(f) as ftest:
filesize = os.stat(ftest.name).st_size
ffull = ftest.name
if (
f != KkrimpCalculation._FILENAME_TAR # ignore tar file
and filesize > 0 # ignore empty files
# ignore files starting with '.' like '.nfs...'
and f[0] != '.'
):
tf.add(ffull, arcname=os.path.basename(ffull))
to_delete.append(f)

# finally delete files that have been added to tarfile
for f in to_delete:
ret.delete_object(f, force=True)
def check_error_file(self, out_folder):
"""Check if anything is in the error file and get some hints for error handler in restart workchain"""

# check if something was written to the error file
errorfile = self.node.attributes['scheduler_stderr']

if errorfile in out_folder.list_object_names():
# read
try:
with out_folder.open(errorfile, 'r') as efile:
error_file_lines = efile.read() # Note: read(), not readlines()
except OSError:
self.logger.error(f'Failed to open error file: {errorfile}.')
return self.exit_codes.ERROR_OPENING_OUTPUTS

# check lines in the errorfile
if error_file_lines:

if isinstance(error_file_lines, bytes):
error_file_lines = error_file_lines.replace(b'\x00', b' ')
else:
error_file_lines = error_file_lines.replace('\x00', ' ')

print(f'The following was written into std error and piped to {errorfile} : \n {error_file_lines}')
self.logger.warning(
f'The following was written into std error and piped to {errorfile} : \n {error_file_lines}'
)

# check for some errors which we can fix automatically
if 'STOP Error creating newmesh!' in error_file_lines:
return self.exit_codes.ERROR_RLOG_TOO_SMALL

# here we estimate how much walltime was available and consumed
try:
time_avail_sec = self.node.attributes['last_job_info']['requested_wallclock_time_seconds']
time_calculated = self.node.attributes['last_job_info']['wallclock_time_seconds']
if 0.97 * time_avail_sec < time_calculated:
return self.exit_codes.ERROR_TIME_LIMIT
except KeyError:
if 'TIME LIMIT' in error_file_lines.upper() or 'time limit' in error_file_lines:
return self.exit_codes.ERROR_TIME_LIMIT

# check for out of memory errors
OUT_OF_MEMORY_PHRASES = [
'cgroup out-of-memory handler',
'Out Of Memory',
]
if any(phrase in error_file_lines for phrase in OUT_OF_MEMORY_PHRASES):
return self.exit_codes.ERROR_NOT_ENOUGH_MEMORY

# Catch all exit code for an unknown failure
return self.exit_codes.ERROR_CALCULATION_FAILED
59 changes: 59 additions & 0 deletions aiida_kkr/parsers/voro.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,5 +125,64 @@ def parse(self, debug=False, **kwargs):
# create output node and link
self.out('output_parameters', Dict(dict=out_dict))

# return an exit code if parsing fails
if not success:
# check error file
exit_code = self.check_error_file(out_folder)
if exit_code is not None:
return exit_code
# if nothing was returned so far we have an unidentified failure of the parser
return self.exit_codes.ERROR_VORONOI_PARSING_FAILED

def check_error_file(self, out_folder):
"""Check if anything is in the error file and get some hints for error handler in restart workchain"""

# check if something was written to the error file
errorfile = self.node.attributes['scheduler_stderr']

if errorfile in out_folder.list_object_names():
# read
try:
with out_folder.open(errorfile, 'r') as efile:
error_file_lines = efile.read() # Note: read(), not readlines()
except OSError:
self.logger.error(f'Failed to open error file: {errorfile}.')
return self.exit_codes.ERROR_OPENING_OUTPUTS

# check lines in the errorfile
if error_file_lines:

if isinstance(error_file_lines, bytes):
error_file_lines = error_file_lines.replace(b'\x00', b' ')
else:
error_file_lines = error_file_lines.replace('\x00', ' ')

print(f'The following was written into std error and piped to {errorfile} : \n {error_file_lines}')
self.logger.warning(
f'The following was written into std error and piped to {errorfile} : \n {error_file_lines}'
)

# check if NACLSD is too small
if 'STOP clsgen: Dimension error (a).' in error_file_lines:
return self.exit_codes.ERROR_NACLSD_TOO_SMALL

# here we estimate how much walltime was available and consumed
try:
time_avail_sec = self.node.attributes['last_job_info']['requested_wallclock_time_seconds']
time_calculated = self.node.attributes['last_job_info']['wallclock_time_seconds']
if 0.97 * time_avail_sec < time_calculated:
return self.exit_codes.ERROR_TIME_LIMIT
except KeyError:
if 'TIME LIMIT' in error_file_lines.upper() or 'time limit' in error_file_lines:
return self.exit_codes.ERROR_TIME_LIMIT

# check for out of memory errors
OUT_OF_MEMORY_PHRASES = [
'cgroup out-of-memory handler',
'Out Of Memory',
]
if any(phrase in error_file_lines for phrase in OUT_OF_MEMORY_PHRASES):
return self.exit_codes.ERROR_NOT_ENOUGH_MEMORY

# Catch all exit code for an unknown failure
return self.exit_codes.ERROR_CALCULATION_FAILED
Loading