Skip to content

Commit

Permalink
Merge pull request #177 from woook/master
Browse files Browse the repository at this point in the history
upgrade integrity check
  • Loading branch information
andyb3 authored Jan 24, 2018
2 parents 82f5907 + acd2a23 commit 95ea3f2
Show file tree
Hide file tree
Showing 2 changed files with 33 additions and 57 deletions.
3 changes: 2 additions & 1 deletion automate_demultiplex_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@
debug = False

# =====git release for the automate_demultiplexing repo=====
script_release = "v8.0"
script_release = "v9.0"


# =====location of input/output files=====
# path to run folders
Expand Down
87 changes: 31 additions & 56 deletions sequencer_checksum.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,18 +8,10 @@

class Nextseq_Integrity_Check():
def __init__(self):
# path to the temp runfolder to be populated depending on sequencer
self.temp_folder = ""

# use the existence of expected folders to determine the sequencer
# temp folder on the nextseq
self.nextseqtemp_folder = "D://Illumina//NextSeq Control Software Temp"
# temp folder on the miseq
self.miseqtemp_folder = "D://Illumina//MiSeqTemp"
# flag to be used if it's the nextseq
self.nextseq=""



# path to the mapped workstation share
self.mapped_workstation_folder = "Z://"

Expand All @@ -42,17 +34,17 @@ def __init__(self):
self.workstation_runfolder = ""
self.sequencer_runfolder = ""

# files to ignore from checksum
self.exclude = ["RTAStart.bat", "CorrectedIntMetrics.bin", "EmpiricalPhasingMetrics.bin", "ErrorMetrics.bin", "EventMetrics.bin", "ExtractionMetrics.bin", "PFGridMetrics.bin", "QMetrics.bin", "RegistrationMetrics.bin", "TileMetrics.bin", "000_000_000_na_rtabat.trans", "FilesAdded.csv", "FilesCopied.csv", "md5checksum.txt"]
# flag to set testing
self.testing=False
self.testing = False

# if testing overwrite the paths to that of the testing
if self.testing:
# drive letter given to usb stick
self.mapped_drive = "E://"
# path to the fake nextseqtemp folder
self.nextseqtemp_folder = self.mapped_drive + "integrity_testing//sequencer_temp"
# path to the fake miseqtemp folder
self.miseqtemp_folder = self.mapped_drive + "integrity_testing//sequencer_temp"
# path to the fake workstation folder
self.mapped_workstation_folder = self.mapped_drive + "integrity_testing//workstation"
# path to the fake checksums_inprogress folder
Expand All @@ -64,28 +56,12 @@ def look_for_folder(self):
"""
This script runs every hour.
The script needs to detect when a run has started, and display a window which remains until the integrity test has been performed.
This function identifies which sequencer the script is running on and then looks to see if a run has finished which is not already being monitored.
If it's a newly started run, display a window to say not to do anything until sequencing is complete and integrity checks done.
Display a window to say not to do anything until sequencing is complete and integrity checks done.
When checksums are done, display a message box displaying pass/fail messages.
"""
# check what sequencer it is.
# look if the nextseq temp folder exists
if os.path.isdir(self.nextseqtemp_folder):
# set the temp folder to the nextseq temp folder path
self.temp_folder = self.nextseqtemp_folder
# set flag to denote nextseq
self.nextseq = True
# if the nextseq folder doesn't exist the miseq temp folder should
elif os.path.isdir(self.miseqtemp_folder):
# set the temp folder to the miseq temp folder path
self.temp_folder = self.miseqtemp_folder
else:
# if neither path exists raise an error
raise UserWarning("Can't determine what sequencer this is!")


# for each runfolder in temp folder
for temp_runfolder in os.listdir(self.temp_folder):
for temp_runfolder in os.listdir(self.nextseqtemp_folder):
# look to see if the run has already been caught by this script at a previous time
if temp_runfolder in os.listdir(self.run_in_progress):
# skip this temp_runfolder
Expand All @@ -104,15 +80,23 @@ def look_for_folder(self):
new_run_marker.write(str(datetime.datetime.now()))

# call function which opens a window to say run in progress - don't do anything until a message box appears denoting integrity check has been performed
# this function will close when the run ends and (if nextseq) the checksum has been calculated
# this function will close when the run ends and the checksum has been calculated
self.open_window()

# call function to assess result of checksum and display message box
# if checksums match (integrity test pass) return a info box
if self.check_checksums():
# create root window which can then be hidden
root = Tkinter.Tk()
# hide
root.withdraw()
tkMessageBox.showinfo("Integrity check complete","Integrity check passed")
# if checksums don't match (integrity test FAIL) return a error box
else:
# create root window which can then be hidden
root = Tkinter.Tk()
# hide
root.withdraw()
tkMessageBox.showerror("Integrity check complete","Integrity check failed - please do not use this sequencer and inform the Bioinformatics team immediately")


Expand Down Expand Up @@ -152,7 +136,7 @@ def run_has_finished(self):
If required the checksums are generated, or if not the script waits until the checksums have been generated (by the demultiplexing script).
"""
# build path to the runfolder
self.sequencer_runfolder = os.path.join(self.temp_folder, self.runfolder)
self.sequencer_runfolder = os.path.join(self.nextseqtemp_folder, self.runfolder)
# build paths on the workstation
self.workstation_runfolder = os.path.join(self.mapped_workstation_folder, self.runfolder)
#flag to denote run and data transfer has finished
Expand All @@ -163,25 +147,13 @@ def run_has_finished(self):
if self.RTA_complete in os.listdir(self.sequencer_runfolder) and self.RTA_complete in os.listdir(self.workstation_runfolder):
# if it's a testing run print a message
if self.testing:
print "run finished"
# now run has finished and is transfered can calculate the checksum (if required) - do this here so window remains until checksum has finished.
if self.nextseq:
# if it's a testing run print a message
if self.testing:
print "NextSeq"
# call function which triggers the checksum calculations
self.prepare_checksum_calculations()
print "run finished - skipping 2 hour wait"
else:
# if it's a miseq run wait to see if checksum has been done
while not self.checksums_done():
# if it's a testing run don't wait as long and print a message
if self.testing:
print "miseq run, waiting for checksums"
time.sleep(20)
else:
# wait 5 minutes
time.sleep(300)

# sleep 2 hours to ensure all file transfers are done
time.sleep(7200)

# call function which triggers the checksum calculations
self.prepare_checksum_calculations()
# now all checksums are done change flag to true so the loop finishes and the window is closed
finished = True

Expand All @@ -201,7 +173,7 @@ def run_has_finished(self):

def prepare_checksum_calculations(self):
"""
On the nextseq the checksums are calculated by this script.
The checksums are calculated by this script.
This function checks the runfolder has not already been checksummed, marks the folder as being checksummed and then calls the function to generate the checksums.
"""
if self.testing:
Expand All @@ -227,21 +199,24 @@ def prepare_checksum_calculations(self):
def run_integrity_check(self):
"""
This function uses self variables for the filepaths and calculates the checksums on directories.
It looks for the presense of any files which should be ignored as they are not copied from temp to output.
The checksums are written to a file on the workstation for the demultiplexing script.
"""
if self.testing:
print "starting integrity checking"
# calculate md5 checksums using dirhash package for both folders
workstation_checksum = dirhash(self.workstation_runfolder, 'md5')
sequencer_checksum = dirhash(self.sequencer_runfolder, 'md5')

# calculate the checksum, using the to_exclude list
workstation_checksum = dirhash(self.workstation_runfolder, 'md5',excluded_files=self.exclude)
sequencer_checksum = dirhash(self.sequencer_runfolder, 'md5',excluded_files=self.exclude)

if self.testing:
print "workstation checksum = " + workstation_checksum
print "sequencer checksum = " + sequencer_checksum

# write the checksums to the output file (on workstation)
with open(os.path.join(self.workstation_runfolder, self.output_file), 'w') as outputfile:
outputfile.write("workstation checksum (" + self.workstation_runfolder + ")=" + workstation_checksum+"\n")
outputfile.write("sequencer checksum (" + self.sequencer_runfolder + ")=" + sequencer_checksum)
outputfile.write("workstation checksum (" + self.workstation_runfolder + ")=" + workstation_checksum + "\n")
outputfile.write("sequencer checksum (" + self.sequencer_runfolder + ")=" + sequencer_checksum + "\n")


def checksums_done(self):
Expand Down

0 comments on commit 95ea3f2

Please sign in to comment.