From fb7b91db620d160e8f6e440a8edb242d5f9f9ea0 Mon Sep 17 00:00:00 2001 From: Hasindu Gamaarachchi Date: Sun, 24 Jul 2022 15:36:48 +1000 Subject: [PATCH 01/18] lazy_thread API with zstd writing support --- .github/workflows/c-cpp.yml | 4 +- .github/workflows/python.yml | 4 +- Makefile | 9 + examples/lazymt/lazymt.c | 258 ++++++++++++++++ include/slow5/slow5.h | 4 +- .../slow5/slow5_lazymt.h | 8 +- python/pyslow5.h | 1 + python/pyslow5.pxd | 3 - setup.py | 7 +- python/slow5threads.c => src/slow5_lazymt.c | 281 +++--------------- 10 files changed, 321 insertions(+), 258 deletions(-) create mode 100644 examples/lazymt/lazymt.c rename python/slow5threads.h => include/slow5/slow5_lazymt.h (92%) rename python/slow5threads.c => src/slow5_lazymt.c (58%) diff --git a/.github/workflows/c-cpp.yml b/.github/workflows/c-cpp.yml index 256e3d7c..a77aa358 100644 --- a/.github/workflows/c-cpp.yml +++ b/.github/workflows/c-cpp.yml @@ -2,9 +2,9 @@ name: C/C++ CI on: push: - branches: [ master, dev, cleanup ] + branches: [ '*' ] pull_request: - branches: [ master, dev, cleanup ] + branches: [ '*' ] jobs: ubuntu_14: diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml index a5a1f92c..cb1b55b6 100644 --- a/.github/workflows/python.yml +++ b/.github/workflows/python.yml @@ -2,9 +2,9 @@ name: Python CI on: push: - branches: [ master, dev, cleanup ] + branches: [ '*' ] pull_request: - branches: [ master, dev, cleanup ] + branches: [ '*' ] jobs: ubuntu_14: diff --git a/Makefile b/Makefile index 2c5632fa..ea97d9ec 100644 --- a/Makefile +++ b/Makefile @@ -19,6 +19,11 @@ else CFLAGS += -DSLOW5_USE_ZSTD CPPFLAGS += -I $(zstd_local) endif +ifeq ($(slow5_lazymt),1) +CFLAGS += -DSLOW5_ENABLE_LAZYMT +LDFLAGS += -lpthread +endif + BUILD_DIR = lib STATICLIB = $(BUILD_DIR)/libslow5.a @@ -28,6 +33,7 @@ OBJ = $(BUILD_DIR)/slow5.o \ $(BUILD_DIR)/slow5_idx.o \ $(BUILD_DIR)/slow5_misc.o \ $(BUILD_DIR)/slow5_press.o \ + $(BUILD_DIR)/slow5_lazymt.o \ PREFIX = /usr/local VERSION = `git describe --tags` @@ -61,6 +67,9 @@ $(BUILD_DIR)/slow5_misc.o: src/slow5_misc.c src/slow5_misc.h include/slow5/slow5 $(BUILD_DIR)/slow5_press.o: src/slow5_press.c include/slow5/slow5_press.h src/slow5_misc.h include/slow5/slow5_error.h $(CC) $(CFLAGS) $(CPPFLAGS) $< -c -fpic -o $@ +$(BUILD_DIR)/slow5_lazymt.o: src/slow5_lazymt.c include/slow5/slow5_lazymt.h $(SLOW5_H) + $(CC) $(CFLAGS) $(CPPFLAGS) $< -c -fpic -o $@ + clean: rm -rf $(OBJ) $(STATICLIB) $(SHAREDLIB) $(SHAREDLIBV) make -C $(SVB) clean diff --git a/examples/lazymt/lazymt.c b/examples/lazymt/lazymt.c new file mode 100644 index 00000000..81fe1ae2 --- /dev/null +++ b/examples/lazymt/lazymt.c @@ -0,0 +1,258 @@ + +#include +#include +#include +#include +#include +#include +#include +#include + + +#define FILE_PATH "test.blow5" //for reading +#define FILE_PATH_WRITE "test.blow5" +//#define FILE_PATH "/home/jamfer/Data/SK/multi_fast5/s5/FAK40634_d1cc054609fe2c5fcdeac358864f9dc81c8bb793_95.blow5" + +int read_func(){ + + slow5_file_t *sp = slow5_open(FILE_PATH,"r"); + if(sp==NULL){ + fprintf(stderr,"Error in opening file\n"); + exit(EXIT_FAILURE); + } + slow5_rec_t **rec = NULL; + int ret=0; + int batch_size = 4096; + int num_thread = 8; + while((ret = slow5_get_next_batch(&rec,sp,batch_size,num_thread)) > 0){ + + for(int i=0;ilen_raw_signal; + printf("%s\t%ld\n",rec[i]->read_id,len_raw_signal); + } + slow5_free_batch(&rec,ret); + + if(retlen_raw_signal; + printf("%s\t%ld\n",rec[i]->read_id,len_raw_signal); + } + slow5_free_batch(&rec,ret); + + slow5_idx_unload(sp); + slow5_close(sp); + + return 0; +} + + +int write_func(){ + + slow5_file_t *sf = slow5_open(FILE_PATH_WRITE,"w"); + if(sf==NULL){ + fprintf(stderr,"Error in opening file\n"); + exit(EXIT_FAILURE); + } + + //set zstd record compression, svb-zd signal compression + // if(slow5_set_press(sf, SLOW5_COMPRESS_ZSTD, SLOW5_COMPRESS_SVB_ZD) < 0){ // + // fprintf(stderr,"Error setting compression method!\n"); + // exit(EXIT_FAILURE); + // } + + /*********************** Header ******************/ + + slow5_hdr_t *header=sf->header; + //add a header group attribute called run_id + if (slow5_hdr_add_attr("run_id", header) != 0){ + fprintf(stderr,"Error adding run_id attribute\n"); + exit(EXIT_FAILURE); + } + //add another header group attribute called asic_id + if (slow5_hdr_add_attr("asic_id", header) != 0){ + fprintf(stderr,"Error adding asic_id attribute\n"); + exit(EXIT_FAILURE); + } + + //set the run_id attribute to "run_0" for read group 0 + if (slow5_hdr_set("run_id", "run_0", 0, header) != 0){ + fprintf(stderr,"Error setting run_id attribute in read group 0\n"); + exit(EXIT_FAILURE); + } + //set the asic_id attribute to "asic_0" for read group 1 + if (slow5_hdr_set("asic_id", "asic_id_0", 0, header) != 0){ + fprintf(stderr,"Error setting asic_id attribute in read group 0\n"); + exit(EXIT_FAILURE); + } + + //add auxilliary field: channel number + if (slow5_aux_add("channel_number", SLOW5_STRING, sf->header)!=0){ + fprintf(stderr,"Error adding channel_number auxilliary field\n"); + exit(EXIT_FAILURE); + } + + //add axuilliary field: median_before + if (slow5_aux_add("median_before", SLOW5_DOUBLE, sf->header)!=0){ + fprintf(stderr,"Error adding median_before auxilliary field\n"); + exit(EXIT_FAILURE); + } + + //add axuilliary field: read_number + if(slow5_aux_add("read_number", SLOW5_INT32_T, sf->header)!=0){ + fprintf(stderr,"Error adding read_number auxilliary field\n"); + exit(EXIT_FAILURE); + } + //add axuilliary field: start_mux + if(slow5_aux_add("start_mux", SLOW5_UINT8_T, sf->header)!=0){ + fprintf(stderr,"Error adding start_mux auxilliary field\n"); + exit(EXIT_FAILURE); + } + //add auxilliary field: start_time + if(slow5_aux_add("start_time", SLOW5_UINT64_T, sf->header)!=0){ + fprintf(stderr,"Error adding start_time auxilliary field\n"); + exit(EXIT_FAILURE); + } + + if(slow5_hdr_write(sf) < 0){ + fprintf(stderr,"Error writing header!\n"); + exit(EXIT_FAILURE); + } + + + slow5_rec_t *rec[4000]; + int ret=0; + int batch_size = 4000; + int num_thread = 8; + + + /******************* SLOW5 records ************************/ + for(int i=0;i read_id = strdup(tmp_read_id); + if(slow5_record->read_id == NULL){ + fprintf(stderr,"Could not allocate space for strdup."); + exit(EXIT_FAILURE); + } + slow5_record -> read_id_len = strlen(slow5_record -> read_id); + slow5_record -> read_group = 0; + slow5_record -> digitisation = 4096.0; + slow5_record -> offset = 3.0+i; + slow5_record -> range = 10.0+i; + slow5_record -> sampling_rate = 4000.0; + slow5_record -> len_raw_signal = 10+i; + slow5_record -> raw_signal = malloc(sizeof(int16_t)*(10+i)); + if(slow5_record->raw_signal == NULL){ + fprintf(stderr,"Could not allocate space for raw signal."); + exit(EXIT_FAILURE); + } + for(int j=0;j<10+i;j++){ + slow5_record->raw_signal[j] = j+i; + } + + //auxiliary fileds + char *channel_number = "channel_number"; + double median_before = 0.1+i; + int32_t read_number = 10+i; + uint8_t start_mux = (1+i)%4; + uint64_t start_time = 100+i; + + if(slow5_aux_set_string(slow5_record, "channel_number", channel_number, sf->header)!=0){ + fprintf(stderr,"Error setting channel_number auxilliary field\n"); + exit(EXIT_FAILURE); + } + if(slow5_aux_set(slow5_record, "median_before", &median_before, sf->header)!=0){ + fprintf(stderr,"Error setting median_before auxilliary field\n"); + exit(EXIT_FAILURE); + } + if(slow5_aux_set(slow5_record, "read_number", &read_number, sf->header)!=0){ + fprintf(stderr,"Error setting read_number auxilliary field\n"); + exit(EXIT_FAILURE); + } + + if(slow5_aux_set(slow5_record, "start_mux", &start_mux, sf->header)!=0){ + fprintf(stderr,"Error setting start_mux auxilliary field\n"); + exit(EXIT_FAILURE); + } + + if(slow5_aux_set(slow5_record, "start_time", &start_time, sf->header)!=0){ + fprintf(stderr,"Error setting start_time auxilliary field\n"); + exit(EXIT_FAILURE); + } + } + //end of record setup + + ret = slow5_write_batch(rec,sf,batch_size,num_thread); + + if(ret +#include diff --git a/python/pyslow5.pxd b/python/pyslow5.pxd index 4cd3c685..0ed53ef8 100644 --- a/python/pyslow5.pxd +++ b/python/pyslow5.pxd @@ -136,9 +136,6 @@ cdef extern from "pyslow5.h": int slow5_aux_set(slow5_rec_t *read, const char *attr, const void *data, slow5_hdr_t *header); int slow5_aux_set_string(slow5_rec_t *read, const char *attr, const char *data, slow5_hdr_t *header); - -cdef extern from "slow5threads.h": - int slow5_get_batch(slow5_rec_t ***read, slow5_file_t *s5p, char **rid, int num_rid, int num_threads); int slow5_get_next_batch(slow5_rec_t ***read, slow5_file_t *s5p, int batch_size, int num_threads); int slow5_write_batch(slow5_rec_t **read, slow5_file_t *s5p, int batch_size, int num_threads); diff --git a/setup.py b/setup.py index f67af694..ac6e6bb7 100644 --- a/setup.py +++ b/setup.py @@ -37,15 +37,14 @@ def build_ext(*args, ** kwargs ): #adapted from https://github.com/lh3/minimap2/blob/master/setup.py sources=['python/pyslow5.pyx', 'src/slow5.c', 'src/slow5_press.c', 'src/slow5_misc.c', 'src/slow5_idx.c', - 'python/slow5threads.c', + 'src/slow5_lazymt.c', 'thirdparty/streamvbyte/src/streamvbyte_zigzag.c', 'thirdparty/streamvbyte/src/streamvbyte_decode.c', 'thirdparty/streamvbyte/src/streamvbyte_encode.c'] depends=['python/pyslow5.pxd', 'python/pyslow5.h', - 'python/slow5threads.h', - 'slow5/slow5.h', 'slow5/slow5_defs.h', 'slow5/slow5_error.h', 'slow5/slow5_press.h', + 'slow5/slow5.h', 'slow5/slow5_defs.h', 'slow5/slow5_error.h', 'slow5/slow5_press.h', 'slow5/slow5_lazymt.h', 'slow5/klib/khash.h', 'slow5/klib/kvec.h', 'src/slow5_extra.h', 'src/slow5_idx.h', 'src/slow5_misc.h', 'src/klib/ksort.h', 'thirdparty/streamvbyte/include/streamvbyte.h', 'thirdparty/streamvbyte/include/streamvbyte_zigzag.h'] -extra_compile_args = ['-g', '-Wall', '-O2', '-std=c99'] +extra_compile_args = ['-g', '-Wall', '-O2', '-std=c99', '-DSLOW5_ENABLE_LAZYMT=1' ] # extra_compile_args = [] # os.environ["CFLAGS"] = '-g -Wall -O2 -std=c99' diff --git a/python/slow5threads.c b/src/slow5_lazymt.c similarity index 58% rename from python/slow5threads.c rename to src/slow5_lazymt.c index afad1af0..b8c389a6 100644 --- a/python/slow5threads.c +++ b/src/slow5_lazymt.c @@ -1,8 +1,10 @@ -/* @file slow5threads.c +/* @file slow5_lazymt.c ** ** @@ ******************************************************************************/ +#ifdef SLOW5_ENABLE_LAZYMT + #include #include #include @@ -10,8 +12,7 @@ #include #include #include -#include "../src/slow5_extra.h" - +#include #define SLOW5_WORK_STEAL 1 //simple work stealing enabled or not (no work stealing mean no load balancing) #define SLOW5_STEAL_THRESH 1 //stealing threshold @@ -45,7 +46,7 @@ typedef struct { //slow5 slow5_file_t *sf; int num_thread; - int batch_size; + int32_t batch_size; } slow5_core_t; @@ -178,12 +179,22 @@ static void slow5_work_per_single_read3(slow5_core_t* core,slow5_db_t* db, int32 assert(db->slow5_rec[i]!=NULL); slow5_file_t *sf = core->sf; //fprintf(stderr,"Here %d\n",i); - slow5_press_method_t press_out = {SLOW5_COMPRESS_ZLIB, SLOW5_COMPRESS_SVB_ZD}; - slow5_press_t *press_ptr = slow5_press_init(press_out); - if(!press_ptr){ - SLOW5_ERROR("Could not initialize the slow5 compression method%s",""); - exit(EXIT_FAILURE); + slow5_press_t *press_ptr = NULL; + + if(sf->compress){ + assert(sf->compress->record_press!=NULL); + assert(sf->compress->signal_press!=NULL); + + slow5_press_method_t press_out = {sf->compress->record_press->method, sf->compress->signal_press->method}; + press_ptr = slow5_press_init(press_out); + if(!press_ptr){ + SLOW5_ERROR("Could not initialize the slow5 compression method%s",""); + exit(EXIT_FAILURE); + } } + + //TODO: check if ASCII if press_ptr is still NULL + db->mem_records[i] = slow5_rec_to_mem(db->slow5_rec[i], sf->header->aux_meta, sf->format, press_ptr, &(db->mem_bytes[i])); //fprintf(stderr,"Here 2 %d\n",i); slow5_press_free(press_ptr); @@ -393,8 +404,6 @@ int slow5_write_batch(slow5_rec_t **read, slow5_file_t *s5p, int batch_size, int return num_wr; } - - void slow5_free_batch(slow5_rec_t ***read, int num_rec){ slow5_rec_t **reads = *read; @@ -406,241 +415,31 @@ void slow5_free_batch(slow5_rec_t ***read, int num_rec){ *read = NULL; } -#ifdef PYSLOW5_DEBUG_THREAD - -#define FILE_PATH "test.blow5" //for reading -#define FILE_PATH_WRITE "test.blow5" -//#define FILE_PATH "/home/jamfer/Data/SK/multi_fast5/s5/FAK40634_d1cc054609fe2c5fcdeac358864f9dc81c8bb793_95.blow5" - -int read_func(){ - - slow5_file_t *sp = slow5_open(FILE_PATH,"r"); - if(sp==NULL){ - fprintf(stderr,"Error in opening file\n"); - exit(EXIT_FAILURE); - } - slow5_rec_t **rec = NULL; - int ret=0; - int batch_size = 4096; - int num_thread = 8; - while((ret = slow5_get_next_batch(&rec,sp,batch_size,num_thread)) > 0){ - - for(int i=0;ilen_raw_signal; - printf("%s\t%ld\n",rec[i]->read_id,len_raw_signal); - } - slow5_free_batch(&rec,ret); - - if(retlen_raw_signal; - printf("%s\t%ld\n",rec[i]->read_id,len_raw_signal); - } - slow5_free_batch(&rec,ret); +#else - slow5_idx_unload(sp); - slow5_close(sp); +#include +#include +#include - return 0; +int slow5_get_next_batch(slow5_rec_t ***read, slow5_file_t *s5p, int batch_size, int num_threads){ + fprintf(stderr,"slow5lib has not been compiled with lazy multithreading support\n"); + exit(EXIT_FAILURE); + return -1; } - - -int write_func(){ - - slow5_file_t *sf = slow5_open(FILE_PATH_WRITE,"w"); - if(sf==NULL){ - fprintf(stderr,"Error in opening file\n"); - exit(EXIT_FAILURE); - } - - /*********************** Header ******************/ - - slow5_hdr_t *header=sf->header; - //add a header group attribute called run_id - if (slow5_hdr_add_attr("run_id", header) != 0){ - fprintf(stderr,"Error adding run_id attribute\n"); - exit(EXIT_FAILURE); - } - //add another header group attribute called asic_id - if (slow5_hdr_add_attr("asic_id", header) != 0){ - fprintf(stderr,"Error adding asic_id attribute\n"); - exit(EXIT_FAILURE); - } - - //set the run_id attribute to "run_0" for read group 0 - if (slow5_hdr_set("run_id", "run_0", 0, header) != 0){ - fprintf(stderr,"Error setting run_id attribute in read group 0\n"); - exit(EXIT_FAILURE); - } - //set the asic_id attribute to "asic_0" for read group 1 - if (slow5_hdr_set("asic_id", "asic_id_0", 0, header) != 0){ - fprintf(stderr,"Error setting asic_id attribute in read group 0\n"); - exit(EXIT_FAILURE); - } - - //add auxilliary field: channel number - if (slow5_aux_add("channel_number", SLOW5_STRING, sf->header)!=0){ - fprintf(stderr,"Error adding channel_number auxilliary field\n"); - exit(EXIT_FAILURE); - } - - //add axuilliary field: median_before - if (slow5_aux_add("median_before", SLOW5_DOUBLE, sf->header)!=0){ - fprintf(stderr,"Error adding median_before auxilliary field\n"); - exit(EXIT_FAILURE); - } - - //add axuilliary field: read_number - if(slow5_aux_add("read_number", SLOW5_INT32_T, sf->header)!=0){ - fprintf(stderr,"Error adding read_number auxilliary field\n"); - exit(EXIT_FAILURE); - } - //add axuilliary field: start_mux - if(slow5_aux_add("start_mux", SLOW5_UINT8_T, sf->header)!=0){ - fprintf(stderr,"Error adding start_mux auxilliary field\n"); - exit(EXIT_FAILURE); - } - //add auxilliary field: start_time - if(slow5_aux_add("start_time", SLOW5_UINT64_T, sf->header)!=0){ - fprintf(stderr,"Error adding start_time auxilliary field\n"); - exit(EXIT_FAILURE); - } - - if(slow5_hdr_write(sf) < 0){ - fprintf(stderr,"Error writing header!\n"); - exit(EXIT_FAILURE); - } - - - slow5_rec_t *rec[4000]; - int ret=0; - int batch_size = 4000; - int num_thread = 8; - - - /******************* SLOW5 records ************************/ - for(int i=0;i read_id = strdup(tmp_read_id); - if(slow5_record->read_id == NULL){ - fprintf(stderr,"Could not allocate space for strdup."); - exit(EXIT_FAILURE); - } - slow5_record -> read_id_len = strlen(slow5_record -> read_id); - slow5_record -> read_group = 0; - slow5_record -> digitisation = 4096.0; - slow5_record -> offset = 3.0+i; - slow5_record -> range = 10.0+i; - slow5_record -> sampling_rate = 4000.0; - slow5_record -> len_raw_signal = 10+i; - slow5_record -> raw_signal = malloc(sizeof(int16_t)*(10+i)); - if(slow5_record->raw_signal == NULL){ - fprintf(stderr,"Could not allocate space for raw signal."); - exit(EXIT_FAILURE); - } - for(int j=0;j<10+i;j++){ - slow5_record->raw_signal[j] = j+i; - } - - //auxiliary fileds - char *channel_number = "channel_number"; - double median_before = 0.1+i; - int32_t read_number = 10+i; - uint8_t start_mux = (1+i)%4; - uint64_t start_time = 100+i; - - if(slow5_aux_set_string(slow5_record, "channel_number", channel_number, sf->header)!=0){ - fprintf(stderr,"Error setting channel_number auxilliary field\n"); - exit(EXIT_FAILURE); - } - if(slow5_aux_set(slow5_record, "median_before", &median_before, sf->header)!=0){ - fprintf(stderr,"Error setting median_before auxilliary field\n"); - exit(EXIT_FAILURE); - } - if(slow5_aux_set(slow5_record, "read_number", &read_number, sf->header)!=0){ - fprintf(stderr,"Error setting read_number auxilliary field\n"); - exit(EXIT_FAILURE); - } - - if(slow5_aux_set(slow5_record, "start_mux", &start_mux, sf->header)!=0){ - fprintf(stderr,"Error setting start_mux auxilliary field\n"); - exit(EXIT_FAILURE); - } - - if(slow5_aux_set(slow5_record, "start_time", &start_time, sf->header)!=0){ - fprintf(stderr,"Error setting start_time auxilliary field\n"); - exit(EXIT_FAILURE); - } - } - //end of record setup - - ret = slow5_write_batch(rec,sf,batch_size,num_thread); - - if(ret Date: Tue, 26 Jul 2022 00:27:48 +1000 Subject: [PATCH 02/18] nonlazy version --- Makefile | 8 +- examples/{lazymt => mt}/lazymt.c | 14 +- examples/mt/mt.c | 278 +++++++++++++++++++++++++++++ include/slow5/slow5_lazymt.h | 17 -- include/slow5/slow5_mt.h | 48 +++++ python/pyslow5.h | 2 +- python/pyslow5.pxd | 8 +- python/pyslow5.pyx | 12 +- setup.py | 4 +- src/{slow5_lazymt.c => slow5_mt.c} | 226 +++++++++++++++-------- 10 files changed, 498 insertions(+), 119 deletions(-) rename examples/{lazymt => mt}/lazymt.c (94%) create mode 100644 examples/mt/mt.c delete mode 100644 include/slow5/slow5_lazymt.h create mode 100644 include/slow5/slow5_mt.h rename src/{slow5_lazymt.c => slow5_mt.c} (62%) diff --git a/Makefile b/Makefile index ea97d9ec..33c6db00 100644 --- a/Makefile +++ b/Makefile @@ -19,8 +19,8 @@ else CFLAGS += -DSLOW5_USE_ZSTD CPPFLAGS += -I $(zstd_local) endif -ifeq ($(slow5_lazymt),1) -CFLAGS += -DSLOW5_ENABLE_LAZYMT +ifeq ($(slow5_mt),1) +CFLAGS += -DSLOW5_ENABLE_MT LDFLAGS += -lpthread endif @@ -33,7 +33,7 @@ OBJ = $(BUILD_DIR)/slow5.o \ $(BUILD_DIR)/slow5_idx.o \ $(BUILD_DIR)/slow5_misc.o \ $(BUILD_DIR)/slow5_press.o \ - $(BUILD_DIR)/slow5_lazymt.o \ + $(BUILD_DIR)/slow5_mt.o \ PREFIX = /usr/local VERSION = `git describe --tags` @@ -67,7 +67,7 @@ $(BUILD_DIR)/slow5_misc.o: src/slow5_misc.c src/slow5_misc.h include/slow5/slow5 $(BUILD_DIR)/slow5_press.o: src/slow5_press.c include/slow5/slow5_press.h src/slow5_misc.h include/slow5/slow5_error.h $(CC) $(CFLAGS) $(CPPFLAGS) $< -c -fpic -o $@ -$(BUILD_DIR)/slow5_lazymt.o: src/slow5_lazymt.c include/slow5/slow5_lazymt.h $(SLOW5_H) +$(BUILD_DIR)/slow5_mt.o: src/slow5_mt.c include/slow5/slow5_mt.h $(SLOW5_H) $(CC) $(CFLAGS) $(CPPFLAGS) $< -c -fpic -o $@ clean: diff --git a/examples/lazymt/lazymt.c b/examples/mt/lazymt.c similarity index 94% rename from examples/lazymt/lazymt.c rename to examples/mt/lazymt.c index 81fe1ae2..01f06812 100644 --- a/examples/lazymt/lazymt.c +++ b/examples/mt/lazymt.c @@ -6,7 +6,7 @@ #include #include #include -#include +#include #define FILE_PATH "test.blow5" //for reading @@ -24,13 +24,13 @@ int read_func(){ int ret=0; int batch_size = 4096; int num_thread = 8; - while((ret = slow5_get_next_batch(&rec,sp,batch_size,num_thread)) > 0){ + while((ret = slow5_get_next_batch_lazy(&rec,sp,batch_size,num_thread)) > 0){ for(int i=0;ilen_raw_signal; printf("%s\t%ld\n",rec[i]->read_id,len_raw_signal); } - slow5_free_batch(&rec,ret); + slow5_free_batch_lazy(&rec,ret); if(retlen_raw_signal; printf("%s\t%ld\n",rec[i]->read_id,len_raw_signal); } - slow5_free_batch(&rec,ret); + slow5_free_batch_lazy(&rec,ret); slow5_idx_unload(sp); slow5_close(sp); @@ -229,7 +229,7 @@ int write_func(){ } //end of record setup - ret = slow5_write_batch(rec,sf,batch_size,num_thread); + ret = slow5_write_batch_lazy(rec,sf,batch_size,num_thread); if(ret +#include +#include +#include +#include +#include +#include +#include + + +#define FILE_PATH "test.blow5" //for reading +#define FILE_PATH_WRITE "test.blow5" +//#define FILE_PATH "/home/jamfer/Data/SK/multi_fast5/s5/FAK40634_d1cc054609fe2c5fcdeac358864f9dc81c8bb793_95.blow5" + +int read_func(){ + + slow5_file_t *sp = slow5_open(FILE_PATH,"r"); + if(sp==NULL){ + fprintf(stderr,"Error in opening file\n"); + exit(EXIT_FAILURE); + } + slow5_rec_t **rec = NULL; + int ret=0; + int batch_size = 4096; + int num_thread = 8; + + slow5_mt_t *mt = slow5_init_mt(num_thread,sp); + slow5_batch_t *read_batch = slow5_init_batch(batch_size); + + while((ret = slow5_get_next_batch(mt,read_batch,batch_size)) > 0){ + + for(int i=0;islow5_rec; + uint64_t len_raw_signal = rec[i]->len_raw_signal; + printf("%s\t%ld\n",rec[i]->read_id,len_raw_signal); + } + + if(retslow5_rec; + uint64_t len_raw_signal = rec[i]->len_raw_signal; + printf("%s\t%ld\n",rec[i]->read_id,len_raw_signal); + } + slow5_free_batch(read_batch); + + slow5_free_mt(mt); + + slow5_idx_unload(sp); + slow5_close(sp); + + return 0; +} + + +int write_func(){ + + slow5_file_t *sf = slow5_open(FILE_PATH_WRITE,"w"); + if(sf==NULL){ + fprintf(stderr,"Error in opening file\n"); + exit(EXIT_FAILURE); + } + + //set zstd record compression, svb-zd signal compression + // if(slow5_set_press(sf, SLOW5_COMPRESS_ZSTD, SLOW5_COMPRESS_SVB_ZD) < 0){ // + // fprintf(stderr,"Error setting compression method!\n"); + // exit(EXIT_FAILURE); + // } + + /*********************** Header ******************/ + + slow5_hdr_t *header=sf->header; + //add a header group attribute called run_id + if (slow5_hdr_add_attr("run_id", header) != 0){ + fprintf(stderr,"Error adding run_id attribute\n"); + exit(EXIT_FAILURE); + } + //add another header group attribute called asic_id + if (slow5_hdr_add_attr("asic_id", header) != 0){ + fprintf(stderr,"Error adding asic_id attribute\n"); + exit(EXIT_FAILURE); + } + + //set the run_id attribute to "run_0" for read group 0 + if (slow5_hdr_set("run_id", "run_0", 0, header) != 0){ + fprintf(stderr,"Error setting run_id attribute in read group 0\n"); + exit(EXIT_FAILURE); + } + //set the asic_id attribute to "asic_0" for read group 1 + if (slow5_hdr_set("asic_id", "asic_id_0", 0, header) != 0){ + fprintf(stderr,"Error setting asic_id attribute in read group 0\n"); + exit(EXIT_FAILURE); + } + + //add auxilliary field: channel number + if (slow5_aux_add("channel_number", SLOW5_STRING, sf->header)!=0){ + fprintf(stderr,"Error adding channel_number auxilliary field\n"); + exit(EXIT_FAILURE); + } + + //add axuilliary field: median_before + if (slow5_aux_add("median_before", SLOW5_DOUBLE, sf->header)!=0){ + fprintf(stderr,"Error adding median_before auxilliary field\n"); + exit(EXIT_FAILURE); + } + + //add axuilliary field: read_number + if(slow5_aux_add("read_number", SLOW5_INT32_T, sf->header)!=0){ + fprintf(stderr,"Error adding read_number auxilliary field\n"); + exit(EXIT_FAILURE); + } + //add axuilliary field: start_mux + if(slow5_aux_add("start_mux", SLOW5_UINT8_T, sf->header)!=0){ + fprintf(stderr,"Error adding start_mux auxilliary field\n"); + exit(EXIT_FAILURE); + } + //add auxilliary field: start_time + if(slow5_aux_add("start_time", SLOW5_UINT64_T, sf->header)!=0){ + fprintf(stderr,"Error adding start_time auxilliary field\n"); + exit(EXIT_FAILURE); + } + + if(slow5_hdr_write(sf) < 0){ + fprintf(stderr,"Error writing header!\n"); + exit(EXIT_FAILURE); + } + + + + int ret=0; + int batch_size = 4000; + int num_thread = 8; + + slow5_mt_t *mt = slow5_init_mt(num_thread,sf); + slow5_batch_t *read_batch = slow5_init_batch(batch_size); + slow5_rec_t **rec = read_batch->slow5_rec; + + /******************* SLOW5 records ************************/ + for(int i=0;i read_id = strdup(tmp_read_id); + if(slow5_record->read_id == NULL){ + fprintf(stderr,"Could not allocate space for strdup."); + exit(EXIT_FAILURE); + } + slow5_record -> read_id_len = strlen(slow5_record -> read_id); + slow5_record -> read_group = 0; + slow5_record -> digitisation = 4096.0; + slow5_record -> offset = 3.0+i; + slow5_record -> range = 10.0+i; + slow5_record -> sampling_rate = 4000.0; + slow5_record -> len_raw_signal = 10+i; + slow5_record -> raw_signal = malloc(sizeof(int16_t)*(10+i)); + if(slow5_record->raw_signal == NULL){ + fprintf(stderr,"Could not allocate space for raw signal."); + exit(EXIT_FAILURE); + } + for(int j=0;j<10+i;j++){ + slow5_record->raw_signal[j] = j+i; + } + + //auxiliary fileds + char *channel_number = "channel_number"; + double median_before = 0.1+i; + int32_t read_number = 10+i; + uint8_t start_mux = (1+i)%4; + uint64_t start_time = 100+i; + + if(slow5_aux_set_string(slow5_record, "channel_number", channel_number, sf->header)!=0){ + fprintf(stderr,"Error setting channel_number auxilliary field\n"); + exit(EXIT_FAILURE); + } + if(slow5_aux_set(slow5_record, "median_before", &median_before, sf->header)!=0){ + fprintf(stderr,"Error setting median_before auxilliary field\n"); + exit(EXIT_FAILURE); + } + if(slow5_aux_set(slow5_record, "read_number", &read_number, sf->header)!=0){ + fprintf(stderr,"Error setting read_number auxilliary field\n"); + exit(EXIT_FAILURE); + } + + if(slow5_aux_set(slow5_record, "start_mux", &start_mux, sf->header)!=0){ + fprintf(stderr,"Error setting start_mux auxilliary field\n"); + exit(EXIT_FAILURE); + } + + if(slow5_aux_set(slow5_record, "start_time", &start_time, sf->header)!=0){ + fprintf(stderr,"Error setting start_time auxilliary field\n"); + exit(EXIT_FAILURE); + } + } + //end of record setup + + ret = slow5_write_batch(mt, read_batch,batch_size); + + if(ret -#include +#include diff --git a/python/pyslow5.pxd b/python/pyslow5.pxd index 0ed53ef8..a26f4755 100644 --- a/python/pyslow5.pxd +++ b/python/pyslow5.pxd @@ -136,7 +136,7 @@ cdef extern from "pyslow5.h": int slow5_aux_set(slow5_rec_t *read, const char *attr, const void *data, slow5_hdr_t *header); int slow5_aux_set_string(slow5_rec_t *read, const char *attr, const char *data, slow5_hdr_t *header); - int slow5_get_batch(slow5_rec_t ***read, slow5_file_t *s5p, char **rid, int num_rid, int num_threads); - int slow5_get_next_batch(slow5_rec_t ***read, slow5_file_t *s5p, int batch_size, int num_threads); - int slow5_write_batch(slow5_rec_t **read, slow5_file_t *s5p, int batch_size, int num_threads); - void slow5_free_batch(slow5_rec_t ***read, int num_rec); + int slow5_get_batch_lazy(slow5_rec_t ***read, slow5_file_t *s5p, char **rid, int num_rid, int num_threads); + int slow5_get_next_batch_lazy(slow5_rec_t ***read, slow5_file_t *s5p, int batch_size, int num_threads); + int slow5_write_batch_lazy(slow5_rec_t **read, slow5_file_t *s5p, int batch_size, int num_threads); + void slow5_free_batch_lazy(slow5_rec_t ***read, int num_rec); diff --git a/python/pyslow5.pyx b/python/pyslow5.pyx index 16bb6bb0..c8728530 100644 --- a/python/pyslow5.pyx +++ b/python/pyslow5.pyx @@ -453,7 +453,7 @@ cdef class Open: self.logger.debug("slow5_get_batch: num_reads: {}".format(batch_len)) - ret = slow5_get_batch(&self.trec, self.s5, self.rid, batch_len, threads); + ret = slow5_get_batch_lazy(&self.trec, self.s5, self.rid, batch_len, threads); self.logger.debug("get_read_multi slow5_get_batch ret: {}".format(ret)) if ret < 0: self.logger.error("slow5_get_next error code: {}: {}".format(ret, self.error_codes[ret])) @@ -538,7 +538,7 @@ cdef class Open: dic.update(aux_dic) yield dic - slow5_free_batch(&self.trec, ret) + slow5_free_batch_lazy(&self.trec, ret) for i in range(batch_len): free(self.rid[i]) free(self.rid) @@ -1123,7 +1123,7 @@ cdef class Open: # While loops check ret of previous read for errors as fail safe while ret > 0: start_slow5_get_next = time.time() - ret = slow5_get_next_batch(&self.trec, self.s5, batchsize, threads) + ret = slow5_get_next_batch_lazy(&self.trec, self.s5, batchsize, threads) self.total_time_slow5_get_next = self.total_time_slow5_get_next + (time.time() - start_slow5_get_next) self.logger.debug("slow5_get_next_multi return: {}".format(ret)) # check for EOF or other errors @@ -1209,7 +1209,7 @@ cdef class Open: row.update(aux_dic) self.total_time_yield_reads = self.total_time_yield_reads + (time.time() - python_parse_read_start) yield row - slow5_free_batch(&self.trec, ret) + slow5_free_batch_lazy(&self.trec, ret) if ret < batchsize: self.logger.debug("slow5_get_next_multi has no more batches - batchsize:{} ret:{}".format(batchsize, ret)) break @@ -1947,14 +1947,14 @@ cdef class Open: self.logger.debug("write_record_batch: aux stuff done") - self.logger.debug("write_record_batch: slow5_write_batch()") + self.logger.debug("write_record_batch: slow5_write_batch_lazy()") # write the record if batch_len <= 0: self.logger.debug("write_record_batch: batch_len 0 or less") break - ret = slow5_write_batch(self.twrite, self.s5, batch_len, threads) + ret = slow5_write_batch_lazy(self.twrite, self.s5, batch_len, threads) if ret < batch_len: self.logger.error("write_record_batch: write failed") return -1 diff --git a/setup.py b/setup.py index ac6e6bb7..730c1f93 100644 --- a/setup.py +++ b/setup.py @@ -37,14 +37,14 @@ def build_ext(*args, ** kwargs ): #adapted from https://github.com/lh3/minimap2/blob/master/setup.py sources=['python/pyslow5.pyx', 'src/slow5.c', 'src/slow5_press.c', 'src/slow5_misc.c', 'src/slow5_idx.c', - 'src/slow5_lazymt.c', + 'src/slow5_mt.c', 'thirdparty/streamvbyte/src/streamvbyte_zigzag.c', 'thirdparty/streamvbyte/src/streamvbyte_decode.c', 'thirdparty/streamvbyte/src/streamvbyte_encode.c'] depends=['python/pyslow5.pxd', 'python/pyslow5.h', 'slow5/slow5.h', 'slow5/slow5_defs.h', 'slow5/slow5_error.h', 'slow5/slow5_press.h', 'slow5/slow5_lazymt.h', 'slow5/klib/khash.h', 'slow5/klib/kvec.h', 'src/slow5_extra.h', 'src/slow5_idx.h', 'src/slow5_misc.h', 'src/klib/ksort.h', 'thirdparty/streamvbyte/include/streamvbyte.h', 'thirdparty/streamvbyte/include/streamvbyte_zigzag.h'] -extra_compile_args = ['-g', '-Wall', '-O2', '-std=c99', '-DSLOW5_ENABLE_LAZYMT=1' ] +extra_compile_args = ['-g', '-Wall', '-O2', '-std=c99', '-DSLOW5_ENABLE_MT=1' ] # extra_compile_args = [] # os.environ["CFLAGS"] = '-g -Wall -O2 -std=c99' diff --git a/src/slow5_lazymt.c b/src/slow5_mt.c similarity index 62% rename from src/slow5_lazymt.c rename to src/slow5_mt.c index b8c389a6..badea54e 100644 --- a/src/slow5_lazymt.c +++ b/src/slow5_mt.c @@ -1,9 +1,8 @@ -/* @file slow5_lazymt.c +/* @file slow5_mt.c ** ** @@ ******************************************************************************/ - -#ifdef SLOW5_ENABLE_LAZYMT +#ifdef SLOW5_ENABLE_MT #include #include @@ -12,7 +11,7 @@ #include #include #include -#include +#include #define SLOW5_WORK_STEAL 1 //simple work stealing enabled or not (no work stealing mean no load balancing) #define SLOW5_STEAL_THRESH 1 //stealing threshold @@ -27,36 +26,14 @@ extern enum slow5_exit_condition_opt slow5_exit_condition; } \ } -/* a batch of read data (dynamic data based on the reads) */ -typedef struct { - int32_t n_rec; - int32_t capacity_rec; - - char **mem_records; //unused in get() - size_t *mem_bytes; - - slow5_rec_t **slow5_rec; - char **rid; //only used in get() - -} slow5_db_t; - - -/* core data structure (mostly static data throughout the program lifetime) */ -typedef struct { - //slow5 - slow5_file_t *sf; - int num_thread; - int32_t batch_size; -} slow5_core_t; - /* argument wrapper for the multithreaded framework used for data processing */ typedef struct { - slow5_core_t* core; - slow5_db_t* db; + slow5_mt_t* core; + slow5_batch_t* db; int32_t starti; int32_t endi; - void (*func)(slow5_core_t*,slow5_db_t*,int); + void (*func)(slow5_mt_t*,slow5_batch_t*,int); int32_t thread_index; #ifdef SLOW5_WORK_STEAL void *all_pthread_args; @@ -65,29 +42,28 @@ typedef struct { /* initialise the core data structure */ -static slow5_core_t* slow5_init_core(slow5_file_t *s5p, int batch_size, int num_thread) { +slow5_mt_t *slow5_init_mt(int num_thread, slow5_file_t *s5p) { - slow5_core_t* core = (slow5_core_t*)malloc(sizeof(slow5_core_t)); + slow5_mt_t* core = (slow5_mt_t*)malloc(sizeof(slow5_mt_t)); SLOW5_MALLOC_CHK_LAZY_EXIT(core); core->sf = s5p; - core->batch_size = batch_size; core->num_thread = num_thread; return core; } /* free the core data structure */ -static void slow5_free_core(slow5_core_t* core) { +void slow5_free_mt(slow5_mt_t* core) { free(core); } /* initialise a data batch */ -static slow5_db_t* slow5_init_db(slow5_core_t* core) { - slow5_db_t* db = (slow5_db_t*)(malloc(sizeof(slow5_db_t))); +slow5_batch_t* slow5_init_batch(int batch_capacity){ + slow5_batch_t* db = (slow5_batch_t*)(malloc(sizeof(slow5_batch_t))); SLOW5_MALLOC_CHK_LAZY_EXIT(db); - db->capacity_rec = core->batch_size; + db->capacity_rec = batch_capacity; db->n_rec = 0; db->mem_records = (char**)(calloc(db->capacity_rec,sizeof(char*))); @@ -102,7 +78,7 @@ static slow5_db_t* slow5_init_db(slow5_core_t* core) { } /* load a data batch from disk */ -static int slow5_load_db(slow5_core_t* core, slow5_db_t* db) { +static int slow5_load_db(slow5_mt_t* core, slow5_batch_t* db) { db->n_rec = 0; @@ -130,7 +106,7 @@ static int slow5_load_db(slow5_core_t* core, slow5_db_t* db) { } -static int slow5_write_db(slow5_core_t* core, slow5_db_t* db) { +static int slow5_write_db(slow5_mt_t* core, slow5_batch_t* db) { int32_t i = 0; @@ -147,7 +123,7 @@ static int slow5_write_db(slow5_core_t* core, slow5_db_t* db) { } -static void slow5_parse_single(slow5_core_t* core,slow5_db_t* db, int32_t i){ +static void slow5_parse_single(slow5_mt_t* core,slow5_batch_t* db, int32_t i){ assert(db->mem_bytes[i]>0); assert(db->mem_records[i]!=NULL); @@ -160,11 +136,11 @@ static void slow5_parse_single(slow5_core_t* core,slow5_db_t* db, int32_t i){ } -static void slow5_work_per_single_read(slow5_core_t* core,slow5_db_t* db, int32_t i){ +static void slow5_work_per_single_read(slow5_mt_t* core,slow5_batch_t* db, int32_t i){ slow5_parse_single(core,db,i); } -static void slow5_work_per_single_read2(slow5_core_t* core,slow5_db_t* db, int32_t i){ +static void slow5_work_per_single_read2(slow5_mt_t* core,slow5_batch_t* db, int32_t i){ assert(db->rid[i]!=NULL); int ret = slow5_get(db->rid[i],&db->slow5_rec[i], core->sf); if(ret<0){ @@ -175,7 +151,7 @@ static void slow5_work_per_single_read2(slow5_core_t* core,slow5_db_t* db, int32 } -static void slow5_work_per_single_read3(slow5_core_t* core,slow5_db_t* db, int32_t i){ +static void slow5_work_per_single_read3(slow5_mt_t* core,slow5_batch_t* db, int32_t i){ assert(db->slow5_rec[i]!=NULL); slow5_file_t *sf = core->sf; //fprintf(stderr,"Here %d\n",i); @@ -208,7 +184,7 @@ static void slow5_work_per_single_read3(slow5_core_t* core,slow5_db_t* db, int32 /* partially free a data batch - only the read dependent allocations are freed */ -static void slow5_free_db_tmp(slow5_db_t* db) { +static void slow5_free_db_tmp(slow5_batch_t* db) { int32_t i = 0; for (i = 0; i < db->n_rec; ++i) { free(db->mem_records[i]); @@ -216,7 +192,7 @@ static void slow5_free_db_tmp(slow5_db_t* db) { } /* completely free a data batch */ -static void slow5_free_db(slow5_db_t* db) { +static void slow5_free_db(slow5_batch_t* db) { free(db->mem_records); free(db->mem_bytes);; @@ -249,8 +225,8 @@ static inline int32_t steal_work(slow5_pt_arg_t* all_args, int32_t num_thread) { static void* slow5_pthread_single(void* voidargs) { int32_t i; slow5_pt_arg_t* args = (slow5_pt_arg_t*)voidargs; - slow5_db_t* db = args->db; - slow5_core_t* core = args->core; + slow5_batch_t* db = args->db; + slow5_mt_t* core = args->core; #ifndef SLOW5_WORK_STEAL for (i = args->starti; i < args->endi; i++) { @@ -275,7 +251,7 @@ static void* slow5_pthread_single(void* voidargs) { pthread_exit(0); } -static void slow5_pthread_db(slow5_core_t* core, slow5_db_t* db, void (*func)(slow5_core_t*,slow5_db_t*,int)){ +static void slow5_pthread_db(slow5_mt_t* core, slow5_batch_t* db, void (*func)(slow5_mt_t*,slow5_batch_t*,int)){ //create threads pthread_t tids[core->num_thread]; slow5_pt_arg_t pt_args[core->num_thread]; @@ -327,7 +303,7 @@ static void slow5_pthread_db(slow5_core_t* core, slow5_db_t* db, void (*func)(sl } /* process all reads in the given batch db */ -static void slow5_work_db(slow5_core_t* core, slow5_db_t* db, void (*func)(slow5_core_t*,slow5_db_t*,int)){ +static void slow5_work_db(slow5_mt_t* core, slow5_batch_t* db, void (*func)(slow5_mt_t*,slow5_batch_t*,int)){ if (core->num_thread == 1) { int32_t i=0; @@ -342,69 +318,135 @@ static void slow5_work_db(slow5_core_t* core, slow5_db_t* db, void (*func)(slow5 } } -int slow5_get_batch(slow5_rec_t ***read, slow5_file_t *s5p, char **rid, int num_rid, int num_threads){ +int slow5_get_batch(slow5_mt_t *core, slow5_batch_t *db, char **rid, int num_rid){ - slow5_core_t *core = slow5_init_core(s5p,num_rid,num_threads); - slow5_db_t* db = slow5_init_db(core); + //slow5_mt_t *core = slow5_init_core(s5p,num_rid,num_threads); + //slow5_batch_t* db = slow5_init_db(core); db->rid = rid; db->n_rec = num_rid; slow5_work_db(core,db,slow5_work_per_single_read2); SLOW5_LOG_DEBUG("loaded and parsed %d recs\n",num_rid); - *read = db->slow5_rec; + //*read = db->slow5_rec; - slow5_free_db_tmp(db); - slow5_free_db(db); - slow5_free_core(core); + // slow5_free_db_tmp(db); + // slow5_free_db(db); + // slow5_free_core(core); return num_rid; } -int slow5_get_next_batch(slow5_rec_t ***read, slow5_file_t *s5p, int batch_size, int num_threads){ +int slow5_get_next_batch(slow5_mt_t *core, slow5_batch_t *db, int batch_size){ - slow5_core_t *core = slow5_init_core(s5p,batch_size,num_threads); - slow5_db_t* db = slow5_init_db(core); + // slow5_mt_t *core = slow5_init_core(s5p,batch_size,num_threads); + // slow5_batch_t* db = slow5_init_db(core); + db->n_rec = batch_size; int num_read=slow5_load_db(core,db); SLOW5_LOG_DEBUG("Loaded %d recs\n",num_read); slow5_work_db(core,db,slow5_work_per_single_read); SLOW5_LOG_DEBUG("Parsed %d recs\n",num_read); - *read = db->slow5_rec; + //*read = db->slow5_rec; - slow5_free_db_tmp(db); - slow5_free_db(db); - slow5_free_core(core); + // slow5_free_db_tmp(db); + // slow5_free_db(db); + // slow5_free_core(core); return num_read; } -int slow5_write_batch(slow5_rec_t **read, slow5_file_t *s5p, int batch_size, int num_threads){ +int slow5_write_batch(slow5_mt_t *core, slow5_batch_t *db, int batch_size){ - slow5_core_t *core = slow5_init_core(s5p,batch_size,num_threads); - slow5_db_t* db = slow5_init_db(core); + // slow5_mt_t *core = slow5_init_core(s5p,batch_size,num_threads); + // slow5_batch_t* db = slow5_init_db(core); db->n_rec = batch_size; - free(db->slow5_rec); //stupid lazy for now - db->slow5_rec = read; + // free(db->slow5_rec); //stupid lazy for now + // db->slow5_rec = read; slow5_work_db(core,db,slow5_work_per_single_read3); SLOW5_LOG_DEBUG("Processed %d recs\n",batch_size); int num_wr=slow5_write_db(core,db); SLOW5_LOG_DEBUG("Written %d recs\n",num_wr); - db->slow5_rec = NULL; + // db->slow5_rec = NULL; + // slow5_free_db_tmp(db); + // slow5_free_db(db); + // slow5_free_core(core); + + return num_wr; +} + +void slow5_free_batch(slow5_batch_t *db){ + + slow5_rec_t **reads = db->slow5_rec; + if(reads != NULL){ + for(int i=0;icapacity_rec;i++){ + slow5_rec_free(reads[i]); + } + } + + free(reads); + //*read = NULL; + slow5_free_db_tmp(db); slow5_free_db(db); - slow5_free_core(core); +} + + +int slow5_get_next_batch_lazy(slow5_rec_t ***read, slow5_file_t *s5p, int batch_size, int num_threads){ + + slow5_mt_t *core = slow5_init_mt(num_threads, s5p); + slow5_batch_t* db = slow5_init_batch(batch_size); + + int ret = slow5_get_next_batch(core,db,batch_size); + + *read = db->slow5_rec; + db->slow5_rec = NULL; + + slow5_free_batch(db); + slow5_free_mt(core); + + return ret; - return num_wr; } +int slow5_get_batch_lazy(slow5_rec_t ***read, slow5_file_t *s5p, char **rid, int num_rid, int num_threads){ -void slow5_free_batch(slow5_rec_t ***read, int num_rec){ + slow5_mt_t *core = slow5_init_mt(num_threads, s5p); + slow5_batch_t* db = slow5_init_batch(num_rid); + + int ret = slow5_get_batch(core,db,rid,num_rid); + + *read = db->slow5_rec; + db->slow5_rec = NULL; + + slow5_free_batch(db); + slow5_free_mt(core); + + return ret; + +} +int slow5_write_batch_lazy(slow5_rec_t **read, slow5_file_t *s5p, int batch_size, int num_threads){ + slow5_mt_t *core = slow5_init_mt(num_threads, s5p); + slow5_batch_t* db = slow5_init_batch(batch_size); + + + free(db->slow5_rec); + db->slow5_rec = read; + int ret = slow5_write_batch(core,db,batch_size); + db->slow5_rec = NULL; + + slow5_free_batch(db); + slow5_free_mt(core); + + return ret; +} + +void slow5_free_batch_lazy(slow5_rec_t ***read, int num_rec){ slow5_rec_t **reads = *read; for(int i=0;i #include #include +#include -int slow5_get_next_batch(slow5_rec_t ***read, slow5_file_t *s5p, int batch_size, int num_threads){ +int slow5_get_next_batch_lazy(slow5_rec_t ***read, slow5_file_t *s5p, int batch_size, int num_threads){ fprintf(stderr,"slow5lib has not been compiled with lazy multithreading support\n"); exit(EXIT_FAILURE); - return -1; } -int slow5_get_batch(slow5_rec_t ***read, slow5_file_t *s5p, char **rid, int num_rid, int num_threads){ +int slow5_get_batch_lazy(slow5_rec_t ***read, slow5_file_t *s5p, char **rid, int num_rid, int num_threads){ fprintf(stderr,"slow5lib has not been compiled with lazy multithreading support\n"); exit(EXIT_FAILURE); - return -1; } -int slow5_write_batch(slow5_rec_t **read, slow5_file_t *s5p, int batch_size, int num_threads){ +int slow5_write_batch_lazy(slow5_rec_t **read, slow5_file_t *s5p, int batch_size, int num_threads){ fprintf(stderr,"slow5lib has not been compiled with lazy multithreading support\n"); exit(EXIT_FAILURE); - return -1; } -void slow5_free_batch(slow5_rec_t ***read, int num_rec){ +void slow5_free_batch_lazy(slow5_rec_t ***read, int num_rec){ + fprintf(stderr,"slow5lib has not been compiled with lazy multithreading support\n"); + exit(EXIT_FAILURE); +} + +slow5_mt_t *slow5_init_mt(int num_thread, slow5_file_t *s5p){ fprintf(stderr,"slow5lib has not been compiled with lazy multithreading support\n"); exit(EXIT_FAILURE); } +slow5_batch_t* slow5_init_batch(int batch_capacity){ + fprintf(stderr,"slow5lib has not been compiled with lazy multithreading support\n"); + exit(EXIT_FAILURE); +} +int slow5_get_next_batch(slow5_mt_t *mt, slow5_batch_t *read_batch, int batch_size){ + fprintf(stderr,"slow5lib has not been compiled with lazy multithreading support\n"); + exit(EXIT_FAILURE); +} +int slow5_get_batch(slow5_mt_t *mt, slow5_batch_t *read_batch, char **rid, int num_rid){ + fprintf(stderr,"slow5lib has not been compiled with lazy multithreading support\n"); + exit(EXIT_FAILURE); +} +int slow5_write_batch(slow5_mt_t *mt, slow5_batch_t *read_batch, int batch_size){ + fprintf(stderr,"slow5lib has not been compiled with lazy multithreading support\n"); + exit(EXIT_FAILURE); +} +void slow5_free_batch(slow5_batch_t *read_batch){ + fprintf(stderr,"slow5lib has not been compiled with lazy multithreading support\n"); + exit(EXIT_FAILURE); +} +void slow5_free_mt(slow5_mt_t *mt){ + fprintf(stderr,"slow5lib has not been compiled with lazy multithreading support\n"); + exit(EXIT_FAILURE); +} + #endif \ No newline at end of file From ea8c034572798d2d20cc7fd4da9c97f9802aac18 Mon Sep 17 00:00:00 2001 From: Hasindu Gamaarachchi Date: Tue, 2 Aug 2022 16:51:32 +1000 Subject: [PATCH 03/18] fix the potential sigsev --- src/slow5_mt.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/slow5_mt.c b/src/slow5_mt.c index badea54e..f7b98492 100644 --- a/src/slow5_mt.c +++ b/src/slow5_mt.c @@ -19,6 +19,9 @@ extern enum slow5_log_level_opt slow5_log_level; extern enum slow5_exit_condition_opt slow5_exit_condition; +void *slow5_get_next_mem(size_t *n, const slow5_file_t *s5p); +int slow5_rec_depress_parse(char **mem, size_t *bytes, const char *read_id, slow5_rec_t **read, slow5_file_t *s5p); + #define SLOW5_MALLOC_CHK_LAZY_EXIT(ret) { \ SLOW5_MALLOC_CHK(ret) \ if (ret == NULL) { \ From 2c50ac61a7b3284dd8fb9a096a803d295a9647d5 Mon Sep 17 00:00:00 2001 From: Hasindu Gamaarachchi Date: Tue, 2 Aug 2022 17:01:47 +1000 Subject: [PATCH 04/18] fix memleak in example --- examples/mt/lazymt.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/examples/mt/lazymt.c b/examples/mt/lazymt.c index 01f06812..35c8443c 100644 --- a/examples/mt/lazymt.c +++ b/examples/mt/lazymt.c @@ -22,10 +22,12 @@ int read_func(){ } slow5_rec_t **rec = NULL; int ret=0; - int batch_size = 4096; + int batch_size = 2048; int num_thread = 8; - while((ret = slow5_get_next_batch_lazy(&rec,sp,batch_size,num_thread)) > 0){ + + while(1){ + ret = slow5_get_next_batch_lazy(&rec,sp,batch_size,num_thread); for(int i=0;ilen_raw_signal; printf("%s\t%ld\n",rec[i]->read_id,len_raw_signal); @@ -165,7 +167,6 @@ int write_func(){ /******************* SLOW5 records ************************/ for(int i=0;i Date: Thu, 15 Sep 2022 01:16:20 +1000 Subject: [PATCH 05/18] update bench --- test/bench/build.sh | 8 ++++---- test/bench/get_all_samples.c | 4 ++-- test/bench/get_selected_read_ids_read_number.c | 10 +++++----- test/bench/get_selected_read_ids_sample_count.c | 10 +++++----- test/bench/get_selected_read_ids_samples.c | 10 +++++----- 5 files changed, 21 insertions(+), 21 deletions(-) diff --git a/test/bench/build.sh b/test/bench/build.sh index b4ee4736..e66c559f 100755 --- a/test/bench/build.sh +++ b/test/bench/build.sh @@ -4,7 +4,7 @@ set -x set -e gcc -Wall -O2 -g -I include/ -o test/bench/get_all_read_ids test/bench/get_all_read_ids.c lib/libslow5.a -lm -lz -lzstd -gcc -Wall -O2 -g -I include/ -o test/bench/get_all_samples test/bench/get_all_samples.c lib/libslow5.a python/slow5threads.c -lm -lz -lzstd -lpthread -fopenmp -gcc -Wall -O2 -g -I include/ -o test/bench/get_selected_read_ids_samples test/bench/get_selected_read_ids_samples.c lib/libslow5.a python/slow5threads.c -lm -lz -lzstd -lpthread -fopenmp -gcc -Wall -O2 -g -I include/ -o test/bench/get_selected_read_ids_sample_count test/bench/get_selected_read_ids_sample_count.c lib/libslow5.a python/slow5threads.c -lm -lz -lzstd -lpthread -gcc -Wall -O2 -g -I include/ -o test/bench/get_selected_read_ids_read_number test/bench/get_selected_read_ids_read_number.c lib/libslow5.a python/slow5threads.c -lm -lz -lzstd -lpthread +gcc -Wall -O2 -g -I include/ -o test/bench/get_all_samples test/bench/get_all_samples.c lib/libslow5.a -lm -lz -lzstd -lpthread -fopenmp +gcc -Wall -O2 -g -I include/ -o test/bench/get_selected_read_ids_samples test/bench/get_selected_read_ids_samples.c lib/libslow5.a -lm -lz -lzstd -lpthread -fopenmp +gcc -Wall -O2 -g -I include/ -o test/bench/get_selected_read_ids_sample_count test/bench/get_selected_read_ids_sample_count.c lib/libslow5.a -lm -lz -lzstd -lpthread +gcc -Wall -O2 -g -I include/ -o test/bench/get_selected_read_ids_read_number test/bench/get_selected_read_ids_read_number.c lib/libslow5.a -lm -lz -lzstd -lpthread diff --git a/test/bench/get_all_samples.c b/test/bench/get_all_samples.c index 298112a2..05955ec1 100644 --- a/test/bench/get_all_samples.c +++ b/test/bench/get_all_samples.c @@ -1,6 +1,6 @@ //get all the samples and sum them to stdout //make zstd=1 -//gcc -Wall -O2 -I include/ -o get_all_samples test/bench/get_all_samples.c lib/libslow5.a python/slow5threads.c -lm -lz -lzstd -lpthread -fopenmp +//gcc -Wall -O2 -I include/ -o get_all_samples test/bench/get_all_samples.c lib/libslow5.a -lm -lz -lzstd -lpthread -fopenmp #include #include @@ -8,7 +8,7 @@ #include #include #include -#include "../../python/slow5threads.h" +//#include "../../python/slow5threads.h" #include "../../src/slow5_extra.h" int threads = 10; diff --git a/test/bench/get_selected_read_ids_read_number.c b/test/bench/get_selected_read_ids_read_number.c index a0113085..85e503f9 100644 --- a/test/bench/get_selected_read_ids_read_number.c +++ b/test/bench/get_selected_read_ids_read_number.c @@ -1,13 +1,13 @@ //get all the samples and sum them to stdout -//make zstd=1 -//gcc -Wall -O2 -I include/ -o get_selected_read_ids_read_number test/bench/get_selected_read_ids_read_number.c lib/libslow5.a python/slow5threads.c -lm -lz -lzstd -lpthread +//make zstd=1 slow5_mt=1 +//gcc -Wall -O2 -I include/ -o get_selected_read_ids_read_number test/bench/get_selected_read_ids_read_number.c lib/libslow5.a -lm -lz -lzstd -lpthread #include #include #include #include #include -#include "../../python/slow5threads.h" +#include static inline double realtime(void) { struct timeval tp; @@ -84,7 +84,7 @@ int main(int argc, char *argv[]) { int num_rid = i; t0 = realtime(); - ret = slow5_get_batch(&rec, sp, rid, num_rid, num_thread); + ret = slow5_get_batch_lazy(&rec, sp, rid, num_rid, num_thread); tot_time += realtime() - t0; if(ret!=num_rid){ @@ -107,7 +107,7 @@ int main(int argc, char *argv[]) { fprintf(stderr,"batch printed with %d reads\n",ret); t0 = realtime(); - slow5_free_batch(&rec,ret); + slow5_free_batch_lazy(&rec,ret); tot_time += realtime() - t0; for(int i=0; i #include #include #include #include -#include "../../python/slow5threads.h" +#include static inline double realtime(void) { struct timeval tp; @@ -84,7 +84,7 @@ int main(int argc, char *argv[]) { int num_rid = i; t0 = realtime(); - ret = slow5_get_batch(&rec, sp, rid, num_rid, num_thread); + ret = slow5_get_batch_lazy(&rec, sp, rid, num_rid, num_thread); tot_time += realtime() - t0; if(ret!=num_rid){ @@ -99,7 +99,7 @@ int main(int argc, char *argv[]) { fprintf(stderr,"batch printed with %d reads\n",ret); t0 = realtime(); - slow5_free_batch(&rec,ret); + slow5_free_batch_lazy(&rec,ret); tot_time += realtime() - t0; for(int i=0; i #include #include #include #include -#include "../../python/slow5threads.h" +#include static inline double realtime(void) { struct timeval tp; @@ -85,7 +85,7 @@ int main(int argc, char *argv[]) { int num_rid = i; t0 = realtime(); - ret = slow5_get_batch(&rec, sp, rid, num_rid, num_thread); + ret = slow5_get_batch_lazy(&rec, sp, rid, num_rid, num_thread); tot_time += realtime() - t0; if(ret!=num_rid){ @@ -110,7 +110,7 @@ int main(int argc, char *argv[]) { fprintf(stderr,"batch printed with %d reads\n",ret); t0 = realtime(); - slow5_free_batch(&rec,ret); + slow5_free_batch_lazy(&rec,ret); tot_time += realtime() - t0; for(int i=0; i Date: Fri, 14 Oct 2022 17:43:35 +1100 Subject: [PATCH 06/18] read enums pyslow5 --- docs/pyslow5_api/pyslow5.md | 25 +++++++++++++++++++++++++ python/README.md | 25 +++++++++++++++++++++++++ python/example.py | 11 +++++++++++ python/pyslow5.pxd | 8 ++++++-- python/pyslow5.pyx | 23 ++++++++++++++++++++++- 5 files changed, 89 insertions(+), 3 deletions(-) diff --git a/docs/pyslow5_api/pyslow5.md b/docs/pyslow5_api/pyslow5.md index eeb702ee..9cb0818a 100644 --- a/docs/pyslow5_api/pyslow5.md +++ b/docs/pyslow5_api/pyslow5.md @@ -311,6 +311,31 @@ Returns an ordered list of auxiliary attribute types (same order as get_aux_name This can mostly be ignored, but will be used in error tracing in the future, as auxiliary field requests have multiple types, each with their own calls, and not all are used. It could be the case a call for an auxiliary filed fails, and knowing which type the field is requesting is very helpful in understanding which function in C is being called, that could be causing the error. +#### `get_aux_enum_labels(label)`: + +Returns an ordered list representing the values in the enum struct in the type header. + +The value in the read can then be used to access the labels as an index to the list. + +Example: + +```python +s5 = slow5.Open(file,'w') +end_reason_labels = s5.get_aux_enum_labels('end_reason') +print(end_reason_labels) + +> ['unknown', 'partial', 'mux_change', 'unblock_mux_change', 'signal_positive', 'signal_negative'] + +readID = "r1" +read = s5.get_read(readID, aux='all') +er_index = read['end_reason] +er = end_reason_labels[er_index] + +print("{}: {}".format(er_index, er)) + +> 4: signal_positive +``` + ### Writing a file To write a file, `mode` in `Open()` must be set to `'w'` and when appending, `'a'` diff --git a/python/README.md b/python/README.md index eeb702ee..9cb0818a 100644 --- a/python/README.md +++ b/python/README.md @@ -311,6 +311,31 @@ Returns an ordered list of auxiliary attribute types (same order as get_aux_name This can mostly be ignored, but will be used in error tracing in the future, as auxiliary field requests have multiple types, each with their own calls, and not all are used. It could be the case a call for an auxiliary filed fails, and knowing which type the field is requesting is very helpful in understanding which function in C is being called, that could be causing the error. +#### `get_aux_enum_labels(label)`: + +Returns an ordered list representing the values in the enum struct in the type header. + +The value in the read can then be used to access the labels as an index to the list. + +Example: + +```python +s5 = slow5.Open(file,'w') +end_reason_labels = s5.get_aux_enum_labels('end_reason') +print(end_reason_labels) + +> ['unknown', 'partial', 'mux_change', 'unblock_mux_change', 'signal_positive', 'signal_negative'] + +readID = "r1" +read = s5.get_read(readID, aux='all') +er_index = read['end_reason] +er = end_reason_labels[er_index] + +print("{}: {}".format(er_index, er)) + +> 4: signal_positive +``` + ### Writing a file To write a file, `mode` in `Open()` must be set to `'w'` and when appending, `'a'` diff --git a/python/example.py b/python/example.py index 2196cd40..37f038f8 100644 --- a/python/example.py +++ b/python/example.py @@ -459,6 +459,17 @@ s58.close() +print("==============================================") +print("get enum fields") + +s59 = slow5.Open('examples/adv/example3.blow5','r', DEBUG=debug) + +e = s59.get_aux_enum_labels('end_reason') + +print(e) + +s59.close() + print("==============================================") # print("seq_reads with big file:") # start_time = time.time() diff --git a/python/pyslow5.pxd b/python/pyslow5.pxd index 703bf3a4..ad5ff874 100644 --- a/python/pyslow5.pxd +++ b/python/pyslow5.pxd @@ -93,8 +93,7 @@ cdef extern from "pyslow5.h": char **slow5_get_rids(const slow5_file_t *s5p, uint64_t *len); - - + # get aux fields int8_t slow5_aux_get_int8(const slow5_rec_t *read, const char *attr, int *err); int16_t slow5_aux_get_int16(const slow5_rec_t *read, const char *attr, int *err); int32_t slow5_aux_get_int32(const slow5_rec_t *read, const char *attr, int *err); @@ -107,6 +106,8 @@ cdef extern from "pyslow5.h": double slow5_aux_get_double(const slow5_rec_t *read, const char *attr, int *err); char slow5_aux_get_char(const slow5_rec_t *read, const char *attr, int *err); uint8_t slow5_aux_get_enum(const slow5_rec_t *read, const char *field, int *err); + + # get aux arrays int8_t *slow5_aux_get_int8_array(const slow5_rec_t *read, const char *attr, uint64_t *len, int *err); int16_t *slow5_aux_get_int16_array(const slow5_rec_t *read, const char *attr, uint64_t *len, int *err); int32_t *slow5_aux_get_int32_array(const slow5_rec_t *read, const char *attr, uint64_t *len, int *err); @@ -120,6 +121,9 @@ cdef extern from "pyslow5.h": char *slow5_aux_get_string(const slow5_rec_t *read, const char *attr, uint64_t *len, int *err); uint8_t *slow5_aux_get_enum_array(const slow5_rec_t *read, const char *field, uint64_t *len, int *err); + # get aux enum labels + char **slow5_get_aux_enum_labels(const slow5_hdr_t *header, const char *field, uint8_t *n); + # Write slow5 file diff --git a/python/pyslow5.pyx b/python/pyslow5.pyx index 50231999..66a36e32 100644 --- a/python/pyslow5.pyx +++ b/python/pyslow5.pyx @@ -38,7 +38,9 @@ cdef class Open: cdef bint close_state cdef pyslow5.uint64_t head_len cdef pyslow5.uint64_t aux_len + cdef pyslow5.uint8_t enum_len cdef pyslow5.slow5_aux_type *s5_aux_type + cdef char **s5_aux_enum cdef int aux_get_err cdef pyslow5.uint64_t aux_get_len cdef np.npy_intp shape_get[1] @@ -126,10 +128,12 @@ cdef class Open: self.header_add_attr_state = False self.close_state = False self.s5_aux_type = NULL + self.s5_aux_enum = NULL self.aux_get_err = 1 self.aux_get_len = 0 self.head_len = 0 self.aux_len = 0 + self.enum_len = 0 self.shape_seq[0] = 0 self.shape_get[0] = 0 self.e0 = -1 @@ -1345,6 +1349,23 @@ cdef class Open: aux_types = [self.s5_aux_type[i] for i in range(self.aux_len)] return aux_types + + + def get_aux_enum_labels(self, label): + ''' + get the labels for an enum aux field + ''' + a = str.encode(label) + labels = [] + self.s5_aux_enum = slow5_get_aux_enum_labels(self.s5.header, a, &self.enum_len) + + if self.s5_aux_enum == NULL: + self.logger.warning("get_aux_enum_labels enum_labels is NULL") + return labels + + labels = [self.s5_aux_enum[i].decode() for i in range(self.enum_len)] + return labels + # ========================================================================== # Write SLOW5 file @@ -1822,7 +1843,7 @@ cdef class Open: "start_mux": type(1), "start_time": type(100), "end_reason": None} - # check an empty dic wasn't given + # check if empty dic was given if aux is not None: if len(aux) == 0: aux = None From 5504d7305a1ad598361b30233a46c3715a82e1d1 Mon Sep 17 00:00:00 2001 From: Psy-Fer Date: Fri, 14 Oct 2022 18:04:31 +1100 Subject: [PATCH 07/18] update enum read examples --- python/example.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/python/example.py b/python/example.py index 37f038f8..c212aed4 100644 --- a/python/example.py +++ b/python/example.py @@ -470,6 +470,19 @@ s59.close() +print("==============================================") +print("check can read enum aux field") + +s510 = slow5.Open('examples/adv/example3.blow5','r', DEBUG=debug) +reads = s510.seq_reads(aux='all') + +e = s510.get_aux_enum_labels('end_reason') + +for read in reads: + print(read['read_id'], read['end_reason'], e[read['end_reason']]) + +s510.close() + print("==============================================") # print("seq_reads with big file:") # start_time = time.time() From 665fe646ce583b2be8860b7668f949520016f889 Mon Sep 17 00:00:00 2001 From: Hasindu Gamaarachchi Date: Mon, 17 Oct 2022 10:49:14 +1100 Subject: [PATCH 08/18] aux_enum_add and polished mt examples --- .../low_level_api/slow5_aux_add_enum.md | 148 ++++++++++++++++++ docs/slow5_api/slow5_low_level_api.md | 2 + examples/.gitignore | 4 + examples/adv/README.md | 1 + examples/adv/auxiliary_field_enum_write.c | 125 +++++++++++++++ examples/adv/build.sh | 1 + examples/mt/README.md | 13 ++ examples/mt/build.sh | 12 ++ examples/mt/lazymt.c | 9 +- examples/mt/mt.c | 16 +- include/slow5/slow5.h | 2 + include/slow5/slow5_mt.h | 32 +++- src/slow5.c | 5 + 13 files changed, 345 insertions(+), 25 deletions(-) create mode 100644 docs/slow5_api/low_level_api/slow5_aux_add_enum.md create mode 100644 examples/adv/auxiliary_field_enum_write.c create mode 100755 examples/mt/README.md create mode 100755 examples/mt/build.sh diff --git a/docs/slow5_api/low_level_api/slow5_aux_add_enum.md b/docs/slow5_api/low_level_api/slow5_aux_add_enum.md new file mode 100644 index 00000000..917ae42f --- /dev/null +++ b/docs/slow5_api/low_level_api/slow5_aux_add_enum.md @@ -0,0 +1,148 @@ +# slow5_aux_add + +## NAME + +slow5_aux_add_enum - adds an auxiliary field of type enum to a SLOW5 header + +## SYNOPSYS + +`int slow5_aux_add_enum(const char *field, const char **enum_labels, uint8_t num_labels, slow5_hdr_t *header)` + +## DESCRIPTION +`slow5_aux_add_enum()` adds an auxiliary field named *field* of the datatype *enum* whose enum labels are pointed by *enum_labels* to a SLOW5 file header pointed by *header*. + +The number of enum labels in should be in *num_labels*. + +The argument *header* points to a SLOW5 header of type *slow5_hdr_t* and typically this is the *s5p->header* member inside the *slow5_file_t \*s5p* returned by `slow5_open()`. + +## RETURN VALUE +Upon successful completion, `slow5_aux_add_enum()` returns a non negative integer (>=0). Otherwise, a negative value is returned. + +## ERRORS + +A negative value is returned when an error occurs and can be due to following occasions (not an exhaustive list): + +- input parameter is NULL +- enum value is invalid +- other error + +## NOTES + +In the future `slow5_errno` will be set to indicate the error. + +## EXAMPLES + +``` +#include +#include +#include +#include + +#define FILE_PATH "test.blow5" + +int main(){ + + //open the SLOW5 file for writing + slow5_file_t *sp = slow5_open(FILE_PATH, "w"); + if(sp==NULL){ + fprintf(stderr,"Error opening file!\n"); + exit(EXIT_FAILURE); + } + + /*********************** Header ******************/ + /* + @run_id run_id_0 + */ + slow5_hdr_t *header=sp->header; //pointer to the SLOW5 header + + //add a header group attribute called run_id + if (slow5_hdr_add("run_id", header) < 0){ + fprintf(stderr,"Error adding run_id attribute\n"); + exit(EXIT_FAILURE); + } + + //set the run_id attribute to "run_0" for read group 0 + if (slow5_hdr_set("run_id", "run_0", 0, header) < 0){ + fprintf(stderr,"Error setting run_id attribute in read group 0\n"); + exit(EXIT_FAILURE); + } + + /* + enum{unknown,partial,mux_change,unblock_mux_change,signal_positive,signal_negative} + end_reason + */ + const char *enum_labels[] = {"unknown", "partial", "mux_change", "unblock_mux_change", "signal_positive", "signal_negative"}; + uint8_t num_labels = 6; + if (slow5_aux_add_enum("end_reason", enum_labels, num_labels, sp->header) < 0){ + fprintf(stderr,"Error adding end_reason auxilliary field\n"); + exit(EXIT_FAILURE); + } + + if(slow5_hdr_write(sp) < 0){ + fprintf(stderr,"Error writing header!\n"); + exit(EXIT_FAILURE); + } + + + /******************* A SLOW5 record ************************/ + slow5_rec_t *slow5_record = slow5_rec_init(); + if(slow5_record == NULL){ + fprintf(stderr,"Could not allocate space for a slow5 record."); + exit(EXIT_FAILURE); + } + + /* primary fields + #read_id read_group digitisation offset range sampling_rate len_raw_signal raw_signal + read_0 0 4096 3 10 4000 10 0,1,2,3,4,5,6,7,8,9 + */ + slow5_record -> read_id = strdup("read_0"); + if(slow5_record->read_id == NULL){ + fprintf(stderr,"Could not do strdup."); + exit(EXIT_FAILURE); + } + slow5_record-> read_id_len = strlen(slow5_record -> read_id); + slow5_record -> read_group = 0; + slow5_record -> digitisation = 4096.0; + slow5_record -> offset = 3.0; + slow5_record -> range = 10.0; + slow5_record -> sampling_rate = 4000.0; + slow5_record -> len_raw_signal = 10; + slow5_record -> raw_signal = (int16_t *)malloc(sizeof(int16_t) * slow5_record->len_raw_signal); + if(slow5_record->raw_signal == NULL){ + fprintf(stderr,"Could not allocate space for raw signal."); + exit(EXIT_FAILURE); + } + for(int i=0; ilen_raw_signal; i++){ + slow5_record->raw_signal[i] = i; + } + + /* auxiliary fileds + end_reason + 1 + */ + uint8_t end_reason = 1; + if(slow5_aux_set(slow5_record, "end_reason", &end_reason, sp->header) < 0){ + fprintf(stderr,"Error setting end_reason auxilliary field\n"); + exit(EXIT_FAILURE); + } + + //write to file + if(slow5_write(slow5_record, sp) < 0){ + fprintf(stderr,"Error writing record!\n"); + exit(EXIT_FAILURE); + } + + //free the slow5 record + slow5_rec_free(slow5_record); + + //close the SLOW5 file + slow5_close(sp); + + return 0; + +} + +``` + +## SEE ALSO +[`slow5_aux_set()`](../slow5_aux_set.md). \ No newline at end of file diff --git a/docs/slow5_api/slow5_low_level_api.md b/docs/slow5_api/slow5_low_level_api.md index d12ba0d9..a7636830 100755 --- a/docs/slow5_api/slow5_low_level_api.md +++ b/docs/slow5_api/slow5_low_level_api.md @@ -33,6 +33,8 @@ Low-level API allows much more efficient access to BLOW5 files compared to the h ### Writing and editing +* [slow5_aux_add_enum](low_level_api/slow5_aux_add_enum.md)
+     adds an auxiliary field of type enum to a SLOW5 header * [slow5_encode](low_level_api/slow5_encode.md)
    encodes a SLOW5 record * [slow5_write_bytes](low_level_api/slow5_write_bytes.md)
diff --git a/examples/.gitignore b/examples/.gitignore index e8360e0a..e98e7de9 100644 --- a/examples/.gitignore +++ b/examples/.gitignore @@ -17,6 +17,10 @@ example_write_aux.blow5 write append adv/auxiliary_field_enum +adv/auxiliary_field_enum_write adv/sequential_read_pthreads adv/sequential_read_openmp adv/get_all_read_ids +mt/mt +mt/lazymt + diff --git a/examples/adv/README.md b/examples/adv/README.md index 26420820..88200693 100755 --- a/examples/adv/README.md +++ b/examples/adv/README.md @@ -5,6 +5,7 @@ This directory contains following advanced examples that uses low-level API. - *sequential_read_pthreads.c* demonstrates how to sequentially read raw SLOW5 records from a slow5/blow5 file using a single thread and then decode those in parallel using *pthreads*. - *sequential_read_openmp.c* demonstrates how to sequentially read raw SLOW5 records from a slow5/blow5 file using a single thread and then decode those in parallel using *openMP*. - *get_all_read_ids.c* demonstrates how to get the list of all read IDs from a slow5/blow5 file. +- *auxiliary_field_enum_write.c* demonstrates how to write a slow5/blow5 file containing an auxiliary field of type enum. You can invoke [build.sh](build.sh) from slow5lib directory as `examples/adv/build.sh` to compile the example programmes. Have a look at the script to see the commands used for compiling and linking. Also make sure you get familiar with the basic examples first, before trying these advanced examples. diff --git a/examples/adv/auxiliary_field_enum_write.c b/examples/adv/auxiliary_field_enum_write.c new file mode 100644 index 00000000..05ba38c1 --- /dev/null +++ b/examples/adv/auxiliary_field_enum_write.c @@ -0,0 +1,125 @@ +// an example programme that uses slow5lib to write a SLOW5 file with an enum data type + +/* +A BLOW5 file with following content will be written in this example +#num_read_groups 1 +@run_id run_0 +#char* uint32_t double double double double uint64_t int16_t* enum{unknown,partial,mux_change,unblock_mux_change,signal_positive,signal_negative} +#read_id read_group digitisation offset range sampling_rate len_raw_signal raw_signal end_reason +read_0 0 4096 3 10 4000 10 0,1,2,3,4,5,6,7,8,9 1 +*/ + +#include +#include +#include +#include + +#define FILE_PATH "test.blow5" + +int main(){ + + //open the SLOW5 file for writing + slow5_file_t *sp = slow5_open(FILE_PATH, "w"); + if(sp==NULL){ + fprintf(stderr,"Error opening file!\n"); + exit(EXIT_FAILURE); + } + + // //This section can be optionally uncommented to set non-default compression methods for blow5 (the recommended default is zlib+svb-zd) + // if(slow5_set_press(sp, SLOW5_COMPRESS_NONE, SLOW5_COMPRESS_NONE) < 0){ // no record compression, no signal compression + // fprintf(stderr,"Error setting compression method!\n"); + // exit(EXIT_FAILURE); + // } + + /*********************** Header ******************/ + /* + @run_id run_id_0 + */ + slow5_hdr_t *header=sp->header; //pointer to the SLOW5 header + + //add a header group attribute called run_id + if (slow5_hdr_add("run_id", header) < 0){ + fprintf(stderr,"Error adding run_id attribute\n"); + exit(EXIT_FAILURE); + } + + //set the run_id attribute to "run_0" for read group 0 + if (slow5_hdr_set("run_id", "run_0", 0, header) < 0){ + fprintf(stderr,"Error setting run_id attribute in read group 0\n"); + exit(EXIT_FAILURE); + } + + /* + enum{unknown,partial,mux_change,unblock_mux_change,signal_positive,signal_negative} + end_reason + */ + const char *enum_labels[] = {"unknown", "partial", "mux_change", "unblock_mux_change", "signal_positive", "signal_negative"}; + uint8_t num_labels = 6; + if (slow5_aux_add_enum("end_reason", enum_labels, num_labels, sp->header) < 0){ + fprintf(stderr,"Error adding end_reason auxilliary field\n"); + exit(EXIT_FAILURE); + } + + if(slow5_hdr_write(sp) < 0){ + fprintf(stderr,"Error writing header!\n"); + exit(EXIT_FAILURE); + } + + + /******************* A SLOW5 record ************************/ + slow5_rec_t *slow5_record = slow5_rec_init(); + if(slow5_record == NULL){ + fprintf(stderr,"Could not allocate space for a slow5 record."); + exit(EXIT_FAILURE); + } + + /* primary fields + #read_id read_group digitisation offset range sampling_rate len_raw_signal raw_signal + read_0 0 4096 3 10 4000 10 0,1,2,3,4,5,6,7,8,9 + */ + slow5_record -> read_id = strdup("read_0"); + if(slow5_record->read_id == NULL){ + fprintf(stderr,"Could not do strdup."); + exit(EXIT_FAILURE); + } + slow5_record-> read_id_len = strlen(slow5_record -> read_id); + slow5_record -> read_group = 0; + slow5_record -> digitisation = 4096.0; + slow5_record -> offset = 3.0; + slow5_record -> range = 10.0; + slow5_record -> sampling_rate = 4000.0; + slow5_record -> len_raw_signal = 10; + slow5_record -> raw_signal = (int16_t *)malloc(sizeof(int16_t) * slow5_record->len_raw_signal); + if(slow5_record->raw_signal == NULL){ + fprintf(stderr,"Could not allocate space for raw signal."); + exit(EXIT_FAILURE); + } + for(int i=0; ilen_raw_signal; i++){ + slow5_record->raw_signal[i] = i; + } + + /* auxiliary fileds + end_reason + 1 + */ + uint8_t end_reason = 1; + if(slow5_aux_set(slow5_record, "end_reason", &end_reason, sp->header) < 0){ + fprintf(stderr,"Error setting end_reason auxilliary field\n"); + exit(EXIT_FAILURE); + } + + //write to file + if(slow5_write(slow5_record, sp) < 0){ + fprintf(stderr,"Error writing record!\n"); + exit(EXIT_FAILURE); + } + + //free the slow5 record + slow5_rec_free(slow5_record); + + //close the SLOW5 file + slow5_close(sp); + + return 0; + +} diff --git a/examples/adv/build.sh b/examples/adv/build.sh index a3e91c42..8dfe216f 100755 --- a/examples/adv/build.sh +++ b/examples/adv/build.sh @@ -8,5 +8,6 @@ gcc -Wall -O2 -I include/ examples/adv/auxiliary_field_enum.c lib/libslow5.a -o gcc -Wall -O2 -I include/ examples/adv/sequential_read_openmp.c lib/libslow5.a -o examples/adv/sequential_read_openmp -lm -lz -fopenmp gcc -Wall -O2 -I include/ examples/adv/sequential_read_pthreads.c lib/libslow5.a -o examples/adv/sequential_read_pthreads -lm -lz -lpthread gcc -Wall -O2 -I include/ examples/adv/get_all_read_ids.c lib/libslow5.a -o examples/adv/get_all_read_ids -lm -lz +gcc -Wall -O2 -I include/ examples/adv/auxiliary_field_enum_write.c lib/libslow5.a -o examples/adv/auxiliary_field_enum_write -lm -lz #append -lzstd to above commands if your slow5lib is built with zstd support \ No newline at end of file diff --git a/examples/mt/README.md b/examples/mt/README.md new file mode 100755 index 00000000..5c82e30d --- /dev/null +++ b/examples/mt/README.md @@ -0,0 +1,13 @@ +# slow5lib mt Examples + +** WARNING: This API is under development. There could be bugs **/ + +** This is not meant to be used by a programmer who has the expertise to write efficient multi-threaded code and use the slow5 low-level API directly. Such advanced examples are at [advanced examples](../adv/) ** + +This directory contains examples that uses the built in multi-threaded API in slow5lib. For these examples to work you must build slow5lib with multi-threaded API as `make slow5_mt=1`. To link against slow5lib built with multi-threaded API, make sure to append `-lpthread` flag. + +- *mt.c* demonstrates how to use multi-threaded API to first write and then fetch a batch of slow5/blow5 records in parallel. +- *lazymt.c* demonstrates how to use the lazy function to do the same as above, which is meant for a lazy programmer. + +You can invoke [build.sh](build.sh) from slow5lib directory as `examples/mt/build.sh` to compile the example programmes. Have a look at the script to see the commands used for compiling and linking. Also make sure you get familiar with the basic examples first, before trying these. + diff --git a/examples/mt/build.sh b/examples/mt/build.sh new file mode 100755 index 00000000..728045cc --- /dev/null +++ b/examples/mt/build.sh @@ -0,0 +1,12 @@ +#!/bin/sh + +#exit on error +set -x +#prints the command to the console +set -e +#make sure slow5lib is built with multi-threading API as `make slow5_mt=1` +make clean && make slow5_mt=1 +gcc -Wall -O2 -I include/ examples/mt/mt.c lib/libslow5.a -o examples/mt/mt -lm -lz -lpthread +gcc -Wall -O2 -I include/ examples/mt/lazymt.c lib/libslow5.a -o examples/mt/lazymt -lm -lz -lpthread + +#append -lzstd to above commands if your slow5lib is built with zstd support \ No newline at end of file diff --git a/examples/mt/lazymt.c b/examples/mt/lazymt.c index 35c8443c..de513e8c 100644 --- a/examples/mt/lazymt.c +++ b/examples/mt/lazymt.c @@ -1,4 +1,8 @@ +// an example programme is for a lazy programmer. see mt.c instead. +// uses the optional multi-threaded API under the lazy mode to fetch batches of records in parallel +// this is under construction and is yet beta + #include #include #include @@ -8,10 +12,8 @@ #include #include - #define FILE_PATH "test.blow5" //for reading #define FILE_PATH_WRITE "test.blow5" -//#define FILE_PATH "/home/jamfer/Data/SK/multi_fast5/s5/FAK40634_d1cc054609fe2c5fcdeac358864f9dc81c8bb793_95.blow5" int read_func(){ @@ -24,7 +26,7 @@ int read_func(){ int ret=0; int batch_size = 2048; int num_thread = 8; - + while(1){ ret = slow5_get_next_batch_lazy(&rec,sp,batch_size,num_thread); @@ -255,5 +257,4 @@ int main(){ return 0; } -//gcc -Wall examples/mt/lazymt.c -I include/ lib/libslow5.a -lpthread -lz -O2 -g diff --git a/examples/mt/mt.c b/examples/mt/mt.c index 240e6169..3b939bee 100644 --- a/examples/mt/mt.c +++ b/examples/mt/mt.c @@ -1,3 +1,5 @@ +// an example programme that uses the optional multi-threaded API in slow5lib to write and fetch batches of records in parallel +// this is under construction and is yet beta #include #include @@ -8,10 +10,8 @@ #include #include - #define FILE_PATH "test.blow5" //for reading #define FILE_PATH_WRITE "test.blow5" -//#define FILE_PATH "/home/jamfer/Data/SK/multi_fast5/s5/FAK40634_d1cc054609fe2c5fcdeac358864f9dc81c8bb793_95.blow5" int read_func(){ @@ -45,8 +45,6 @@ int read_func(){ slow5_free_mt(mt); slow5_close(sp); - - //now random read fun sp = slow5_open(FILE_PATH,"r"); if(sp==NULL){ @@ -169,8 +167,6 @@ int write_func(){ exit(EXIT_FAILURE); } - - int ret=0; int batch_size = 4000; int num_thread = 8; @@ -182,7 +178,6 @@ int write_func(){ /******************* SLOW5 records ************************/ for(int i=0;iaux_meta, field, SLOW5_ENUM, enum_labels, num_labels); + return ret; +} + void slow5_aux_meta_free(struct slow5_aux_meta *aux_meta) { if (aux_meta) { if (aux_meta->attrs) { From cd0bdbc4e0062475486c5b7c1493d72ed00574af Mon Sep 17 00:00:00 2001 From: Hasindu Gamaarachchi Date: Mon, 17 Oct 2022 10:50:59 +1100 Subject: [PATCH 09/18] Update slow5_aux_add_enum.md --- docs/slow5_api/low_level_api/slow5_aux_add_enum.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/slow5_api/low_level_api/slow5_aux_add_enum.md b/docs/slow5_api/low_level_api/slow5_aux_add_enum.md index 917ae42f..a9ffb947 100644 --- a/docs/slow5_api/low_level_api/slow5_aux_add_enum.md +++ b/docs/slow5_api/low_level_api/slow5_aux_add_enum.md @@ -1,4 +1,4 @@ -# slow5_aux_add +# slow5_aux_add_enum ## NAME @@ -145,4 +145,4 @@ int main(){ ``` ## SEE ALSO -[`slow5_aux_set()`](../slow5_aux_set.md). \ No newline at end of file +[`slow5_aux_set()`](../slow5_aux_set.md). From 114cfae1b3d2f66cfc5fd6f16da48eb57848e038 Mon Sep 17 00:00:00 2001 From: Hasindu Gamaarachchi Date: Mon, 17 Oct 2022 10:53:29 +1100 Subject: [PATCH 10/18] Update README.md --- examples/mt/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/mt/README.md b/examples/mt/README.md index 5c82e30d..504b5e2e 100755 --- a/examples/mt/README.md +++ b/examples/mt/README.md @@ -1,8 +1,8 @@ # slow5lib mt Examples -** WARNING: This API is under development. There could be bugs **/ +**WARNING: This API is under development. There could be bugs** -** This is not meant to be used by a programmer who has the expertise to write efficient multi-threaded code and use the slow5 low-level API directly. Such advanced examples are at [advanced examples](../adv/) ** +**This is not meant to be used by a programmer who has the expertise to write efficient multi-threaded code and use the slow5 low-level API directly. Such advanced examples are at [advanced examples](../adv/)** This directory contains examples that uses the built in multi-threaded API in slow5lib. For these examples to work you must build slow5lib with multi-threaded API as `make slow5_mt=1`. To link against slow5lib built with multi-threaded API, make sure to append `-lpthread` flag. From d0674db5e30a4f0f40172ba75a016692c7a0699d Mon Sep 17 00:00:00 2001 From: Hasindu Gamaarachchi Date: Mon, 17 Oct 2022 11:07:30 +1100 Subject: [PATCH 11/18] clean up the mt examples a bit --- examples/mt/lazymt.c | 54 +++++++++++++++++++------------------- examples/mt/mt.c | 62 +++++++++++++++++++++++--------------------- 2 files changed, 60 insertions(+), 56 deletions(-) diff --git a/examples/mt/lazymt.c b/examples/mt/lazymt.c index de513e8c..c9738d40 100644 --- a/examples/mt/lazymt.c +++ b/examples/mt/lazymt.c @@ -12,10 +12,22 @@ #include #include -#define FILE_PATH "test.blow5" //for reading -#define FILE_PATH_WRITE "test.blow5" +#define FILE_PATH "test.blow5" //for writing and reading -int read_func(){ +void sequential_read_func(); //function to read records sequentially +void random_read_func(); //function to read records randomly (lot of read IDs) +void write_func(); //function to write records + +int main(){ + + write_func(); + sequential_read_func(); + random_read_func(); + + return 0; +} + +void sequential_read_func(){ slow5_file_t *sp = slow5_open(FILE_PATH,"r"); if(sp==NULL){ @@ -43,29 +55,27 @@ int read_func(){ slow5_close(sp); + return; +} + - //now random read fun - sp = slow5_open(FILE_PATH,"r"); +void random_read_func(){ + + slow5_file_t *sp = slow5_open(FILE_PATH,"r"); if(sp==NULL){ fprintf(stderr,"Error in opening file\n"); exit(EXIT_FAILURE); } - rec = NULL; - - ret = slow5_idx_create(sp); - if(ret<0){ - fprintf(stderr,"Error in creating index\n"); - exit(EXIT_FAILURE); - } + slow5_rec_t **rec = NULL; - ret = slow5_idx_load(sp); + int ret = slow5_idx_load(sp); if(ret<0){ fprintf(stderr,"Error in loading index\n"); exit(EXIT_FAILURE); } int num_rid = 4; - num_thread = 2; + int num_thread = 2; char *rid[num_rid]; rid[0]="read_id_50"; rid[1]="read_id_3999", @@ -83,13 +93,13 @@ int read_func(){ slow5_idx_unload(sp); slow5_close(sp); - return 0; + return; } -int write_func(){ +void write_func(){ - slow5_file_t *sf = slow5_open(FILE_PATH_WRITE,"w"); + slow5_file_t *sf = slow5_open(FILE_PATH,"w"); if(sf==NULL){ fprintf(stderr,"Error in opening file\n"); exit(EXIT_FAILURE); @@ -245,16 +255,8 @@ int write_func(){ slow5_rec_free(rec[i]); } - return 0; + return; } -int main(){ - - write_func(); - read_func(); - - - return 0; -} diff --git a/examples/mt/mt.c b/examples/mt/mt.c index 3b939bee..470d2022 100644 --- a/examples/mt/mt.c +++ b/examples/mt/mt.c @@ -10,10 +10,22 @@ #include #include -#define FILE_PATH "test.blow5" //for reading -#define FILE_PATH_WRITE "test.blow5" +#define FILE_PATH "test.blow5" //for writing and then reading -int read_func(){ +void sequential_read_func(); //function to read records sequentially +void random_read_func(); //function to read records randomly (lot of read IDs) +void write_func(); //function to write records + +int main(){ + + write_func(); + sequential_read_func(); + random_read_func(); + + return 0; +} + +void sequential_read_func(){ slow5_file_t *sp = slow5_open(FILE_PATH,"r"); if(sp==NULL){ @@ -45,37 +57,37 @@ int read_func(){ slow5_free_mt(mt); slow5_close(sp); - //now random read fun - sp = slow5_open(FILE_PATH,"r"); + return; +} + + +void random_read_func(){ + + slow5_file_t *sp = slow5_open(FILE_PATH,"r"); if(sp==NULL){ fprintf(stderr,"Error in opening file\n"); exit(EXIT_FAILURE); } - rec = NULL; - - ret = slow5_idx_create(sp); - if(ret<0){ - fprintf(stderr,"Error in creating index\n"); - exit(EXIT_FAILURE); - } + slow5_rec_t **rec = NULL; - ret = slow5_idx_load(sp); + int ret = slow5_idx_load(sp); if(ret<0){ fprintf(stderr,"Error in loading index\n"); exit(EXIT_FAILURE); } int num_rid = 4; - num_thread = 2; + int num_thread = 2; + int batch_size = 4096; char *rid[num_rid]; rid[0]="read_id_50"; rid[1]="read_id_3999", rid[2]="read_id_0"; rid[3]="read_id_4"; - mt = slow5_init_mt(num_thread,sp); + slow5_mt_t *mt = slow5_init_mt(num_thread,sp); - read_batch = slow5_init_batch(batch_size); + slow5_batch_t *read_batch = slow5_init_batch(batch_size); ret = slow5_get_batch(mt, read_batch, rid, num_rid); assert(ret==num_rid); @@ -91,13 +103,13 @@ int read_func(){ slow5_idx_unload(sp); slow5_close(sp); - return 0; + return; } -int write_func(){ +void write_func(){ - slow5_file_t *sf = slow5_open(FILE_PATH_WRITE,"w"); + slow5_file_t *sf = slow5_open(FILE_PATH,"w"); if(sf==NULL){ fprintf(stderr,"Error in opening file\n"); exit(EXIT_FAILURE); @@ -252,15 +264,5 @@ int write_func(){ slow5_free_mt(mt); slow5_close(sf); - return 0; -} - -int main(){ - - write_func(); - read_func(); - - return 0; + return; } - - From bcdfb5001f04c67e90404fcba3ed9e8770904cbe Mon Sep 17 00:00:00 2001 From: Psy-Fer Date: Mon, 17 Oct 2022 17:45:05 +1100 Subject: [PATCH 12/18] add write for end_reason enum --- python/example.py | 76 +++++++++++++++++++++++++++ python/pyslow5.pxd | 1 + python/pyslow5.pyx | 125 ++++++++++++++++++++++++++++++++++----------- 3 files changed, 173 insertions(+), 29 deletions(-) diff --git a/python/example.py b/python/example.py index c212aed4..1f5434bc 100644 --- a/python/example.py +++ b/python/example.py @@ -483,6 +483,82 @@ s510.close() +print("==============================================") +print("write reads with aux and end_reason enum") + +F = slow5.Open('examples/example_write_aux_enum.blow5','w', DEBUG=debug) +header, end_reason_labels = F.get_empty_header(aux=True) + +counter = 0 +for i in header: + header[i] = "test_{}".format(counter) + counter += 1 + +ret = F.write_header(header, end_reason_labels=end_reason_labels) +print("ret: write_header(): {}".format(ret)) + +s58 = slow5.Open('examples/adv/example3.blow5','r', DEBUG=debug) +reads = s58.seq_reads(aux='all') + +for read in reads: + record, aux = F.get_empty_record(aux=True) + for i in read: + if i in record: + record[i] = read[i] + if i in aux: + aux[i] = read[i] + ret = F.write_record(record, aux) + print("ret: write_record(): {}".format(ret)) + +s58.close() +F.close() + +print("==============================================") +print("write reads with aux multi") + +F = slow5.Open('examples/example_write_aux_enum_multi.blow5','w', DEBUG=debug) +header, end_reason_labels = F.get_empty_header(aux=True) +header2 = F.get_empty_header() + +counter = 0 +for i in header: + header[i] = "test_{}".format(counter) + counter += 1 + +for i in header2: + header2[i] = "test_{}".format(counter) + counter += 1 + +ret = F.write_header(header, end_reason_labels=end_reason_labels) +print("ret: write_header(): {}".format(ret)) +ret = F.write_header(header2, read_group=1) +print("ret: write_header(): {}".format(ret)) + +s58 = slow5.Open('examples/adv/example3.blow5','r', DEBUG=debug) +reads = s58.seq_reads(aux='all') + +records = {} +auxs = {} +for read in reads: + record, aux = F.get_empty_record(aux=True) + # record = F.get_empty_record() + for i in read: + if i == "read_id": + readID = read[i] + if i in record: + record[i] = read[i] + if i in aux: + aux[i] = read[i] + records[readID] = record + auxs[readID] = aux +print(records) +print(auxs) +ret = F.write_record_batch(records, threads=2, batchsize=3, aux=auxs) +print("ret: write_record(): {}".format(ret)) + +s58.close() +F.close() + print("==============================================") # print("seq_reads with big file:") # start_time = time.time() diff --git a/python/pyslow5.pxd b/python/pyslow5.pxd index ad5ff874..2c3d435f 100644 --- a/python/pyslow5.pxd +++ b/python/pyslow5.pxd @@ -138,6 +138,7 @@ cdef extern from "pyslow5.h": int slow5_hdr_write(slow5_file_t *sf); int slow5_write(slow5_rec_t *rec, slow5_file_t *sf); int slow5_aux_add(const char *attr, slow5_aux_type type, slow5_hdr_t *header); + int slow5_aux_add_enum(const char *field, const char **enum_labels, uint8_t num_labels, slow5_hdr_t *header) int slow5_aux_set(slow5_rec_t *read, const char *attr, const void *data, slow5_hdr_t *header); int slow5_aux_set_string(slow5_rec_t *read, const char *attr, const char *data, slow5_hdr_t *header); diff --git a/python/pyslow5.pyx b/python/pyslow5.pyx index 66a36e32..649f66f6 100644 --- a/python/pyslow5.pyx +++ b/python/pyslow5.pyx @@ -86,17 +86,21 @@ cdef class Open: cdef char *read_number cdef char *start_mux cdef char *start_time - cdef char *end_reason # need to add end_reason_val + cdef char *end_reason + cdef char **end_reason_labels + cdef pyslow5.uint8_t end_reason_labels_len cdef char *channel_number_val cdef double median_before_val cdef pyslow5.int32_t read_number_val cdef pyslow5.uint8_t start_mux_val cdef pyslow5.uint64_t start_time_val + cdef pyslow5.uint8_t end_reason_val cdef char **channel_number_val_array cdef double *median_before_val_array cdef pyslow5.int32_t *read_number_val_array cdef pyslow5.uint8_t *start_mux_val_array cdef pyslow5.uint64_t *start_time_val_array + cdef pyslow5.uint8_t *end_reason_val_array cdef pyslow5.float total_time_slow5_get_next cdef pyslow5.float total_time_yield_reads @@ -165,19 +169,22 @@ cdef class Open: self.start_mux = strdup("start_mux") self.start_time = strdup("start_time") self.end_reason = strdup("end_reason") + self.end_reason_labels = NULL + self.end_reason_labels_len = 0 channel_number_val = NULL median_before_val = -1.0 read_number_val = -1 start_mux_val = -1 start_time_val = -1 + end_reason_val = -1 channel_number_val_array = NULL median_before_val_array = NULL read_number_val_array = NULL start_mux_val_array = NULL start_time_val_array = NULL + end_reason_val_array = NULL - # cdef something end_reason # some enum self.total_time_slow5_get_next = 0.0 self.total_time_yield_reads = 0.0 self.total_single_write_time = 0.0 @@ -314,6 +321,10 @@ cdef class Open: free(self.start_mux) free(self.start_time) free(self.end_reason) + if self.end_reason_labels is not NULL: + for i in range(self.end_reason_labels_len): + free(self.end_reason_labels[i]) + free(self.end_reason_labels) self.logger.debug("pathname: {}".format(self.path)) self.logger.debug("total_time_slow5_get_next: {} seconds".format(self.total_time_slow5_get_next)) @@ -1371,7 +1382,7 @@ cdef class Open: # Write SLOW5 file # ========================================================================== - def get_empty_header(self): + def get_empty_header(self, aux=False): ''' returns example empty header dic for user to populate Any values not populated will be skipped @@ -1415,6 +1426,10 @@ cdef class Open: "hublett_board_id": None, "satellite_firmware_version": None} + end_reason_labels = ['unknown', 'partial', 'mux_change', 'unblock_mux_change', 'signal_positive', 'signal_negative'] + + if aux: + return header, end_reason_labels return header def get_empty_record(self, aux=False): @@ -1435,8 +1450,9 @@ cdef class Open: "median_before": None, "read_number": None, "start_mux": None, - "start_time": None} - + "start_time": None, + "end_reason": None} + if aux: return record, aux_rec return record @@ -1473,14 +1489,14 @@ cdef class Open: "read_number": type(10), "start_mux": type(1), "start_time": type(100), - "end_reason": None} + "end_reason": type(["a", "b", "c"])} C_aux_types = {"channel_number": SLOW5_STRING, "median_before":SLOW5_DOUBLE, "read_number": SLOW5_INT32_T, "start_mux": SLOW5_UINT8_T, "start_time": SLOW5_UINT64_T, - "end_reason": None} + "end_reason": SLOW5_ENUM} for a in user_aux_types: if user_aux_types[a] is None: @@ -1516,7 +1532,7 @@ cdef class Open: "read_number": type(10), "start_mux": type(1), "start_time": type(100), - "end_reason": None} + "end_reason": type(1)} new_aux = {} @@ -1561,8 +1577,11 @@ cdef class Open: self.start_time_val = aux[a] new_aux[a] = aux[a] elif a == "end_reason": - continue - self.logger.debug("_record_type_validation: doing aux stuff...") + self.end_reason_val = aux[a] + new_aux[a] = aux[a] + else: + self.logger.error("_record_type_validation {}: {} user aux field unknown?".format(a, aux[a])) + return user_record, new_aux @@ -1587,7 +1606,7 @@ cdef class Open: "read_number": type(10), "start_mux": type(1), "start_time": type(100), - "end_reason": None} + "end_reason": type(1)} new_aux = {} @@ -1627,7 +1646,9 @@ cdef class Open: elif a == "start_time": new_aux[a] = aux[a] elif a == "end_reason": - continue + new_aux[a] = aux[a] + else: + self.logger.error("_record_type_validation {}: {} user aux field unknown".format(a, aux[a])) self.logger.debug("_record_type_validation: aux stuff done") @@ -1635,7 +1656,7 @@ cdef class Open: return user_record, new_aux - def write_header(self, header, read_group=0): + def write_header(self, header, read_group=0, end_reason_labels=None): ''' write slow5 header to file. takes header dic for attributes, then write once. @@ -1672,6 +1693,27 @@ cdef class Open: if ret < 0: self.logger.error("write_header: slow5_hdr_set {}: {} could not set to C s5.header struct".format(h, checked_header[h])) errors = True + + # check end_reason_labels type + if end_reason_labels is not None: + erl = [] + for i in end_reason_labels: + t = type(i) + if t is not type("string"): + self.logger.error("write_header: end_reason_labels: {} not type: string, trying to convert".format(i)) + try: + s = str(i) + erl.append(s) + self.logger.warning("write_header end_reason_labels: {} conversion successful".format(s)) + except: + self.logger.error("write_header end_reason_labels: {} could not convert value to string".format(i)) + errors = True + else: + erl.append(i) + self.end_reason_labels = malloc(sizeof(char*)*len(erl)) + for i in range(len(erl)): + self.end_reason_labels[i] = strdup(erl[i].encode()) + self.end_reason_labels_len = len(erl) if not errors: return 0 @@ -1693,7 +1735,7 @@ cdef class Open: "read_number": type(10), "start_mux": type(1), "start_time": type(100), - "end_reason": None} + "end_reason": type(["a", "b", "c"])} self.logger.debug("write_record: _record_type_validation running") checked_record, checked_aux = self._record_type_validation(record, aux) @@ -1712,12 +1754,20 @@ cdef class Open: if checked_aux is not None: slow5_aux_types = self._aux_header_type_validation(aux_types) for a in slow5_aux_types: - if slow5_aux_types[a] is None: + if a not in checked_aux: continue - ret = slow5_aux_add(a.encode(), slow5_aux_types[a], self.s5.header) - if ret < 0: - self.logger.error("write_record: slow5_aux_add {}: {} could not set to C s5.header.aux_meta struct".format(a, checked_aux[a])) - error = True + elif checked_aux[a] is None: + continue + elif slow5_aux_types[a] == SLOW5_ENUM: + ret = slow5_aux_add_enum(a.encode(), self.end_reason_labels, self.end_reason_labels_len, self.s5.header) + if ret < 0: + self.logger.error("write_record: slow5_aux_add_enum {}: {} could not set to C s5.header.aux_meta struct".format(a, checked_aux[a])) + error = True + else: + ret = slow5_aux_add(a.encode(), slow5_aux_types[a], self.s5.header) + if ret < 0: + self.logger.error("write_record: slow5_aux_add {}: {} could not set to C s5.header.aux_meta struct".format(a, checked_aux[a])) + error = True else: error = True @@ -1787,6 +1837,7 @@ cdef class Open: self.logger.error("write_record: slow5_aux_set_string could not write aux value {}: {}".format(a, checked_aux[a])) #### We should free here return -1 + elif a == "median_before": ret = slow5_aux_set(self.write, self.median_before, &self.median_before_val, self.s5.header) elif a == "read_number": @@ -1796,8 +1847,9 @@ cdef class Open: elif a == "start_time": ret = slow5_aux_set(self.write, self.start_time, &self.start_time_val, self.s5.header) elif a == "end_reason": - # not implemented yet becuase of variability in ONT versioning - ret = 0 + ret = slow5_aux_set(self.write, self.end_reason, &self.end_reason_val, self.s5.header) + else: + ret = -1 if ret < 0: self.logger.error("write_record: slow5_aux_set could not write aux value {}: {}".format(a, checked_aux[a])) return -1 @@ -1817,6 +1869,7 @@ cdef class Open: self.read_number_val = -1 self.start_mux_val = -1 self.start_time_val = -1 + self.end_reason_val = -1 # free memory @@ -1842,7 +1895,7 @@ cdef class Open: "read_number": type(10), "start_mux": type(1), "start_time": type(100), - "end_reason": None} + "end_reason": type(["a", "b", "c"])} # check if empty dic was given if aux is not None: if len(aux) == 0: @@ -1868,6 +1921,7 @@ cdef class Open: self.read_number_val_array = malloc(sizeof(int32_t)*batch_len) self.start_mux_val_array = malloc(sizeof(uint8_t)*batch_len) self.start_time_val_array = malloc(sizeof(uint64_t)*batch_len) + self.end_reason_val_array = malloc(sizeof(uint8_t)*batch_len) for i, idx in enumerate(batch): if aux is not None: checked_record, checked_aux = self._multi_record_type_validation(records[idx], aux[idx]) @@ -1883,6 +1937,8 @@ cdef class Open: self.start_mux_val_array[i] = checked_aux[a] elif a == "start_time": self.start_time_val_array[i] = checked_aux[a] + elif a == "end_reason": + self.end_reason_val_array[i] = checked_aux[a] checked_auxs[idx] = checked_aux else: checked_record, checked_aux = self._record_type_validation(records[idx], aux) @@ -1905,12 +1961,20 @@ cdef class Open: if checked_aux is not None: slow5_aux_types = self._aux_header_type_validation(aux_types) for a in slow5_aux_types: - if slow5_aux_types[a] is None: + if a not in checked_aux: + continue + elif checked_aux[a] is None: continue - ret = slow5_aux_add(a.encode(), slow5_aux_types[a], self.s5.header) - if ret < 0: - self.logger.error("write_record_batch: slow5_aux_add {}: {} could not set to C s5.header.aux_meta struct".format(a, checked_aux[a])) - error = True + elif slow5_aux_types[a] == SLOW5_ENUM: + ret = slow5_aux_add_enum(a.encode(), self.end_reason_labels, self.end_reason_labels_len, self.s5.header) + if ret < 0: + self.logger.error("write_record: slow5_aux_add_enum {}: {} could not set to C s5.header.aux_meta struct".format(a, checked_aux[a])) + error = True + else: + ret = slow5_aux_add(a.encode(), slow5_aux_types[a], self.s5.header) + if ret < 0: + self.logger.error("write_record_batch: slow5_aux_add {}: {} could not set to C s5.header.aux_meta struct".format(a, checked_aux[a])) + error = True else: error = True @@ -1982,6 +2046,7 @@ cdef class Open: self.logger.error("write_record_batch: slow5_aux_set_string could not write aux value {}: {}".format(a, checked_auxs[batch[idx]][a])) #### We should free here return -1 + elif a == "median_before": ret = slow5_aux_set(self.twrite[idx], self.median_before, &self.median_before_val_array[idx], self.s5.header) elif a == "read_number": @@ -1991,8 +2056,9 @@ cdef class Open: elif a == "start_time": ret = slow5_aux_set(self.twrite[idx], self.start_time, &self.start_time_val_array[idx], self.s5.header) elif a == "end_reason": - # not implemented yet becuase of variability in ONT versioning - ret = 0 + ret = slow5_aux_set(self.twrite[idx], self.end_reason, &self.end_reason_val_array[idx], self.s5.header) + else: + ret = -1 if ret < 0: self.logger.error("write_record_batch: slow5_aux_set could not write aux value {}: {}".format(a, checked_aux[a])) return -1 @@ -2027,6 +2093,7 @@ cdef class Open: free(self.read_number_val_array) free(self.start_mux_val_array) free(self.start_time_val_array) + free(self.end_reason_val_array) end_multi_write = time.time() - start_multi_write self.total_multi_write_time = self.total_multi_write_time + end_multi_write From 5e9cddc6f1eec47e8b32d021f27bb68fbb1c6d68 Mon Sep 17 00:00:00 2001 From: Hasindu Gamaarachchi Date: Mon, 17 Oct 2022 18:15:16 +1100 Subject: [PATCH 13/18] scripts for testing --- python/pydebug.sh | 11 ++++++++--- python/pytest.sh | 41 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 49 insertions(+), 3 deletions(-) create mode 100755 python/pytest.sh diff --git a/python/pydebug.sh b/python/pydebug.sh index 923a73a0..e7e5265a 100755 --- a/python/pydebug.sh +++ b/python/pydebug.sh @@ -1,11 +1,16 @@ #!/bin/bash +die() { + echo "$1" >&2 + echo + exit 1 +} make clean rm -rf *.so python/pyslow5.cpp python/pyslow5.c build/lib.* build/temp.* GCC_ASAN_PRELOAD=$(gcc -print-file-name=libasan.so) -CFLAGS="-fsanitize=address -fno-omit-frame-pointer" python3 setup.py build -cp build/lib.*/*.so ./ +CFLAGS="-fsanitize=address -fno-omit-frame-pointer" python3 setup.py build || die "Failed to build pyslow5" +cp build/lib.*/*.so ./ || die "Failed to copy .so file" echo $GCC_ASAN_PRELOAD -LD_PRELOAD=$GCC_ASAN_PRELOAD python3 < python/example.py +LD_PRELOAD=$GCC_ASAN_PRELOAD python3 < python/example.py || die "Failed to run example.py" # LD_PRELOAD=$GCC_ASAN_PRELOAD python3 -m unittest -v python/test.py diff --git a/python/pytest.sh b/python/pytest.sh new file mode 100755 index 00000000..adc606e9 --- /dev/null +++ b/python/pytest.sh @@ -0,0 +1,41 @@ +#!/bin/bash + +# terminate script +die() { + echo "$1" >&2 + echo + exit 1 +} + + +python/pydebug.sh || die "pydebug.sh failed" +rm *.so + +make clean +rm -f dist/* valgrind.log +test -d ./venv && rm -r ./venv + +make pyslow5 || die "make pyslow5 failed" +python3.7 -m venv ./venv || die "Failed to create virtual environment" +source ./venv/bin/activate || die "Failed to activate virtual environment" +pip install --upgrade pip || die "Failed to update pip" +pip install dist/*.tar.gz +python3 python/example.py || die "Failed to run example.py" +python3 -m unittest -v python/test.py || die "Failed to run test.py" +valgrind --log-file=valgrind.log --leak-check=full --track-origins=yes --suppressions=python/valgrind-python.supp python3 python/example.py || die "Failed to run example.py under valgrind" +tail valgrind.log || die "Failed to tail valgrind.log" +def_lost=$(grep "definitely lost:" valgrind_lazymt_1.log | awk '{print $4}') +ind_lost=$(grep "indirectly lost:" valgrind_lazymt_1.log | awk '{print $4}') +if [ $def_lost -gt 40 ] || [ $ind_lost -gt 0 ]; then + die "Memory leak detected" +fi +rm -r ./venv + +python3.7 -m venv ./venv || die "Failed to create virtual environment" +source ./venv/bin/activate || die "Failed to activate virtual environment" +pip install --upgrade pip || die "Failed to update pip" +pip3 install twine || die "Failed to install twine" +twine check dist/* || die "Failed to check dist/*" +rm -r ./venv + +echo "All tests passed" \ No newline at end of file From 5952e05273fc92ce9089bebe0da328b8901535c8 Mon Sep 17 00:00:00 2001 From: Hasindu Gamaarachchi Date: Mon, 17 Oct 2022 20:55:33 +1100 Subject: [PATCH 14/18] add more valgrind exceptions --- python/pydebug.sh | 9 +++++++-- python/pyslow5.pyx | 12 ++++++------ python/pytest.sh | 17 ++++++++++------- python/valgrind-python.supp | 29 +++++++++++++++++++++++++++++ 4 files changed, 52 insertions(+), 15 deletions(-) diff --git a/python/pydebug.sh b/python/pydebug.sh index e7e5265a..440eb439 100755 --- a/python/pydebug.sh +++ b/python/pydebug.sh @@ -7,10 +7,15 @@ die() { } make clean -rm -rf *.so python/pyslow5.cpp python/pyslow5.c build/lib.* build/temp.* +rm -rf *.so python/pyslow5.cpp python/pyslow5.c build/lib.* build/temp.* asan.log GCC_ASAN_PRELOAD=$(gcc -print-file-name=libasan.so) CFLAGS="-fsanitize=address -fno-omit-frame-pointer" python3 setup.py build || die "Failed to build pyslow5" cp build/lib.*/*.so ./ || die "Failed to copy .so file" echo $GCC_ASAN_PRELOAD -LD_PRELOAD=$GCC_ASAN_PRELOAD python3 < python/example.py || die "Failed to run example.py" +LD_PRELOAD=$GCC_ASAN_PRELOAD python3 < python/example.py &> asan.log +tail asan.log || die "Failed to tail asan.log" +leaks=$(tail asan.log | grep "SUMMARY: AddressSanitizer" | awk '{print $3}') +# if [ $leaks -gt 1607006 ]; then +# die "Memory leak detected" +# fi # LD_PRELOAD=$GCC_ASAN_PRELOAD python3 -m unittest -v python/test.py diff --git a/python/pyslow5.pyx b/python/pyslow5.pyx index 649f66f6..0a3140dd 100644 --- a/python/pyslow5.pyx +++ b/python/pyslow5.pyx @@ -1360,7 +1360,7 @@ cdef class Open: aux_types = [self.s5_aux_type[i] for i in range(self.aux_len)] return aux_types - + def get_aux_enum_labels(self, label): ''' @@ -1373,7 +1373,7 @@ cdef class Open: if self.s5_aux_enum == NULL: self.logger.warning("get_aux_enum_labels enum_labels is NULL") return labels - + labels = [self.s5_aux_enum[i].decode() for i in range(self.enum_len)] return labels @@ -1452,7 +1452,7 @@ cdef class Open: "start_mux": None, "start_time": None, "end_reason": None} - + if aux: return record, aux_rec return record @@ -1581,7 +1581,7 @@ cdef class Open: new_aux[a] = aux[a] else: self.logger.error("_record_type_validation {}: {} user aux field unknown?".format(a, aux[a])) - + return user_record, new_aux @@ -1693,7 +1693,7 @@ cdef class Open: if ret < 0: self.logger.error("write_header: slow5_hdr_set {}: {} could not set to C s5.header struct".format(h, checked_header[h])) errors = True - + # check end_reason_labels type if end_reason_labels is not None: erl = [] @@ -2046,7 +2046,7 @@ cdef class Open: self.logger.error("write_record_batch: slow5_aux_set_string could not write aux value {}: {}".format(a, checked_auxs[batch[idx]][a])) #### We should free here return -1 - + elif a == "median_before": ret = slow5_aux_set(self.twrite[idx], self.median_before, &self.median_before_val_array[idx], self.s5.header) elif a == "read_number": diff --git a/python/pytest.sh b/python/pytest.sh index adc606e9..30b29f95 100755 --- a/python/pytest.sh +++ b/python/pytest.sh @@ -7,6 +7,7 @@ die() { exit 1 } +test -z "$PYTHON" && PYTHON=python3 python/pydebug.sh || die "pydebug.sh failed" rm *.so @@ -16,26 +17,28 @@ rm -f dist/* valgrind.log test -d ./venv && rm -r ./venv make pyslow5 || die "make pyslow5 failed" -python3.7 -m venv ./venv || die "Failed to create virtual environment" +${PYTHON} -m venv ./venv || die "Failed to create virtual environment" source ./venv/bin/activate || die "Failed to activate virtual environment" -pip install --upgrade pip || die "Failed to update pip" +pip install --upgrade pip wheel numpy || die "Failed to update pip" pip install dist/*.tar.gz python3 python/example.py || die "Failed to run example.py" python3 -m unittest -v python/test.py || die "Failed to run test.py" -valgrind --log-file=valgrind.log --leak-check=full --track-origins=yes --suppressions=python/valgrind-python.supp python3 python/example.py || die "Failed to run example.py under valgrind" +valgrind --error-exitcode=1 --log-file=valgrind.log --leak-check=full --track-origins=yes --suppressions=python/valgrind-python.supp python3 python/example.py || die "Failed to run example.py under valgrind" tail valgrind.log || die "Failed to tail valgrind.log" -def_lost=$(grep "definitely lost:" valgrind_lazymt_1.log | awk '{print $4}') -ind_lost=$(grep "indirectly lost:" valgrind_lazymt_1.log | awk '{print $4}') +def_lost=$(grep "definitely lost:" valgrind.log | awk '{print $4}') +ind_lost=$(grep "indirectly lost:" valgrind.log | awk '{print $4}') if [ $def_lost -gt 40 ] || [ $ind_lost -gt 0 ]; then die "Memory leak detected" fi +deactivate rm -r ./venv -python3.7 -m venv ./venv || die "Failed to create virtual environment" +${PYTHON} -m venv ./venv || die "Failed to create virtual environment" source ./venv/bin/activate || die "Failed to activate virtual environment" -pip install --upgrade pip || die "Failed to update pip" +pip install --upgrade pip wheel numpy || die "Failed to update pip" pip3 install twine || die "Failed to install twine" twine check dist/* || die "Failed to check dist/*" +deactivate rm -r ./venv echo "All tests passed" \ No newline at end of file diff --git a/python/valgrind-python.supp b/python/valgrind-python.supp index 487f8dc8..0c8f1fba 100644 --- a/python/valgrind-python.supp +++ b/python/valgrind-python.supp @@ -497,3 +497,32 @@ fun:PyUnicode_FSConverter } +## Custom added - might be wrong + +{ + Suppress leaking the dlopen + Memcheck:Leak + fun:malloc + fun:_dl_map_object_deps + fun:dl_open_worker + fun:_dl_catch_error + fun:_dl_open +} + +{ + Suppress leaking the PyMem_RawMalloc + Memcheck:Leak + fun:malloc + fun:PyMem_RawMalloc + fun:_PyObject_Malloc +} + +{ + Suppress leaking the PyMem_RawMalloc + Memcheck:Leak + fun:realloc + fun:_PyObject_GC_Resize + fun:_PyFrame_New_NoTrack +} + + From 4520b9a2b816e9ee2669002c6f336a59a33b89d0 Mon Sep 17 00:00:00 2001 From: Psy-Fer Date: Mon, 17 Oct 2022 23:42:58 +1100 Subject: [PATCH 15/18] add docs for writing end_reason enum --- docs/pyslow5_api/pyslow5.md | 31 +++++++++++++++++++++++++++++-- python/README.md | 31 +++++++++++++++++++++++++++++-- 2 files changed, 58 insertions(+), 4 deletions(-) diff --git a/docs/pyslow5_api/pyslow5.md b/docs/pyslow5_api/pyslow5.md index 9cb0818a..31864edd 100644 --- a/docs/pyslow5_api/pyslow5.md +++ b/docs/pyslow5_api/pyslow5.md @@ -340,7 +340,7 @@ print("{}: {}".format(er_index, er)) To write a file, `mode` in `Open()` must be set to `'w'` and when appending, `'a'` -#### `get_empty_header()`: +#### `get_empty_header(aux=False)`: Returns a dictionary containing all known header attributes with their values set to `None`. @@ -348,6 +348,9 @@ User can modify each value, and add or remove attributes to be used has header i All values end up stored as strings, and anything left as `None` will be skipped. To write header, see `write_header()` +If `aux=True`, an ordered list of strings for the enum `end_reason` will be returned. +This can be modified depending on the end reason. + Example: ```python @@ -355,12 +358,19 @@ s5 = slow5.Open(file,'w') header = s5.get_empty_header() ``` -#### `write_header(header, read_group=0)`: +`end_reason` enum example + +```python +s5 = slow5.Open(file, w) +header, end_reason_labels = s5.get_empty_header(aux=True) + +#### `write_header(header, read_group=0, end_reason_labels=None)`: Write header to file + `header` = populated dictionary from `get_empty_header()` + read_group = read group integer for when multiple runs are written to the same slow5 file ++ end_reason_labels = ordered list used for end_reason enum + returns 0 on success, <0 on error with error code You must write `read_group=0` (default) first before writing any other read_groups, and it is advised to write read_groups in sequential order. @@ -390,6 +400,23 @@ ret = s5.write_header(header2, read_group=1) print("ret: write_header(): {}".format(ret)) ``` +`end_reason` example: + +```python +# Get some empty headers +header, end_reason_labels = s5.get_empty_header(aux=True) + +# Populate headers with some test data +counter = 0 +for i in header: + header[i] = "test_{}".format(counter) + counter += 1 + +# Write first read group +ret = s5.write_header(header, end_reason_labels=end_reason_labels) +print("ret: write_header(): {}".format(ret)) +``` + #### `get_empty_record(aux=False)`: Get empty read record for populating with data. Use with `write_record()` diff --git a/python/README.md b/python/README.md index 9cb0818a..31864edd 100644 --- a/python/README.md +++ b/python/README.md @@ -340,7 +340,7 @@ print("{}: {}".format(er_index, er)) To write a file, `mode` in `Open()` must be set to `'w'` and when appending, `'a'` -#### `get_empty_header()`: +#### `get_empty_header(aux=False)`: Returns a dictionary containing all known header attributes with their values set to `None`. @@ -348,6 +348,9 @@ User can modify each value, and add or remove attributes to be used has header i All values end up stored as strings, and anything left as `None` will be skipped. To write header, see `write_header()` +If `aux=True`, an ordered list of strings for the enum `end_reason` will be returned. +This can be modified depending on the end reason. + Example: ```python @@ -355,12 +358,19 @@ s5 = slow5.Open(file,'w') header = s5.get_empty_header() ``` -#### `write_header(header, read_group=0)`: +`end_reason` enum example + +```python +s5 = slow5.Open(file, w) +header, end_reason_labels = s5.get_empty_header(aux=True) + +#### `write_header(header, read_group=0, end_reason_labels=None)`: Write header to file + `header` = populated dictionary from `get_empty_header()` + read_group = read group integer for when multiple runs are written to the same slow5 file ++ end_reason_labels = ordered list used for end_reason enum + returns 0 on success, <0 on error with error code You must write `read_group=0` (default) first before writing any other read_groups, and it is advised to write read_groups in sequential order. @@ -390,6 +400,23 @@ ret = s5.write_header(header2, read_group=1) print("ret: write_header(): {}".format(ret)) ``` +`end_reason` example: + +```python +# Get some empty headers +header, end_reason_labels = s5.get_empty_header(aux=True) + +# Populate headers with some test data +counter = 0 +for i in header: + header[i] = "test_{}".format(counter) + counter += 1 + +# Write first read group +ret = s5.write_header(header, end_reason_labels=end_reason_labels) +print("ret: write_header(): {}".format(ret)) +``` + #### `get_empty_record(aux=False)`: Get empty read record for populating with data. Use with `write_record()` From a82880ac9340ffe059300dee2b17996a276fe0b9 Mon Sep 17 00:00:00 2001 From: Hasindu Gamaarachchi Date: Tue, 18 Oct 2022 00:29:09 +1100 Subject: [PATCH 16/18] update suppression --- python/valgrind-python.supp | 27 ++++++++++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-) diff --git a/python/valgrind-python.supp b/python/valgrind-python.supp index 0c8f1fba..10c942c9 100644 --- a/python/valgrind-python.supp +++ b/python/valgrind-python.supp @@ -503,9 +503,7 @@ Suppress leaking the dlopen Memcheck:Leak fun:malloc - fun:_dl_map_object_deps - fun:dl_open_worker - fun:_dl_catch_error + ... fun:_dl_open } @@ -525,4 +523,27 @@ fun:_PyFrame_New_NoTrack } +{ + Suppress leaking the _dl_find_object_update + Memcheck:Leak + fun:malloc + fun:_dl_find_object_update + fun:dl_open_worker_begin +} +{ + Suppress leaking the _PyEval_EvalFrameDefault + Memcheck:Leak + fun:malloc + ... + fun:_PyEval_EvalFrameDefault + fun:_PyFunction_Vectorcall +} + +{ + Suppress leaking the _PyEval_EvalFrameDefault + Memcheck:Leak + fun:malloc + ... + fun:_PyEval_EvalFrameDefault +} From acadc74daa76d2119aff5bedc74262e7798aa2ba Mon Sep 17 00:00:00 2001 From: Psy-Fer Date: Tue, 18 Oct 2022 00:42:04 +1100 Subject: [PATCH 17/18] fix readme --- docs/pyslow5_api/pyslow5.md | 3 ++- python/README.md | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/docs/pyslow5_api/pyslow5.md b/docs/pyslow5_api/pyslow5.md index 31864edd..11c71679 100644 --- a/docs/pyslow5_api/pyslow5.md +++ b/docs/pyslow5_api/pyslow5.md @@ -328,7 +328,7 @@ print(end_reason_labels) readID = "r1" read = s5.get_read(readID, aux='all') -er_index = read['end_reason] +er_index = read['end_reason'] er = end_reason_labels[er_index] print("{}: {}".format(er_index, er)) @@ -363,6 +363,7 @@ header = s5.get_empty_header() ```python s5 = slow5.Open(file, w) header, end_reason_labels = s5.get_empty_header(aux=True) +``` #### `write_header(header, read_group=0, end_reason_labels=None)`: diff --git a/python/README.md b/python/README.md index 31864edd..11c71679 100644 --- a/python/README.md +++ b/python/README.md @@ -328,7 +328,7 @@ print(end_reason_labels) readID = "r1" read = s5.get_read(readID, aux='all') -er_index = read['end_reason] +er_index = read['end_reason'] er = end_reason_labels[er_index] print("{}: {}".format(er_index, er)) @@ -363,6 +363,7 @@ header = s5.get_empty_header() ```python s5 = slow5.Open(file, w) header, end_reason_labels = s5.get_empty_header(aux=True) +``` #### `write_header(header, read_group=0, end_reason_labels=None)`: From d814e568c00ecae0307e922e03b75d4559b5822a Mon Sep 17 00:00:00 2001 From: Hasindu Gamaarachchi Date: Tue, 18 Oct 2022 01:21:29 +1100 Subject: [PATCH 18/18] do make pyslow5 inside venv --- python/README.md | 2 +- python/pytest.sh | 3 ++- python/valgrind-python.supp | 1 + 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/python/README.md b/python/README.md index 11c71679..3f9debd5 100644 --- a/python/README.md +++ b/python/README.md @@ -79,7 +79,7 @@ git clone git@github.com:hasindu2008/slow5lib.git cd slow5lib make -# CHOOSE A OR B: +# CHOOSE A OR B: # (B is the cleanest method) # |=======================================================================| # |A. Install with pip if wheel is present, otherwise it uses setuptools | diff --git a/python/pytest.sh b/python/pytest.sh index 30b29f95..55948693 100755 --- a/python/pytest.sh +++ b/python/pytest.sh @@ -16,10 +16,11 @@ make clean rm -f dist/* valgrind.log test -d ./venv && rm -r ./venv -make pyslow5 || die "make pyslow5 failed" + ${PYTHON} -m venv ./venv || die "Failed to create virtual environment" source ./venv/bin/activate || die "Failed to activate virtual environment" pip install --upgrade pip wheel numpy || die "Failed to update pip" +make pyslow5 || die "make pyslow5 failed" pip install dist/*.tar.gz python3 python/example.py || die "Failed to run example.py" python3 -m unittest -v python/test.py || die "Failed to run test.py" diff --git a/python/valgrind-python.supp b/python/valgrind-python.supp index 10c942c9..43f60ead 100644 --- a/python/valgrind-python.supp +++ b/python/valgrind-python.supp @@ -544,6 +544,7 @@ Suppress leaking the _PyEval_EvalFrameDefault Memcheck:Leak fun:malloc + fun:_PyObject_GC_NewVar ... fun:_PyEval_EvalFrameDefault }