-
Notifications
You must be signed in to change notification settings - Fork 442
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
This is like bcf_sr_add_reader but the caller supplies an existing open htsFile instead of a filename. Fixes #1862
- Loading branch information
1 parent
329e794
commit dac8f29
Showing
4 changed files
with
139 additions
and
16 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,7 +1,7 @@ | ||
/// @file htslib/synced_bcf_reader.h | ||
/// Stream through multiple VCF files. | ||
/* | ||
Copyright (C) 2012-2017, 2019-2024 Genome Research Ltd. | ||
Copyright (C) 2012-2017, 2019-2025 Genome Research Ltd. | ||
Author: Petr Danecek <[email protected]> | ||
|
@@ -233,10 +233,30 @@ void bcf_sr_destroy_threads(bcf_srs_t *files); | |
* | ||
* See also the bcf_srs_t data structure for parameters controlling | ||
* the reader's logic. | ||
* Invokes bcf_sr_add_hreader with opened file | ||
*/ | ||
HTSLIB_EXPORT | ||
int bcf_sr_add_reader(bcf_srs_t *readers, const char *fname); | ||
|
||
/** | ||
* bcf_sr_add_hreader() - open new reader using htsfile | ||
* @readers: holder of the open readers | ||
* @file_ptr: htsfile already opened | ||
* @autoclose: close file along with reader or not, 1 - close, 0 - do not close | ||
* @idxname: index file name for file in @file_ptr | ||
* | ||
* Returns 1 if the call succeeded, or 0 on error. | ||
* | ||
* See also the bcf_srs_t data structure for parameters controlling | ||
* the reader's logic. | ||
* If idxname is NULL, uses file_ptr->fn to find index file. | ||
* With idxname as NULL, index file must be present along with the file with | ||
* default name | ||
*/ | ||
HTSLIB_EXPORT | ||
int bcf_sr_add_hreader(bcf_srs_t *readers, htsFile *file_ptr, int autoclose, | ||
const char *idxname); | ||
|
||
HTSLIB_EXPORT | ||
void bcf_sr_remove_reader(bcf_srs_t *files, int i); | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,6 @@ | ||
/* synced_bcf_reader.c -- stream through multiple VCF files. | ||
Copyright (C) 2012-2023 Genome Research Ltd. | ||
Copyright (C) 2012-2023, 2025 Genome Research Ltd. | ||
Author: Petr Danecek <[email protected]> | ||
|
@@ -69,6 +69,7 @@ typedef struct | |
{ | ||
sr_sort_t sort; | ||
int regions_overlap, targets_overlap; | ||
int *closefile; // close htsfile with sync reader close or not | ||
} | ||
aux_t; | ||
|
||
|
@@ -251,13 +252,32 @@ void bcf_sr_destroy_threads(bcf_srs_t *files) { | |
int bcf_sr_add_reader(bcf_srs_t *files, const char *fname) | ||
{ | ||
char fmode[5]; | ||
int ret = 0; | ||
const char *idxname = NULL; | ||
|
||
strcpy(fmode, "r"); | ||
vcf_open_mode(fmode+1, fname, NULL); | ||
htsFile* file_ptr = hts_open(fname, fmode); | ||
if ( ! file_ptr ) { | ||
files->errnum = open_failed; | ||
return 0; | ||
} | ||
//get idx name and pass to add_hreader | ||
idxname = strstr(fname, HTS_IDX_DELIM); | ||
idxname += idxname ? sizeof(HTS_IDX_DELIM) - 1 : 0; | ||
if (!(ret = bcf_sr_add_hreader(files, file_ptr, 1, idxname))) { | ||
hts_close(file_ptr); //failed, close the file | ||
} | ||
return ret; | ||
} | ||
|
||
int bcf_sr_add_hreader(bcf_srs_t *files, htsFile *file_ptr, int autoclose, const char *idxname) | ||
{ | ||
aux_t *auxdata = NULL; | ||
if ( ! file_ptr ) { | ||
files->errnum = open_failed; | ||
return 0; | ||
} | ||
|
||
files->has_line = (int*) realloc(files->has_line, sizeof(int)*(files->nreaders+1)); | ||
files->has_line[files->nreaders] = 0; | ||
|
@@ -274,7 +294,7 @@ int bcf_sr_add_reader(bcf_srs_t *files, const char *fname) | |
BGZF *bgzf = hts_get_bgzfp(reader->file); | ||
if ( bgzf && bgzf_check_EOF(bgzf) == 0 ) { | ||
files->errnum = no_eof; | ||
hts_log_warning("No BGZF EOF marker; file '%s' may be truncated", fname); | ||
hts_log_warning("No BGZF EOF marker; file '%s' may be truncated", file_ptr->fn); | ||
} | ||
if (files->p) | ||
bgzf_thread_pool(bgzf, files->p->pool, files->p->qsize); | ||
|
@@ -290,7 +310,7 @@ int bcf_sr_add_reader(bcf_srs_t *files, const char *fname) | |
return 0; | ||
} | ||
|
||
reader->tbx_idx = tbx_index_load(fname); | ||
reader->tbx_idx = tbx_index_load2(file_ptr->fn, idxname); | ||
if ( !reader->tbx_idx ) | ||
{ | ||
files->errnum = idx_load_failed; | ||
|
@@ -309,7 +329,7 @@ int bcf_sr_add_reader(bcf_srs_t *files, const char *fname) | |
|
||
reader->header = bcf_hdr_read(reader->file); | ||
|
||
reader->bcf_idx = bcf_index_load(fname); | ||
reader->bcf_idx = bcf_index_load2(file_ptr->fn, idxname); | ||
if ( !reader->bcf_idx ) | ||
{ | ||
files->errnum = idx_load_failed; | ||
|
@@ -362,7 +382,7 @@ int bcf_sr_add_reader(bcf_srs_t *files, const char *fname) | |
return 0; | ||
} | ||
|
||
reader->fname = strdup(fname); | ||
reader->fname = strdup(file_ptr->fn); | ||
if ( files->apply_filters ) | ||
reader->filter_ids = init_filters(reader->header, files->apply_filters, &reader->nfilter_ids); | ||
|
||
|
@@ -413,6 +433,18 @@ int bcf_sr_add_reader(bcf_srs_t *files, const char *fname) | |
} | ||
} | ||
|
||
if ((auxdata = BCF_SR_AUX(files))) { | ||
//store closure status for htsfile | ||
int *tmp = realloc(auxdata->closefile, sizeof(int) * files->nreaders); | ||
if (!tmp) { | ||
hts_log_error("Failed to allocate memory"); | ||
return 0; | ||
} | ||
tmp[files->nreaders - 1] = autoclose; | ||
auxdata->closefile = tmp; | ||
} | ||
|
||
|
||
return 1; | ||
} | ||
|
||
|
@@ -426,13 +458,15 @@ bcf_srs_t *bcf_sr_init(void) | |
return files; | ||
} | ||
|
||
static void bcf_sr_destroy1(bcf_sr_t *reader) | ||
static void bcf_sr_destroy1(bcf_sr_t *reader, int closefile) | ||
{ | ||
free(reader->fname); | ||
if ( reader->tbx_idx ) tbx_destroy(reader->tbx_idx); | ||
if ( reader->bcf_idx ) hts_idx_destroy(reader->bcf_idx); | ||
bcf_hdr_destroy(reader->header); | ||
hts_close(reader->file); | ||
if (closefile) { | ||
hts_close(reader->file); | ||
} | ||
if ( reader->itr ) tbx_itr_destroy(reader->itr); | ||
int j; | ||
for (j=0; j<reader->mbuffer; j++) | ||
|
@@ -445,8 +479,10 @@ static void bcf_sr_destroy1(bcf_sr_t *reader) | |
void bcf_sr_destroy(bcf_srs_t *files) | ||
{ | ||
int i; | ||
int *autoclose = BCF_SR_AUX(files)->closefile; | ||
|
||
for (i=0; i<files->nreaders; i++) | ||
bcf_sr_destroy1(&files->readers[i]); | ||
bcf_sr_destroy1(&files->readers[i], autoclose[i]); | ||
free(files->has_line); | ||
free(files->readers); | ||
for (i=0; i<files->n_smpl; i++) free(files->samples[i]); | ||
|
@@ -456,19 +492,23 @@ void bcf_sr_destroy(bcf_srs_t *files) | |
if (files->tmps.m) free(files->tmps.s); | ||
if (files->n_threads) bcf_sr_destroy_threads(files); | ||
bcf_sr_sort_destroy(&BCF_SR_AUX(files)->sort); | ||
free(autoclose); | ||
free(files->aux); | ||
free(files); | ||
} | ||
|
||
void bcf_sr_remove_reader(bcf_srs_t *files, int i) | ||
{ | ||
assert( !files->samples ); // not ready for this yet | ||
int *autoclose = BCF_SR_AUX(files)->closefile; | ||
|
||
bcf_sr_sort_remove_reader(files, &BCF_SR_AUX(files)->sort, i); | ||
bcf_sr_destroy1(&files->readers[i]); | ||
bcf_sr_destroy1(&files->readers[i], autoclose[i]); | ||
if ( i+1 < files->nreaders ) | ||
{ | ||
memmove(&files->readers[i], &files->readers[i+1], (files->nreaders-i-1)*sizeof(bcf_sr_t)); | ||
memmove(&files->has_line[i], &files->has_line[i+1], (files->nreaders-i-1)*sizeof(int)); | ||
memmove(&autoclose[i], &autoclose[i+1], (files->nreaders-i-1)*sizeof(int)); | ||
} | ||
files->nreaders--; | ||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,5 @@ | ||
/* | ||
Copyright (C) 2017, 2020, 2023 Genome Research Ltd. | ||
Copyright (C) 2017, 2020, 2023, 2025 Genome Research Ltd. | ||
Author: Petr Danecek <[email protected]> | ||
|
@@ -54,7 +54,7 @@ error(const char *format, ...) | |
void HTS_NORETURN usage(int exit_code) | ||
{ | ||
fprintf(stderr, "Usage: test-bcf-sr [OPTIONS] vcf-list.txt\n"); | ||
fprintf(stderr, " test-bcf-sr [OPTIONS] -args file1.bcf [...]\n"); | ||
fprintf(stderr, " test-bcf-sr [OPTIONS] --args file1.bcf [...]\n"); | ||
fprintf(stderr, "Options:\n"); | ||
fprintf(stderr, " --args pass filenames directly in argument list\n"); | ||
fprintf(stderr, " --no-index allow streaming\n"); | ||
|
@@ -63,6 +63,7 @@ void HTS_NORETURN usage(int exit_code) | |
fprintf(stderr, " -p, --pair <logic[+ref]> logic: snps,indels,both,snps+ref,indels+ref,both+ref,exact,some,all\n"); | ||
fprintf(stderr, " -r, --regions <reg_list> comma-separated list of regions\n"); | ||
fprintf(stderr, " -t, --targets <reg_list> comma-separated list of targets\n"); | ||
fprintf(stderr, " -u, --usefptr use hfile pointer interface on reader addition\n"); | ||
fprintf(stderr, "\n"); | ||
exit(exit_code); | ||
} | ||
|
@@ -133,13 +134,15 @@ int main(int argc, char *argv[]) | |
{"targets",required_argument,NULL,'t'}, | ||
{"no-index",no_argument,NULL,1000}, | ||
{"args",no_argument,NULL,1001}, | ||
{"usefptr",no_argument,NULL,'u'}, | ||
{NULL,0,NULL,0} | ||
}; | ||
|
||
int c, pair = 0, use_index = 1, use_fofn = 1; | ||
int c, pair = 0, use_index = 1, use_fofn = 1, usefptr = 0; | ||
enum htsExactFormat out_fmt = text_format; // for original pos + alleles | ||
const char *out_fn = NULL, *regions = NULL, *targets = NULL; | ||
while ((c = getopt_long(argc, argv, "o:O:p:r:t:h", loptions, NULL)) >= 0) | ||
htsFile **htsfp = NULL; | ||
while ((c = getopt_long(argc, argv, "o:O:p:r:t:hu", loptions, NULL)) >= 0) | ||
{ | ||
switch (c) | ||
{ | ||
|
@@ -179,6 +182,9 @@ int main(int argc, char *argv[]) | |
case 1001: | ||
use_fofn = 0; | ||
break; | ||
case 'u': | ||
usefptr = 1; //use htsfile interface instead of fname i/f | ||
break; | ||
case 'h': | ||
usage(EXIT_SUCCESS); | ||
default: usage(EXIT_FAILURE); | ||
|
@@ -218,8 +224,32 @@ int main(int argc, char *argv[]) | |
error("Failed to set targets\n"); | ||
} | ||
|
||
for (i=0; i<nvcf; i++) | ||
if ( !bcf_sr_add_reader(sr,vcfs[i]) ) error("Failed to open %s: %s\n", vcfs[i],bcf_sr_strerror(sr->errnum)); | ||
if (usefptr && !(htsfp = malloc(sizeof(htsFile*) * nvcf))) { | ||
error("Failed to allocate memory\n"); | ||
} | ||
|
||
for (i=0; i<nvcf; i++) { | ||
if (!usefptr) { | ||
if ( !bcf_sr_add_reader(sr,vcfs[i]) ) { | ||
error("Failed to open %s: %s\n", vcfs[i], | ||
bcf_sr_strerror(sr->errnum)); | ||
} | ||
} else { //use htsfile i/f | ||
if (!(htsfp[i] = hts_open(vcfs[i], "r"))) { | ||
error("Failed to open %s: %s\n", vcfs[i], | ||
bcf_sr_strerror(sr->errnum)); | ||
} | ||
/*with name, index can be anywhere, named as anything | ||
w/o name it has to be along with file with default naming*/ | ||
|
||
const char *idxname = strstr(vcfs[i], HTS_IDX_DELIM); | ||
idxname += idxname ? sizeof(HTS_IDX_DELIM) - 1 : 0; | ||
if ( !bcf_sr_add_hreader(sr, htsfp[i], 1, idxname) ) { | ||
error("Failed to add reader %s: %s\n", vcfs[i], | ||
bcf_sr_strerror(sr->errnum)); | ||
} | ||
} | ||
} | ||
|
||
if (!sr->readers || sr->nreaders < 1) | ||
error("No readers set, even though one was added\n"); | ||
|
@@ -264,6 +294,10 @@ int main(int argc, char *argv[]) | |
free(vcfs[i]); | ||
free(vcfs); | ||
} | ||
if (usefptr) { | ||
//files are closed along with sr destroy | ||
free(htsfp); | ||
} | ||
|
||
return 0; | ||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters