Skip to content

Commit

Permalink
Add bcf_sr_add_hreader() interface
Browse files Browse the repository at this point in the history
This is like bcf_sr_add_reader but the caller supplies an existing
open htsFile instead of a filename.

Fixes #1862
  • Loading branch information
vasudeva8 authored and jkbonfield committed Jan 16, 2025
1 parent 329e794 commit dac8f29
Show file tree
Hide file tree
Showing 4 changed files with 139 additions and 16 deletions.
22 changes: 21 additions & 1 deletion htslib/synced_bcf_reader.h
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
/// @file htslib/synced_bcf_reader.h
/// Stream through multiple VCF files.
/*
Copyright (C) 2012-2017, 2019-2024 Genome Research Ltd.
Copyright (C) 2012-2017, 2019-2025 Genome Research Ltd.
Author: Petr Danecek <[email protected]>
Expand Down Expand Up @@ -233,10 +233,30 @@ void bcf_sr_destroy_threads(bcf_srs_t *files);
*
* See also the bcf_srs_t data structure for parameters controlling
* the reader's logic.
* Invokes bcf_sr_add_hreader with opened file
*/
HTSLIB_EXPORT
int bcf_sr_add_reader(bcf_srs_t *readers, const char *fname);

/**
* bcf_sr_add_hreader() - open new reader using htsfile
* @readers: holder of the open readers
* @file_ptr: htsfile already opened
* @autoclose: close file along with reader or not, 1 - close, 0 - do not close
* @idxname: index file name for file in @file_ptr
*
* Returns 1 if the call succeeded, or 0 on error.
*
* See also the bcf_srs_t data structure for parameters controlling
* the reader's logic.
* If idxname is NULL, uses file_ptr->fn to find index file.
* With idxname as NULL, index file must be present along with the file with
* default name
*/
HTSLIB_EXPORT
int bcf_sr_add_hreader(bcf_srs_t *readers, htsFile *file_ptr, int autoclose,
const char *idxname);

HTSLIB_EXPORT
void bcf_sr_remove_reader(bcf_srs_t *files, int i);

Expand Down
58 changes: 49 additions & 9 deletions synced_bcf_reader.c
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/* synced_bcf_reader.c -- stream through multiple VCF files.
Copyright (C) 2012-2023 Genome Research Ltd.
Copyright (C) 2012-2023, 2025 Genome Research Ltd.
Author: Petr Danecek <[email protected]>
Expand Down Expand Up @@ -69,6 +69,7 @@ typedef struct
{
sr_sort_t sort;
int regions_overlap, targets_overlap;
int *closefile; // close htsfile with sync reader close or not
}
aux_t;

Expand Down Expand Up @@ -251,13 +252,32 @@ void bcf_sr_destroy_threads(bcf_srs_t *files) {
int bcf_sr_add_reader(bcf_srs_t *files, const char *fname)
{
char fmode[5];
int ret = 0;
const char *idxname = NULL;

strcpy(fmode, "r");
vcf_open_mode(fmode+1, fname, NULL);
htsFile* file_ptr = hts_open(fname, fmode);
if ( ! file_ptr ) {
files->errnum = open_failed;
return 0;
}
//get idx name and pass to add_hreader
idxname = strstr(fname, HTS_IDX_DELIM);
idxname += idxname ? sizeof(HTS_IDX_DELIM) - 1 : 0;
if (!(ret = bcf_sr_add_hreader(files, file_ptr, 1, idxname))) {
hts_close(file_ptr); //failed, close the file
}
return ret;
}

int bcf_sr_add_hreader(bcf_srs_t *files, htsFile *file_ptr, int autoclose, const char *idxname)
{
aux_t *auxdata = NULL;
if ( ! file_ptr ) {
files->errnum = open_failed;
return 0;
}

files->has_line = (int*) realloc(files->has_line, sizeof(int)*(files->nreaders+1));
files->has_line[files->nreaders] = 0;
Expand All @@ -274,7 +294,7 @@ int bcf_sr_add_reader(bcf_srs_t *files, const char *fname)
BGZF *bgzf = hts_get_bgzfp(reader->file);
if ( bgzf && bgzf_check_EOF(bgzf) == 0 ) {
files->errnum = no_eof;
hts_log_warning("No BGZF EOF marker; file '%s' may be truncated", fname);
hts_log_warning("No BGZF EOF marker; file '%s' may be truncated", file_ptr->fn);
}
if (files->p)
bgzf_thread_pool(bgzf, files->p->pool, files->p->qsize);
Expand All @@ -290,7 +310,7 @@ int bcf_sr_add_reader(bcf_srs_t *files, const char *fname)
return 0;
}

reader->tbx_idx = tbx_index_load(fname);
reader->tbx_idx = tbx_index_load2(file_ptr->fn, idxname);
if ( !reader->tbx_idx )
{
files->errnum = idx_load_failed;
Expand All @@ -309,7 +329,7 @@ int bcf_sr_add_reader(bcf_srs_t *files, const char *fname)

reader->header = bcf_hdr_read(reader->file);

reader->bcf_idx = bcf_index_load(fname);
reader->bcf_idx = bcf_index_load2(file_ptr->fn, idxname);
if ( !reader->bcf_idx )
{
files->errnum = idx_load_failed;
Expand Down Expand Up @@ -362,7 +382,7 @@ int bcf_sr_add_reader(bcf_srs_t *files, const char *fname)
return 0;
}

reader->fname = strdup(fname);
reader->fname = strdup(file_ptr->fn);
if ( files->apply_filters )
reader->filter_ids = init_filters(reader->header, files->apply_filters, &reader->nfilter_ids);

Expand Down Expand Up @@ -413,6 +433,18 @@ int bcf_sr_add_reader(bcf_srs_t *files, const char *fname)
}
}

if ((auxdata = BCF_SR_AUX(files))) {
//store closure status for htsfile
int *tmp = realloc(auxdata->closefile, sizeof(int) * files->nreaders);
if (!tmp) {
hts_log_error("Failed to allocate memory");
return 0;
}
tmp[files->nreaders - 1] = autoclose;
auxdata->closefile = tmp;
}


return 1;
}

Expand All @@ -426,13 +458,15 @@ bcf_srs_t *bcf_sr_init(void)
return files;
}

static void bcf_sr_destroy1(bcf_sr_t *reader)
static void bcf_sr_destroy1(bcf_sr_t *reader, int closefile)
{
free(reader->fname);
if ( reader->tbx_idx ) tbx_destroy(reader->tbx_idx);
if ( reader->bcf_idx ) hts_idx_destroy(reader->bcf_idx);
bcf_hdr_destroy(reader->header);
hts_close(reader->file);
if (closefile) {
hts_close(reader->file);
}
if ( reader->itr ) tbx_itr_destroy(reader->itr);
int j;
for (j=0; j<reader->mbuffer; j++)
Expand All @@ -445,8 +479,10 @@ static void bcf_sr_destroy1(bcf_sr_t *reader)
void bcf_sr_destroy(bcf_srs_t *files)
{
int i;
int *autoclose = BCF_SR_AUX(files)->closefile;

for (i=0; i<files->nreaders; i++)
bcf_sr_destroy1(&files->readers[i]);
bcf_sr_destroy1(&files->readers[i], autoclose[i]);
free(files->has_line);
free(files->readers);
for (i=0; i<files->n_smpl; i++) free(files->samples[i]);
Expand All @@ -456,19 +492,23 @@ void bcf_sr_destroy(bcf_srs_t *files)
if (files->tmps.m) free(files->tmps.s);
if (files->n_threads) bcf_sr_destroy_threads(files);
bcf_sr_sort_destroy(&BCF_SR_AUX(files)->sort);
free(autoclose);
free(files->aux);
free(files);
}

void bcf_sr_remove_reader(bcf_srs_t *files, int i)
{
assert( !files->samples ); // not ready for this yet
int *autoclose = BCF_SR_AUX(files)->closefile;

bcf_sr_sort_remove_reader(files, &BCF_SR_AUX(files)->sort, i);
bcf_sr_destroy1(&files->readers[i]);
bcf_sr_destroy1(&files->readers[i], autoclose[i]);
if ( i+1 < files->nreaders )
{
memmove(&files->readers[i], &files->readers[i+1], (files->nreaders-i-1)*sizeof(bcf_sr_t));
memmove(&files->has_line[i], &files->has_line[i+1], (files->nreaders-i-1)*sizeof(int));
memmove(&autoclose[i], &autoclose[i+1], (files->nreaders-i-1)*sizeof(int));
}
files->nreaders--;
}
Expand Down
46 changes: 40 additions & 6 deletions test/test-bcf-sr.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
Copyright (C) 2017, 2020, 2023 Genome Research Ltd.
Copyright (C) 2017, 2020, 2023, 2025 Genome Research Ltd.
Author: Petr Danecek <[email protected]>
Expand Down Expand Up @@ -54,7 +54,7 @@ error(const char *format, ...)
void HTS_NORETURN usage(int exit_code)
{
fprintf(stderr, "Usage: test-bcf-sr [OPTIONS] vcf-list.txt\n");
fprintf(stderr, " test-bcf-sr [OPTIONS] -args file1.bcf [...]\n");
fprintf(stderr, " test-bcf-sr [OPTIONS] --args file1.bcf [...]\n");
fprintf(stderr, "Options:\n");
fprintf(stderr, " --args pass filenames directly in argument list\n");
fprintf(stderr, " --no-index allow streaming\n");
Expand All @@ -63,6 +63,7 @@ void HTS_NORETURN usage(int exit_code)
fprintf(stderr, " -p, --pair <logic[+ref]> logic: snps,indels,both,snps+ref,indels+ref,both+ref,exact,some,all\n");
fprintf(stderr, " -r, --regions <reg_list> comma-separated list of regions\n");
fprintf(stderr, " -t, --targets <reg_list> comma-separated list of targets\n");
fprintf(stderr, " -u, --usefptr use hfile pointer interface on reader addition\n");
fprintf(stderr, "\n");
exit(exit_code);
}
Expand Down Expand Up @@ -133,13 +134,15 @@ int main(int argc, char *argv[])
{"targets",required_argument,NULL,'t'},
{"no-index",no_argument,NULL,1000},
{"args",no_argument,NULL,1001},
{"usefptr",no_argument,NULL,'u'},
{NULL,0,NULL,0}
};

int c, pair = 0, use_index = 1, use_fofn = 1;
int c, pair = 0, use_index = 1, use_fofn = 1, usefptr = 0;
enum htsExactFormat out_fmt = text_format; // for original pos + alleles
const char *out_fn = NULL, *regions = NULL, *targets = NULL;
while ((c = getopt_long(argc, argv, "o:O:p:r:t:h", loptions, NULL)) >= 0)
htsFile **htsfp = NULL;
while ((c = getopt_long(argc, argv, "o:O:p:r:t:hu", loptions, NULL)) >= 0)
{
switch (c)
{
Expand Down Expand Up @@ -179,6 +182,9 @@ int main(int argc, char *argv[])
case 1001:
use_fofn = 0;
break;
case 'u':
usefptr = 1; //use htsfile interface instead of fname i/f
break;
case 'h':
usage(EXIT_SUCCESS);
default: usage(EXIT_FAILURE);
Expand Down Expand Up @@ -218,8 +224,32 @@ int main(int argc, char *argv[])
error("Failed to set targets\n");
}

for (i=0; i<nvcf; i++)
if ( !bcf_sr_add_reader(sr,vcfs[i]) ) error("Failed to open %s: %s\n", vcfs[i],bcf_sr_strerror(sr->errnum));
if (usefptr && !(htsfp = malloc(sizeof(htsFile*) * nvcf))) {
error("Failed to allocate memory\n");
}

for (i=0; i<nvcf; i++) {
if (!usefptr) {
if ( !bcf_sr_add_reader(sr,vcfs[i]) ) {
error("Failed to open %s: %s\n", vcfs[i],
bcf_sr_strerror(sr->errnum));
}
} else { //use htsfile i/f
if (!(htsfp[i] = hts_open(vcfs[i], "r"))) {
error("Failed to open %s: %s\n", vcfs[i],
bcf_sr_strerror(sr->errnum));
}
/*with name, index can be anywhere, named as anything
w/o name it has to be along with file with default naming*/

const char *idxname = strstr(vcfs[i], HTS_IDX_DELIM);
idxname += idxname ? sizeof(HTS_IDX_DELIM) - 1 : 0;
if ( !bcf_sr_add_hreader(sr, htsfp[i], 1, idxname) ) {
error("Failed to add reader %s: %s\n", vcfs[i],
bcf_sr_strerror(sr->errnum));
}
}
}

if (!sr->readers || sr->nreaders < 1)
error("No readers set, even though one was added\n");
Expand Down Expand Up @@ -264,6 +294,10 @@ int main(int argc, char *argv[])
free(vcfs[i]);
free(vcfs);
}
if (usefptr) {
//files are closed along with sr destroy
free(htsfp);
}

return 0;
}
Expand Down
29 changes: 29 additions & 0 deletions test/test.pl
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@
run_test('test_bcf_sr_sort',$opts);
run_test('test_bcf_sr_no_index',$opts);
run_test('test_bcf_sr_range', $opts);
run_test('test_bcf_sr_hreader', $opts);
run_test('test_command',$opts,cmd=>'test-bcf-translate -',out=>'test-bcf-translate.out');
run_test('test_convert_padded_header',$opts);
run_test('test_rebgzip',$opts);
Expand Down Expand Up @@ -1345,6 +1346,34 @@ sub test_bcf_sr_range {
}
}

sub test_bcf_sr_hreader {
#uses input file from test_bcf_sr_sort / test-bcf-sr.pl
#invokes bcf sync reader with hread method
my ($opts, %args) = @_;
my $test = "test_bcf_sr_hreader";
my $fail = 0;
my $cmd = "$$opts{path}/test-bcf-sr -p all $$opts{tmp}/list.txt -o $$opts{tmp}/file.out";
my $cmd_header = "$$opts{path}/test-bcf-sr -p all $$opts{tmp}/list.txt -o $$opts{tmp}/filenew.out -u";
my $cmd_diff = "diff $$opts{tmp}/filenew.out $$opts{tmp}/file.out";

my ($ret, $out) = _cmd($cmd);
if ($ret != 0) {
failed($opts, $test, "Failed to create reference output\n");
return;
}
($ret, $out) = _cmd($cmd_header);
if ($ret != 0) {
failed($opts, $test, "Failed to create output\n");
return;
}
($ret, $out) = _cmd($cmd_diff);
if ($ret != 0) {
failed($opts, $test, "Output differs to reference output\n");
return;
}
passed($opts, $test);
}

sub test_command
{
my ($opts, %args) = @_;
Expand Down

0 comments on commit dac8f29

Please sign in to comment.