Skip to content

Commit

Permalink
Support tabix index for collections
Browse files Browse the repository at this point in the history
  • Loading branch information
lukfor committed Dec 3, 2023
1 parent ad98b9b commit 3253ef5
Show file tree
Hide file tree
Showing 2 changed files with 122 additions and 15 deletions.
114 changes: 114 additions & 0 deletions src/main/java/genepi/riskscore/io/csv/TabixTableReader.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
package genepi.riskscore.io.csv;

import genepi.io.FileUtil;
import genepi.io.table.reader.AbstractTableReader;
import genepi.io.table.reader.CsvTableReader;
import genepi.io.text.LineReader;
import genepi.riskscore.io.dbsnp.DbSnpReader;
import htsjdk.tribble.readers.TabixReader;

import java.io.DataInputStream;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Vector;

public class TabixTableReader extends AbstractTableReader {

private List<String> headerLines = new Vector<String>();

private String[] columns = null;

private Map<String, Integer> index = new HashMap<String, Integer>();

private String[] row = null;

private TabixReader tabixReader;

private TabixReader.Iterator iterator;

private int lineNumber = 0;

public TabixTableReader(String filename, String chromosome) throws IOException {
this(filename, chromosome, 0, Integer.MAX_VALUE);
}


public TabixTableReader(String filename, String chromosome, int start, int end) throws IOException {
LineReader reader = new LineReader(openTxtOrGzipStream(filename));
while(reader.next()) {
if (reader.get().startsWith("#")) {
headerLines.add(reader.get());
} else{
columns = reader.get().split("\t");
for (int i = 0 ; i < columns.length; i++){
index.put(columns[i], i);
}
break;
}
}
reader.close();

tabixReader = new TabixReader(filename);
iterator = tabixReader.query(chromosome, start, end);
}

public List<String> getHeader() {
return headerLines;
}

@Override
public String[] getColumns() {
return columns;
}

@Override
public boolean next() {
String line = null;
try {
line = iterator.next();
} catch (IOException e) {
throw new RuntimeException(e);
}
if (line == null) {
row = null;
return false;
}
row = line.split("\t", -1);
lineNumber++;
if (row.length != columns.length){
throw new RuntimeException("Different number of columns in line " + lineNumber + ": " + line);
}
return true;
}

@Override
public int getColumnIndex(String column) {
return index.get(column);
}

@Override
public String[] getRow() {
return row;
}

@Override
public void close() {
tabixReader.close();
}

@Override
public boolean hasColumn(String column) {
return index.containsKey(column);
}

private static DataInputStream openTxtOrGzipStream(String filename) throws IOException {
FileInputStream inputStream = new FileInputStream(filename);
InputStream in2 = FileUtil.decompressStream(inputStream);
return new DataInputStream(in2);
}

}
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
package genepi.riskscore.io.scores;

import genepi.io.table.reader.ITableReader;
import genepi.riskscore.io.Chunk;
import genepi.riskscore.io.RiskScoreFile;
import genepi.riskscore.io.VariantFile;
import genepi.riskscore.io.csv.CsvWithHeaderTableReader;
import genepi.riskscore.io.csv.TabixTableReader;
import genepi.riskscore.io.formats.RiskScoreFormatFactory.RiskScoreFormat;
import genepi.riskscore.model.ReferenceVariant;
import genepi.riskscore.model.RiskScoreSummary;
Expand Down Expand Up @@ -34,6 +36,8 @@ public class MergedRiskScoreCollection implements IRiskScoreCollection {

public static String META_EXTENSION = ".info";

public static String INDEX_EXTENSION = ".tbi";

public static String COLUMN_CHROMOSOME = "chr_name";

public static String COLUMN_POSITION = "chr_position";
Expand Down Expand Up @@ -88,8 +92,10 @@ public void buildIndex(String chromosome, Chunk chunk, String dbsnp, String prox
}
readerMeta.close();


CsvWithHeaderTableReader reader = new CsvWithHeaderTableReader(filename, '\t');
if (chunk == null){
chunk = new Chunk();
}
TabixTableReader reader = new TabixTableReader(filename, chromosome, chunk.getStart(), chunk.getEnd());
String[] columns = reader.getColumns();

numberRiskScores = columns.length - COLUMNS.size();
Expand All @@ -114,19 +120,6 @@ public void buildIndex(String chromosome, Chunk chunk, String dbsnp, String prox

String _chromosome = reader.getString(COLUMN_CHROMOSOME);
int position = reader.getInteger(COLUMN_POSITION);
if (!_chromosome.equals(chromosome)){
continue;
}
if (chunk != null) {
if (position < chunk.getStart()) {
continue;
}

if (position > chunk.getEnd()) {
break;
}
}

String otherAllele = reader.getString(COLUMN_OTHER_ALLELE);
String effectAllele = reader.getString(COLUMN_EFFECT_ALLELE);

Expand Down

0 comments on commit 3253ef5

Please sign in to comment.