From 1a66a2ea9acfc8b0c01305a42fdfa8531c75f024 Mon Sep 17 00:00:00 2001 From: Muhammad S Shamim Date: Wed, 6 Apr 2022 19:33:55 -0500 Subject: [PATCH 1/7] update stream handling --- src/javastraw/StrawGlobals.java | 2 +- src/javastraw/reader/DatasetReaderV2.java | 22 +++----- src/javastraw/reader/ReaderTools.java | 51 ++++++++++++------- .../reader/block/DynamicBlockIndex.java | 2 +- .../reader/norm/NormFactorMapReader.java | 8 ++- 5 files changed, 50 insertions(+), 35 deletions(-) diff --git a/src/javastraw/StrawGlobals.java b/src/javastraw/StrawGlobals.java index 1a9ab75..f629f7b 100644 --- a/src/javastraw/StrawGlobals.java +++ b/src/javastraw/StrawGlobals.java @@ -25,7 +25,7 @@ package javastraw; public class StrawGlobals { - public static final String versionNum = "2.11.04"; + public static final String versionNum = "2.12.01"; // min hic file version supported public static final int minVersion = 6; public static final int bufferSize = 2097152; diff --git a/src/javastraw/reader/DatasetReaderV2.java b/src/javastraw/reader/DatasetReaderV2.java index 015353f..29bf1c0 100644 --- a/src/javastraw/reader/DatasetReaderV2.java +++ b/src/javastraw/reader/DatasetReaderV2.java @@ -345,8 +345,7 @@ private void readFooter(long position) throws IOException { // Normalized expected values (v6 and greater only) if (version >= 6) { currentPosition = normVectorFilePosition; - SeekableStream stream = ReaderTools.getValidStream(path); - stream.seek(currentPosition); + SeekableStream stream = ReaderTools.getValidStream(path, currentPosition); LittleEndianInputStream dis = new LittleEndianInputStream(new BufferedInputStream(stream, StrawGlobals.bufferSize)); @@ -363,7 +362,7 @@ private void readFooter(long position) throws IOException { for (int i = 0; i < nNormExpectedValueVectors; i++) { stream.seek(currentPosition); - dis = new LittleEndianInputStream(new BufferedInputStream(stream, StrawGlobals.bufferSize)); + dis = new LittleEndianInputStream(new BufferedInputStream(stream, 30)); String typeString = dis.readString(); NormalizationType norm = dataset.getNormalizationHandler().getNormTypeFromString(typeString); @@ -403,10 +402,8 @@ private void readFooter(long position) throws IOException { private long readExpectedValuesMapForNone(long currentPosition) throws IOException { Map expectedValuesMap = new LinkedHashMap<>(); - SeekableStream stream = ReaderTools.getValidStream(path); - stream.seek(currentPosition); - LittleEndianInputStream dis = new LittleEndianInputStream(new BufferedInputStream(stream, StrawGlobals.bufferSize)); - int nExpectedValues = dis.readInt(); + SeekableStream stream = ReaderTools.getValidStream(path, currentPosition); + int nExpectedValues = ReaderTools.readIntFromBytes(stream); currentPosition += 4; for (int i = 0; i < nExpectedValues; i++) { NormalizationType norm = NormalizationHandler.NONE; @@ -419,12 +416,10 @@ private long readExpectedValuesMapForNone(long currentPosition) throws IOExcepti } private long populateMasterIndex(long currentPosition) throws IOException { - SeekableStream stream = ReaderTools.getValidStream(path); - stream.seek(currentPosition); - LittleEndianInputStream dis = new LittleEndianInputStream(new BufferedInputStream(stream, StrawGlobals.bufferSize)); - - int nEntries = dis.readInt(); + SeekableStream stream = ReaderTools.getValidStream(path, currentPosition); + int nEntries = ReaderTools.readIntFromBytes(stream); currentPosition += 4; + LittleEndianInputStream dis = new LittleEndianInputStream(new BufferedInputStream(stream, 50 * nEntries)); for (int i = 0; i < nEntries; i++) { String key = dis.readString(); @@ -439,8 +434,7 @@ private long populateMasterIndex(long currentPosition) throws IOException { } private long determineNormVectorFilePosition(int numBytesInVar, long position) throws IOException { - SeekableStream stream = ReaderTools.getValidStream(path); - stream.seek(position); + SeekableStream stream = ReaderTools.getValidStream(path, position); byte[] buffer = new byte[numBytesInVar]; int actualBytes = stream.read(buffer); if (numBytesInVar == actualBytes) { diff --git a/src/javastraw/reader/ReaderTools.java b/src/javastraw/reader/ReaderTools.java index 2463e65..ce4fc0c 100644 --- a/src/javastraw/reader/ReaderTools.java +++ b/src/javastraw/reader/ReaderTools.java @@ -32,7 +32,7 @@ public class ReaderTools { private static final int maxLengthEntryName = 100; private static final int MAX_BYTE_READ_SIZE = Integer.MAX_VALUE - 10; - static SeekableStream getValidStream(String path) throws IOException { + public static SeekableStream getValidStream(String path) throws IOException { SeekableStream stream; do { stream = streamFactory.getStreamFor(path); @@ -40,6 +40,12 @@ static SeekableStream getValidStream(String path) throws IOException { return stream; } + public static SeekableStream getValidStream(String path, long position) throws IOException { + SeekableStream stream = getValidStream(path); + stream.seek(position); + return stream; + } + static LittleEndianInputStream createStreamFromSeveralBuffers(LargeIndexEntry idx, String path) throws IOException { List buffer = seekAndFullyReadLargeCompressedBytes(idx, path); @@ -52,8 +58,7 @@ static LittleEndianInputStream createStreamFromSeveralBuffers(LargeIndexEntry id static byte[] seekAndFullyReadCompressedBytes(IndexEntry idx, String path) throws IOException { byte[] compressedBytes = new byte[idx.size]; - SeekableStream stream = ReaderTools.getValidStream(path); - stream.seek(idx.position); + SeekableStream stream = ReaderTools.getValidStream(path, idx.position); stream.readFully(compressedBytes); stream.close(); return compressedBytes; @@ -68,8 +73,7 @@ static List seekAndFullyReadLargeCompressedBytes(LargeIndexEntry idx, St } compressedBytes.add(new byte[(int) counter]); - SeekableStream stream = ReaderTools.getValidStream(path); - stream.seek(idx.position); + SeekableStream stream = ReaderTools.getValidStream(path, idx.position); for (int i = 0; i < compressedBytes.size(); i++) { stream.readFully(compressedBytes.get(i)); } @@ -81,8 +85,7 @@ static Pair readMatrixZoomData(Chromosome chr1, Chromosome long filePointer, String path, boolean useCache, Map blockIndexMap, DatasetReader reader) throws IOException { - SeekableStream stream = ReaderTools.getValidStream(path); - stream.seek(filePointer); + SeekableStream stream = ReaderTools.getValidStream(path, filePointer); LittleEndianInputStream dis = new LittleEndianInputStream(new BufferedInputStream(stream, StrawGlobals.bufferSize)); String hicUnitStr = dis.readString(); @@ -135,9 +138,8 @@ static Pair readMatrixZoomData(Chromosome chr1, Chromosome static long readExpectedVectorInFooter(long currentPosition, Map expectedValuesMap, NormalizationType norm, int version, String path, DatasetReader reader) throws IOException { - SeekableStream stream = ReaderTools.getValidStream(path); - stream.seek(currentPosition); - LittleEndianInputStream dis = new LittleEndianInputStream(new BufferedInputStream(stream, StrawGlobals.bufferSize)); + SeekableStream stream = ReaderTools.getValidStream(path, currentPosition); + LittleEndianInputStream dis = new LittleEndianInputStream(new BufferedInputStream(stream, 50)); String unitString = dis.readString(); currentPosition += (unitString.length() + 1); HiCZoom.HiCUnit unit = HiCZoom.valueOfUnit(unitString); @@ -146,7 +148,7 @@ static long readExpectedVectorInFooter(long currentPosition, long[] nValues = new long[1]; currentPosition += readVectorLength(dis, nValues, version); - + /* todo time if (binSize >= 500) { currentPosition = ReaderTools.readWholeNormalizationVector(currentPosition, dis, expectedValuesMap, unit, binSize, nValues[0], norm, version); @@ -154,6 +156,9 @@ static long readExpectedVectorInFooter(long currentPosition, currentPosition = ReaderTools.setUpPartialVectorStreaming(currentPosition, expectedValuesMap, unit, binSize, nValues[0], norm, version, path, reader); } + */ + currentPosition = ReaderTools.setUpPartialVectorStreaming(currentPosition, expectedValuesMap, unit, binSize, + nValues[0], norm, version, path, reader); stream.close(); return currentPosition; } @@ -179,14 +184,11 @@ static long setUpPartialVectorStreaming(long currentPosition, Map normFactors = new LinkedHashMap<>(); - public NormFactorMapReader(int nFactors, int version, LittleEndianInputStream dis) + public NormFactorMapReader(int nFactors, int version, long position, String path) throws IOException { this.version = version; this.nFactors = nFactors; + SeekableStream stream = ReaderTools.getValidStream(path, position); + LittleEndianInputStream dis = new LittleEndianInputStream(new BufferedInputStream(stream, getOffset())); + for (int j = 0; j < nFactors; j++) { int chrIdx = dis.readInt(); if (version > 8) { From ff1d3d826dc977226005df1a54f4cea2e4b93967 Mon Sep 17 00:00:00 2001 From: Muhammad S Shamim Date: Fri, 8 Apr 2022 00:54:15 -0500 Subject: [PATCH 2/7] address null iterator --- src/javastraw/StrawGlobals.java | 2 +- src/javastraw/reader/iterators/ContactRecordIterator.java | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/src/javastraw/StrawGlobals.java b/src/javastraw/StrawGlobals.java index f629f7b..04cf21c 100644 --- a/src/javastraw/StrawGlobals.java +++ b/src/javastraw/StrawGlobals.java @@ -25,7 +25,7 @@ package javastraw; public class StrawGlobals { - public static final String versionNum = "2.12.01"; + public static final String versionNum = "2.12.02"; // min hic file version supported public static final int minVersion = 6; public static final int bufferSize = 2097152; diff --git a/src/javastraw/reader/iterators/ContactRecordIterator.java b/src/javastraw/reader/iterators/ContactRecordIterator.java index 226b965..bc64839 100644 --- a/src/javastraw/reader/iterators/ContactRecordIterator.java +++ b/src/javastraw/reader/iterators/ContactRecordIterator.java @@ -75,6 +75,9 @@ public ContactRecordIterator(DatasetReader reader, String zdKey, BlockCache bloc */ @Override public boolean hasNext() { + if (blockNumbers == null || blockNumbers.size() == 0) { + return false; + } if (currentBlockIterator != null && currentBlockIterator.hasNext()) { return true; From 55c51162c8f7043de2b07121671ca58f63bd9a7f Mon Sep 17 00:00:00 2001 From: Muhammad S Shamim Date: Fri, 8 Apr 2022 19:22:28 -0500 Subject: [PATCH 3/7] allow non-dynamic block loading for hires --- src/javastraw/StrawGlobals.java | 6 ++--- src/javastraw/reader/Dataset.java | 6 ++++- src/javastraw/reader/DatasetReader.java | 2 +- src/javastraw/reader/DatasetReaderV2.java | 4 +-- src/javastraw/reader/ReaderTools.java | 28 ++++++++++++++------ src/javastraw/reader/mzd/MatrixZoomData.java | 11 ++++++++ 6 files changed, 42 insertions(+), 15 deletions(-) diff --git a/src/javastraw/StrawGlobals.java b/src/javastraw/StrawGlobals.java index 04cf21c..54f979e 100644 --- a/src/javastraw/StrawGlobals.java +++ b/src/javastraw/StrawGlobals.java @@ -25,12 +25,12 @@ package javastraw; public class StrawGlobals { - public static final String versionNum = "2.12.02"; - // min hic file version supported + public static final String versionNum = "2.13.01"; public static final int minVersion = 6; public static final int bufferSize = 2097152; - // implement Map scaling with this global variable public static boolean allowDynamicBlockIndex = true; + public static int dynamicResolutionLimit = 50; + public static boolean printVerboseComments = false; } diff --git a/src/javastraw/reader/Dataset.java b/src/javastraw/reader/Dataset.java index 878cc50..d0fa25d 100644 --- a/src/javastraw/reader/Dataset.java +++ b/src/javastraw/reader/Dataset.java @@ -87,6 +87,10 @@ public void clearCache(boolean onlyClearInter) { } public Matrix getMatrix(Chromosome chr1, Chromosome chr2) { + return getMatrix(chr1, chr2, -1); + } + + public Matrix getMatrix(Chromosome chr1, Chromosome chr2, int specificResolution) { // order is arbitrary, convention is lower # chr first if (chr1 == null || chr2 == null) return null; @@ -97,7 +101,7 @@ public Matrix getMatrix(Chromosome chr1, Chromosome chr2) { if (m == null && reader != null) { try { - m = reader.readMatrix(key); + m = reader.readMatrix(key, specificResolution); matrices.put(key, m); } catch (Exception e) { System.err.println("Error fetching matrix for: " + chr1.getName() + "-" + chr2.getName()); diff --git a/src/javastraw/reader/DatasetReader.java b/src/javastraw/reader/DatasetReader.java index c9c6e44..48938e9 100644 --- a/src/javastraw/reader/DatasetReader.java +++ b/src/javastraw/reader/DatasetReader.java @@ -43,7 +43,7 @@ public interface DatasetReader { Dataset read() throws IOException; - Matrix readMatrix(String key) throws IOException; + Matrix readMatrix(String key, int resolution) throws IOException; Block readNormalizedBlock(int blockNumber, String zdKey, NormalizationType no, int chr1Index, int chr2Index, HiCZoom zoom) throws IOException; diff --git a/src/javastraw/reader/DatasetReaderV2.java b/src/javastraw/reader/DatasetReaderV2.java index 29bf1c0..9f4bb33 100644 --- a/src/javastraw/reader/DatasetReaderV2.java +++ b/src/javastraw/reader/DatasetReaderV2.java @@ -455,7 +455,7 @@ private long determineNormVectorFilePosition(int numBytesInVar, long position) t } @Override - public Matrix readMatrix(String key) throws IOException { + public Matrix readMatrix(String key, int specificResolution) throws IOException { IndexEntry idx = masterIndex.get(key); if (idx == null) { return null; @@ -493,7 +493,7 @@ public Matrix readMatrix(String key) throws IOException { for (int i = 0; i < nResolutions; i++) { try { Pair result = ReaderTools.readMatrixZoomData(chr1, chr2, chr1Sites, chr2Sites, - currentFilePosition, path, useCache, blockIndexMap, this); + currentFilePosition, path, useCache, blockIndexMap, this, specificResolution); zdList.add(result.getFirst()); currentFilePosition = result.getSecond(); } catch (Exception ee) { diff --git a/src/javastraw/reader/ReaderTools.java b/src/javastraw/reader/ReaderTools.java index ce4fc0c..a0e5095 100644 --- a/src/javastraw/reader/ReaderTools.java +++ b/src/javastraw/reader/ReaderTools.java @@ -84,7 +84,7 @@ static List seekAndFullyReadLargeCompressedBytes(LargeIndexEntry idx, St static Pair readMatrixZoomData(Chromosome chr1, Chromosome chr2, int[] chr1Sites, int[] chr2Sites, long filePointer, String path, boolean useCache, Map blockIndexMap, - DatasetReader reader) throws IOException { + DatasetReader reader, int specificResolution) throws IOException { SeekableStream stream = ReaderTools.getValidStream(path, filePointer); LittleEndianInputStream dis = new LittleEndianInputStream(new BufferedInputStream(stream, StrawGlobals.bufferSize)); @@ -115,14 +115,26 @@ static Pair readMatrixZoomData(Chromosome chr1, Chromosome long currentFilePointer = filePointer + (9 * 4) + hicUnitStr.getBytes().length + 1; // i think 1 byte for 0 terminated string? - if (binSize < 50 && StrawGlobals.allowDynamicBlockIndex) { - int maxPossibleBlockNumber = blockColumnCount * blockColumnCount - 1; - DynamicBlockIndex blockIndex = new DynamicBlockIndex(ReaderTools.getValidStream(path), nBlocks, maxPossibleBlockNumber, currentFilePointer); - blockIndexMap.put(zd.getKey(), blockIndex); + if (specificResolution > 0) { + if (binSize != specificResolution) { + int maxPossibleBlockNumber = blockColumnCount * blockColumnCount - 1; + DynamicBlockIndex blockIndex = new DynamicBlockIndex(ReaderTools.getValidStream(path), nBlocks, maxPossibleBlockNumber, currentFilePointer); + blockIndexMap.put(zd.getKey(), blockIndex); + } else { + BlockIndex blockIndex = new BlockIndex(nBlocks); + blockIndex.populateBlocks(dis); + blockIndexMap.put(zd.getKey(), blockIndex); + } } else { - BlockIndex blockIndex = new BlockIndex(nBlocks); - blockIndex.populateBlocks(dis); - blockIndexMap.put(zd.getKey(), blockIndex); + if (binSize < StrawGlobals.dynamicResolutionLimit && StrawGlobals.allowDynamicBlockIndex) { + int maxPossibleBlockNumber = blockColumnCount * blockColumnCount - 1; + DynamicBlockIndex blockIndex = new DynamicBlockIndex(ReaderTools.getValidStream(path), nBlocks, maxPossibleBlockNumber, currentFilePointer); + blockIndexMap.put(zd.getKey(), blockIndex); + } else { + BlockIndex blockIndex = new BlockIndex(nBlocks); + blockIndex.populateBlocks(dis); + blockIndexMap.put(zd.getKey(), blockIndex); + } } currentFilePointer += (nBlocks * 16L); diff --git a/src/javastraw/reader/mzd/MatrixZoomData.java b/src/javastraw/reader/mzd/MatrixZoomData.java index b6f65aa..6ce90b0 100644 --- a/src/javastraw/reader/mzd/MatrixZoomData.java +++ b/src/javastraw/reader/mzd/MatrixZoomData.java @@ -25,6 +25,7 @@ package javastraw.reader.mzd; +import javastraw.StrawGlobals; import javastraw.matrices.BasicMatrix; import javastraw.reader.DatasetReader; import javastraw.reader.basics.Chromosome; @@ -265,11 +266,21 @@ public void clearCache() { } public Iterator getDirectIterator() { + if (zoom.getBinSize() < StrawGlobals.dynamicResolutionLimit && StrawGlobals.allowDynamicBlockIndex) { + System.err.println("This resolution has currently been loaded with dynamic blocks; reload it using direct Matrix loading"); + System.exit(9); + return null; + } return new ContactRecordIterator(reader, getKey(), blockCache, getChr1Idx(), getChr2Idx(), getZoom(), NormalizationHandler.NONE); } public Iterator getNormalizedIterator(NormalizationType normType) { + if (zoom.getBinSize() < StrawGlobals.dynamicResolutionLimit && StrawGlobals.allowDynamicBlockIndex) { + System.err.println("This resolution has currently been loaded with dynamic blocks; reload it using direct Matrix loading"); + System.exit(19); + return null; + } return new ContactRecordIterator(reader, getKey(), blockCache, getChr1Idx(), getChr2Idx(), getZoom(), normType); } From 454cf6d12e4a2b6ede200ee893b4ba5bdf682b25 Mon Sep 17 00:00:00 2001 From: Muhammad S Shamim Date: Fri, 8 Apr 2022 19:27:52 -0500 Subject: [PATCH 4/7] add matrix types --- src/javastraw/StrawGlobals.java | 2 +- .../reader/iterators/ContactRecordIterator.java | 3 +++ src/javastraw/reader/mzd/MatrixZoomData.java | 11 ----------- 3 files changed, 4 insertions(+), 12 deletions(-) diff --git a/src/javastraw/StrawGlobals.java b/src/javastraw/StrawGlobals.java index 54f979e..a309823 100644 --- a/src/javastraw/StrawGlobals.java +++ b/src/javastraw/StrawGlobals.java @@ -25,7 +25,7 @@ package javastraw; public class StrawGlobals { - public static final String versionNum = "2.13.01"; + public static final String versionNum = "2.13.02"; public static final int minVersion = 6; public static final int bufferSize = 2097152; diff --git a/src/javastraw/reader/iterators/ContactRecordIterator.java b/src/javastraw/reader/iterators/ContactRecordIterator.java index bc64839..a5fb0c5 100644 --- a/src/javastraw/reader/iterators/ContactRecordIterator.java +++ b/src/javastraw/reader/iterators/ContactRecordIterator.java @@ -76,6 +76,9 @@ public ContactRecordIterator(DatasetReader reader, String zdKey, BlockCache bloc @Override public boolean hasNext() { if (blockNumbers == null || blockNumbers.size() == 0) { + System.err.println("No blocks available. If working with hires, the resolution may have " + + "been loaded with dynamic blocks. Reload the matrix class directly specifying " + + "the resolution to resolve this issue."); return false; } diff --git a/src/javastraw/reader/mzd/MatrixZoomData.java b/src/javastraw/reader/mzd/MatrixZoomData.java index 6ce90b0..b6f65aa 100644 --- a/src/javastraw/reader/mzd/MatrixZoomData.java +++ b/src/javastraw/reader/mzd/MatrixZoomData.java @@ -25,7 +25,6 @@ package javastraw.reader.mzd; -import javastraw.StrawGlobals; import javastraw.matrices.BasicMatrix; import javastraw.reader.DatasetReader; import javastraw.reader.basics.Chromosome; @@ -266,21 +265,11 @@ public void clearCache() { } public Iterator getDirectIterator() { - if (zoom.getBinSize() < StrawGlobals.dynamicResolutionLimit && StrawGlobals.allowDynamicBlockIndex) { - System.err.println("This resolution has currently been loaded with dynamic blocks; reload it using direct Matrix loading"); - System.exit(9); - return null; - } return new ContactRecordIterator(reader, getKey(), blockCache, getChr1Idx(), getChr2Idx(), getZoom(), NormalizationHandler.NONE); } public Iterator getNormalizedIterator(NormalizationType normType) { - if (zoom.getBinSize() < StrawGlobals.dynamicResolutionLimit && StrawGlobals.allowDynamicBlockIndex) { - System.err.println("This resolution has currently been loaded with dynamic blocks; reload it using direct Matrix loading"); - System.exit(19); - return null; - } return new ContactRecordIterator(reader, getKey(), blockCache, getChr1Idx(), getChr2Idx(), getZoom(), normType); } From 5420b16862e7653750e2d9f8bf6516ceb336906e Mon Sep 17 00:00:00 2001 From: Muhammad S Shamim Date: Tue, 12 Apr 2022 08:21:20 -0500 Subject: [PATCH 5/7] restructure block index handling; store as part of mzd object; allow for clearing --- src/javastraw/StrawGlobals.java | 2 +- src/javastraw/reader/DatasetReader.java | 7 +- src/javastraw/reader/DatasetReaderV2.java | 215 +++++++++--------- src/javastraw/reader/ReaderTools.java | 29 +-- .../{BlockIndex.java => BlockIndices.java} | 18 +- ...ockIndex.java => DynamicBlockIndices.java} | 18 +- .../iterators/ContactRecordIterator.java | 23 +- src/javastraw/reader/mzd/BlockLoader.java | 11 +- .../reader/mzd/DynamicMatrixZoomData.java | 4 +- .../reader/mzd/LegacyVersionBlockReader.java | 5 +- src/javastraw/reader/mzd/MatrixZoomData.java | 49 ++-- .../reader/mzd/V9IntraBlockReader.java | 5 +- 12 files changed, 186 insertions(+), 200 deletions(-) rename src/javastraw/reader/block/{BlockIndex.java => BlockIndices.java} (81%) rename src/javastraw/reader/block/{DynamicBlockIndex.java => DynamicBlockIndices.java} (89%) diff --git a/src/javastraw/StrawGlobals.java b/src/javastraw/StrawGlobals.java index a309823..c343efe 100644 --- a/src/javastraw/StrawGlobals.java +++ b/src/javastraw/StrawGlobals.java @@ -25,7 +25,7 @@ package javastraw; public class StrawGlobals { - public static final String versionNum = "2.13.02"; + public static final String versionNum = "2.14.01"; public static final int minVersion = 6; public static final int bufferSize = 2097152; diff --git a/src/javastraw/reader/DatasetReader.java b/src/javastraw/reader/DatasetReader.java index 48938e9..02e522c 100644 --- a/src/javastraw/reader/DatasetReader.java +++ b/src/javastraw/reader/DatasetReader.java @@ -25,13 +25,13 @@ package javastraw.reader; import javastraw.reader.block.Block; +import javastraw.reader.block.IndexEntry; import javastraw.reader.datastructures.ListOfDoubleArrays; import javastraw.reader.norm.NormalizationVector; import javastraw.reader.type.HiCZoom; import javastraw.reader.type.NormalizationType; import java.io.IOException; -import java.util.List; public interface DatasetReader { @@ -46,9 +46,8 @@ public interface DatasetReader { Matrix readMatrix(String key, int resolution) throws IOException; Block readNormalizedBlock(int blockNumber, String zdKey, NormalizationType no, - int chr1Index, int chr2Index, HiCZoom zoom) throws IOException; - - List getBlockNumbers(String zdKey); + int chr1Index, int chr2Index, HiCZoom zoom, + IndexEntry idx) throws IOException; NormalizationVector readNormalizationVector(NormalizationType type, int chrIdx, HiCZoom.HiCUnit unit, int binSize) throws IOException; diff --git a/src/javastraw/reader/DatasetReaderV2.java b/src/javastraw/reader/DatasetReaderV2.java index 9f4bb33..e2b3468 100644 --- a/src/javastraw/reader/DatasetReaderV2.java +++ b/src/javastraw/reader/DatasetReaderV2.java @@ -30,7 +30,10 @@ import javastraw.StrawGlobals; import javastraw.reader.basics.Chromosome; import javastraw.reader.basics.ChromosomeHandler; -import javastraw.reader.block.*; +import javastraw.reader.block.Block; +import javastraw.reader.block.ContactRecord; +import javastraw.reader.block.IndexEntry; +import javastraw.reader.block.LargeIndexEntry; import javastraw.reader.datastructures.ListOfDoubleArrays; import javastraw.reader.expected.ExpectedValueFunction; import javastraw.reader.mzd.BlockLoader; @@ -59,7 +62,7 @@ public class DatasetReaderV2 extends AbstractDatasetReader { private final Dataset dataset; private int version = -1; private Map fragmentSitesIndex; - private final Map blockIndexMap = Collections.synchronizedMap(new HashMap<>()); + //private final Map blockIndexMap = Collections.synchronizedMap(new HashMap<>()); private long masterIndexPos; private long normVectorFilePosition; private boolean activeStatus = true; @@ -493,7 +496,7 @@ public Matrix readMatrix(String key, int specificResolution) throws IOException for (int i = 0; i < nResolutions; i++) { try { Pair result = ReaderTools.readMatrixZoomData(chr1, chr2, chr1Sites, chr2Sites, - currentFilePosition, path, useCache, blockIndexMap, this, specificResolution); + currentFilePosition, path, useCache, this, specificResolution); zdList.add(result.getFirst()); currentFilePosition = result.getSecond(); } catch (Exception ee) { @@ -527,12 +530,6 @@ private synchronized int[] retrieveFragmentSitesFromCache(Chromosome chromosome) return chrSites; } - @Override - public List getBlockNumbers(String zdKey) { - BlockIndex blockIndex = blockIndexMap.get(zdKey); - return blockIndex == null ? null : blockIndex.getBlockNumbers(); - } - public Map getNormVectorIndex() { return normVectorIndex; } @@ -612,12 +609,13 @@ public ListOfDoubleArrays readExpectedVectorPart(long position, long nVals) thro @Override public Block readNormalizedBlock(int blockNumber, String zdKey, NormalizationType no, - int chr1Index, int chr2Index, HiCZoom zoom) throws IOException { + int chr1Index, int chr2Index, HiCZoom zoom, + IndexEntry idx) throws IOException { if (no == null) { throw new IOException("Norm " + no + " is null"); } else if (no.equals(NormalizationHandler.NONE)) { - return readBlock(blockNumber, zdKey); + return readBlock(blockNumber, zdKey, idx); } else { long[] timeDiffThings = new long[4]; timeDiffThings[0] = System.currentTimeMillis(); @@ -635,7 +633,7 @@ public Block readNormalizedBlock(int blockNumber, String zdKey, NormalizationTyp ListOfDoubleArrays nv1Data = nv1.getData(); ListOfDoubleArrays nv2Data = nv2.getData(); timeDiffThings[1] = System.currentTimeMillis(); - Block rawBlock = readBlock(blockNumber, zdKey); + Block rawBlock = readBlock(blockNumber, zdKey, idx); timeDiffThings[2] = System.currentTimeMillis(); if (rawBlock == null) return null; @@ -656,126 +654,121 @@ public Block readNormalizedBlock(int blockNumber, String zdKey, NormalizationTyp } } - private Block readBlock(int blockNumber, String zdKey) throws IOException { + private Block readBlock(int blockNumber, String zdKey, IndexEntry idx) throws IOException { long[] timeDiffThings = new long[6]; timeDiffThings[0] = System.currentTimeMillis(); Block b = null; - BlockIndex blockIndex = blockIndexMap.get(zdKey); - if (blockIndex != null) { + if (idx != null) { - IndexEntry idx = blockIndex.getBlock(blockNumber); - if (idx != null) { + //System.out.println(" blockIndexPosition:" + idx.position); + timeDiffThings[1] = System.currentTimeMillis(); + byte[] compressedBytes = ReaderTools.seekAndFullyReadCompressedBytes(idx, path); + timeDiffThings[2] = System.currentTimeMillis(); + byte[] buffer; - //System.out.println(" blockIndexPosition:" + idx.position); - timeDiffThings[1] = System.currentTimeMillis(); - byte[] compressedBytes = ReaderTools.seekAndFullyReadCompressedBytes(idx, path); - timeDiffThings[2] = System.currentTimeMillis(); - byte[] buffer; + try { + buffer = ReaderTools.decompress(compressedBytes); + timeDiffThings[3] = System.currentTimeMillis(); - try { - buffer = ReaderTools.decompress(compressedBytes); - timeDiffThings[3] = System.currentTimeMillis(); + } catch (Exception e) { + throw new RuntimeException("Block read error: " + e.getMessage()); + } - } catch (Exception e) { - throw new RuntimeException("Block read error: " + e.getMessage()); + LittleEndianInputStream dis = new LittleEndianInputStream(new ByteArrayInputStream(buffer)); + int nRecords = dis.readInt(); + List records = new ArrayList<>(nRecords); + timeDiffThings[4] = System.currentTimeMillis(); + + if (version < 7) { + for (int i = 0; i < nRecords; i++) { + int binX = dis.readInt(); + int binY = dis.readInt(); + float counts = dis.readFloat(); + records.add(new ContactRecord(binX, binY, counts)); } + } else { - LittleEndianInputStream dis = new LittleEndianInputStream(new ByteArrayInputStream(buffer)); - int nRecords = dis.readInt(); - List records = new ArrayList<>(nRecords); - timeDiffThings[4] = System.currentTimeMillis(); - - if (version < 7) { - for (int i = 0; i < nRecords; i++) { - int binX = dis.readInt(); - int binY = dis.readInt(); - float counts = dis.readFloat(); - records.add(new ContactRecord(binX, binY, counts)); - } - } else { - - int binXOffset = dis.readInt(); - int binYOffset = dis.readInt(); + int binXOffset = dis.readInt(); + int binYOffset = dis.readInt(); - boolean useShort = dis.readByte() == 0; - boolean useShortBinX = true, useShortBinY = true; - if (version > 8) { - useShortBinX = dis.readByte() == 0; - useShortBinY = dis.readByte() == 0; - } + boolean useShort = dis.readByte() == 0; + boolean useShortBinX = true, useShortBinY = true; + if (version > 8) { + useShortBinX = dis.readByte() == 0; + useShortBinY = dis.readByte() == 0; + } - byte type = dis.readByte(); + byte type = dis.readByte(); - switch (type) { - case 1: - if (useShortBinX && useShortBinY) { - // List-of-rows representation - int rowCount = dis.readShort(); - for (int i = 0; i < rowCount; i++) { - int binY = binYOffset + dis.readShort(); - ReaderTools.populateContactRecordsColShort(dis, records, binXOffset, useShort, binY); - } - } else if (useShortBinX) { // && !useShortBinY - // List-of-rows representation - int rowCount = dis.readInt(); - for (int i = 0; i < rowCount; i++) { - int binY = binYOffset + dis.readInt(); - ReaderTools.populateContactRecordsColShort(dis, records, binXOffset, useShort, binY); - } - } else if (useShortBinY) { // && !useShortBinX - // List-of-rows representation - int rowCount = dis.readShort(); - for (int i = 0; i < rowCount; i++) { - int binY = binYOffset + dis.readShort(); - ReaderTools.populateContactRecordsColInt(dis, records, binXOffset, useShort, binY); + switch (type) { + case 1: + if (useShortBinX && useShortBinY) { + // List-of-rows representation + int rowCount = dis.readShort(); + for (int i = 0; i < rowCount; i++) { + int binY = binYOffset + dis.readShort(); + ReaderTools.populateContactRecordsColShort(dis, records, binXOffset, useShort, binY); + } + } else if (useShortBinX) { // && !useShortBinY + // List-of-rows representation + int rowCount = dis.readInt(); + for (int i = 0; i < rowCount; i++) { + int binY = binYOffset + dis.readInt(); + ReaderTools.populateContactRecordsColShort(dis, records, binXOffset, useShort, binY); + } + } else if (useShortBinY) { // && !useShortBinX + // List-of-rows representation + int rowCount = dis.readShort(); + for (int i = 0; i < rowCount; i++) { + int binY = binYOffset + dis.readShort(); + ReaderTools.populateContactRecordsColInt(dis, records, binXOffset, useShort, binY); + } + } else { + // List-of-rows representation + int rowCount = dis.readInt(); + for (int i = 0; i < rowCount; i++) { + int binY = binYOffset + dis.readInt(); + ReaderTools.populateContactRecordsColInt(dis, records, binXOffset, useShort, binY); + } + } + break; + case 2: + + int nPts = dis.readInt(); + int w = dis.readShort(); + + for (int i = 0; i < nPts; i++) { + //int idx = (p.y - binOffset2) * w + (p.x - binOffset1); + int row = i / w; + int col = i - row * w; + int bin1 = binXOffset + col; + int bin2 = binYOffset + row; + + if (useShort) { + short counts = dis.readShort(); + if (counts != Short.MIN_VALUE) { + records.add(new ContactRecord(bin1, bin2, counts)); } } else { - // List-of-rows representation - int rowCount = dis.readInt(); - for (int i = 0; i < rowCount; i++) { - int binY = binYOffset + dis.readInt(); - ReaderTools.populateContactRecordsColInt(dis, records, binXOffset, useShort, binY); - } - } - break; - case 2: - - int nPts = dis.readInt(); - int w = dis.readShort(); - - for (int i = 0; i < nPts; i++) { - //int idx = (p.y - binOffset2) * w + (p.x - binOffset1); - int row = i / w; - int col = i - row * w; - int bin1 = binXOffset + col; - int bin2 = binYOffset + row; - - if (useShort) { - short counts = dis.readShort(); - if (counts != Short.MIN_VALUE) { - records.add(new ContactRecord(bin1, bin2, counts)); - } - } else { - float counts = dis.readFloat(); - if (!Float.isNaN(counts)) { - records.add(new ContactRecord(bin1, bin2, counts)); - } + float counts = dis.readFloat(); + if (!Float.isNaN(counts)) { + records.add(new ContactRecord(bin1, bin2, counts)); } } + } - break; - default: - throw new RuntimeException("Unknown block type: " + type); - } - } - b = new Block(blockNumber, records, BlockLoader.getBlockKey(zdKey, blockNumber, NormalizationHandler.NONE)); - timeDiffThings[5] = System.currentTimeMillis(); - for (int ii = 0; ii < timeDiffThings.length - 1; ii++) { - globalTimeDiffThings[ii] += (timeDiffThings[ii + 1] - timeDiffThings[ii]) / 1000.0; + break; + default: + throw new RuntimeException("Unknown block type: " + type); } } + b = new Block(blockNumber, records, BlockLoader.getBlockKey(zdKey, blockNumber, NormalizationHandler.NONE)); + timeDiffThings[5] = System.currentTimeMillis(); + for (int ii = 0; ii < timeDiffThings.length - 1; ii++) { + globalTimeDiffThings[ii] += (timeDiffThings[ii + 1] - timeDiffThings[ii]) / 1000.0; + } } // If no block exists, mark with an "empty block" to prevent further attempts diff --git a/src/javastraw/reader/ReaderTools.java b/src/javastraw/reader/ReaderTools.java index a0e5095..c082c62 100644 --- a/src/javastraw/reader/ReaderTools.java +++ b/src/javastraw/reader/ReaderTools.java @@ -83,7 +83,6 @@ static List seekAndFullyReadLargeCompressedBytes(LargeIndexEntry idx, St static Pair readMatrixZoomData(Chromosome chr1, Chromosome chr2, int[] chr1Sites, int[] chr2Sites, long filePointer, String path, boolean useCache, - Map blockIndexMap, DatasetReader reader, int specificResolution) throws IOException { SeekableStream stream = ReaderTools.getValidStream(path, filePointer); LittleEndianInputStream dis = new LittleEndianInputStream(new BufferedInputStream(stream, StrawGlobals.bufferSize)); @@ -107,41 +106,31 @@ static Pair readMatrixZoomData(Chromosome chr1, Chromosome int blockBinCount = dis.readInt(); int blockColumnCount = dis.readInt(); - MatrixZoomData zd = new MatrixZoomData(chr1, chr2, zoom, blockBinCount, blockColumnCount, chr1Sites, chr2Sites, - reader); - zd.setUseCache(useCache); - int nBlocks = dis.readInt(); + BlockIndices blockIndices; long currentFilePointer = filePointer + (9 * 4) + hicUnitStr.getBytes().length + 1; // i think 1 byte for 0 terminated string? - if (specificResolution > 0) { if (binSize != specificResolution) { int maxPossibleBlockNumber = blockColumnCount * blockColumnCount - 1; - DynamicBlockIndex blockIndex = new DynamicBlockIndex(ReaderTools.getValidStream(path), nBlocks, maxPossibleBlockNumber, currentFilePointer); - blockIndexMap.put(zd.getKey(), blockIndex); + blockIndices = new DynamicBlockIndices(ReaderTools.getValidStream(path), nBlocks, maxPossibleBlockNumber, currentFilePointer); } else { - BlockIndex blockIndex = new BlockIndex(nBlocks); - blockIndex.populateBlocks(dis); - blockIndexMap.put(zd.getKey(), blockIndex); + blockIndices = new BlockIndices(nBlocks); + blockIndices.populateBlocks(dis); } } else { if (binSize < StrawGlobals.dynamicResolutionLimit && StrawGlobals.allowDynamicBlockIndex) { int maxPossibleBlockNumber = blockColumnCount * blockColumnCount - 1; - DynamicBlockIndex blockIndex = new DynamicBlockIndex(ReaderTools.getValidStream(path), nBlocks, maxPossibleBlockNumber, currentFilePointer); - blockIndexMap.put(zd.getKey(), blockIndex); + blockIndices = new DynamicBlockIndices(ReaderTools.getValidStream(path), nBlocks, maxPossibleBlockNumber, currentFilePointer); } else { - BlockIndex blockIndex = new BlockIndex(nBlocks); - blockIndex.populateBlocks(dis); - blockIndexMap.put(zd.getKey(), blockIndex); + blockIndices = new BlockIndices(nBlocks); + blockIndices.populateBlocks(dis); } } currentFilePointer += (nBlocks * 16L); - long nBins1 = chr1.getLength() / binSize; - long nBins2 = chr2.getLength() / binSize; - double avgCount = (sumCounts / nBins1) / nBins2; // <= trying to avoid overflows - zd.setAverageCount(avgCount); + MatrixZoomData zd = new MatrixZoomData(chr1, chr2, zoom, blockBinCount, blockColumnCount, chr1Sites, chr2Sites, + reader, blockIndices, useCache, sumCounts); stream.close(); return new Pair<>(zd, currentFilePointer); diff --git a/src/javastraw/reader/block/BlockIndex.java b/src/javastraw/reader/block/BlockIndices.java similarity index 81% rename from src/javastraw/reader/block/BlockIndex.java rename to src/javastraw/reader/block/BlockIndices.java index c3cb448..2e25d7d 100644 --- a/src/javastraw/reader/block/BlockIndex.java +++ b/src/javastraw/reader/block/BlockIndices.java @@ -32,13 +32,13 @@ import java.util.List; import java.util.Map; -public class BlockIndex { - protected final Map blockIndex; +public class BlockIndices { + protected final Map blockIndices; protected final int numBlocks; - public BlockIndex(int nBlocks) { + public BlockIndices(int nBlocks) { numBlocks = nBlocks; - blockIndex = new HashMap<>(nBlocks); + blockIndices = new HashMap<>(nBlocks); } public void populateBlocks(LittleEndianInputStream dis) throws IOException { @@ -46,15 +46,19 @@ public void populateBlocks(LittleEndianInputStream dis) throws IOException { int blockNumber = dis.readInt(); long filePosition = dis.readLong(); int blockSizeInBytes = dis.readInt(); - blockIndex.put(blockNumber, new IndexEntry(filePosition, blockSizeInBytes)); + blockIndices.put(blockNumber, new IndexEntry(filePosition, blockSizeInBytes)); } } public List getBlockNumbers() { - return new ArrayList<>(blockIndex.keySet()); + return new ArrayList<>(blockIndices.keySet()); } public IndexEntry getBlock(int blockNumber) { - return blockIndex.get(blockNumber); + return blockIndices.get(blockNumber); + } + + public void clearCache() { + blockIndices.clear(); } } diff --git a/src/javastraw/reader/block/DynamicBlockIndex.java b/src/javastraw/reader/block/DynamicBlockIndices.java similarity index 89% rename from src/javastraw/reader/block/DynamicBlockIndex.java rename to src/javastraw/reader/block/DynamicBlockIndices.java index e607fde..6ddae4d 100644 --- a/src/javastraw/reader/block/DynamicBlockIndex.java +++ b/src/javastraw/reader/block/DynamicBlockIndices.java @@ -32,7 +32,7 @@ import java.io.IOException; import java.util.List; -public class DynamicBlockIndex extends BlockIndex { +public class DynamicBlockIndices extends BlockIndices { private final int maxBlocks; private final long minPosition, maxPosition; @@ -40,8 +40,8 @@ public class DynamicBlockIndex extends BlockIndex { private Long mapFileBoundsMin = null, mapFileBoundsMax = null; private final SeekableStream stream; - public DynamicBlockIndex(SeekableStream stream, int numBlocks, int maxBlocks, long minPosition) { - super(numBlocks); + public DynamicBlockIndices(SeekableStream stream, int numBlocks, int maxBlocks, long minPosition) { + super(numBlocks / 2); // when using dynamic blocks, the idea is to not load every single block? this.stream = stream; this.maxBlocks = maxBlocks; this.minPosition = minPosition; @@ -59,8 +59,8 @@ public List getBlockNumbers() { public IndexEntry getBlock(int blockNumber) { if (blockNumber > maxBlocks) { return null; - } else if (blockIndex.containsKey(blockNumber)) { - return blockIndex.get(blockNumber); + } else if (blockIndices.containsKey(blockNumber)) { + return blockIndices.get(blockNumber); } else if (blockNumber == 0) { try { return searchForBlockIndexEntry(blockNumber, this.minPosition, this.minPosition + 16); @@ -109,7 +109,7 @@ private IndexEntry searchForBlockIndexEntry(int blockNumber, long boundsMin, lon int blockNumberFound = dis.readInt(); long filePosition = dis.readLong(); int blockSizeInBytes = dis.readInt(); - blockIndex.put(blockNumberFound, new IndexEntry(filePosition, blockSizeInBytes)); + blockIndices.put(blockNumberFound, new IndexEntry(filePosition, blockSizeInBytes)); if (firstBlockNumber == null) firstBlockNumber = blockNumberFound; lastBlockNumber = blockNumberFound; pointer += 16; @@ -122,7 +122,7 @@ private IndexEntry searchForBlockIndexEntry(int blockNumber, long boundsMin, lon blockNumberRangeMax = lastBlockNumber; } - return blockIndex.get(blockNumber); + return blockIndices.get(blockNumber); } // Midpoint in units of 16 byte chunks int nEntries = (int) ((boundsMax - boundsMin) / 16); @@ -141,8 +141,8 @@ private IndexEntry searchForBlockIndexEntry(int blockNumber, long boundsMin, lon blockSizeInBytes = dis.readInt(); } if (blockNumberFound == blockNumber) { - blockIndex.put(blockNumberFound, new IndexEntry(filePosition, blockSizeInBytes)); - return blockIndex.get(blockNumber); + blockIndices.put(blockNumberFound, new IndexEntry(filePosition, blockSizeInBytes)); + return blockIndices.get(blockNumber); } else if (blockNumber > blockNumberFound) { return searchForBlockIndexEntry(blockNumber, positionToSeek + 16, boundsMax); } else { diff --git a/src/javastraw/reader/iterators/ContactRecordIterator.java b/src/javastraw/reader/iterators/ContactRecordIterator.java index a5fb0c5..8d11bb1 100644 --- a/src/javastraw/reader/iterators/ContactRecordIterator.java +++ b/src/javastraw/reader/iterators/ContactRecordIterator.java @@ -26,6 +26,7 @@ import javastraw.reader.DatasetReader; import javastraw.reader.block.Block; +import javastraw.reader.block.BlockIndices; import javastraw.reader.block.ContactRecord; import javastraw.reader.mzd.BlockCache; import javastraw.reader.mzd.BlockLoader; @@ -42,7 +43,7 @@ public class ContactRecordIterator implements Iterator { private final List blockNumbers; - private int blockIdx; + private final BlockIndices blockIndices; private Iterator currentBlockIterator; private final DatasetReader reader; private final String zdKey; @@ -50,21 +51,23 @@ public class ContactRecordIterator implements Iterator { private final int chr1Idx, chr2Idx; private final HiCZoom zoom; private final NormalizationType normType; + private int currentBlockIdx; /** * Initializes the iterator */ - public ContactRecordIterator(DatasetReader reader, String zdKey, BlockCache blockCache, + public ContactRecordIterator(DatasetReader reader, BlockIndices blockIndices, String zdKey, BlockCache blockCache, int chr1Idx, int chr2Idx, HiCZoom zoom, NormalizationType normType) { this.reader = reader; + this.blockIndices = blockIndices; this.zdKey = zdKey; this.chr1Idx = chr1Idx; this.chr2Idx = chr2Idx; this.zoom = zoom; this.blockCache = blockCache; - this.blockIdx = -1; + this.currentBlockIdx = -1; this.normType = normType; - this.blockNumbers = reader.getBlockNumbers(zdKey); + this.blockNumbers = blockIndices.getBlockNumbers(); } /** @@ -85,10 +88,10 @@ public boolean hasNext() { if (currentBlockIterator != null && currentBlockIterator.hasNext()) { return true; } else { - blockIdx++; - while (blockIdx < blockNumbers.size()) { + currentBlockIdx++; + while (currentBlockIdx < blockNumbers.size()) { try { - int blockNumber = blockNumbers.get(blockIdx); + int blockNumber = blockNumbers.get(currentBlockIdx); // Optionally check the cache String key = BlockLoader.getBlockKey(zdKey, blockNumber, normType); @@ -97,7 +100,7 @@ public boolean hasNext() { nextBlock = blockCache.get(key); } else { nextBlock = reader.readNormalizedBlock(blockNumber, zdKey, normType, - chr1Idx, chr2Idx, zoom); + chr1Idx, chr2Idx, zoom, blockIndices.getBlock(blockNumber)); } List contactRecords = nextBlock.getContactRecords(); if (contactRecords != null && contactRecords.size() > 0) { @@ -108,10 +111,12 @@ public boolean hasNext() { System.err.println("Error fetching block " + e.getMessage()); return false; } - blockIdx++; + currentBlockIdx++; } } + blockNumbers.clear(); // done with iterator + return false; } diff --git a/src/javastraw/reader/mzd/BlockLoader.java b/src/javastraw/reader/mzd/BlockLoader.java index 1a0acfc..e6051e3 100644 --- a/src/javastraw/reader/mzd/BlockLoader.java +++ b/src/javastraw/reader/mzd/BlockLoader.java @@ -3,7 +3,9 @@ import javastraw.reader.DatasetReader; import javastraw.reader.basics.Chromosome; import javastraw.reader.block.Block; +import javastraw.reader.block.BlockIndices; import javastraw.reader.block.BlockModifier; +import javastraw.reader.block.IndexEntry; import javastraw.reader.type.HiCZoom; import javastraw.reader.type.NormalizationType; import javastraw.tools.ParallelizationTools; @@ -19,7 +21,8 @@ public class BlockLoader { public static void actuallyLoadGivenBlocks(final List globalBlockList, List blockIds, final NormalizationType no, BlockModifier modifier, final String zdKey, Chromosome chrom1, Chromosome chrom2, HiCZoom zoom, - BlockCache globalBlockCache, DatasetReader reader) { + BlockCache globalBlockCache, DatasetReader reader, + BlockIndices blockIndex) { final AtomicInteger errorCounter = new AtomicInteger(); final Object listLock = new Object(); final Object cacheLock = new Object(); @@ -38,7 +41,7 @@ public static void actuallyLoadGivenBlocks(final List globalBlockList, Li String key = getBlockKey(zdKey, blockNumber, no); try { getBlockFromReader(blockList, no, modifier, zdKey, chrom1, chrom2, zoom, blockCache, - reader, blockNumber, key); + reader, blockNumber, key, blockIndex.getBlock(blockNumber)); } catch (IOException e) { errorCounter.incrementAndGet(); } @@ -61,9 +64,9 @@ public static void actuallyLoadGivenBlocks(final List globalBlockList, Li private static void getBlockFromReader(List blockList, NormalizationType no, BlockModifier modifier, String zdKey, Chromosome chrom1, Chromosome chrom2, HiCZoom zoom, BlockCache blockCache, DatasetReader reader, - int blockNumber, String key) throws IOException { + int blockNumber, String key, IndexEntry idx) throws IOException { Block b = reader.readNormalizedBlock(blockNumber, zdKey, no, - chrom1.getIndex(), chrom2.getIndex(), zoom); + chrom1.getIndex(), chrom2.getIndex(), zoom, idx); if (b == null) { b = new Block(blockNumber, key); } diff --git a/src/javastraw/reader/mzd/DynamicMatrixZoomData.java b/src/javastraw/reader/mzd/DynamicMatrixZoomData.java index 5a14576..2a4ce1e 100644 --- a/src/javastraw/reader/mzd/DynamicMatrixZoomData.java +++ b/src/javastraw/reader/mzd/DynamicMatrixZoomData.java @@ -43,7 +43,9 @@ public class DynamicMatrixZoomData extends MatrixZoomData { * @param zoom */ public DynamicMatrixZoomData(HiCZoom zoom, MatrixZoomData higherResZD) { - super(higherResZD.chr1, higherResZD.chr2, zoom, higherResZD.blockBinCount, higherResZD.blockColumnCount, new int[0], new int[0], null); + super(higherResZD.chr1, higherResZD.chr2, zoom, higherResZD.blockBinCount, + higherResZD.blockColumnCount, new int[0], new int[0], null, null, + true, higherResZD.sumCounts); this.higherResZD = higherResZD; scaleFactor = zoom.getBinSize() / higherResZD.getBinSize(); } diff --git a/src/javastraw/reader/mzd/LegacyVersionBlockReader.java b/src/javastraw/reader/mzd/LegacyVersionBlockReader.java index a416ef4..2b20186 100644 --- a/src/javastraw/reader/mzd/LegacyVersionBlockReader.java +++ b/src/javastraw/reader/mzd/LegacyVersionBlockReader.java @@ -3,6 +3,7 @@ import javastraw.reader.DatasetReader; import javastraw.reader.basics.Chromosome; import javastraw.reader.block.Block; +import javastraw.reader.block.BlockIndices; import javastraw.reader.block.BlockModifier; import javastraw.reader.type.HiCZoom; import javastraw.reader.type.NormalizationType; @@ -26,7 +27,7 @@ public static List addNormalizedBlocksToList(final List blockList, boolean getBelowDiagonal, BlockModifier modifier, int blockBinCount, int blockColumnCount, BlockCache blockCache, String zdKey, Chromosome chrom1, Chromosome chrom2, HiCZoom zoom, - DatasetReader reader) { + DatasetReader reader, BlockIndices blockIndices) { Set blocksToLoad = new HashSet<>(); @@ -53,7 +54,7 @@ public static List addNormalizedBlocksToList(final List blockList, } BlockLoader.actuallyLoadGivenBlocks(blockList, new ArrayList<>(blocksToLoad), norm, modifier, zdKey, - chrom1, chrom2, zoom, blockCache, reader); + chrom1, chrom2, zoom, blockCache, reader, blockIndices); return new ArrayList<>(new HashSet<>(blockList)); } diff --git a/src/javastraw/reader/mzd/MatrixZoomData.java b/src/javastraw/reader/mzd/MatrixZoomData.java index b6f65aa..a36f0f4 100644 --- a/src/javastraw/reader/mzd/MatrixZoomData.java +++ b/src/javastraw/reader/mzd/MatrixZoomData.java @@ -28,10 +28,7 @@ import javastraw.matrices.BasicMatrix; import javastraw.reader.DatasetReader; import javastraw.reader.basics.Chromosome; -import javastraw.reader.block.Block; -import javastraw.reader.block.BlockModifier; -import javastraw.reader.block.ContactRecord; -import javastraw.reader.block.IdentityModifier; +import javastraw.reader.block.*; import javastraw.reader.depth.LogDepth; import javastraw.reader.depth.V9Depth; import javastraw.reader.expected.ExpectedValueFunction; @@ -60,18 +57,23 @@ public class MatrixZoomData { protected DatasetReader reader; protected final Map eigenvectorMap; protected final BlockModifier identity = new IdentityModifier(); - protected double averageCount = -1; + protected final double sumCounts; public static boolean useIteratorDontPutAllInRAM = false; public static boolean shouldCheckRAMUsage = false; + private final BlockIndices blockIndices; public MatrixZoomData(Chromosome chr1, Chromosome chr2, HiCZoom zoom, int blockBinCount, int blockColumnCount, - int[] chr1Sites, int[] chr2Sites, DatasetReader reader) { + int[] chr1Sites, int[] chr2Sites, DatasetReader reader, BlockIndices blockIndices, + boolean useCache, double sumCounts) { this.chr1 = chr1; this.chr2 = chr2; this.zoom = zoom; this.isIntra = chr1.getIndex() == chr2.getIndex(); this.reader = reader; this.blockBinCount = blockBinCount; + this.blockIndices = blockIndices; + this.sumCounts = sumCounts; + blockCache = new BlockCache(); if (reader.getVersion() > 8) { v9Depth = V9Depth.setDepthMethod(reader.getDepthBase(), blockBinCount); @@ -98,6 +100,7 @@ public MatrixZoomData(Chromosome chr1, Chromosome chr2, HiCZoom zoom, int blockB } pearsonsMap = new HashMap<>(); eigenvectorMap = new HashMap<>(); + blockCache.setUseCache(useCache); } protected MatrixZoomData(MatrixZoomData zd0) { @@ -110,14 +113,11 @@ protected MatrixZoomData(MatrixZoomData zd0) { this.correctedBinCount = zd0.correctedBinCount; this.blockCache = zd0.blockCache; this.v9Depth = zd0.v9Depth; - this.averageCount = zd0.averageCount; + this.sumCounts = zd0.sumCounts; this.reader = zd0.reader; this.pearsonsMap = zd0.pearsonsMap; this.eigenvectorMap = zd0.eigenvectorMap; - } - - public void setUseCache(boolean useCache) { - blockCache.setUseCache(useCache); + this.blockIndices = zd0.blockIndices; } public Chromosome getChr1() { @@ -193,11 +193,11 @@ public List getNormalizedBlocksOverlapping(long binX1, long binY1, long b if (reader.getVersion() > 8 && isIntra) { return V9IntraBlockReader.addNormalizedBlocksToListV9(blockList, (int) binX1, (int) binY1, (int) binX2, (int) binY2, no, modifier, blockBinCount, v9Depth, - blockColumnCount, blockCache, getKey(), chr1, chr2, zoom, reader); + blockColumnCount, blockCache, getKey(), chr1, chr2, zoom, reader, blockIndices); } else { return LegacyVersionBlockReader.addNormalizedBlocksToList(blockList, (int) binX1, (int) binY1, (int) binX2, (int) binY2, no, fillUnderDiagonal, modifier, blockBinCount, blockColumnCount, blockCache, - getKey(), chr1, chr2, zoom, reader); + getKey(), chr1, chr2, zoom, reader, blockIndices); } } @@ -240,37 +240,26 @@ private List getBlockNumbersForRegionFromBinPosition(long[] regionBinIn } } - /** - * Returns the average count - * - * @return Average count - */ public double getAverageCount() { - return averageCount; - } - - /** - * Sets the average count - * - * @param averageCount Average count to set - */ - public void setAverageCount(double averageCount) { - this.averageCount = averageCount; + long nBins1 = chr1.getLength() / zoom.getBinSize(); + long nBins2 = chr2.getLength() / zoom.getBinSize(); + return (sumCounts / nBins1) / nBins2; // <= trying to avoid overflows } public void clearCache() { blockCache.clear(); pearsonsMap.clear(); eigenvectorMap.clear(); + if (blockIndices != null) blockIndices.clearCache(); } public Iterator getDirectIterator() { - return new ContactRecordIterator(reader, getKey(), blockCache, + return new ContactRecordIterator(reader, blockIndices, getKey(), blockCache, getChr1Idx(), getChr2Idx(), getZoom(), NormalizationHandler.NONE); } public Iterator getNormalizedIterator(NormalizationType normType) { - return new ContactRecordIterator(reader, getKey(), blockCache, + return new ContactRecordIterator(reader, blockIndices, getKey(), blockCache, getChr1Idx(), getChr2Idx(), getZoom(), normType); } diff --git a/src/javastraw/reader/mzd/V9IntraBlockReader.java b/src/javastraw/reader/mzd/V9IntraBlockReader.java index 21966d3..b7bc09e 100644 --- a/src/javastraw/reader/mzd/V9IntraBlockReader.java +++ b/src/javastraw/reader/mzd/V9IntraBlockReader.java @@ -3,6 +3,7 @@ import javastraw.reader.DatasetReader; import javastraw.reader.basics.Chromosome; import javastraw.reader.block.Block; +import javastraw.reader.block.BlockIndices; import javastraw.reader.block.BlockModifier; import javastraw.reader.depth.V9Depth; import javastraw.reader.type.HiCZoom; @@ -19,7 +20,7 @@ public static List addNormalizedBlocksToListV9(final List blockLis int blockBinCount, V9Depth v9Depth, int blockColumnCount, BlockCache blockCache, String zdKey, Chromosome chrom1, Chromosome chrom2, HiCZoom zoom, - DatasetReader reader) { + DatasetReader reader, BlockIndices blockIndex) { List blockNumbersToLoad = getBlockNumbersForRegionFromBinPosition(binX1, binX2, binY1, binY2, blockBinCount, blockColumnCount, v9Depth); @@ -29,7 +30,7 @@ public static List addNormalizedBlocksToListV9(final List blockLis blockCache, zdKey); BlockLoader.actuallyLoadGivenBlocks(blockList, new ArrayList<>(blocksToLoad), norm, modifier, zdKey, - chrom1, chrom2, zoom, blockCache, reader); + chrom1, chrom2, zoom, blockCache, reader, blockIndex); return blockList; } From 3cf2c3ae7b413dfecdec77d40e1ad3da2fdc3cd8 Mon Sep 17 00:00:00 2001 From: Muhammad S Shamim Date: Tue, 12 Apr 2022 16:47:04 -0500 Subject: [PATCH 6/7] clear matrices --- src/javastraw/StrawGlobals.java | 2 +- src/javastraw/reader/Dataset.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/javastraw/StrawGlobals.java b/src/javastraw/StrawGlobals.java index c343efe..c9fc10b 100644 --- a/src/javastraw/StrawGlobals.java +++ b/src/javastraw/StrawGlobals.java @@ -25,7 +25,7 @@ package javastraw; public class StrawGlobals { - public static final String versionNum = "2.14.01"; + public static final String versionNum = "2.14.02"; public static final int minVersion = 6; public static final int bufferSize = 2097152; diff --git a/src/javastraw/reader/Dataset.java b/src/javastraw/reader/Dataset.java index d0fa25d..8d72455 100644 --- a/src/javastraw/reader/Dataset.java +++ b/src/javastraw/reader/Dataset.java @@ -83,7 +83,7 @@ public void clearCache(boolean onlyClearInter) { } eigenvectorCache.clear(); normalizationVectorCache.clear(); - normalizationTypes.clear(); + matrices.clear(); } public Matrix getMatrix(Chromosome chr1, Chromosome chr2) { From d3837269a8896a1497baaf3fcf376f803d7019d2 Mon Sep 17 00:00:00 2001 From: Muhammad S Shamim Date: Wed, 13 Apr 2022 18:39:34 -0500 Subject: [PATCH 7/7] update visibility of variables --- src/javastraw/StrawGlobals.java | 2 +- src/javastraw/reader/mzd/MatrixZoomData.java | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/javastraw/StrawGlobals.java b/src/javastraw/StrawGlobals.java index c9fc10b..21c7f07 100644 --- a/src/javastraw/StrawGlobals.java +++ b/src/javastraw/StrawGlobals.java @@ -25,7 +25,7 @@ package javastraw; public class StrawGlobals { - public static final String versionNum = "2.14.02"; + public static final String versionNum = "2.14.03"; public static final int minVersion = 6; public static final int bufferSize = 2097152; diff --git a/src/javastraw/reader/mzd/MatrixZoomData.java b/src/javastraw/reader/mzd/MatrixZoomData.java index a36f0f4..b869844 100644 --- a/src/javastraw/reader/mzd/MatrixZoomData.java +++ b/src/javastraw/reader/mzd/MatrixZoomData.java @@ -58,9 +58,9 @@ public class MatrixZoomData { protected final Map eigenvectorMap; protected final BlockModifier identity = new IdentityModifier(); protected final double sumCounts; - public static boolean useIteratorDontPutAllInRAM = false; - public static boolean shouldCheckRAMUsage = false; - private final BlockIndices blockIndices; + protected static boolean useIteratorDontPutAllInRAM = false; + protected static boolean shouldCheckRAMUsage = false; + protected final BlockIndices blockIndices; public MatrixZoomData(Chromosome chr1, Chromosome chr2, HiCZoom zoom, int blockBinCount, int blockColumnCount, int[] chr1Sites, int[] chr2Sites, DatasetReader reader, BlockIndices blockIndices,