diff --git a/src/javastraw/StrawGlobals.java b/src/javastraw/StrawGlobals.java index 81d2867..bdc5771 100644 --- a/src/javastraw/StrawGlobals.java +++ b/src/javastraw/StrawGlobals.java @@ -25,7 +25,7 @@ package javastraw; public class StrawGlobals { - public static final String versionNum = "2.21.00"; + public static final String versionNum = "2.22.00"; public static final int minVersion = 6; public static final int bufferSize = 2097152; public static final String CHR_ALL = "All"; diff --git a/src/javastraw/expected/LogExpectedZscoreSpline.java b/src/javastraw/expected/LogExpectedZscoreSpline.java new file mode 100644 index 0000000..b402f34 --- /dev/null +++ b/src/javastraw/expected/LogExpectedZscoreSpline.java @@ -0,0 +1,80 @@ +package javastraw.expected; + +import javastraw.reader.basics.Chromosome; +import javastraw.reader.block.ContactRecord; +import javastraw.reader.mzd.MatrixZoomData; +import javastraw.reader.type.NormalizationType; +import org.apache.commons.math3.analysis.interpolation.SplineInterpolator; +import org.apache.commons.math3.analysis.polynomials.PolynomialSplineFunction; + +import java.util.Iterator; + +public class LogExpectedZscoreSpline extends ExpectedModel { + + private final PolynomialSplineFunction mean, std; + private final double nearDiagonalSignal; + private final float maxX; + + public LogExpectedZscoreSpline(MatrixZoomData zd, NormalizationType norm, Chromosome chrom, int res) { + int maxBin = (int) (chrom.getLength() / res + 1); + int[] maxDist = new int[1]; + PolynomialSplineFunction[] functions = fitDataToFunction(zd, norm, maxBin, maxDist); + mean = functions[0]; + std = functions[1]; + maxX = logp1i(Math.min(maxBin, maxDist[0])); + nearDiagonalSignal = Math.expm1(mean.value(0.5)); + } + + private PolynomialSplineFunction[] fitDataToFunction(MatrixZoomData zd, NormalizationType norm, int maxBin, int[] maxDist) { + + WelfordArray values = new WelfordArray(logp1i(maxBin) + 2); + WelfordArray distances = new WelfordArray(logp1i(maxBin) + 2); + populateWithCounts(zd, norm, values, distances, maxBin, maxDist); + + double[] avgExpected = values.getMean(); + double[] avgExpectedStds = values.getStdDev(); + double[] avgDistances = distances.getMean(); + maxDist[0] = (int) Math.expm1(avgDistances[avgDistances.length - 1]); + + return new PolynomialSplineFunction[]{new SplineInterpolator().interpolate(avgDistances, avgExpected), + new SplineInterpolator().interpolate(avgDistances, avgExpectedStds)}; + } + + private void populateWithCounts(MatrixZoomData zd, NormalizationType norm, WelfordArray values, + WelfordArray distances, int maxBin, int[] maxDist) { + Iterator records = getIterator(zd, norm); + while (records.hasNext()) { + ContactRecord record = records.next(); + int dist = getDist(record); + if (dist == 0) { + values.addValue(0, logp1(record.getCounts())); + distances.addValue(0, logp1(dist)); + } else if (dist < maxBin) { + maxDist[0] = Math.max(maxDist[0], dist); + values.addValue(logp1i(dist) + 1, logp1(record.getCounts())); + distances.addValue(logp1i(dist) + 1, logp1(dist)); + } + } + } + + @Override + public double getExpectedFromUncompressedBin(int dist0) { + double dist = Math.min(Math.max(0, logp1(dist0)), maxX); + return Math.expm1(mean.value(dist)); + } + + public double getExpectedValForZFromUncompressedBin(int dist0, float z) { + double dist = Math.min(Math.max(0, logp1(dist0)), maxX); + return Math.expm1(mean.value(dist) + z * std.value(dist)); + } + + public double getZscoreForObservedUncompressedBin(int dist0, float observed) { + double dist = Math.min(Math.max(0, logp1(dist0)), maxX); + return (logp1(observed) - mean.value(dist)) / std.value(dist); + } + + @Override + public double getNearDiagonalSignal() { + return nearDiagonalSignal; + } +} diff --git a/src/javastraw/expected/TestExpected.java b/src/javastraw/expected/TestExpected.java new file mode 100644 index 0000000..98c2840 --- /dev/null +++ b/src/javastraw/expected/TestExpected.java @@ -0,0 +1,50 @@ +package javastraw.expected; + +import javastraw.reader.Dataset; +import javastraw.reader.basics.Chromosome; +import javastraw.reader.mzd.MatrixZoomData; +import javastraw.reader.type.HiCZoom; +import javastraw.reader.type.NormalizationHandler; +import javastraw.reader.type.NormalizationType; +import javastraw.tools.HiCFileTools; +import javastraw.tools.MatrixTools; + +public class TestExpected { + + public static void main(String[] args) { + + Dataset ds = HiCFileTools.extractDatasetForCLT( + "/Users/muhammad/Desktop/hicfiles/subsample/chr10_subsample0.25.hic", + //"", + false, false, false); + Chromosome chrom = ds.getChromosomeHandler().getChromosomeFromName("chr10"); + + NormalizationType[] norms = new NormalizationType[]{NormalizationHandler.SCALE, + NormalizationHandler.VC + }; + + for (int res : new int[]{50000, 10000, 5000, 2000, 1000}) { + MatrixZoomData zd = ds.getMatrix(chrom, chrom).getZoomData(new HiCZoom(res)); + + for (NormalizationType norm : norms) { + + int maxBin = (int) (chrom.getLength() / res); + LogExpectedSpline polynomial2 = new LogExpectedSpline(zd, norm, chrom, res); + LogExpectedZscoreSpline polyZ = new LogExpectedZscoreSpline(zd, norm, chrom, res); + + System.out.println("Got all points"); + + double[][] data = new double[5][maxBin]; + double[] evec = ds.getExpectedValues(new HiCZoom(res), norm, false).getExpectedValuesWithNormalization(chrom.getIndex()).getValues().get(0); + for (int x = 0; x < maxBin; x++) { + data[0][x] = x; + data[1][x] = polynomial2.getExpectedFromUncompressedBin(x); + data[2][x] = evec[x]; + data[3][x] = polyZ.getExpectedFromUncompressedBin(x); + data[4][x] = polyZ.getExpectedValForZFromUncompressedBin(x, 1); + } + MatrixTools.saveMatrixTextNumpy("multi_interp_" + norm.getLabel() + "_" + res + ".npy", data); + } + } + } +}