Skip to content

Commit

Permalink
add additional expected model and testing code
Browse files Browse the repository at this point in the history
  • Loading branch information
sa501428 committed Oct 13, 2022
1 parent 4db9011 commit eecfd4c
Show file tree
Hide file tree
Showing 3 changed files with 131 additions and 1 deletion.
2 changes: 1 addition & 1 deletion src/javastraw/StrawGlobals.java
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
package javastraw;

public class StrawGlobals {
public static final String versionNum = "2.21.00";
public static final String versionNum = "2.22.00";
public static final int minVersion = 6;
public static final int bufferSize = 2097152;
public static final String CHR_ALL = "All";
Expand Down
80 changes: 80 additions & 0 deletions src/javastraw/expected/LogExpectedZscoreSpline.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
package javastraw.expected;

import javastraw.reader.basics.Chromosome;
import javastraw.reader.block.ContactRecord;
import javastraw.reader.mzd.MatrixZoomData;
import javastraw.reader.type.NormalizationType;
import org.apache.commons.math3.analysis.interpolation.SplineInterpolator;
import org.apache.commons.math3.analysis.polynomials.PolynomialSplineFunction;

import java.util.Iterator;

public class LogExpectedZscoreSpline extends ExpectedModel {

private final PolynomialSplineFunction mean, std;
private final double nearDiagonalSignal;
private final float maxX;

public LogExpectedZscoreSpline(MatrixZoomData zd, NormalizationType norm, Chromosome chrom, int res) {
int maxBin = (int) (chrom.getLength() / res + 1);
int[] maxDist = new int[1];
PolynomialSplineFunction[] functions = fitDataToFunction(zd, norm, maxBin, maxDist);
mean = functions[0];
std = functions[1];
maxX = logp1i(Math.min(maxBin, maxDist[0]));
nearDiagonalSignal = Math.expm1(mean.value(0.5));
}

private PolynomialSplineFunction[] fitDataToFunction(MatrixZoomData zd, NormalizationType norm, int maxBin, int[] maxDist) {

WelfordArray values = new WelfordArray(logp1i(maxBin) + 2);
WelfordArray distances = new WelfordArray(logp1i(maxBin) + 2);
populateWithCounts(zd, norm, values, distances, maxBin, maxDist);

double[] avgExpected = values.getMean();
double[] avgExpectedStds = values.getStdDev();
double[] avgDistances = distances.getMean();
maxDist[0] = (int) Math.expm1(avgDistances[avgDistances.length - 1]);

return new PolynomialSplineFunction[]{new SplineInterpolator().interpolate(avgDistances, avgExpected),
new SplineInterpolator().interpolate(avgDistances, avgExpectedStds)};
}

private void populateWithCounts(MatrixZoomData zd, NormalizationType norm, WelfordArray values,
WelfordArray distances, int maxBin, int[] maxDist) {
Iterator<ContactRecord> records = getIterator(zd, norm);
while (records.hasNext()) {
ContactRecord record = records.next();
int dist = getDist(record);
if (dist == 0) {
values.addValue(0, logp1(record.getCounts()));
distances.addValue(0, logp1(dist));
} else if (dist < maxBin) {
maxDist[0] = Math.max(maxDist[0], dist);
values.addValue(logp1i(dist) + 1, logp1(record.getCounts()));
distances.addValue(logp1i(dist) + 1, logp1(dist));
}
}
}

@Override
public double getExpectedFromUncompressedBin(int dist0) {
double dist = Math.min(Math.max(0, logp1(dist0)), maxX);
return Math.expm1(mean.value(dist));
}

public double getExpectedValForZFromUncompressedBin(int dist0, float z) {
double dist = Math.min(Math.max(0, logp1(dist0)), maxX);
return Math.expm1(mean.value(dist) + z * std.value(dist));
}

public double getZscoreForObservedUncompressedBin(int dist0, float observed) {
double dist = Math.min(Math.max(0, logp1(dist0)), maxX);
return (logp1(observed) - mean.value(dist)) / std.value(dist);
}

@Override
public double getNearDiagonalSignal() {
return nearDiagonalSignal;
}
}
50 changes: 50 additions & 0 deletions src/javastraw/expected/TestExpected.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
package javastraw.expected;

import javastraw.reader.Dataset;
import javastraw.reader.basics.Chromosome;
import javastraw.reader.mzd.MatrixZoomData;
import javastraw.reader.type.HiCZoom;
import javastraw.reader.type.NormalizationHandler;
import javastraw.reader.type.NormalizationType;
import javastraw.tools.HiCFileTools;
import javastraw.tools.MatrixTools;

public class TestExpected {

public static void main(String[] args) {

Dataset ds = HiCFileTools.extractDatasetForCLT(
"/Users/muhammad/Desktop/hicfiles/subsample/chr10_subsample0.25.hic",
//"",
false, false, false);
Chromosome chrom = ds.getChromosomeHandler().getChromosomeFromName("chr10");

NormalizationType[] norms = new NormalizationType[]{NormalizationHandler.SCALE,
NormalizationHandler.VC
};

for (int res : new int[]{50000, 10000, 5000, 2000, 1000}) {
MatrixZoomData zd = ds.getMatrix(chrom, chrom).getZoomData(new HiCZoom(res));

for (NormalizationType norm : norms) {

int maxBin = (int) (chrom.getLength() / res);
LogExpectedSpline polynomial2 = new LogExpectedSpline(zd, norm, chrom, res);
LogExpectedZscoreSpline polyZ = new LogExpectedZscoreSpline(zd, norm, chrom, res);

System.out.println("Got all points");

double[][] data = new double[5][maxBin];
double[] evec = ds.getExpectedValues(new HiCZoom(res), norm, false).getExpectedValuesWithNormalization(chrom.getIndex()).getValues().get(0);
for (int x = 0; x < maxBin; x++) {
data[0][x] = x;
data[1][x] = polynomial2.getExpectedFromUncompressedBin(x);
data[2][x] = evec[x];
data[3][x] = polyZ.getExpectedFromUncompressedBin(x);
data[4][x] = polyZ.getExpectedValForZFromUncompressedBin(x, 1);
}
MatrixTools.saveMatrixTextNumpy("multi_interp_" + norm.getLabel() + "_" + res + ".npy", data);
}
}
}
}

0 comments on commit eecfd4c

Please sign in to comment.