-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
add additional expected model and testing code
- Loading branch information
Showing
3 changed files
with
131 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,80 @@ | ||
package javastraw.expected; | ||
|
||
import javastraw.reader.basics.Chromosome; | ||
import javastraw.reader.block.ContactRecord; | ||
import javastraw.reader.mzd.MatrixZoomData; | ||
import javastraw.reader.type.NormalizationType; | ||
import org.apache.commons.math3.analysis.interpolation.SplineInterpolator; | ||
import org.apache.commons.math3.analysis.polynomials.PolynomialSplineFunction; | ||
|
||
import java.util.Iterator; | ||
|
||
public class LogExpectedZscoreSpline extends ExpectedModel { | ||
|
||
private final PolynomialSplineFunction mean, std; | ||
private final double nearDiagonalSignal; | ||
private final float maxX; | ||
|
||
public LogExpectedZscoreSpline(MatrixZoomData zd, NormalizationType norm, Chromosome chrom, int res) { | ||
int maxBin = (int) (chrom.getLength() / res + 1); | ||
int[] maxDist = new int[1]; | ||
PolynomialSplineFunction[] functions = fitDataToFunction(zd, norm, maxBin, maxDist); | ||
mean = functions[0]; | ||
std = functions[1]; | ||
maxX = logp1i(Math.min(maxBin, maxDist[0])); | ||
nearDiagonalSignal = Math.expm1(mean.value(0.5)); | ||
} | ||
|
||
private PolynomialSplineFunction[] fitDataToFunction(MatrixZoomData zd, NormalizationType norm, int maxBin, int[] maxDist) { | ||
|
||
WelfordArray values = new WelfordArray(logp1i(maxBin) + 2); | ||
WelfordArray distances = new WelfordArray(logp1i(maxBin) + 2); | ||
populateWithCounts(zd, norm, values, distances, maxBin, maxDist); | ||
|
||
double[] avgExpected = values.getMean(); | ||
double[] avgExpectedStds = values.getStdDev(); | ||
double[] avgDistances = distances.getMean(); | ||
maxDist[0] = (int) Math.expm1(avgDistances[avgDistances.length - 1]); | ||
|
||
return new PolynomialSplineFunction[]{new SplineInterpolator().interpolate(avgDistances, avgExpected), | ||
new SplineInterpolator().interpolate(avgDistances, avgExpectedStds)}; | ||
} | ||
|
||
private void populateWithCounts(MatrixZoomData zd, NormalizationType norm, WelfordArray values, | ||
WelfordArray distances, int maxBin, int[] maxDist) { | ||
Iterator<ContactRecord> records = getIterator(zd, norm); | ||
while (records.hasNext()) { | ||
ContactRecord record = records.next(); | ||
int dist = getDist(record); | ||
if (dist == 0) { | ||
values.addValue(0, logp1(record.getCounts())); | ||
distances.addValue(0, logp1(dist)); | ||
} else if (dist < maxBin) { | ||
maxDist[0] = Math.max(maxDist[0], dist); | ||
values.addValue(logp1i(dist) + 1, logp1(record.getCounts())); | ||
distances.addValue(logp1i(dist) + 1, logp1(dist)); | ||
} | ||
} | ||
} | ||
|
||
@Override | ||
public double getExpectedFromUncompressedBin(int dist0) { | ||
double dist = Math.min(Math.max(0, logp1(dist0)), maxX); | ||
return Math.expm1(mean.value(dist)); | ||
} | ||
|
||
public double getExpectedValForZFromUncompressedBin(int dist0, float z) { | ||
double dist = Math.min(Math.max(0, logp1(dist0)), maxX); | ||
return Math.expm1(mean.value(dist) + z * std.value(dist)); | ||
} | ||
|
||
public double getZscoreForObservedUncompressedBin(int dist0, float observed) { | ||
double dist = Math.min(Math.max(0, logp1(dist0)), maxX); | ||
return (logp1(observed) - mean.value(dist)) / std.value(dist); | ||
} | ||
|
||
@Override | ||
public double getNearDiagonalSignal() { | ||
return nearDiagonalSignal; | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
package javastraw.expected; | ||
|
||
import javastraw.reader.Dataset; | ||
import javastraw.reader.basics.Chromosome; | ||
import javastraw.reader.mzd.MatrixZoomData; | ||
import javastraw.reader.type.HiCZoom; | ||
import javastraw.reader.type.NormalizationHandler; | ||
import javastraw.reader.type.NormalizationType; | ||
import javastraw.tools.HiCFileTools; | ||
import javastraw.tools.MatrixTools; | ||
|
||
public class TestExpected { | ||
|
||
public static void main(String[] args) { | ||
|
||
Dataset ds = HiCFileTools.extractDatasetForCLT( | ||
"/Users/muhammad/Desktop/hicfiles/subsample/chr10_subsample0.25.hic", | ||
//"", | ||
false, false, false); | ||
Chromosome chrom = ds.getChromosomeHandler().getChromosomeFromName("chr10"); | ||
|
||
NormalizationType[] norms = new NormalizationType[]{NormalizationHandler.SCALE, | ||
NormalizationHandler.VC | ||
}; | ||
|
||
for (int res : new int[]{50000, 10000, 5000, 2000, 1000}) { | ||
MatrixZoomData zd = ds.getMatrix(chrom, chrom).getZoomData(new HiCZoom(res)); | ||
|
||
for (NormalizationType norm : norms) { | ||
|
||
int maxBin = (int) (chrom.getLength() / res); | ||
LogExpectedSpline polynomial2 = new LogExpectedSpline(zd, norm, chrom, res); | ||
LogExpectedZscoreSpline polyZ = new LogExpectedZscoreSpline(zd, norm, chrom, res); | ||
|
||
System.out.println("Got all points"); | ||
|
||
double[][] data = new double[5][maxBin]; | ||
double[] evec = ds.getExpectedValues(new HiCZoom(res), norm, false).getExpectedValuesWithNormalization(chrom.getIndex()).getValues().get(0); | ||
for (int x = 0; x < maxBin; x++) { | ||
data[0][x] = x; | ||
data[1][x] = polynomial2.getExpectedFromUncompressedBin(x); | ||
data[2][x] = evec[x]; | ||
data[3][x] = polyZ.getExpectedFromUncompressedBin(x); | ||
data[4][x] = polyZ.getExpectedValForZFromUncompressedBin(x, 1); | ||
} | ||
MatrixTools.saveMatrixTextNumpy("multi_interp_" + norm.getLabel() + "_" + res + ".npy", data); | ||
} | ||
} | ||
} | ||
} |