From d7fb48cb66a1c539174a5a59b1b963a0abd30d3d Mon Sep 17 00:00:00 2001 From: gwlucastrig Date: Mon, 18 Mar 2024 14:35:48 -0400 Subject: [PATCH] Issue 108: API extensions to support interpolation --- .../regression/OlsNeighborInterpolator.java | 131 +++++++-- .../org/tinfour/regression/OlsSurface.java | 49 ++-- .../NaturalNeighborElements.java | 2 +- .../org/tinfour/utils/BasicTabulator.java | 266 ++++++++++++++++++ .../tinfour/demo/utils/TabulatorDelta.java | 47 +++- 5 files changed, 449 insertions(+), 46 deletions(-) create mode 100644 core/src/main/java/org/tinfour/utils/BasicTabulator.java diff --git a/analysis/src/main/java/org/tinfour/regression/OlsNeighborInterpolator.java b/analysis/src/main/java/org/tinfour/regression/OlsNeighborInterpolator.java index 818235f2..ca095685 100644 --- a/analysis/src/main/java/org/tinfour/regression/OlsNeighborInterpolator.java +++ b/analysis/src/main/java/org/tinfour/regression/OlsNeighborInterpolator.java @@ -37,6 +37,7 @@ import org.tinfour.common.IIncrementalTin; import org.tinfour.common.IIncrementalTinNavigator; import org.tinfour.common.IQuadEdge; +import org.tinfour.common.PolygonConstraint; import org.tinfour.common.Thresholds; import org.tinfour.common.Vertex; import org.tinfour.interpolation.IInterpolatorOverTin; @@ -101,7 +102,7 @@ public enum SelectionMethod { private PrintStream reportSamples; private final SurfaceModel surfaceModel; - private final ListsamplesFromRegression = new ArrayList<>(); + private final List samplesFromRegression = new ArrayList<>(); private final VertexValuatorDefault defaultValuator = new VertexValuatorDefault(); /** @@ -199,11 +200,11 @@ public double interpolate(final double x, final double y, IVertexValuator valuat return computeRegression(x, y, valuator, edgeQuery, false, computeExtendedStatistics); } - - private double computeRegression(final double x, final double y, - IVertexValuator valuator, IQuadEdge edgeQuery, - boolean crossValidate, boolean computeExtendedStatistics) { + final IVertexValuator valuator, + final IQuadEdge edgeQuery, + final boolean crossValidate, + final boolean computeExtendedStatistics) { // in the logic below, we access the Vertex x and y coordinates directly // but we use the getZ() method to get the z value. Some vertices // may actually be VertexMergerGroup instances and so the Z value must @@ -267,6 +268,7 @@ private double computeRegression(final double x, final double y, innerList.add(e.getB()); } + workTin.clear(); workTin.add(innerList, null); if (!workTin.isBootstrapped()) { workTin.clear(); @@ -274,7 +276,6 @@ private double computeRegression(final double x, final double y, } workNni.resetForChangeToTin(); NaturalNeighborElements innerElements = workNni.getNaturalNeighborElements(x, y); - workTin.clear(); if (innerElements == null) { return Double.NaN; } @@ -302,7 +303,7 @@ private double computeRegression(final double x, final double y, samplesFromRegression.add(C); } for (int i = 0; i < vArray.length; i++) { - samplesFromRegression.add(vArray[i]); + samplesFromRegression.add(vArray[i]); samples[k][0] = vArray[i].getX() - x; samples[k][1] = vArray[i].getY() - y; samples[k][2] = vq.value(vArray[i]); @@ -310,7 +311,7 @@ private double computeRegression(final double x, final double y, } vArray = outerElements.getNaturalNeighbors(); for (int i = 0; i < vArray.length; i++) { - samplesFromRegression.add(vArray[i]); + samplesFromRegression.add(vArray[i]); samples[k][0] = vArray[i].getX() - x; samples[k][1] = vArray[i].getY() - y; samples[k][2] = vq.value(vArray[i]); @@ -370,6 +371,35 @@ public void resetForChangeToTin() { nni.resetForChangeToTin(); } + /** + * Compute the parameters that would be estimated at the position of the + * vertex if it were not incorporated into the Delaunay triangulation. + * + * @param v a valid reference to a vertex instance currently incorporated + * into the Delaunay triangulation. + * @param computeExtendedStatistics compute the extended statistics such + * as the "hat" matrix and R-Student values. + * @return a valid floating-point value if the cross validation is successful; + * otherwise, NaN. + */ + public double crossValidate(Vertex v, boolean computeExtendedStatistics) { + clearResults(); + xQuery = v.getX(); + yQuery = v.getY(); + double x = v.getX(); + double y = v.getY(); + IQuadEdge edgeQuery = this.getRelatedEdge(x, y); + if (edgeQuery == null) { + return Double.NaN; + } + Vertex A = edgeQuery.getA(); + if (!A.equals(v)) { + // did not match a vertex in the TIN with the input + return Double.NaN; + } + return computeRegression(x, y, null, edgeQuery, true, computeExtendedStatistics); + } + /** * Compute the parameters that would be estimated at the position of the * vertex if it were not incorporated into the Delaunay triangulation. @@ -394,7 +424,7 @@ public double crossValidate(Vertex v) { // did not match a vertex in the TIN with the input return Double.NaN; } - return this.computeRegression(x, y, null, edgeQuery, true, true); + return computeRegression(x, y, null, edgeQuery, true, false); } /** @@ -502,22 +532,40 @@ private NaturalNeighborElements getAdjacentElements(List envelope) { } NaturalNeighborElements nne = null; - workTin.clear(); // not needed once everything else is correctly implemented + workTin.clear(); workTin.add(list, null); if (workTin.isBootstrapped()) { workNni.resetForChangeToTin(); nne = workNni.getNaturalNeighborElements(xQuery, yQuery); } - workTin.clear(); if (nne == null && list.size() >= 3) { + // The NNI class couldn't find an envelope, so we conclude that + // the query point must be outside the convex hull of the working TIN. + // It was inside the original inner envelope (and main TIN), + // but is not inside the outer envelope that we are trying to build here. + // As a fall-back, we attempt to use the points we do have, assigning + // them equal weights. There might be a better way to do this based + // on distances or some other criteria. + List perimeter = workTin.getPerimeter(); + PolygonConstraint pc = new PolygonConstraint(); + for (IQuadEdge iEdge : perimeter) { + pc.add(iEdge.getA()); + } + pc.complete(); + + double areaOfConvexHull = pc.getArea(); Vertex[] neighbors = list.toArray(new Vertex[0]); - return new NaturalNeighborElements(xQuery, yQuery, new double[list.size()], neighbors, 1.0); + double[] lambda = new double[neighbors.length]; + Arrays.fill(lambda, 1.0 / lambda.length); + return new NaturalNeighborElements( + xQuery, yQuery, lambda, neighbors, areaOfConvexHull); } + return nne; } - /** + /** * Gets a safe copy of the computed polynomial coefficients from the * regression (the "beta" parameters). These coefficients can be used * for interpolation or surface modeling purposes. Developers @@ -531,7 +579,7 @@ private NaturalNeighborElements getAdjacentElements(List envelope) { * array. */ public double[] getBeta() { - if (beta == null) { + if (beta == null) { return new double[0]; } return Arrays.copyOf(beta, beta.length); @@ -606,7 +654,7 @@ public double getConfidenceIntervalHalfSpan(double populationFraction) { * @param populationFraction a value in the range 0 < factor < 1. * @return a positive floating-point value */ - public double getPredictonIntervalHalfSpan(double populationFraction) { + public double getPredictionIntervalHalfSpan(double populationFraction) { if (!(0 < populationFraction && populationFraction < 1)) { throw new IllegalArgumentException("Population fraction is not in the range (0,1): " + populationFraction); } @@ -656,12 +704,13 @@ public RealMatrix getHatMatrix() { /** * Get the array of R-Student values corresponding to the input - * sample data. The R-Student values is often a useful tool in detecting - * anomalous points in the input same set. + * sample data. The R-Student values set is often a useful tool in detecting + * anomalous points in the input samples. * The computation of the R-Student values is optional. * If the extended-statistics option was not enabled, * then this method will return a null. - * @return if enabled, a valid array; otherwise, a nukll. + * + * @return if enabled, a valid array; otherwise, a null. */ public double[] getRStudentValues() { return olsSurface.rStudentValues; @@ -678,16 +727,56 @@ public int getDegreesOfFreedom() { return olsSurface.nDegOfFreedom; } + /** + * Gets a safe copy of the computed standard error for the polynomial + * coefficients from the regression (the error of the "beta" parameters). + *

+ * + * @return a if the last regression computation was successful, a valid array + * of standard-error values for the parameters corresponding + * to the selected surface model; otherwise, an array of length zero. + */ + public double[] getStandardErrorOfParameters() { + return olsSurface.getStandardErrorOfParameters(); + } + /** * Gets a list containing the vertices used in the most recent regression - * operation. The index of the vertices within the list will correspond + * operation. The index of the vertices within the list will correspond * to the corresponding positions in the hat matrix and the R-Student Values * array (if they were computed). + * * @return a valid, potentially empty list. */ - public ListgetVertices(){ - ArrayListlist = new ArrayList(); + public List getVertices() { + ArrayList list = new ArrayList(); list.addAll(samplesFromRegression); return list; } + + /** + * Get the r-squared value, also known as the coefficient of multiple + * determination. + * + * @return a value in the range 0 to 1. + */ + public double getRSquared() { + return olsSurface.getRSquared(); + } + + /** + * Get the estimated value at the specified coordinates using the + * parameters from the most recently completed regression operation. + * + * @param xEstimate the x coordinate of interest + * @param yEstimate the y coordinate of interest + * @return if successful, a valid floating-point value; otherwise, + * Double.NaN. + */ + public double computeEstimatedValue(double xEstimate, double yEstimate) { + if (Double.isFinite(this.zResult)) { + return olsSurface.computeEstimatedValue(xEstimate - xQuery, yEstimate - yQuery); + } + return Double.NaN; + } } diff --git a/analysis/src/main/java/org/tinfour/regression/OlsSurface.java b/analysis/src/main/java/org/tinfour/regression/OlsSurface.java index 01c340aa..411475fe 100644 --- a/analysis/src/main/java/org/tinfour/regression/OlsSurface.java +++ b/analysis/src/main/java/org/tinfour/regression/OlsSurface.java @@ -762,26 +762,26 @@ public double computeRegression( // Temporary code to compare results of this class // (quadratic model only) with the Apache Math implementation // - //double[] data = new double[nSamples * (5)]; - //int k = 0; - //for (int i = 0; i < nSamples; i++) { - // data[k++] = samples[i][2]; - // double x = samples[i][0]; - // double y = samples[i][1]; - // data[k++] = x; - // data[k++] = y; - // data[k++] = x * x; - // data[k++] = y * y; - //} - //OLSMultipleLinearRegression mls = new OLSMultipleLinearRegression(); - //mls.newSampleData(data, nSamples, 4); - //double[] q = mls.estimateRegressionParameters(); - //double[][] v = mls.estimateRegressionParametersVariance(); - //double []erp = mls.estimateRegressionParametersStandardErrors(); - //double erv = mls.estimateRegressandVariance(); - //double eev = mls.estimateErrorVariance(); - //double rss = mls.calculateResidualSumOfSquares(); - //double dummy = rss; // to provide a break point for debugging + // int nVar = model.nCoefficients-1; + // double[] data = new double[nSamples * (nVar+1)]; + // double[][] qm = computeDesignMatrix(model, nSamples, samples); + // int k = 0; + // for (int i = 0; i < nSamples; i++) { + // data[k++] = samples[i][2]; + // for(int j=0; j maxV) { + maxV = value; + } + if (dAbs < minAbsV) { + minAbsV = dAbs; + } + if (dAbs > maxAbsV) { + maxAbsV = dAbs; + } + } else { + nNaN++; + } + } + + + /** + * Get the mean of the absolute values of the input sample values. + * + * @return a valid floating point number, zero if no input has occurred. + */ + public double getMeanAbsValue() { + if (nD == 0) { + return 0; + } + return sumAbsV.getMean(); + } + + /** + * Get the mean of the signed values of the input sample values. + * + * @return a valid floating point number, zero if no input has occurred. + */ + public double getMean() { + if (nD == 0) { + return 0; + } + return sumV.getMean(); + } + + /** + * Get an unbiased estimate of the standard deviation of the population + * based on the tabulated samples. + * + * @return the standard deviation of the absolute values of the inputs, + * or zero if insufficient data is available. + */ + public double getStdDevAbsValue() { + if (nD < 2) { + return 0; + } + + // to reduce errors due to loss of precision, + // rather than using the conventional form for std dev + // nE*sumE2-sumE*sumE)/((nE*(nE-1)) + // this method uses the form below + double sD2 = sumV2.getSum(); + double sD = sumAbsV.getSum(); + return Math.sqrt((sD2 - (sD / nD) * sD) / (nD - 1)); + } + + /** + * Get an unbiased estimate of the standard deviation of the population + * based on the tabulated values (signed values). + * + * @return the standard deviation of the signed values of the inputs, + * or zero if insufficient data is available. + */ + public double getStdDev() { + if (nD < 2) { + return 0; + } + + // to reduce errors due to loss of precision, + // rather than using the conventional form for std dev + // nE*sumE2-sumE*sumE)/((nE*(nE-1)) + // this method uses the form below + double sD2 = sumV2.getSum(); + double sD = sumV.getSum(); + return Math.sqrt((sD2 - (sD / nD) * sD) / (nD - 1)); + } + + /** + * Get the minimum of the signed input values + * + * @return a valid floating point number + */ + public double getMinValue() { + return minV; + } + + /** + * Get the maximum value of the signed input samples + * + * @return a valid floating point number + */ + public double getMaxValue() { + return maxV; + } + + /** + * Get the minimum of the signed input values + * + * @return a valid floating point number + */ + public double getMinAbsValue() { + return minAbsV; + } + + /** + * Get the maximum value of the signed input samples + * + * @return a valid floating point number + */ + public double getMaxAbsValue() { + return maxAbsV; + } + + /** + * Gets the number of sample values passed into this instance of the + * tabulator. + * + * @return a positive, potentially zero value. + */ + public int getNumberSamples() { + return nD; + } + + /** + * Gets the number of non-finite sample values passed into this instance of the + * tabulator. + * + * @return a positive, potentially zero value. + */ + public int getNumberNonFiniteSamples() { + return nNaN; + } + + public static void printCaption(PrintStream ps){ + ps.println(" mean sigma min max" + + " mean (abs) sigma (abs) min max"); + } + + /** + * Prints a summary of the statistics computed from signed inputs. + * @param ps a valid print stream + * @param label a label of 25 characters or fewer to be printed in the summary + */ + public void summarize(PrintStream ps, String label ){ + String s; + if(label==null || label.isEmpty()){ + s = ""; + }else if(label.length()>25){ + s = label.substring(0, 25); + }else{ + s = label; + } + + double mean = getMean(); + double sigma = getStdDev(); + double absMean = getMeanAbsValue(); + double absSigma = getStdDevAbsValue(); + double absMin = getMinAbsValue(); + double absMax = getMaxAbsValue(); + ps.format("%-25.25s %13.6f %13.6f %9.3f %9.3f %13.6f %13.6f %9.3f %9.3f%n", + s, + mean, sigma, getMinValue(), getMaxValue(), + absMean, absSigma, absMin, absMax + ); + } + /** + * Prints a summary of the statistics computed from absolute values of the inputs. + * @param ps a valid print stream + * @param label a label of 25 characters or fewer to be printed in the summary + */ + public void summarizeAbsValues(PrintStream ps, String label ){ + String s; + if(label==null || label.isEmpty()){ + s = ""; + }else if(label.length()>25){ + s = label.substring(0, 25); + }else{ + s = label; + } + + double mean = this.getMeanAbsValue(); + double sigma = this.getStdDevAbsValue(); + ps.format("%-25.25s %13.6f %13.6f %9.3f %9.3f%n", + s, mean, sigma, getMinAbsValue(), getMaxAbsValue()); + } + +} diff --git a/demo/src/main/java/org/tinfour/demo/utils/TabulatorDelta.java b/demo/src/main/java/org/tinfour/demo/utils/TabulatorDelta.java index 32c843f9..4f5f8193 100644 --- a/demo/src/main/java/org/tinfour/demo/utils/TabulatorDelta.java +++ b/demo/src/main/java/org/tinfour/demo/utils/TabulatorDelta.java @@ -46,7 +46,7 @@ public class TabulatorDelta { double sumD2; // sum of delta^2 double sumSignedD; double maxD = Double.NEGATIVE_INFINITY; // max signed delta - double minD = Double.POSITIVE_INFINITY; // nim signed delta + double minD = Double.POSITIVE_INFINITY; // min signed delta double cD; // compensator for Kahan Summation, sum delta double cD2;// compensator, sum delta squared @@ -101,6 +101,8 @@ public void summarize(PrintStream ps, String label) { double meanE = 0; double sigma = 0; double rmse = 0; + double sigmaSigned=0; + double meanSigned = 0; if (nD > 1) { meanE = sumD / nD; // to reduce errors due to loss of precision, @@ -109,9 +111,12 @@ public void summarize(PrintStream ps, String label) { // use the form below sigma = Math.sqrt((sumD2 - (sumD / nD) * sumD) / (nD - 1)); rmse = getRMSE(); + sigmaSigned=this.getStdDevSignedValue(); + meanSigned = sumSignedD/nD; } - ps.format("%-25.25s %13.6f %13.6f %13.6f %10.3f %8.3f %9.3f%n", - label, rmse, meanE, sigma, minD, maxD, sumSignedD); + + ps.format("%-25.25s %13.6f %13.6f %13.6f %13.6f %13.6f %9.3f %9.3f%n", + label, rmse, meanE, sigma, meanSigned, sigmaSigned, minD, maxD); } /** @@ -126,6 +131,20 @@ public double getMeanAbsValue() { return sumD / nD; } + /** + * Get the mean of the signed values of the input sample values. + * + * @return a valid floating point number, zero if no input has occurred. + */ + public double getMeanSignedValue() { + if (nD == 0) { + return 0; + } + return sumD / nD; + } + + + /** * Get an unbiased estimate of the standard deviation of the population * based on the tabulated samples. @@ -145,6 +164,28 @@ public double getStdDevAbsValue() { return Math.sqrt((sumD2 - (sumD / nD) * sumD) / (nD - 1)); } + + /** + * Get an unbiased estimate of the standard deviation of the population + * based on the tabulated samples. + * + * @return the standard deviation of the signed values of the inputs, + * or zero if insufficient data is available. + */ + public double getStdDevSignedValue() { + if (nD < 2) { + return 0; + } + + // to reduce errors due to loss of precision, + // rather than using the conventional form for std dev + // nE*sumE2-sumE*sumE)/((nE*(nE-1)) + // use the form below + return Math.sqrt((sumD2 - (sumSignedD / nD) * sumSignedD) / (nD - 1)); + } + + + /** * Get the signed minimum value of the input samples *