From 318c9d2c3b8b9567ae1db6d621d078a73e25128f Mon Sep 17 00:00:00 2001 From: Alex Peters <80283343+alexpeters1208@users.noreply.github.com> Date: Mon, 13 Nov 2023 22:18:31 -0600 Subject: [PATCH] Clarify standard deviation / variance documentation with Bessel's correction (#4786) * First attempt at fixing std/var docs * Fix Java file formatting * Chip review suggestions * Spotless apply * Spotless apply (again) --- .../include/public/deephaven/client/client.h | 24 +++- .../public/deephaven/client/update_by.h | 25 +++-- engine/function/src/templates/Numeric.ftl | 104 ++++++++++++------ .../engine/util/TotalsTableBuilder.java | 4 +- go/pkg/client/query.go | 20 +++- go/pkg/client/tablehandle.go | 10 +- py/client/pydeephaven/_table_interface.py | 12 +- py/client/pydeephaven/agg.py | 10 +- py/client/pydeephaven/table.py | 12 +- py/client/pydeephaven/updateby.py | 26 +++-- py/server/deephaven/agg.py | 12 +- py/server/deephaven/table.py | 10 +- py/server/deephaven/updateby.py | 28 +++-- .../io/deephaven/api/TableOperations.java | 52 +++++++-- .../io/deephaven/api/agg/Aggregation.java | 14 ++- .../io/deephaven/api/agg/spec/AggSpecStd.java | 8 +- .../io/deephaven/api/agg/spec/AggSpecVar.java | 7 +- .../api/updateby/UpdateByOperation.java | 80 +++++++++----- web/WebDevelopersGuide.md | 4 +- .../web/client/api/JsColumnStatistics.java | 6 +- .../tree/enums/JsAggregationOperation.java | 14 ++- 21 files changed, 340 insertions(+), 142 deletions(-) diff --git a/cpp-client/deephaven/dhclient/include/public/deephaven/client/client.h b/cpp-client/deephaven/dhclient/include/public/deephaven/client/client.h index 7cb72ca94e1..a99ca9c46e9 100644 --- a/cpp-client/deephaven/dhclient/include/public/deephaven/client/client.h +++ b/cpp-client/deephaven/dhclient/include/public/deephaven/client/client.h @@ -568,8 +568,11 @@ class Aggregate { } /** - * Returns an aggregator that computes the standard deviation of values, within an aggregation - * group, for each input column. + * Returns an aggregator that computes the sample standard deviation of values, within an + * aggregation group, for each input column. + * + * Sample standard deviation is computed using Bessel's correction (https://en.wikipedia.org/wiki/Bessel%27s_correction), + * which ensures that the sample variance will be an unbiased estimator of population variance. */ [[nodiscard]] static Aggregate Std(std::vector column_specs); @@ -608,8 +611,11 @@ class Aggregate { } /** - * Returns an aggregator that computes the variance of values, within an aggregation group, + * Returns an aggregator that computes the sample variance of values, within an aggregation group, * for each input column. + * + * Sample variance is computed using Bessel's correction (https://en.wikipedia.org/wiki/Bessel%27s_correction), + * which ensures that the sample variance will be an unbiased estimator of population variance. */ [[nodiscard]] static Aggregate Var(std::vector column_specs); @@ -801,8 +807,11 @@ Aggregate AggPct(double percentile, Args &&... args) { } /** - * Returns an aggregator that computes the standard deviation of values, within an aggregation - * group, for each input column. + * Returns an aggregator that computes the sample standard deviation of values, within an aggregation group, + * for each input column. + * + * Sample standard deviation is computed using Bessel's correction (https://en.wikipedia.org/wiki/Bessel%27s_correction), + * which ensures that the sample variance will be an unbiased estimator of population variance. */ template [[nodiscard]] @@ -821,8 +830,11 @@ Aggregate aggSum(Args &&... args) { } /** - * Returns an aggregator that computes the variance of values, within an aggregation group, + * Returns an aggregator that computes the sample variance of values, within an aggregation group, * for each input column. + * + * Sample variance is computed using Bessel's correction (https://en.wikipedia.org/wiki/Bessel%27s_correction), + * which ensures that the sample variance will be an unbiased estimator of population variance. */ template [[nodiscard]] diff --git a/cpp-client/deephaven/dhclient/include/public/deephaven/client/update_by.h b/cpp-client/deephaven/dhclient/include/public/deephaven/client/update_by.h index 5b9d41025a0..f2d722718e1 100644 --- a/cpp-client/deephaven/dhclient/include/public/deephaven/client/update_by.h +++ b/cpp-client/deephaven/dhclient/include/public/deephaven/client/update_by.h @@ -528,11 +528,13 @@ UpdateByOperation rollingCountTime(std::string timestamp_col, std::vector cols, int rev_ticks, int fwd_ticks = 0); /** - * Creates a rolling standard deviation UpdateByOperation for the supplied column names, using time as the - * windowing unit. This function accepts nanoseconds or time strings as the reverse and forward - * window parameters. Negative values are allowed and can be used to generate completely forward or - * completely reverse windows. A row containing a null in the timestamp column belongs to no window - * and will not be considered in the windows of other rows; its output will be null. + * Creates a rolling sample standard deviation UpdateByOperation for the supplied column names, using time as the + * windowing unit. This function accepts nanoseconds or time strings as the reverse and forward window parameters. + * Negative values are allowed and can be used to generate completely forward or completely reverse windows. + * A row containing a null in the timestamp column belongs to no window and will not be considered in the windows + * of other rows; its output will be null. + * + * Sample standard deviation is computed using Bessel's correction (https://en.wikipedia.org/wiki/Bessel%27s_correction), + * which ensures that the sample variance will be an unbiased estimator of population variance. * * See the documentation of rollingSumTime() for examples of window values. * diff --git a/engine/function/src/templates/Numeric.ftl b/engine/function/src/templates/Numeric.ftl index 49df0a3f4af..6270b603bea 100644 --- a/engine/function/src/templates/Numeric.ftl +++ b/engine/function/src/templates/Numeric.ftl @@ -427,20 +427,26 @@ public class Numeric { } /** - * Returns the variance. Null values are excluded. + * Returns the sample variance. Null values are excluded. + * + * Sample variance is computed using Bessel's correction (https://en.wikipedia.org/wiki/Bessel%27s_correction), + * which ensures that the sample variance will be an unbiased estimator of population variance. * * @param values values. - * @return variance of non-null values. + * @return sample variance of non-null values. */ public static double var(${pt.boxed}[] values) { return var(unbox(values)); } /** - * Returns the variance. Null values are excluded. + * Returns the sample variance. Null values are excluded. + * + * Sample variance is computed using Bessel's correction (https://en.wikipedia.org/wiki/Bessel%27s_correction), + * which ensures that the sample variance will be an unbiased estimator of population variance. * * @param values values. - * @return variance of non-null values. + * @return sample variance of non-null values. */ public static double var(${pt.primitive}... values) { if (values == null) { @@ -451,10 +457,13 @@ public class Numeric { } /** - * Returns the variance. Null values are excluded. + * Returns the sample variance. Null values are excluded. + * + * Sample variance is computed using Bessel's correction (https://en.wikipedia.org/wiki/Bessel%27s_correction), + * which ensures that the sample variance will be an unbiased estimator of population variance. * * @param values values. - * @return variance of non-null values. + * @return sample variance of non-null values. */ public static double var(${pt.vector} values) { if (values == null) { @@ -476,7 +485,7 @@ public class Numeric { } } - // Return NaN if poisoned or too few values to compute variance. + // Return NaN if poisoned or too few values to compute sample variance. if (count <= 1 || Double.isNaN(sum) || Double.isNaN(sum2)) { return Double.NaN; } @@ -487,7 +496,7 @@ public class Numeric { final double delta = sum2 - vs2bar; final double rel_eps = delta / eps; - // Return zero when the variance is leq the floating point error. + // Return zero when the sample variance is leq the floating point error. return Math.abs(rel_eps) > 1.0 ? delta / (count - 1) : 0.0; } @@ -495,11 +504,14 @@ public class Numeric { <#if pt2.valueType.isNumber > /** - * Returns the weighted variance. Null values are excluded. + * Returns the weighted sample variance. Null values are excluded. + * + * Weighted sample variance is computed using Bessel's correction (https://en.wikipedia.org/wiki/Bessel%27s_correction), + * which ensures that the weighted sample variance will be an unbiased estimator of weighted population variance. * * @param values values. * @param weights weights - * @return weighted variance of non-null values. + * @return weighted sample variance of non-null values. */ public static double wvar(${pt.primitive}[] values, ${pt2.primitive}[] weights) { if (values == null || weights == null) { @@ -510,11 +522,14 @@ public class Numeric { } /** - * Returns the weighted variance. Null values are excluded. + * Returns the weighted sample variance. Null values are excluded. + * + * Weighted sample variance is computed using Bessel's correction (https://en.wikipedia.org/wiki/Bessel%27s_correction), + * which ensures that the weighted sample variance will be an unbiased estimator of weighted population variance. * * @param values values. * @param weights weights - * @return weighted variance of non-null values. + * @return weighted sample variance of non-null values. */ public static double wvar(${pt.primitive}[] values, ${pt2.vector} weights) { if (values == null || weights == null) { @@ -525,11 +540,14 @@ public class Numeric { } /** - * Returns the weighted variance. Null values are excluded. + * Returns the weighted sample variance. Null values are excluded. + * + * Sample variance is computed using Bessel's correction (https://en.wikipedia.org/wiki/Bessel%27s_correction), + * which ensures that the sample variance will be an unbiased estimator of population variance. * * @param values values. * @param weights weights - * @return weighted variance of non-null values. + * @return weighted sample variance of non-null values. */ public static double wvar(${pt.vector} values, ${pt2.primitive}[] weights) { if (values == null || weights == null) { @@ -540,11 +558,14 @@ public class Numeric { } /** - * Returns the weighted variance. Null values are excluded. + * Returns the weighted sample variance. Null values are excluded. + * + * Sample variance is computed using Bessel's correction (https://en.wikipedia.org/wiki/Bessel%27s_correction), + * which ensures that the sample variance will be an unbiased estimator of population variance. * * @param values values. * @param weights weights - * @return weighted variance of non-null values. + * @return weighted sample variance of non-null values. */ public static double wvar(${pt.vector} values, ${pt2.vector} weights) { if (values == null || weights == null) { @@ -579,7 +600,7 @@ public class Numeric { } } - // Return NaN if poisoned or too few values to compute variance. + // Return NaN if poisoned or too few values to compute sample variance. if (count <= 1 || Double.isNaN(sum) || Double.isNaN(sum2) || Double.isNaN(count) || Double.isNaN(count2)) { return Double.NaN; } @@ -597,20 +618,26 @@ public class Numeric { /** - * Returns the standard deviation. Null values are excluded. + * Returns the sample standard deviation. Null values are excluded. + * + * Sample standard deviation is computed using Bessel's correction (https://en.wikipedia.org/wiki/Bessel%27s_correction), + * which ensures that the sample variance will be an unbiased estimator of population variance. * * @param values values. - * @return standard deviation of non-null values. + * @return sample standard deviation of non-null values. */ public static double std(${pt.boxed}[] values) { return std(unbox(values)); } /** - * Returns the standard deviation. Null values are excluded. + * Returns the sample standard deviation. Null values are excluded. + * + * Sample standard deviation is computed using Bessel's correction (https://en.wikipedia.org/wiki/Bessel%27s_correction), + * which ensures that the sample variance will be an unbiased estimator of population variance. * * @param values values. - * @return standard deviation of non-null values. + * @return sample standard deviation of non-null values. */ public static double std(${pt.primitive}... values) { if (values == null) { @@ -621,10 +648,13 @@ public class Numeric { } /** - * Returns the standard deviation. Null values are excluded. + * Returns the sample standard deviation. Null values are excluded. + * + * Sample standard deviation is computed using Bessel's correction (https://en.wikipedia.org/wiki/Bessel%27s_correction), + * which ensures that the sample variance will be an unbiased estimator of population variance. * * @param values values. - * @return standard deviation of non-null values. + * @return sample standard deviation of non-null values. */ public static double std(${pt.vector} values) { if (values == null) { @@ -639,11 +669,14 @@ public class Numeric { <#if pt2.valueType.isNumber > /** - * Returns the weighted standard deviation. Null values are excluded. + * Returns the weighted sample standard deviation. Null values are excluded. + * + * Weighted sample standard deviation is computed using Bessel's correction (https://en.wikipedia.org/wiki/Bessel%27s_correction), + * which ensures that the weighted sample variance will be an unbiased estimator of weighted population variance. * * @param values values. * @param weights weights - * @return weighted standard deviation of non-null values. + * @return weighted sample standard deviation of non-null values. */ public static double wstd(${pt.primitive}[] values, ${pt2.primitive}[] weights) { if (values == null || weights == null) { @@ -654,11 +687,14 @@ public class Numeric { } /** - * Returns the weighted standard deviation. Null values are excluded. + * Returns the weighted sample standard deviation. Null values are excluded. + * + * Weighted sample standard deviation is computed using Bessel's correction (https://en.wikipedia.org/wiki/Bessel%27s_correction), + * which ensures that the weighted sample variance will be an unbiased estimator of weighted population variance. * * @param values values. * @param weights weights - * @return weighted standard deviation of non-null values. + * @return weighted sample standard deviation of non-null values. */ public static double wstd(${pt.primitive}[] values, ${pt2.vector} weights) { if (values == null || weights == null) { @@ -669,11 +705,14 @@ public class Numeric { } /** - * Returns the weighted standard deviation. Null values are excluded. + * Returns the weighted sample standard deviation. Null values are excluded. + * + * Weighted sample standard deviation is computed using Bessel's correction (https://en.wikipedia.org/wiki/Bessel%27s_correction), + * which ensures that the weighted sample variance will be an unbiased estimator of weighted population variance. * * @param values values. * @param weights weights - * @return weighted standard deviation of non-null values. + * @return weighted sample standard deviation of non-null values. */ public static double wstd(${pt.vector} values, ${pt2.primitive}[] weights) { if (values == null || weights == null) { @@ -684,11 +723,14 @@ public class Numeric { } /** - * Returns the weighted standard deviation. Null values are excluded. + * Returns the weighted sample standard deviation. Null values are excluded. + * + * Weighted sample standard deviation is computed using Bessel's correction (https://en.wikipedia.org/wiki/Bessel%27s_correction), + * which ensures that the weighted sample variance will be an unbiased estimator of weighted population variance. * * @param values values. * @param weights weights - * @return weighted standard deviation of non-null values. + * @return weighted sample standard deviation of non-null values. */ public static double wstd(${pt.vector} values, ${pt2.vector} weights) { if (values == null || weights == null) { diff --git a/engine/table/src/main/java/io/deephaven/engine/util/TotalsTableBuilder.java b/engine/table/src/main/java/io/deephaven/engine/util/TotalsTableBuilder.java index 77e35040f2e..d3399ec18ad 100644 --- a/engine/table/src/main/java/io/deephaven/engine/util/TotalsTableBuilder.java +++ b/engine/table/src/main/java/io/deephaven/engine/util/TotalsTableBuilder.java @@ -46,11 +46,11 @@ public enum AggType { Sum, /** Return the sum of absolute values in each group. */ AbsSum, - /** Return the variance of values in each group. */ + /** Return the sample variance of values in each group. */ Var, /** Return the average of values in each group. */ Avg, - /** Return the standard deviation of each group. */ + /** Return the sample standard deviation of each group. */ Std, /** Return the first value of each group. */ First, diff --git a/go/pkg/client/query.go b/go/pkg/client/query.go index ddb25303a38..cfaac62a844 100644 --- a/go/pkg/client/query.go +++ b/go/pkg/client/query.go @@ -1013,14 +1013,20 @@ func (qb QueryNode) AvgBy(by ...string) QueryNode { return qb.addOp(dedicatedAggOp{child: qb, colNames: by, kind: tablepb2.ComboAggregateRequest_AVG}) } -// StdBy returns the standard deviation for each group. Null values are ignored. +// StdBy returns the sample standard deviation for each group. Null values are ignored. // Columns not used in the grouping must be numeric. +// +// Sample standard deviation is calculated using `Bessel's correction `_, +// which ensures that the sample variance will be an unbiased estimator of population variance. func (qb QueryNode) StdBy(by ...string) QueryNode { return qb.addOp(dedicatedAggOp{child: qb, colNames: by, kind: tablepb2.ComboAggregateRequest_STD}) } -// VarBy returns the variance for each group. Null values are ignored. +// VarBy returns the sample variance for each group. Null values are ignored. // Columns not used in the grouping must be numeric. +// +// Sample variance is calculated using `Bessel's correction `_, +// which ensures that the sample variance will be an unbiased estimator of population variance. func (qb QueryNode) VarBy(by ...string) QueryNode { return qb.addOp(dedicatedAggOp{child: qb, colNames: by, kind: tablepb2.ComboAggregateRequest_VAR}) } @@ -1156,15 +1162,21 @@ func (b *AggBuilder) Percentile(percentile float64, cols ...string) *AggBuilder return b } -// Std returns an aggregator that computes the standard deviation of values, within an aggregation group, for each input column. +// Std returns an aggregator that computes the sample standard deviation of values, within an aggregation group, for each input column. // The source columns are specified by cols. +// +// Sample standard deviation is calculated using `Bessel's correction `_, +// which ensures that the sample variance will be an unbiased estimator of population variance. func (b *AggBuilder) StdDev(cols ...string) *AggBuilder { b.addAgg(aggPart{matchPairs: cols, kind: tablepb2.ComboAggregateRequest_STD}) return b } -// Var returns an aggregator that computes the variance of values, within an aggregation group, for each input column. +// Var returns an aggregator that computes the sample variance of values, within an aggregation group, for each input column. // The source columns are specified by cols. +// +// Sample variance is calculated using `Bessel's correction `_, +// which ensures that the sample variance will be an unbiased estimator of population variance. func (b *AggBuilder) Variance(cols ...string) *AggBuilder { b.addAgg(aggPart{matchPairs: cols, kind: tablepb2.ComboAggregateRequest_VAR}) return b diff --git a/go/pkg/client/tablehandle.go b/go/pkg/client/tablehandle.go index 94769a6f8a9..5b1f5f38a32 100644 --- a/go/pkg/client/tablehandle.go +++ b/go/pkg/client/tablehandle.go @@ -543,8 +543,11 @@ func (th *TableHandle) AvgBy(ctx context.Context, cols ...string) (*TableHandle, return th.client.dedicatedAggOp(ctx, th, cols, "", tablepb2.ComboAggregateRequest_AVG) } -// StdBy returns the standard deviation for each group. Null values are ignored. +// StdBy returns the sample standard deviation for each group. Null values are ignored. // Columns not used in the grouping must be numeric. +// +// Sample standard deviation is calculated using `Bessel's correction `_, +// which ensures that the sample variance will be an unbiased estimator of population variance. func (th *TableHandle) StdBy(ctx context.Context, cols ...string) (*TableHandle, error) { if !th.rLockIfValid() { return nil, ErrInvalidTableHandle @@ -553,8 +556,11 @@ func (th *TableHandle) StdBy(ctx context.Context, cols ...string) (*TableHandle, return th.client.dedicatedAggOp(ctx, th, cols, "", tablepb2.ComboAggregateRequest_STD) } -// VarBy returns the variance for each group. Null values are ignored. +// VarBy returns the sample variance for each group. Null values are ignored. // Columns not used in the grouping must be numeric. +// +// Sample variance is calculated using `Bessel's correction `_, +// which ensures that the sample variance will be an unbiased estimator of population variance. func (th *TableHandle) VarBy(ctx context.Context, cols ...string) (*TableHandle, error) { if !th.rLockIfValid() { return nil, ErrInvalidTableHandle diff --git a/py/client/pydeephaven/_table_interface.py b/py/client/pydeephaven/_table_interface.py index 60751d9af3b..fc3e0b3a5c3 100644 --- a/py/client/pydeephaven/_table_interface.py +++ b/py/client/pydeephaven/_table_interface.py @@ -466,8 +466,11 @@ def avg_by(self, by: Union[str, List[str]] = None) -> Union[Table, Query]: return self.table_op_handler(table_op) def std_by(self, by: Union[str, List[str]] = None) -> Union[Table, Query]: - """The std_by method creates a new table containing the standard deviation for each group. Columns not used - in the grouping must be of numeric types. + """The std_by method creates a new table containing the sample standard deviation for each group. Columns not + used in the grouping must be of numeric types. + + Sample standard deviation is computed using `Bessel's correction `_, + which ensures that the sample variance will be an unbiased estimator of population variance. Args: by (Union[str, List[str]]): the group-by column names(s), default is None, meaning grouping @@ -483,9 +486,12 @@ def std_by(self, by: Union[str, List[str]] = None) -> Union[Table, Query]: return self.table_op_handler(table_op) def var_by(self, by: Union[str, List[str]] = None) -> Union[Table, Query]: - """The var_by method creates a new table containing the variance for each group. Columns not used in the + """The var_by method creates a new table containing the sample variance for each group. Columns not used in the grouping must be of numeric types. + Sample variance is computed using `Bessel's correction `_, + which ensures that the sample variance will be an unbiased estimator of population variance. + Args: by (Union[str, List[str]], optional): the group-by column name(s), default is None, meaning grouping all the rows into one group diff --git a/py/client/pydeephaven/agg.py b/py/client/pydeephaven/agg.py index f8a3eaf7fda..b50df148f1f 100644 --- a/py/client/pydeephaven/agg.py +++ b/py/client/pydeephaven/agg.py @@ -300,7 +300,10 @@ def sorted_last(order_by: str, cols: Union[str, List[str]] = None) -> Aggregatio def std(cols: Union[str, List[str]] = None) -> Aggregation: - """Creates a Std (standard deviation) aggregation. + """Creates a Std (sample standard deviation) aggregation. + + Sample standard deviation is computed using `Bessel's correction `_, + which ensures that the sample variance will be an unbiased estimator of population variance. Args: cols (Union[str, List[str]]): the column(s) to aggregate on, can be renaming expressions, i.e. "new_col = col"; @@ -366,7 +369,10 @@ def unique(cols: Union[str, List[str]] = None, include_nulls: bool = False, def var(cols: Union[str, List[str]] = None) -> Aggregation: - """Creates a Variance aggregation. + """Creates a sample Variance aggregation. + + Sample variance is computed using `Bessel's correction `_, + which ensures that the sample variance will be an unbiased estimator of population variance. Args: cols (Union[str, List[str]]): the column(s) to aggregate on, can be renaming expressions, i.e. "new_col = col"; diff --git a/py/client/pydeephaven/table.py b/py/client/pydeephaven/table.py index 478616eece8..4251639731f 100644 --- a/py/client/pydeephaven/table.py +++ b/py/client/pydeephaven/table.py @@ -529,8 +529,11 @@ def avg_by(self, by: Union[str, List[str]] = None) -> Table: return super(Table, self).avg_by(by) def std_by(self, by: Union[str, List[str]] = None) -> Table: - """The std_by method creates a new table containing the standard deviation for each group. Columns not used - in the grouping must be of numeric types. + """The std_by method creates a new table containing the sample standard deviation for each group. Columns not + used in the grouping must be of numeric types. + + Sample standard deviation is computed using `Bessel's correction `_, + which ensures that the sample variance will be an unbiased estimator of population variance. Args: by (Union[str, List[str]]): the group-by column names(s), default is None, meaning grouping @@ -545,9 +548,12 @@ def std_by(self, by: Union[str, List[str]] = None) -> Table: return super(Table, self).std_by(by) def var_by(self, by: Union[str, List[str]] = None) -> Table: - """The var_by method creates a new table containing the variance for each group. Columns not used in the + """The var_by method creates a new table containing the sample variance for each group. Columns not used in the grouping must be of numeric types. + Sample variance is computed using `Bessel's correction `_, + which ensures that the sample variance will be an unbiased estimator of population variance. + Args: by (Union[str, List[str]], optional): the group-by column name(s), default is None, meaning grouping all the rows into one group diff --git a/py/client/pydeephaven/updateby.py b/py/client/pydeephaven/updateby.py index 9d096d1fe57..af1f1a71edd 100644 --- a/py/client/pydeephaven/updateby.py +++ b/py/client/pydeephaven/updateby.py @@ -1279,10 +1279,13 @@ def rolling_count_time(ts_col: str, cols: Union[str, List[str]], rev_time: Union def rolling_std_tick(cols: Union[str, List[str]], rev_ticks: int, fwd_ticks: int = 0) -> UpdateByOperation: - """Creates a rolling standard deviation UpdateByOperation for the supplied column names, using ticks as the windowing unit. Ticks - are row counts, and you may specify the reverse and forward window in number of rows to include. The current row - is considered to belong to the reverse window but not the forward window. Also, negative values are allowed and - can be used to generate completely forward or completely reverse windows. + """Creates a rolling sample standard deviation UpdateByOperation for the supplied column names, using ticks as the + windowing unit. Ticks are row counts, and you may specify the reverse and forward window in number of rows to + include. The current row is considered to belong to the reverse window but not the forward window. Also, negative + values are allowed and can be used to generate completely forward or completely reverse windows. + + Sample standard deviation is computed using `Bessel's correction `_, + which ensures that the sample variance will be an unbiased estimator of population variance. Here are some examples of window values: rev_ticks = 1, fwd_ticks = 0 - contains only the current row @@ -1298,7 +1301,7 @@ def rolling_std_tick(cols: Union[str, List[str]], rev_ticks: int, fwd_ticks: int Args: cols (Union[str, List[str]]): the column(s) to be operated on, can include expressions to rename the output, - i.e. "new_col = col"; when empty, update_by perform the rolling standard deviation operation on all columns. + i.e. "new_col = col"; when empty, update_by perform the rolling sample standard deviation operation on all columns. rev_ticks (int): the look-behind window size (in rows/ticks) fwd_ticks (int): the look-forward window size (int rows/ticks), default is 0 @@ -1322,11 +1325,14 @@ def rolling_std_tick(cols: Union[str, List[str]], rev_ticks: int, fwd_ticks: int def rolling_std_time(ts_col: str, cols: Union[str, List[str]], rev_time: Union[int, str], fwd_time: Union[int, str] = 0) -> UpdateByOperation: - """Creates a rolling standard deviation UpdateByOperation for the supplied column names, using time as the windowing unit. This - function accepts nanoseconds or time strings as the reverse and forward window parameters. Negative values are - allowed and can be used to generate completely forward or completely reverse windows. A row containing a null in - the timestamp column belongs to no window and will not be considered in the windows of other rows; its output will - be null. + """Creates a rolling sample standard deviation UpdateByOperation for the supplied column names, using time as the + windowing unit. This function accepts nanoseconds or time strings as the reverse and forward window parameters. + Negative values are allowed and can be used to generate completely forward or completely reverse windows. A row + containing a null in the timestamp column belongs to no window and will not be considered in the windows of other + rows; its output will be null. + + Sample standard deviation is computed using `Bessel's correction `_, + which ensures that the sample variance will be an unbiased estimator of population variance. Here are some examples of window values: rev_time = 0, fwd_time = 0 - contains rows that exactly match the current row timestamp diff --git a/py/server/deephaven/agg.py b/py/server/deephaven/agg.py index 622e0320507..7e4c4293f73 100644 --- a/py/server/deephaven/agg.py +++ b/py/server/deephaven/agg.py @@ -272,7 +272,11 @@ def sorted_last(order_by: str, cols: Union[str, List[str]] = None) -> Aggregatio def std(cols: Union[str, List[str]] = None) -> Aggregation: - """Creates a Std aggregation. + """Creates a Std (sample standard deviation) aggregation. + + Sample standard deviation is computed using `Bessel's correction `_, + which ensures that the sample variance will be an unbiased estimator of population variance. + Args: cols (Union[str, List[str]]): the column(s) to aggregate on, can be renaming expressions, i.e. "new_col = col"; @@ -312,7 +316,11 @@ def unique(cols: Union[str, List[str]] = None, include_nulls: bool = False, non_ def var(cols: Union[str, List[str]] = None) -> Aggregation: - """Creates a Var aggregation. + """Creates a sample Var aggregation. + + Sample standard deviation is computed using `Bessel's correction `_, + which ensures that the sample variance will be an unbiased estimator of population variance. + Args: cols (Union[str, List[str]]): the column(s) to aggregate on, can be renaming expressions, i.e. "new_col = col"; diff --git a/py/server/deephaven/table.py b/py/server/deephaven/table.py index 8a60dae1008..b541480b4a2 100644 --- a/py/server/deephaven/table.py +++ b/py/server/deephaven/table.py @@ -1782,7 +1782,10 @@ def weighted_avg_by(self, wcol: str, by: Union[str, Sequence[str]] = None) -> Ta raise DHError(e, "table avg_by operation failed.") from e def std_by(self, by: Union[str, Sequence[str]] = None) -> Table: - """The std_by method creates a new table containing the standard deviation for each group. + """The std_by method creates a new table containing the sample standard deviation for each group. + + Sample standard deviation is computed using `Bessel's correction `_, + which ensures that the sample variance will be an unbiased estimator of population variance. Args: by (Union[str, Sequence[str]], optional): the group-by column name(s), default is None @@ -1803,7 +1806,10 @@ def std_by(self, by: Union[str, Sequence[str]] = None) -> Table: raise DHError(e, "table std_by operation failed.") from e def var_by(self, by: Union[str, Sequence[str]] = None) -> Table: - """The var_by method creates a new table containing the variance for each group. + """The var_by method creates a new table containing the sample variance for each group. + + Sample variance is computed using `Bessel's correction `_, + which ensures that the sample variance will be an unbiased estimator of population variance. Args: by (Union[str, Sequence[str]], optional): the group-by column name(s), default is None diff --git a/py/server/deephaven/updateby.py b/py/server/deephaven/updateby.py index efff037fff0..56f60a23836 100644 --- a/py/server/deephaven/updateby.py +++ b/py/server/deephaven/updateby.py @@ -1190,10 +1190,13 @@ def rolling_count_time(ts_col: str, cols: Union[str, List[str]], rev_time: Union def rolling_std_tick(cols: Union[str, List[str]], rev_ticks: int, fwd_ticks: int = 0) -> UpdateByOperation: - """Creates a rolling standard deviation UpdateByOperation for the supplied column names, using ticks as the windowing unit. Ticks - are row counts, and you may specify the reverse and forward window in number of rows to include. The current row - is considered to belong to the reverse window but not the forward window. Also, negative values are allowed and - can be used to generate completely forward or completely reverse windows. + """Creates a rolling sample standard deviation UpdateByOperation for the supplied column names, using ticks as the + windowing unit. Ticks are row counts, and you may specify the reverse and forward window in number of rows to + include. The current row is considered to belong to the reverse window but not the forward window. Also, negative + values are allowed and can be used to generate completely forward or completely reverse windows. + + Sample standard deviation is computed using `Bessel's correction `_, + which ensures that the sample variance will be an unbiased estimator of population variance. Here are some examples of window values: | `rev_ticks = 1, fwd_ticks = 0` - contains only the current row @@ -1209,7 +1212,7 @@ def rolling_std_tick(cols: Union[str, List[str]], rev_ticks: int, fwd_ticks: int Args: cols (Union[str, List[str]]): the column(s) to be operated on, can include expressions to rename the output, - i.e. "new_col = col"; when empty, update_by perform the rolling standard deviation operation on all columns. + i.e. "new_col = col"; when empty, update_by perform the rolling sample standard deviation operation on all columns. rev_ticks (int): the look-behind window size (in rows/ticks) fwd_ticks (int): the look-forward window size (int rows/ticks), default is 0 @@ -1228,11 +1231,14 @@ def rolling_std_tick(cols: Union[str, List[str]], rev_ticks: int, fwd_ticks: int def rolling_std_time(ts_col: str, cols: Union[str, List[str]], rev_time: Union[int, str], fwd_time: Union[int, str] = 0) -> UpdateByOperation: - """Creates a rolling standard deviation UpdateByOperation for the supplied column names, using time as the windowing unit. This - function accepts nanoseconds or time strings as the reverse and forward window parameters. Negative values are - allowed and can be used to generate completely forward or completely reverse windows. A row containing a null in - the timestamp column belongs to no window and will not be considered in the windows of other rows; its output will - be null. + """Creates a rolling sample standard deviation UpdateByOperation for the supplied column names, using time as the + windowing unit. This function accepts nanoseconds or time strings as the reverse and forward window parameters. + Negative values are allowed and can be used to generate completely forward or completely reverse windows. A row + containing a null in the timestamp column belongs to no window and will not be considered in the windows of other + rows; its output will be null. + + Sample standard deviation is computed using `Bessel's correction `_, + which ensures that the sample variance will be an unbiased estimator of population variance. Here are some examples of window values: | `rev_time = 0, fwd_time = 0` - contains rows that exactly match the current row timestamp @@ -1250,7 +1256,7 @@ def rolling_std_time(ts_col: str, cols: Union[str, List[str]], rev_time: Union[i Args: ts_col (str): the timestamp column for determining the window cols (Union[str, List[str]]): the column(s) to be operated on, can include expressions to rename the output, - i.e. "new_col = col"; when empty, update_by perform the rolling standard deviation operation on all columns. + i.e. "new_col = col"; when empty, update_by perform the rolling sample standard deviation operation on all columns. rev_time (int): the look-behind window size, can be expressed as an integer in nanoseconds or a time interval string, e.g. "PT00:00:00.001" or "PT5M" fwd_time (int): the look-ahead window size, can be expressed as an integer in nanoseconds or a time diff --git a/table-api/src/main/java/io/deephaven/api/TableOperations.java b/table-api/src/main/java/io/deephaven/api/TableOperations.java index 6b6b6cfc92f..c8899973338 100644 --- a/table-api/src/main/java/io/deephaven/api/TableOperations.java +++ b/table-api/src/main/java/io/deephaven/api/TableOperations.java @@ -1165,16 +1165,24 @@ TOPS updateBy(UpdateByControl control, Collection o // ------------------------------------------------------------------------------------------- /** - * Produces a single row table with the standard deviation of each column. + * Produces a single row table with the sample standard deviation of each column. *

* When the input table is empty, zero output rows are produced. + *

+ * Sample standard deviation is computed using Bessel's correction + * (https://en.wikipedia.org/wiki/Bessel%27s_correction), which ensures that the sample variance will be an unbiased + * estimator of population variance. */ @ConcurrentMethod TOPS stdBy(); /** - * Groups the data column according to groupByColumns and computes the standard deviation for the rest - * of the fields + * Groups the data column according to groupByColumns and computes the sample standard deviation for + * the rest of the fields + *

+ * Sample standard deviation is computed using Bessel's correction + * (https://en.wikipedia.org/wiki/Bessel%27s_correction), which ensures that the sample variance will be an unbiased + * estimator of population variance. * * @param groupByColumns The grouping columns as in {@link TableOperations#groupBy} */ @@ -1182,8 +1190,12 @@ TOPS updateBy(UpdateByControl control, Collection o TOPS stdBy(String... groupByColumns); /** - * Groups the data column according to groupByColumns and computes the standard deviation for the rest - * of the fields + * Groups the data column according to groupByColumns and computes the sample standard deviation for + * the rest of the fields + *

+ * Sample standard deviation is computed using Bessel's correction + * (https://en.wikipedia.org/wiki/Bessel%27s_correction), which ensures that the sample variance will be an unbiased + * estimator of population variance. * * @param groupByColumns The grouping columns as in {@link TableOperations#groupBy} */ @@ -1191,8 +1203,12 @@ TOPS updateBy(UpdateByControl control, Collection o TOPS stdBy(ColumnName... groupByColumns); /** - * Groups the data column according to groupByColumns and computes the standard deviation for the rest - * of the fields + * Groups the data column according to groupByColumns and computes the sample standard deviation for + * the rest of the fields + *

+ * Sample standard deviation is computed using Bessel's correction + * (https://en.wikipedia.org/wiki/Bessel%27s_correction), which ensures that the sample variance will be an unbiased + * estimator of population variance. * * @param groupByColumns The grouping columns as in {@link TableOperations#groupBy} */ @@ -1202,16 +1218,22 @@ TOPS updateBy(UpdateByControl control, Collection o // ------------------------------------------------------------------------------------------- /** - * Produces a single row table with the variance of each column. + * Produces a single row table with the sample variance of each column. *

* When the input table is empty, zero output rows are produced. + *

+ * Sample variance is computed using Bessel's correction (https://en.wikipedia.org/wiki/Bessel%27s_correction), + * which ensures that the sample variance will be an unbiased estimator of population variance. */ @ConcurrentMethod TOPS varBy(); /** - * Groups the data column according to groupByColumns and computes the variance for the rest of the - * fields + * Groups the data column according to groupByColumns and computes the sample variance for the rest of + * the fields + *

+ * Sample variance is computed using Bessel's correction (https://en.wikipedia.org/wiki/Bessel%27s_correction), + * which ensures that the sample variance will be an unbiased estimator of population variance. * * @param groupByColumns The grouping columns as in {@link TableOperations#groupBy} */ @@ -1219,8 +1241,11 @@ TOPS updateBy(UpdateByControl control, Collection o TOPS varBy(String... groupByColumns); /** - * Groups the data column according to groupByColumns and computes the variance for the rest of the - * fields + * Groups the data column according to groupByColumns and computes the sample variance for the rest of + * the fields + *

+ * Sample variance is computed using Bessel's correction (https://en.wikipedia.org/wiki/Bessel%27s_correction), + * which ensures that the sample variance will be an unbiased estimator of population variance. * * @param groupByColumns The grouping columns as in {@link TableOperations#groupBy} */ @@ -1230,6 +1255,9 @@ TOPS updateBy(UpdateByControl control, Collection o /** * Groups the data column according to groupByColumns and computes the variance for the rest of the * fields + *

+ * Sample variance is computed using Bessel's correction (https://en.wikipedia.org/wiki/Bessel%27s_correction), + * which ensures that the sample variance will be an unbiased estimator of population variance. * * @param groupByColumns The grouping columns as in {@link TableOperations#groupBy} */ diff --git a/table-api/src/main/java/io/deephaven/api/agg/Aggregation.java b/table-api/src/main/java/io/deephaven/api/agg/Aggregation.java index f228f3e324f..23d249e05dc 100644 --- a/table-api/src/main/java/io/deephaven/api/agg/Aggregation.java +++ b/table-api/src/main/java/io/deephaven/api/agg/Aggregation.java @@ -522,8 +522,12 @@ static Aggregation AggSortedLast(Collection sortColumns, Strin } /** - * Create a {@link io.deephaven.api.agg.spec.AggSpecStd standard deviation} aggregation for the supplied column name - * pairs. + * Create a {@link io.deephaven.api.agg.spec.AggSpecStd sample standard deviation} aggregation for the supplied + * column name pairs. + * + * Sample standard deviation is computed using Bessel's correction + * (https://en.wikipedia.org/wiki/Bessel%27s_correction), which ensures that the sample variance will be an unbiased + * estimator of population variance. * * @param pairs The input/output column name pairs * @return The aggregation @@ -608,7 +612,11 @@ static Aggregation AggUnique(boolean includeNulls, UnionObject nonUniqueSentinel } /** - * Create a {@link io.deephaven.api.agg.spec.AggSpecVar variance} aggregation for the supplied column name pairs. + * Create a {@link io.deephaven.api.agg.spec.AggSpecVar sample variance} aggregation for the supplied column name + * pairs. + * + * Sample variance is computed using Bessel's correction (https://en.wikipedia.org/wiki/Bessel%27s_correction), + * which ensures that the sample variance will be an unbiased estimator of population variance. * * @param pairs The input/output column name pairs * @return The aggregation diff --git a/table-api/src/main/java/io/deephaven/api/agg/spec/AggSpecStd.java b/table-api/src/main/java/io/deephaven/api/agg/spec/AggSpecStd.java index 5b18e83bee6..2e7513bb666 100644 --- a/table-api/src/main/java/io/deephaven/api/agg/spec/AggSpecStd.java +++ b/table-api/src/main/java/io/deephaven/api/agg/spec/AggSpecStd.java @@ -8,8 +8,12 @@ import org.immutables.value.Value.Immutable; /** - * Specifies an aggregation that outputs the standard deviation of the input column values for each group. Only works - * for numeric input types. + * Specifies an aggregation that outputs the sample standard deviation of the input column values for each group. Only + * works for numeric input types. + * + * Sample standard deviation is computed using Bessel's correction + * (https://en.wikipedia.org/wiki/Bessel%27s_correction), which ensures that the sample variance will be an unbiased + * estimator of population variance. * * @see TableOperations#stdBy */ diff --git a/table-api/src/main/java/io/deephaven/api/agg/spec/AggSpecVar.java b/table-api/src/main/java/io/deephaven/api/agg/spec/AggSpecVar.java index dd5e5b4c364..b86e5fa3eeb 100644 --- a/table-api/src/main/java/io/deephaven/api/agg/spec/AggSpecVar.java +++ b/table-api/src/main/java/io/deephaven/api/agg/spec/AggSpecVar.java @@ -8,8 +8,11 @@ import org.immutables.value.Value.Immutable; /** - * Specifies an aggregation that outputs the variance of the input column values for each group. Only works for numeric - * input types. + * Specifies an aggregation that outputs the sample variance of the input column values for each group. Only works for + * numeric input types. + * + * Sample variance is computed using Bessel's correction (https://en.wikipedia.org/wiki/Bessel%27s_correction), which + * ensures that the sample variance will be an unbiased estimator of population variance. * * @see TableOperations#varBy */ diff --git a/table-api/src/main/java/io/deephaven/api/updateby/UpdateByOperation.java b/table-api/src/main/java/io/deephaven/api/updateby/UpdateByOperation.java index f59e448cbd2..745b85734cb 100644 --- a/table-api/src/main/java/io/deephaven/api/updateby/UpdateByOperation.java +++ b/table-api/src/main/java/io/deephaven/api/updateby/UpdateByOperation.java @@ -1620,11 +1620,15 @@ static UpdateByOperation RollingCount(String timestampCol, long revTime, long fw /** - * Create a {@link RollingStdSpec rolling standard deviation} for the supplied column name pairs, using ticks as the - * windowing unit. Ticks are row counts and you may specify the previous window in number of rows to include. The - * current row is considered to belong to the reverse window, so calling this with {@code revTicks = 1} will simply - * return the current row. Specifying {@code revTicks = 10} will include the previous 9 rows to this one and this - * row for a total of 10 rows. + * Create a {@link RollingStdSpec rolling sample standard deviation} for the supplied column name pairs, using ticks + * as the windowing unit. Ticks are row counts and you may specify the previous window in number of rows to include. + * The current row is considered to belong to the reverse window, so calling this with {@code revTicks = 1} will + * simply return the current row. Specifying {@code revTicks = 10} will include the previous 9 rows to this one and + * this row for a total of 10 rows. + * + * Sample standard deviation is computed using Bessel's correction + * (https://en.wikipedia.org/wiki/Bessel%27s_correction), which ensures that the sample variance will be an unbiased + * estimator of population variance. * * @param revTicks the look-behind window size (in rows/ticks) * @param pairs The input/output column name pairs @@ -1635,11 +1639,11 @@ static UpdateByOperation RollingStd(long revTicks, String... pairs) { } /** - * Create a {@link RollingStdSpec rolling standard deviation} for the supplied column name pairs, using ticks as the - * windowing unit. Ticks are row counts and you may specify the reverse and forward window in number of rows to - * include. The current row is considered to belong to the reverse window but not the forward window. Also, negative - * values are allowed and can be used to generate completely forward or completely reverse windows. Here are some - * examples of window values: + * Create a {@link RollingStdSpec rolling sample standard deviation} for the supplied column name pairs, using ticks + * as the windowing unit. Ticks are row counts and you may specify the reverse and forward window in number of rows + * to include. The current row is considered to belong to the reverse window but not the forward window. Also, + * negative values are allowed and can be used to generate completely forward or completely reverse windows. Here + * are some examples of window values: *

    *
  • {@code revTicks = 1, fwdTicks = 0} - contains only the current row
  • *
  • {@code revTicks = 10, fwdTicks = 0} - contains 9 previous rows and the current row
  • @@ -1654,6 +1658,10 @@ static UpdateByOperation RollingStd(long revTicks, String... pairs) { * following the current row (inclusive) *
* + * Sample standard deviation is computed using Bessel's correction + * (https://en.wikipedia.org/wiki/Bessel%27s_correction), which ensures that the sample variance will be an unbiased + * estimator of population variance. + * * @param revTicks the look-behind window size (in rows/ticks) * @param fwdTicks the look-ahead window size (in rows/ticks) * @param pairs The input/output column name pairs @@ -1664,10 +1672,10 @@ static UpdateByOperation RollingStd(long revTicks, long fwdTicks, String... pair } /** - * Create a {@link RollingStdSpec rolling standard deviation} for the supplied column name pairs, using time as the - * windowing unit. This function accepts {@link Duration duration} as the reverse window parameter. A row containing - * a {@code null} in the timestamp column belongs to no window and will not have a value computed or be considered - * in the windows of other rows. + * Create a {@link RollingStdSpec rolling sample standard deviation} for the supplied column name pairs, using time + * as the windowing unit. This function accepts {@link Duration duration} as the reverse window parameter. A row + * containing a {@code null} in the timestamp column belongs to no window and will not have a value computed or be + * considered in the windows of other rows. * * Here are some examples of window values: *
    @@ -1675,6 +1683,10 @@ static UpdateByOperation RollingStd(long revTicks, long fwdTicks, String... pair *
  • {@code revDuration = 10m} - contains rows from 10m earlier through the current row timestamp (inclusive)
  • *
* + * Sample standard deviation is computed using Bessel's correction + * (https://en.wikipedia.org/wiki/Bessel%27s_correction), which ensures that the sample variance will be an unbiased + * estimator of population variance. + * * @param timestampCol the name of the timestamp column * @param revDuration the look-behind window size (in Duration) * @param pairs The input/output column name pairs @@ -1685,11 +1697,11 @@ static UpdateByOperation RollingStd(String timestampCol, Duration revDuration, S } /** - * Create a {@link RollingStdSpec rolling standard deviation} for the supplied column name pairs, using time as the - * windowing unit. This function accepts {@link Duration durations} as the reverse and forward window parameters. - * Negative values are allowed and can be used to generate completely forward or completely reverse windows. A row - * containing a {@code null} in the timestamp column belongs to no window and will not have a value computed or be - * considered in the windows of other rows. + * Create a {@link RollingStdSpec rolling sample standard deviation} for the supplied column name pairs, using time + * as the windowing unit. This function accepts {@link Duration durations} as the reverse and forward window + * parameters. Negative values are allowed and can be used to generate completely forward or completely reverse + * windows. A row containing a {@code null} in the timestamp column belongs to no window and will not have a value + * computed or be considered in the windows of other rows. * * Here are some examples of window values: *
    @@ -1706,6 +1718,10 @@ static UpdateByOperation RollingStd(String timestampCol, Duration revDuration, S * current row timestamp (inclusive), this is a purely forwards looking window *
* + * Sample standard deviation is computed using Bessel's correction + * (https://en.wikipedia.org/wiki/Bessel%27s_correction), which ensures that the sample variance will be an unbiased + * estimator of population variance. + * * @param timestampCol the name of the timestamp column * @param revDuration the look-behind window size (in Duration) * @param fwdDuration the look-ahead window size (in Duration) @@ -1718,10 +1734,14 @@ static UpdateByOperation RollingStd(String timestampCol, Duration revDuration, D } /** - * Create a {@link RollingStdSpec rolling standard deviation} for the supplied column name pairs, using time as the - * windowing unit. This function accepts {@code nanoseconds} as the reverse window parameters. A row containing a - * {@code null} in the timestamp column belongs to no window and will not have a value computed or be considered in - * the windows of other rows. + * Create a {@link RollingStdSpec rolling sample standard deviation} for the supplied column name pairs, using time + * as the windowing unit. This function accepts {@code nanoseconds} as the reverse window parameters. A row + * containing a {@code null} in the timestamp column belongs to no window and will not have a value computed or be + * considered in the windows of other rows. + * + * Sample standard deviation is computed using Bessel's correction + * (https://en.wikipedia.org/wiki/Bessel%27s_correction), which ensures that the sample variance will be an unbiased + * estimator of population variance. * * @param timestampCol the name of the timestamp column * @param revTime the look-behind window size (in nanoseconds) @@ -1733,11 +1753,15 @@ static UpdateByOperation RollingStd(String timestampCol, long revTime, String... } /** - * Create a {@link RollingStdSpec rolling standard deviation} for the supplied column name pairs, using time as the - * windowing unit. This function accepts {@code nanoseconds} as the reverse and forward window parameters. Negative - * values are allowed and can be used to generate completely forward or completely reverse windows. A row containing - * a {@code null} in the timestamp column belongs to no window and will not have a value computed or be considered - * in the windows of other rows. + * Create a {@link RollingStdSpec rolling sample standard deviation} for the supplied column name pairs, using time + * as the windowing unit. This function accepts {@code nanoseconds} as the reverse and forward window parameters. + * Negative values are allowed and can be used to generate completely forward or completely reverse windows. A row + * containing a {@code null} in the timestamp column belongs to no window and will not have a value computed or be + * considered in the windows of other rows. + * + * Sample standard deviation is computed using Bessel's correction + * (https://en.wikipedia.org/wiki/Bessel%27s_correction), which ensures that the sample variance will be an unbiased + * estimator of population variance. * * @param timestampCol the name of the timestamp column * @param revTime the look-behind window size (in nanoseconds) diff --git a/web/WebDevelopersGuide.md b/web/WebDevelopersGuide.md index 87bc0ab50b7..33a202e8dd3 100644 --- a/web/WebDevelopersGuide.md +++ b/web/WebDevelopersGuide.md @@ -992,9 +992,9 @@ This enum describes the name of each supported operation/aggregation type when c value is "Max". * `SUM` - The sum of all values in the specified column. Can only apply to numeric types. String value is "Sum". * `ABS_SUM` - The sum of all values, as their distance from zero, in the specified column. Can only apply to numeric types. String value is “AbsSum”. - * `VAR` - The variance of all values in the specified column. Can only apply to numeric types. String value is "Var". + * `VAR` - The sample variance of all values in the specified column. Can only apply to numeric types. String value is "Var". * `AVG` - The average of all values in the specified column. Can only apply to numeric types. String value is "Avg". - * `STD` - The standard deviation of all values in the specified column. Can only apply to numeric types. String value is + * `STD` - The sample standard deviation of all values in the specified column. Can only apply to numeric types. String value is "Std". * `FIRST` - The first value in the specified column. Can apply to any type. String value is "First". * `LAST` - The last value in the specified column. Can apply to any type. String value is "Last". diff --git a/web/client-api/src/main/java/io/deephaven/web/client/api/JsColumnStatistics.java b/web/client-api/src/main/java/io/deephaven/web/client/api/JsColumnStatistics.java index 552bbc6497a..2b70704618e 100644 --- a/web/client-api/src/main/java/io/deephaven/web/client/api/JsColumnStatistics.java +++ b/web/client-api/src/main/java/io/deephaven/web/client/api/JsColumnStatistics.java @@ -72,7 +72,11 @@ public enum StatType { */ MAX_ABS("MAX (ABS)", null), /** - * The standard deviation of the values in the column. + * The sample standard deviation of the values in the column. + * + * Sample standard deviation is computed using Bessel's correction + * (https://en.wikipedia.org/wiki/Bessel%27s_correction), which ensures that the sample variance will be an + * unbiased estimator of population variance. */ STD_DEV("STD DEV", "double"), /** diff --git a/web/client-api/src/main/java/io/deephaven/web/client/api/tree/enums/JsAggregationOperation.java b/web/client-api/src/main/java/io/deephaven/web/client/api/tree/enums/JsAggregationOperation.java index fa0c50cce57..ab0824b1a98 100644 --- a/web/client-api/src/main/java/io/deephaven/web/client/api/tree/enums/JsAggregationOperation.java +++ b/web/client-api/src/main/java/io/deephaven/web/client/api/tree/enums/JsAggregationOperation.java @@ -45,8 +45,12 @@ public class JsAggregationOperation { */ ABS_SUM = "AbsSum", /** - * The variance of all values in the specified column. Can only apply to numeric types. String value is - * "Var". + * The sample variance of all values in the specified column. Can only apply to numeric types. String value + * is "Var". + * + * Sample variance is computed using Bessel's correction + * (https://en.wikipedia.org/wiki/Bessel%27s_correction), which ensures that the sample variance will be an + * unbiased estimator of population variance. */ VAR = "Var", /** @@ -55,8 +59,10 @@ public class JsAggregationOperation { */ AVG = "Avg", /** - * The standard deviation of all values in the specified column. Can only apply to numeric types. String - * value is "Std". + * The sample standard deviation of all values in the specified column. Can only apply to numeric types. + * String value is "Std". Sample standard deviation is computed using Bessel's correction + * (https://en.wikipedia.org/wiki/Bessel%27s_correction), which ensures that the sample variance will be an + * unbiased estimator of population variance. */ STD = "Std", /**