From 0ef2bac89742301a467f0f4de1002866396281ce Mon Sep 17 00:00:00 2001 From: alexpeters1208 Date: Tue, 7 Nov 2023 11:10:06 -0600 Subject: [PATCH 1/5] First attempt at fixing std/var docs --- .../include/public/deephaven/client/client.h | 20 +++-- .../public/deephaven/client/update_by.h | 17 +++-- engine/function/src/templates/Numeric.ftl | 76 +++++++++++-------- .../engine/util/TotalsTableBuilder.java | 4 +- go/pkg/client/query.go | 12 ++- go/pkg/client/tablehandle.go | 6 +- py/client/pydeephaven/_table_interface.py | 10 ++- py/client/pydeephaven/agg.py | 4 +- py/client/pydeephaven/table.py | 10 ++- py/client/pydeephaven/updateby.py | 16 ++-- py/server/deephaven/agg.py | 4 +- py/server/deephaven/table.py | 8 +- py/server/deephaven/updateby.py | 25 +++--- .../io/deephaven/api/TableOperations.java | 24 +++--- .../io/deephaven/api/agg/Aggregation.java | 6 +- .../io/deephaven/api/agg/spec/AggSpecStd.java | 3 +- .../io/deephaven/api/agg/spec/AggSpecVar.java | 5 +- .../api/updateby/UpdateByOperation.java | 56 ++++++++------ web/WebDevelopersGuide.md | 4 +- .../web/client/api/JsColumnStatistics.java | 2 +- .../tree/enums/JsAggregationOperation.java | 8 +- 21 files changed, 190 insertions(+), 130 deletions(-) diff --git a/cpp-client/deephaven/dhclient/include/public/deephaven/client/client.h b/cpp-client/deephaven/dhclient/include/public/deephaven/client/client.h index 7cb72ca94e1..3e0c80e8aa9 100644 --- a/cpp-client/deephaven/dhclient/include/public/deephaven/client/client.h +++ b/cpp-client/deephaven/dhclient/include/public/deephaven/client/client.h @@ -568,8 +568,9 @@ class Aggregate { } /** - * Returns an aggregator that computes the standard deviation of values, within an aggregation - * group, for each input column. + * Returns an aggregator that computes the sample standard deviation of values, within an + * aggregation group, for each input column. Sample standard deviation is computed using + * Bessel's correction: https://en.wikipedia.org/wiki/Bessel%27s_correction */ [[nodiscard]] static Aggregate Std(std::vector column_specs); @@ -608,8 +609,9 @@ class Aggregate { } /** - * Returns an aggregator that computes the variance of values, within an aggregation group, - * for each input column. + * Returns an aggregator that computes the sample variance of values, within an aggregation group, + * for each input column. Sample variance is computed using Bessel's correction: + * https://en.wikipedia.org/wiki/Bessel%27s_correction */ [[nodiscard]] static Aggregate Var(std::vector column_specs); @@ -801,8 +803,9 @@ Aggregate AggPct(double percentile, Args &&... args) { } /** - * Returns an aggregator that computes the standard deviation of values, within an aggregation - * group, for each input column. + * Returns an aggregator that computes the sample standard deviation of values, within an + * aggregation group, for each input column. Sample standard deviation is computed using Bessel's correction: + * https://en.wikipedia.org/wiki/Bessel%27s_correction */ template [[nodiscard]] @@ -821,8 +824,9 @@ Aggregate aggSum(Args &&... args) { } /** - * Returns an aggregator that computes the variance of values, within an aggregation group, - * for each input column. + * Returns an aggregator that computes the sample variance of values, within an aggregation group, + * for each input column. Sample variance is computed using + * Bessel's correction: https://en.wikipedia.org/wiki/Bessel%27s_correction */ template [[nodiscard]] diff --git a/cpp-client/deephaven/dhclient/include/public/deephaven/client/update_by.h b/cpp-client/deephaven/dhclient/include/public/deephaven/client/update_by.h index 5b9d41025a0..a681d61ea09 100644 --- a/cpp-client/deephaven/dhclient/include/public/deephaven/client/update_by.h +++ b/cpp-client/deephaven/dhclient/include/public/deephaven/client/update_by.h @@ -528,11 +528,12 @@ UpdateByOperation rollingCountTime(std::string timestamp_col, std::vector cols, int rev_ticks, int fwd_ticks = 0); /** - * Creates a rolling standard deviation UpdateByOperation for the supplied column names, using time as the - * windowing unit. This function accepts nanoseconds or time strings as the reverse and forward + * Creates a rolling sample standard deviation UpdateByOperation for the supplied column names, using time as the + * windowing unit. Sample standard deviation is computed using Bessel's correction, + * discussed here: https://en.wikipedia.org/wiki/Bessel%27s_correction + * This function accepts nanoseconds or time strings as the reverse and forward window parameters. * window parameters. Negative values are allowed and can be used to generate completely forward or * completely reverse windows. A row containing a null in the timestamp column belongs to no window * and will not be considered in the windows of other rows; its output will be null. diff --git a/engine/function/src/templates/Numeric.ftl b/engine/function/src/templates/Numeric.ftl index 49df0a3f4af..dd0468b782e 100644 --- a/engine/function/src/templates/Numeric.ftl +++ b/engine/function/src/templates/Numeric.ftl @@ -427,20 +427,22 @@ public class Numeric { } /** - * Returns the variance. Null values are excluded. + * Returns the sample variance. Null values are excluded. + * Sample variance is computed using Bessel's correction: https://en.wikipedia.org/wiki/Bessel%27s_correction * * @param values values. - * @return variance of non-null values. + * @return sample variance of non-null values. */ public static double var(${pt.boxed}[] values) { return var(unbox(values)); } /** - * Returns the variance. Null values are excluded. + * Returns the sample variance. Null values are excluded. + * Sample variance is computed using Bessel's correction: https://en.wikipedia.org/wiki/Bessel%27s_correction * * @param values values. - * @return variance of non-null values. + * @return sample variance of non-null values. */ public static double var(${pt.primitive}... values) { if (values == null) { @@ -451,10 +453,11 @@ public class Numeric { } /** - * Returns the variance. Null values are excluded. + * Returns the sample variance. Null values are excluded. + * Sample variance is computed using Bessel's correction: https://en.wikipedia.org/wiki/Bessel%27s_correction * * @param values values. - * @return variance of non-null values. + * @return sample variance of non-null values. */ public static double var(${pt.vector} values) { if (values == null) { @@ -476,7 +479,7 @@ public class Numeric { } } - // Return NaN if poisoned or too few values to compute variance. + // Return NaN if poisoned or too few values to compute sample variance. if (count <= 1 || Double.isNaN(sum) || Double.isNaN(sum2)) { return Double.NaN; } @@ -487,7 +490,7 @@ public class Numeric { final double delta = sum2 - vs2bar; final double rel_eps = delta / eps; - // Return zero when the variance is leq the floating point error. + // Return zero when the sample variance is leq the floating point error. return Math.abs(rel_eps) > 1.0 ? delta / (count - 1) : 0.0; } @@ -495,11 +498,12 @@ public class Numeric { <#if pt2.valueType.isNumber > /** - * Returns the weighted variance. Null values are excluded. + * Returns the weighted sample variance. Null values are excluded. + * Weighted sample variance is computed using Bessel's correction: https://en.wikipedia.org/wiki/Bessel%27s_correction * * @param values values. * @param weights weights - * @return weighted variance of non-null values. + * @return weighted sample variance of non-null values. */ public static double wvar(${pt.primitive}[] values, ${pt2.primitive}[] weights) { if (values == null || weights == null) { @@ -510,11 +514,12 @@ public class Numeric { } /** - * Returns the weighted variance. Null values are excluded. + * Returns the weighted sample variance. Null values are excluded. + * Weighted sample variance is computed using Bessel's correction: https://en.wikipedia.org/wiki/Bessel%27s_correction * * @param values values. * @param weights weights - * @return weighted variance of non-null values. + * @return weighted sample variance of non-null values. */ public static double wvar(${pt.primitive}[] values, ${pt2.vector} weights) { if (values == null || weights == null) { @@ -525,11 +530,12 @@ public class Numeric { } /** - * Returns the weighted variance. Null values are excluded. + * Returns the weighted sample variance. Null values are excluded. + * Weighted sample variance is computed using Bessel's correction: https://en.wikipedia.org/wiki/Bessel%27s_correction * * @param values values. * @param weights weights - * @return weighted variance of non-null values. + * @return weighted sample variance of non-null values. */ public static double wvar(${pt.vector} values, ${pt2.primitive}[] weights) { if (values == null || weights == null) { @@ -540,11 +546,12 @@ public class Numeric { } /** - * Returns the weighted variance. Null values are excluded. + * Returns the weighted sample variance. Null values are excluded. + * Weighted sample variance is computed using Bessel's correction: https://en.wikipedia.org/wiki/Bessel%27s_correction * * @param values values. * @param weights weights - * @return weighted variance of non-null values. + * @return weighted sample variance of non-null values. */ public static double wvar(${pt.vector} values, ${pt2.vector} weights) { if (values == null || weights == null) { @@ -579,7 +586,7 @@ public class Numeric { } } - // Return NaN if poisoned or too few values to compute variance. + // Return NaN if poisoned or too few values to compute sample variance. if (count <= 1 || Double.isNaN(sum) || Double.isNaN(sum2) || Double.isNaN(count) || Double.isNaN(count2)) { return Double.NaN; } @@ -597,20 +604,22 @@ public class Numeric { /** - * Returns the standard deviation. Null values are excluded. + * Returns the sample standard deviation. Null values are excluded. + * Sample standard deviation is computed using Bessel's correction: https://en.wikipedia.org/wiki/Bessel%27s_correction * * @param values values. - * @return standard deviation of non-null values. + * @return sample standard deviation of non-null values. */ public static double std(${pt.boxed}[] values) { return std(unbox(values)); } /** - * Returns the standard deviation. Null values are excluded. + * Returns the sample standard deviation. Null values are excluded. + * Sample standard deviation is computed using Bessel's correction: https://en.wikipedia.org/wiki/Bessel%27s_correction * * @param values values. - * @return standard deviation of non-null values. + * @return sample standard deviation of non-null values. */ public static double std(${pt.primitive}... values) { if (values == null) { @@ -621,10 +630,11 @@ public class Numeric { } /** - * Returns the standard deviation. Null values are excluded. + * Returns the sample standard deviation. Null values are excluded. + * Sample standard deviation is computed using Bessel's correction: https://en.wikipedia.org/wiki/Bessel%27s_correction * * @param values values. - * @return standard deviation of non-null values. + * @return sample standard deviation of non-null values. */ public static double std(${pt.vector} values) { if (values == null) { @@ -639,11 +649,12 @@ public class Numeric { <#if pt2.valueType.isNumber > /** - * Returns the weighted standard deviation. Null values are excluded. + * Returns the weighted sample standard deviation. Null values are excluded. + * Weighted sample standard deviation is computed using Bessel's correction: https://en.wikipedia.org/wiki/Bessel%27s_correction * * @param values values. * @param weights weights - * @return weighted standard deviation of non-null values. + * @return weighted sample standard deviation of non-null values. */ public static double wstd(${pt.primitive}[] values, ${pt2.primitive}[] weights) { if (values == null || weights == null) { @@ -654,11 +665,12 @@ public class Numeric { } /** - * Returns the weighted standard deviation. Null values are excluded. + * Returns the weighted sample standard deviation. Null values are excluded. + * Weighted sample standard deviation is computed using Bessel's correction: https://en.wikipedia.org/wiki/Bessel%27s_correction * * @param values values. * @param weights weights - * @return weighted standard deviation of non-null values. + * @return weighted sample standard deviation of non-null values. */ public static double wstd(${pt.primitive}[] values, ${pt2.vector} weights) { if (values == null || weights == null) { @@ -669,11 +681,12 @@ public class Numeric { } /** - * Returns the weighted standard deviation. Null values are excluded. + * Returns the weighted sample standard deviation. Null values are excluded. + * Weighted sample standard deviation is computed using Bessel's correction: https://en.wikipedia.org/wiki/Bessel%27s_correction * * @param values values. * @param weights weights - * @return weighted standard deviation of non-null values. + * @return weighted sample standard deviation of non-null values. */ public static double wstd(${pt.vector} values, ${pt2.primitive}[] weights) { if (values == null || weights == null) { @@ -684,11 +697,12 @@ public class Numeric { } /** - * Returns the weighted standard deviation. Null values are excluded. + * Returns the weighted sample standard deviation. Null values are excluded. + * Weighted sample standard deviation is computed using Bessel's correction: https://en.wikipedia.org/wiki/Bessel%27s_correction * * @param values values. * @param weights weights - * @return weighted standard deviation of non-null values. + * @return weighted sample standard deviation of non-null values. */ public static double wstd(${pt.vector} values, ${pt2.vector} weights) { if (values == null || weights == null) { diff --git a/engine/table/src/main/java/io/deephaven/engine/util/TotalsTableBuilder.java b/engine/table/src/main/java/io/deephaven/engine/util/TotalsTableBuilder.java index 77e35040f2e..d3399ec18ad 100644 --- a/engine/table/src/main/java/io/deephaven/engine/util/TotalsTableBuilder.java +++ b/engine/table/src/main/java/io/deephaven/engine/util/TotalsTableBuilder.java @@ -46,11 +46,11 @@ public enum AggType { Sum, /** Return the sum of absolute values in each group. */ AbsSum, - /** Return the variance of values in each group. */ + /** Return the sample variance of values in each group. */ Var, /** Return the average of values in each group. */ Avg, - /** Return the standard deviation of each group. */ + /** Return the sample standard deviation of each group. */ Std, /** Return the first value of each group. */ First, diff --git a/go/pkg/client/query.go b/go/pkg/client/query.go index ddb25303a38..b157d8cdf89 100644 --- a/go/pkg/client/query.go +++ b/go/pkg/client/query.go @@ -1013,13 +1013,15 @@ func (qb QueryNode) AvgBy(by ...string) QueryNode { return qb.addOp(dedicatedAggOp{child: qb, colNames: by, kind: tablepb2.ComboAggregateRequest_AVG}) } -// StdBy returns the standard deviation for each group. Null values are ignored. +// StdBy returns the sample standard deviation for each group. Null values are ignored. +// Sample standard deviation is calculated using `Bessel's correction `_. // Columns not used in the grouping must be numeric. func (qb QueryNode) StdBy(by ...string) QueryNode { return qb.addOp(dedicatedAggOp{child: qb, colNames: by, kind: tablepb2.ComboAggregateRequest_STD}) } -// VarBy returns the variance for each group. Null values are ignored. +// VarBy returns the sample variance for each group. Null values are ignored. +// Sample variance is calculated using `Bessel's correction `_. // Columns not used in the grouping must be numeric. func (qb QueryNode) VarBy(by ...string) QueryNode { return qb.addOp(dedicatedAggOp{child: qb, colNames: by, kind: tablepb2.ComboAggregateRequest_VAR}) @@ -1156,14 +1158,16 @@ func (b *AggBuilder) Percentile(percentile float64, cols ...string) *AggBuilder return b } -// Std returns an aggregator that computes the standard deviation of values, within an aggregation group, for each input column. +// Std returns an aggregator that computes the sample standard deviation of values, within an aggregation group, for each input column. +// Sample standard deviation is calculated using `Bessel's correction `_. // The source columns are specified by cols. func (b *AggBuilder) StdDev(cols ...string) *AggBuilder { b.addAgg(aggPart{matchPairs: cols, kind: tablepb2.ComboAggregateRequest_STD}) return b } -// Var returns an aggregator that computes the variance of values, within an aggregation group, for each input column. +// Var returns an aggregator that computes the sample variance of values, within an aggregation group, for each input column. +// Sample variance is calculated using `Bessel's correction `_. // The source columns are specified by cols. func (b *AggBuilder) Variance(cols ...string) *AggBuilder { b.addAgg(aggPart{matchPairs: cols, kind: tablepb2.ComboAggregateRequest_VAR}) diff --git a/go/pkg/client/tablehandle.go b/go/pkg/client/tablehandle.go index 94769a6f8a9..3c7ee1d807e 100644 --- a/go/pkg/client/tablehandle.go +++ b/go/pkg/client/tablehandle.go @@ -543,7 +543,8 @@ func (th *TableHandle) AvgBy(ctx context.Context, cols ...string) (*TableHandle, return th.client.dedicatedAggOp(ctx, th, cols, "", tablepb2.ComboAggregateRequest_AVG) } -// StdBy returns the standard deviation for each group. Null values are ignored. +// StdBy returns the sample standard deviation for each group. Null values are ignored. +// Sample standard deviation is calculated using `Bessel's correction `_. // Columns not used in the grouping must be numeric. func (th *TableHandle) StdBy(ctx context.Context, cols ...string) (*TableHandle, error) { if !th.rLockIfValid() { @@ -553,7 +554,8 @@ func (th *TableHandle) StdBy(ctx context.Context, cols ...string) (*TableHandle, return th.client.dedicatedAggOp(ctx, th, cols, "", tablepb2.ComboAggregateRequest_STD) } -// VarBy returns the variance for each group. Null values are ignored. +// VarBy returns the sample variance for each group. Null values are ignored. +// Sample variance is calculated using `Bessel's correction `_. // Columns not used in the grouping must be numeric. func (th *TableHandle) VarBy(ctx context.Context, cols ...string) (*TableHandle, error) { if !th.rLockIfValid() { diff --git a/py/client/pydeephaven/_table_interface.py b/py/client/pydeephaven/_table_interface.py index 60751d9af3b..aee77b24b18 100644 --- a/py/client/pydeephaven/_table_interface.py +++ b/py/client/pydeephaven/_table_interface.py @@ -466,8 +466,10 @@ def avg_by(self, by: Union[str, List[str]] = None) -> Union[Table, Query]: return self.table_op_handler(table_op) def std_by(self, by: Union[str, List[str]] = None) -> Union[Table, Query]: - """The std_by method creates a new table containing the standard deviation for each group. Columns not used - in the grouping must be of numeric types. + """The std_by method creates a new table containing the sample standard deviation for each group. Columns not + used in the grouping must be of numeric types. + Sample standard deviation is computed using `Bessel's correction `_, + which ensures that the sample variance will be an unbiased estimator of population variance. Args: by (Union[str, List[str]]): the group-by column names(s), default is None, meaning grouping @@ -483,8 +485,10 @@ def std_by(self, by: Union[str, List[str]] = None) -> Union[Table, Query]: return self.table_op_handler(table_op) def var_by(self, by: Union[str, List[str]] = None) -> Union[Table, Query]: - """The var_by method creates a new table containing the variance for each group. Columns not used in the + """The var_by method creates a new table containing the sample variance for each group. Columns not used in the grouping must be of numeric types. + Sample variance is computed using `Bessel's correction `_, + which ensures that the sample variance will be an unbiased estimator of population variance. Args: by (Union[str, List[str]], optional): the group-by column name(s), default is None, meaning grouping diff --git a/py/client/pydeephaven/agg.py b/py/client/pydeephaven/agg.py index f8a3eaf7fda..2f2334b1a26 100644 --- a/py/client/pydeephaven/agg.py +++ b/py/client/pydeephaven/agg.py @@ -300,7 +300,7 @@ def sorted_last(order_by: str, cols: Union[str, List[str]] = None) -> Aggregatio def std(cols: Union[str, List[str]] = None) -> Aggregation: - """Creates a Std (standard deviation) aggregation. + """Creates a Std (sample standard deviation) aggregation. Args: cols (Union[str, List[str]]): the column(s) to aggregate on, can be renaming expressions, i.e. "new_col = col"; @@ -366,7 +366,7 @@ def unique(cols: Union[str, List[str]] = None, include_nulls: bool = False, def var(cols: Union[str, List[str]] = None) -> Aggregation: - """Creates a Variance aggregation. + """Creates a sample Variance aggregation. Args: cols (Union[str, List[str]]): the column(s) to aggregate on, can be renaming expressions, i.e. "new_col = col"; diff --git a/py/client/pydeephaven/table.py b/py/client/pydeephaven/table.py index 478616eece8..b7c19adcd89 100644 --- a/py/client/pydeephaven/table.py +++ b/py/client/pydeephaven/table.py @@ -529,8 +529,10 @@ def avg_by(self, by: Union[str, List[str]] = None) -> Table: return super(Table, self).avg_by(by) def std_by(self, by: Union[str, List[str]] = None) -> Table: - """The std_by method creates a new table containing the standard deviation for each group. Columns not used - in the grouping must be of numeric types. + """The std_by method creates a new table containing the sample standard deviation for each group. Columns not + used in the grouping must be of numeric types. + Sample standard deviation is computed using `Bessel's correction `_, + which ensures that the sample variance will be an unbiased estimator of population variance. Args: by (Union[str, List[str]]): the group-by column names(s), default is None, meaning grouping @@ -545,8 +547,10 @@ def std_by(self, by: Union[str, List[str]] = None) -> Table: return super(Table, self).std_by(by) def var_by(self, by: Union[str, List[str]] = None) -> Table: - """The var_by method creates a new table containing the variance for each group. Columns not used in the + """The var_by method creates a new table containing the sample variance for each group. Columns not used in the grouping must be of numeric types. + Sample variance is computed using `Bessel's correction `_, + which ensures that the sample variance will be an unbiased estimator of population variance. Args: by (Union[str, List[str]], optional): the group-by column name(s), default is None, meaning grouping diff --git a/py/client/pydeephaven/updateby.py b/py/client/pydeephaven/updateby.py index 9d096d1fe57..bebad5edc55 100644 --- a/py/client/pydeephaven/updateby.py +++ b/py/client/pydeephaven/updateby.py @@ -1279,9 +1279,11 @@ def rolling_count_time(ts_col: str, cols: Union[str, List[str]], rev_time: Union def rolling_std_tick(cols: Union[str, List[str]], rev_ticks: int, fwd_ticks: int = 0) -> UpdateByOperation: - """Creates a rolling standard deviation UpdateByOperation for the supplied column names, using ticks as the windowing unit. Ticks - are row counts, and you may specify the reverse and forward window in number of rows to include. The current row - is considered to belong to the reverse window but not the forward window. Also, negative values are allowed and + """Creates a rolling sample standard deviation UpdateByOperation for the supplied column names, using ticks as the + windowing unit. Sample standard deviation is computed using `Bessel's correction `_, + which ensures that the sample variance will be an unbiased estimator of population variance. + Ticks are row counts, and you may specify the reverse and forward window in number of rows to include. The current + row is considered to belong to the reverse window but not the forward window. Also, negative values are allowed and can be used to generate completely forward or completely reverse windows. Here are some examples of window values: @@ -1298,7 +1300,7 @@ def rolling_std_tick(cols: Union[str, List[str]], rev_ticks: int, fwd_ticks: int Args: cols (Union[str, List[str]]): the column(s) to be operated on, can include expressions to rename the output, - i.e. "new_col = col"; when empty, update_by perform the rolling standard deviation operation on all columns. + i.e. "new_col = col"; when empty, update_by perform the rolling sample standard deviation operation on all columns. rev_ticks (int): the look-behind window size (in rows/ticks) fwd_ticks (int): the look-forward window size (int rows/ticks), default is 0 @@ -1322,8 +1324,10 @@ def rolling_std_tick(cols: Union[str, List[str]], rev_ticks: int, fwd_ticks: int def rolling_std_time(ts_col: str, cols: Union[str, List[str]], rev_time: Union[int, str], fwd_time: Union[int, str] = 0) -> UpdateByOperation: - """Creates a rolling standard deviation UpdateByOperation for the supplied column names, using time as the windowing unit. This - function accepts nanoseconds or time strings as the reverse and forward window parameters. Negative values are + """Creates a rolling sample standard deviation UpdateByOperation for the supplied column names, using time as the + windowing unit. Sample standard deviation is computed using `Bessel's correction `_, + which ensures that the sample variance will be an unbiased estimator of population variance. + This function accepts nanoseconds or time strings as the reverse and forward window parameters. Negative values are allowed and can be used to generate completely forward or completely reverse windows. A row containing a null in the timestamp column belongs to no window and will not be considered in the windows of other rows; its output will be null. diff --git a/py/server/deephaven/agg.py b/py/server/deephaven/agg.py index 622e0320507..434e03d8a08 100644 --- a/py/server/deephaven/agg.py +++ b/py/server/deephaven/agg.py @@ -272,7 +272,7 @@ def sorted_last(order_by: str, cols: Union[str, List[str]] = None) -> Aggregatio def std(cols: Union[str, List[str]] = None) -> Aggregation: - """Creates a Std aggregation. + """Creates a Std (sample standard deviation) aggregation. Args: cols (Union[str, List[str]]): the column(s) to aggregate on, can be renaming expressions, i.e. "new_col = col"; @@ -312,7 +312,7 @@ def unique(cols: Union[str, List[str]] = None, include_nulls: bool = False, non_ def var(cols: Union[str, List[str]] = None) -> Aggregation: - """Creates a Var aggregation. + """Creates a sample Var aggregation. Args: cols (Union[str, List[str]]): the column(s) to aggregate on, can be renaming expressions, i.e. "new_col = col"; diff --git a/py/server/deephaven/table.py b/py/server/deephaven/table.py index 8a60dae1008..e31408bc841 100644 --- a/py/server/deephaven/table.py +++ b/py/server/deephaven/table.py @@ -1782,7 +1782,9 @@ def weighted_avg_by(self, wcol: str, by: Union[str, Sequence[str]] = None) -> Ta raise DHError(e, "table avg_by operation failed.") from e def std_by(self, by: Union[str, Sequence[str]] = None) -> Table: - """The std_by method creates a new table containing the standard deviation for each group. + """The std_by method creates a new table containing the sample standard deviation for each group. + Sample standard deviation is computed using `Bessel's correction `_, + which ensures that the sample variance will be an unbiased estimator of population variance. Args: by (Union[str, Sequence[str]], optional): the group-by column name(s), default is None @@ -1803,7 +1805,9 @@ def std_by(self, by: Union[str, Sequence[str]] = None) -> Table: raise DHError(e, "table std_by operation failed.") from e def var_by(self, by: Union[str, Sequence[str]] = None) -> Table: - """The var_by method creates a new table containing the variance for each group. + """The var_by method creates a new table containing the sample variance for each group. + Sample variance is computed using `Bessel's correction `_, + which ensures that the sample variance will be an unbiased estimator of population variance. Args: by (Union[str, Sequence[str]], optional): the group-by column name(s), default is None diff --git a/py/server/deephaven/updateby.py b/py/server/deephaven/updateby.py index efff037fff0..1ccf48b9f10 100644 --- a/py/server/deephaven/updateby.py +++ b/py/server/deephaven/updateby.py @@ -1190,10 +1190,12 @@ def rolling_count_time(ts_col: str, cols: Union[str, List[str]], rev_time: Union def rolling_std_tick(cols: Union[str, List[str]], rev_ticks: int, fwd_ticks: int = 0) -> UpdateByOperation: - """Creates a rolling standard deviation UpdateByOperation for the supplied column names, using ticks as the windowing unit. Ticks - are row counts, and you may specify the reverse and forward window in number of rows to include. The current row - is considered to belong to the reverse window but not the forward window. Also, negative values are allowed and - can be used to generate completely forward or completely reverse windows. + """Creates a rolling sample standard deviation UpdateByOperation for the supplied column names, using ticks as the + windowing unit. Sample standard deviation is computed using `Bessel's correction `_, + which ensures that the sample variance will be an unbiased estimator of population variance. + Ticks are row counts, and you may specify the reverse and forward window in number of rows to include. + The current row is considered to belong to the reverse window but not the forward window. Also, negative values are + allowed and can be used to generate completely forward or completely reverse windows. Here are some examples of window values: | `rev_ticks = 1, fwd_ticks = 0` - contains only the current row @@ -1209,7 +1211,7 @@ def rolling_std_tick(cols: Union[str, List[str]], rev_ticks: int, fwd_ticks: int Args: cols (Union[str, List[str]]): the column(s) to be operated on, can include expressions to rename the output, - i.e. "new_col = col"; when empty, update_by perform the rolling standard deviation operation on all columns. + i.e. "new_col = col"; when empty, update_by perform the rolling sample standard deviation operation on all columns. rev_ticks (int): the look-behind window size (in rows/ticks) fwd_ticks (int): the look-forward window size (int rows/ticks), default is 0 @@ -1228,11 +1230,12 @@ def rolling_std_tick(cols: Union[str, List[str]], rev_ticks: int, fwd_ticks: int def rolling_std_time(ts_col: str, cols: Union[str, List[str]], rev_time: Union[int, str], fwd_time: Union[int, str] = 0) -> UpdateByOperation: - """Creates a rolling standard deviation UpdateByOperation for the supplied column names, using time as the windowing unit. This - function accepts nanoseconds or time strings as the reverse and forward window parameters. Negative values are - allowed and can be used to generate completely forward or completely reverse windows. A row containing a null in - the timestamp column belongs to no window and will not be considered in the windows of other rows; its output will - be null. + """Creates a rolling sample standard deviation UpdateByOperation for the supplied column names, using time as the + windowing unit. Sample standard deviation is computed using `Bessel's correction `_, + which ensures that the sample variance will be an unbiased estimator of population variance. + This function accepts nanoseconds or time strings as the reverse and forward window parameters. Negative values are + allowed and can be used to generate completely forward or completely reverse windows. A row containing a null in the + timestamp column belongs to no window and will not be considered in the windows of other rows; its output will be null. Here are some examples of window values: | `rev_time = 0, fwd_time = 0` - contains rows that exactly match the current row timestamp @@ -1250,7 +1253,7 @@ def rolling_std_time(ts_col: str, cols: Union[str, List[str]], rev_time: Union[i Args: ts_col (str): the timestamp column for determining the window cols (Union[str, List[str]]): the column(s) to be operated on, can include expressions to rename the output, - i.e. "new_col = col"; when empty, update_by perform the rolling standard deviation operation on all columns. + i.e. "new_col = col"; when empty, update_by perform the rolling sample standard deviation operation on all columns. rev_time (int): the look-behind window size, can be expressed as an integer in nanoseconds or a time interval string, e.g. "PT00:00:00.001" or "PT5M" fwd_time (int): the look-ahead window size, can be expressed as an integer in nanoseconds or a time diff --git a/table-api/src/main/java/io/deephaven/api/TableOperations.java b/table-api/src/main/java/io/deephaven/api/TableOperations.java index 6b6b6cfc92f..d3c0b81db26 100644 --- a/table-api/src/main/java/io/deephaven/api/TableOperations.java +++ b/table-api/src/main/java/io/deephaven/api/TableOperations.java @@ -1165,7 +1165,7 @@ TOPS updateBy(UpdateByControl control, Collection o // ------------------------------------------------------------------------------------------- /** - * Produces a single row table with the standard deviation of each column. + * Produces a single row table with the sample standard deviation of each column. *

* When the input table is empty, zero output rows are produced. */ @@ -1173,8 +1173,8 @@ TOPS updateBy(UpdateByControl control, Collection o TOPS stdBy(); /** - * Groups the data column according to groupByColumns and computes the standard deviation for the rest - * of the fields + * Groups the data column according to groupByColumns and computes the sample standard deviation for + * the rest of the fields * * @param groupByColumns The grouping columns as in {@link TableOperations#groupBy} */ @@ -1182,8 +1182,8 @@ TOPS updateBy(UpdateByControl control, Collection o TOPS stdBy(String... groupByColumns); /** - * Groups the data column according to groupByColumns and computes the standard deviation for the rest - * of the fields + * Groups the data column according to groupByColumns and computes the sample standard deviation for + * the rest of the fields * * @param groupByColumns The grouping columns as in {@link TableOperations#groupBy} */ @@ -1191,8 +1191,8 @@ TOPS updateBy(UpdateByControl control, Collection o TOPS stdBy(ColumnName... groupByColumns); /** - * Groups the data column according to groupByColumns and computes the standard deviation for the rest - * of the fields + * Groups the data column according to groupByColumns and computes the sample standard deviation for + * the rest of the fields * * @param groupByColumns The grouping columns as in {@link TableOperations#groupBy} */ @@ -1202,7 +1202,7 @@ TOPS updateBy(UpdateByControl control, Collection o // ------------------------------------------------------------------------------------------- /** - * Produces a single row table with the variance of each column. + * Produces a single row table with the sample variance of each column. *

* When the input table is empty, zero output rows are produced. */ @@ -1210,8 +1210,8 @@ TOPS updateBy(UpdateByControl control, Collection o TOPS varBy(); /** - * Groups the data column according to groupByColumns and computes the variance for the rest of the - * fields + * Groups the data column according to groupByColumns and computes the sample variance for the rest of + * the fields * * @param groupByColumns The grouping columns as in {@link TableOperations#groupBy} */ @@ -1219,8 +1219,8 @@ TOPS updateBy(UpdateByControl control, Collection o TOPS varBy(String... groupByColumns); /** - * Groups the data column according to groupByColumns and computes the variance for the rest of the - * fields + * Groups the data column according to groupByColumns and computes the sample variance for the rest of + * the fields * * @param groupByColumns The grouping columns as in {@link TableOperations#groupBy} */ diff --git a/table-api/src/main/java/io/deephaven/api/agg/Aggregation.java b/table-api/src/main/java/io/deephaven/api/agg/Aggregation.java index f228f3e324f..7ebc01488ce 100644 --- a/table-api/src/main/java/io/deephaven/api/agg/Aggregation.java +++ b/table-api/src/main/java/io/deephaven/api/agg/Aggregation.java @@ -522,8 +522,8 @@ static Aggregation AggSortedLast(Collection sortColumns, Strin } /** - * Create a {@link io.deephaven.api.agg.spec.AggSpecStd standard deviation} aggregation for the supplied column name - * pairs. + * Create a {@link io.deephaven.api.agg.spec.AggSpecStd sample standard deviation} aggregation for the supplied + * column name pairs. * * @param pairs The input/output column name pairs * @return The aggregation @@ -608,7 +608,7 @@ static Aggregation AggUnique(boolean includeNulls, UnionObject nonUniqueSentinel } /** - * Create a {@link io.deephaven.api.agg.spec.AggSpecVar variance} aggregation for the supplied column name pairs. + * Create a {@link io.deephaven.api.agg.spec.AggSpecVar sample variance} aggregation for the supplied column name pairs. * * @param pairs The input/output column name pairs * @return The aggregation diff --git a/table-api/src/main/java/io/deephaven/api/agg/spec/AggSpecStd.java b/table-api/src/main/java/io/deephaven/api/agg/spec/AggSpecStd.java index 5b18e83bee6..d4577e8909f 100644 --- a/table-api/src/main/java/io/deephaven/api/agg/spec/AggSpecStd.java +++ b/table-api/src/main/java/io/deephaven/api/agg/spec/AggSpecStd.java @@ -8,8 +8,9 @@ import org.immutables.value.Value.Immutable; /** - * Specifies an aggregation that outputs the standard deviation of the input column values for each group. Only works + * Specifies an aggregation that outputs the sample standard deviation of the input column values for each group. Only works * for numeric input types. + * Sample standard deviation is computed using Bessel's correction: https://en.wikipedia.org/wiki/Bessel%27s_correction * * @see TableOperations#stdBy */ diff --git a/table-api/src/main/java/io/deephaven/api/agg/spec/AggSpecVar.java b/table-api/src/main/java/io/deephaven/api/agg/spec/AggSpecVar.java index dd5e5b4c364..5959e165a3b 100644 --- a/table-api/src/main/java/io/deephaven/api/agg/spec/AggSpecVar.java +++ b/table-api/src/main/java/io/deephaven/api/agg/spec/AggSpecVar.java @@ -8,8 +8,9 @@ import org.immutables.value.Value.Immutable; /** - * Specifies an aggregation that outputs the variance of the input column values for each group. Only works for numeric - * input types. + * Specifies an aggregation that outputs the sample variance of the input column values for each group. Only works for + * numeric input types. + * Sample variance is computed using Bessel's correction: https://en.wikipedia.org/wiki/Bessel%27s_correction * * @see TableOperations#varBy */ diff --git a/table-api/src/main/java/io/deephaven/api/updateby/UpdateByOperation.java b/table-api/src/main/java/io/deephaven/api/updateby/UpdateByOperation.java index f59e448cbd2..e17c0e324d0 100644 --- a/table-api/src/main/java/io/deephaven/api/updateby/UpdateByOperation.java +++ b/table-api/src/main/java/io/deephaven/api/updateby/UpdateByOperation.java @@ -1620,8 +1620,10 @@ static UpdateByOperation RollingCount(String timestampCol, long revTime, long fw /** - * Create a {@link RollingStdSpec rolling standard deviation} for the supplied column name pairs, using ticks as the - * windowing unit. Ticks are row counts and you may specify the previous window in number of rows to include. The + * Create a {@link RollingStdSpec rolling sample standard deviation} for the supplied column name pairs, using ticks + * as the windowing unit. Sample standard deviation is computed using Bessel's correction: https://en.wikipedia.org/wiki/Bessel%27s_correction + * This ensures that the sample variance will be an unbiased estimator of population variance. + *Ticks are row counts and you may specify the previous window in number of rows to include. The * current row is considered to belong to the reverse window, so calling this with {@code revTicks = 1} will simply * return the current row. Specifying {@code revTicks = 10} will include the previous 9 rows to this one and this * row for a total of 10 rows. @@ -1635,11 +1637,13 @@ static UpdateByOperation RollingStd(long revTicks, String... pairs) { } /** - * Create a {@link RollingStdSpec rolling standard deviation} for the supplied column name pairs, using ticks as the - * windowing unit. Ticks are row counts and you may specify the reverse and forward window in number of rows to - * include. The current row is considered to belong to the reverse window but not the forward window. Also, negative - * values are allowed and can be used to generate completely forward or completely reverse windows. Here are some - * examples of window values: + * Create a {@link RollingStdSpec rolling sample standard deviation} for the supplied column name pairs, using ticks + * as the windowing unit. Sample standard deviation is computed using Bessel's correction: https://en.wikipedia.org/wiki/Bessel%27s_correction + * This ensures that the sample variance will be an unbiased estimator of population variance. + * Ticks are row counts and you may specify the reverse and forward window in number of rows to include. The current + * row is considered to belong to the reverse window but not the forward window. Also, negative values are allowed + * and can be used to generate completely forward or completely reverse windows. Here are some examples of window + * values: *

    *
  • {@code revTicks = 1, fwdTicks = 0} - contains only the current row
  • *
  • {@code revTicks = 10, fwdTicks = 0} - contains 9 previous rows and the current row
  • @@ -1664,10 +1668,12 @@ static UpdateByOperation RollingStd(long revTicks, long fwdTicks, String... pair } /** - * Create a {@link RollingStdSpec rolling standard deviation} for the supplied column name pairs, using time as the - * windowing unit. This function accepts {@link Duration duration} as the reverse window parameter. A row containing - * a {@code null} in the timestamp column belongs to no window and will not have a value computed or be considered - * in the windows of other rows. + * Create a {@link RollingStdSpec rolling sample standard deviation} for the supplied column name pairs, using time + * as the windowing unit. Sample standard deviation is computed using Bessel's correction: https://en.wikipedia.org/wiki/Bessel%27s_correction + * This ensures that the sample variance will be an unbiased estimator of population variance. + * This function accepts {@link Duration duration} as the reverse window parameter. A row containing a {@code null} + * in the timestamp column belongs to no window and will not have a value computed or be considered in the windows + * of other rows. * * Here are some examples of window values: *
      @@ -1685,11 +1691,13 @@ static UpdateByOperation RollingStd(String timestampCol, Duration revDuration, S } /** - * Create a {@link RollingStdSpec rolling standard deviation} for the supplied column name pairs, using time as the - * windowing unit. This function accepts {@link Duration durations} as the reverse and forward window parameters. - * Negative values are allowed and can be used to generate completely forward or completely reverse windows. A row - * containing a {@code null} in the timestamp column belongs to no window and will not have a value computed or be - * considered in the windows of other rows. + * Create a {@link RollingStdSpec rolling sample standard deviation} for the supplied column name pairs, using time + * as the windowing unit. Sample standard deviation is computed using Bessel's correction: https://en.wikipedia.org/wiki/Bessel%27s_correction + * This ensures that the sample variance will be an unbiased estimator of population variance. + * This function accepts {@link Duration durations} as the reverse and forward window parameters. Negative values + * are allowed and can be used to generate completely forward or completely reverse windows. A row containing a + * {@code null} in the timestamp column belongs to no window and will not have a value computed or be considered in + * the windows of other rows. * * Here are some examples of window values: *
        @@ -1718,10 +1726,12 @@ static UpdateByOperation RollingStd(String timestampCol, Duration revDuration, D } /** - * Create a {@link RollingStdSpec rolling standard deviation} for the supplied column name pairs, using time as the - * windowing unit. This function accepts {@code nanoseconds} as the reverse window parameters. A row containing a - * {@code null} in the timestamp column belongs to no window and will not have a value computed or be considered in - * the windows of other rows. + * Create a {@link RollingStdSpec rolling sample standard deviation} for the supplied column name pairs, using time + * as the windowing unit. Sample standard deviation is computed using Bessel's correction: https://en.wikipedia.org/wiki/Bessel%27s_correction + * This ensures that the sample variance will be an unbiased estimator of population variance. + * This function accepts {@code nanoseconds} as the reverse window parameters. A row containing a {@code null} in + * the timestamp column belongs to no window and will not have a value computed or be considered in the windows of + * other rows. * * @param timestampCol the name of the timestamp column * @param revTime the look-behind window size (in nanoseconds) @@ -1733,8 +1743,10 @@ static UpdateByOperation RollingStd(String timestampCol, long revTime, String... } /** - * Create a {@link RollingStdSpec rolling standard deviation} for the supplied column name pairs, using time as the - * windowing unit. This function accepts {@code nanoseconds} as the reverse and forward window parameters. Negative + * Create a {@link RollingStdSpec rolling sample standard deviation} for the supplied column name pairs, using time + * as the windowing unit. Sample standard deviation is computed using Bessel's correction: https://en.wikipedia.org/wiki/Bessel%27s_correction + * This ensures that the sample variance will be an unbiased estimator of population variance. + * This function accepts {@code nanoseconds} as the reverse and forward window parameters. Negative * values are allowed and can be used to generate completely forward or completely reverse windows. A row containing * a {@code null} in the timestamp column belongs to no window and will not have a value computed or be considered * in the windows of other rows. diff --git a/web/WebDevelopersGuide.md b/web/WebDevelopersGuide.md index 87bc0ab50b7..33a202e8dd3 100644 --- a/web/WebDevelopersGuide.md +++ b/web/WebDevelopersGuide.md @@ -992,9 +992,9 @@ This enum describes the name of each supported operation/aggregation type when c value is "Max". * `SUM` - The sum of all values in the specified column. Can only apply to numeric types. String value is "Sum". * `ABS_SUM` - The sum of all values, as their distance from zero, in the specified column. Can only apply to numeric types. String value is “AbsSum”. - * `VAR` - The variance of all values in the specified column. Can only apply to numeric types. String value is "Var". + * `VAR` - The sample variance of all values in the specified column. Can only apply to numeric types. String value is "Var". * `AVG` - The average of all values in the specified column. Can only apply to numeric types. String value is "Avg". - * `STD` - The standard deviation of all values in the specified column. Can only apply to numeric types. String value is + * `STD` - The sample standard deviation of all values in the specified column. Can only apply to numeric types. String value is "Std". * `FIRST` - The first value in the specified column. Can apply to any type. String value is "First". * `LAST` - The last value in the specified column. Can apply to any type. String value is "Last". diff --git a/web/client-api/src/main/java/io/deephaven/web/client/api/JsColumnStatistics.java b/web/client-api/src/main/java/io/deephaven/web/client/api/JsColumnStatistics.java index 552bbc6497a..b100bca4ba0 100644 --- a/web/client-api/src/main/java/io/deephaven/web/client/api/JsColumnStatistics.java +++ b/web/client-api/src/main/java/io/deephaven/web/client/api/JsColumnStatistics.java @@ -72,7 +72,7 @@ public enum StatType { */ MAX_ABS("MAX (ABS)", null), /** - * The standard deviation of the values in the column. + * The sample standard deviation of the values in the column. */ STD_DEV("STD DEV", "double"), /** diff --git a/web/client-api/src/main/java/io/deephaven/web/client/api/tree/enums/JsAggregationOperation.java b/web/client-api/src/main/java/io/deephaven/web/client/api/tree/enums/JsAggregationOperation.java index fa0c50cce57..3a19eec6ec1 100644 --- a/web/client-api/src/main/java/io/deephaven/web/client/api/tree/enums/JsAggregationOperation.java +++ b/web/client-api/src/main/java/io/deephaven/web/client/api/tree/enums/JsAggregationOperation.java @@ -45,8 +45,8 @@ public class JsAggregationOperation { */ ABS_SUM = "AbsSum", /** - * The variance of all values in the specified column. Can only apply to numeric types. String value is - * "Var". + * The sample variance of all values in the specified column. Can only apply to numeric types. String value + * is "Var". */ VAR = "Var", /** @@ -55,8 +55,8 @@ public class JsAggregationOperation { */ AVG = "Avg", /** - * The standard deviation of all values in the specified column. Can only apply to numeric types. String - * value is "Std". + * The sample standard deviation of all values in the specified column. Can only apply to numeric types. + * String value is "Std". */ STD = "Std", /** From 401577d84ae265c4309fc13144045ebc2054f5c1 Mon Sep 17 00:00:00 2001 From: alexpeters1208 Date: Tue, 7 Nov 2023 11:21:30 -0600 Subject: [PATCH 2/5] Fix Java file formatting --- .../io/deephaven/api/agg/Aggregation.java | 3 +- .../io/deephaven/api/agg/spec/AggSpecStd.java | 6 +- .../io/deephaven/api/agg/spec/AggSpecVar.java | 4 +- .../api/updateby/UpdateByOperation.java | 68 +++++++++---------- 4 files changed, 41 insertions(+), 40 deletions(-) diff --git a/table-api/src/main/java/io/deephaven/api/agg/Aggregation.java b/table-api/src/main/java/io/deephaven/api/agg/Aggregation.java index 7ebc01488ce..9cb0afd93bd 100644 --- a/table-api/src/main/java/io/deephaven/api/agg/Aggregation.java +++ b/table-api/src/main/java/io/deephaven/api/agg/Aggregation.java @@ -608,7 +608,8 @@ static Aggregation AggUnique(boolean includeNulls, UnionObject nonUniqueSentinel } /** - * Create a {@link io.deephaven.api.agg.spec.AggSpecVar sample variance} aggregation for the supplied column name pairs. + * Create a {@link io.deephaven.api.agg.spec.AggSpecVar sample variance} aggregation for the supplied column name + * pairs. * * @param pairs The input/output column name pairs * @return The aggregation diff --git a/table-api/src/main/java/io/deephaven/api/agg/spec/AggSpecStd.java b/table-api/src/main/java/io/deephaven/api/agg/spec/AggSpecStd.java index d4577e8909f..e3b87e888f2 100644 --- a/table-api/src/main/java/io/deephaven/api/agg/spec/AggSpecStd.java +++ b/table-api/src/main/java/io/deephaven/api/agg/spec/AggSpecStd.java @@ -8,9 +8,9 @@ import org.immutables.value.Value.Immutable; /** - * Specifies an aggregation that outputs the sample standard deviation of the input column values for each group. Only works - * for numeric input types. - * Sample standard deviation is computed using Bessel's correction: https://en.wikipedia.org/wiki/Bessel%27s_correction + * Specifies an aggregation that outputs the sample standard deviation of the input column values for each group. Only + * works for numeric input types. Sample standard deviation is computed using Bessel's correction: + * https://en.wikipedia.org/wiki/Bessel%27s_correction * * @see TableOperations#stdBy */ diff --git a/table-api/src/main/java/io/deephaven/api/agg/spec/AggSpecVar.java b/table-api/src/main/java/io/deephaven/api/agg/spec/AggSpecVar.java index 5959e165a3b..5c1861e4bc5 100644 --- a/table-api/src/main/java/io/deephaven/api/agg/spec/AggSpecVar.java +++ b/table-api/src/main/java/io/deephaven/api/agg/spec/AggSpecVar.java @@ -9,8 +9,8 @@ /** * Specifies an aggregation that outputs the sample variance of the input column values for each group. Only works for - * numeric input types. - * Sample variance is computed using Bessel's correction: https://en.wikipedia.org/wiki/Bessel%27s_correction + * numeric input types. Sample variance is computed using Bessel's correction: + * https://en.wikipedia.org/wiki/Bessel%27s_correction * * @see TableOperations#varBy */ diff --git a/table-api/src/main/java/io/deephaven/api/updateby/UpdateByOperation.java b/table-api/src/main/java/io/deephaven/api/updateby/UpdateByOperation.java index e17c0e324d0..eee01d8b1ea 100644 --- a/table-api/src/main/java/io/deephaven/api/updateby/UpdateByOperation.java +++ b/table-api/src/main/java/io/deephaven/api/updateby/UpdateByOperation.java @@ -1621,12 +1621,12 @@ static UpdateByOperation RollingCount(String timestampCol, long revTime, long fw /** * Create a {@link RollingStdSpec rolling sample standard deviation} for the supplied column name pairs, using ticks - * as the windowing unit. Sample standard deviation is computed using Bessel's correction: https://en.wikipedia.org/wiki/Bessel%27s_correction - * This ensures that the sample variance will be an unbiased estimator of population variance. - *Ticks are row counts and you may specify the previous window in number of rows to include. The - * current row is considered to belong to the reverse window, so calling this with {@code revTicks = 1} will simply - * return the current row. Specifying {@code revTicks = 10} will include the previous 9 rows to this one and this - * row for a total of 10 rows. + * as the windowing unit. Sample standard deviation is computed using Bessel's correction: + * https://en.wikipedia.org/wiki/Bessel%27s_correction This ensures that the sample variance will be an unbiased + * estimator of population variance. Ticks are row counts and you may specify the previous window in number of rows + * to include. The current row is considered to belong to the reverse window, so calling this with + * {@code revTicks = 1} will simply return the current row. Specifying {@code revTicks = 10} will include the + * previous 9 rows to this one and this row for a total of 10 rows. * * @param revTicks the look-behind window size (in rows/ticks) * @param pairs The input/output column name pairs @@ -1638,12 +1638,12 @@ static UpdateByOperation RollingStd(long revTicks, String... pairs) { /** * Create a {@link RollingStdSpec rolling sample standard deviation} for the supplied column name pairs, using ticks - * as the windowing unit. Sample standard deviation is computed using Bessel's correction: https://en.wikipedia.org/wiki/Bessel%27s_correction - * This ensures that the sample variance will be an unbiased estimator of population variance. - * Ticks are row counts and you may specify the reverse and forward window in number of rows to include. The current - * row is considered to belong to the reverse window but not the forward window. Also, negative values are allowed - * and can be used to generate completely forward or completely reverse windows. Here are some examples of window - * values: + * as the windowing unit. Sample standard deviation is computed using Bessel's correction: + * https://en.wikipedia.org/wiki/Bessel%27s_correction This ensures that the sample variance will be an unbiased + * estimator of population variance. Ticks are row counts and you may specify the reverse and forward window in + * number of rows to include. The current row is considered to belong to the reverse window but not the forward + * window. Also, negative values are allowed and can be used to generate completely forward or completely reverse + * windows. Here are some examples of window values: *
          *
        • {@code revTicks = 1, fwdTicks = 0} - contains only the current row
        • *
        • {@code revTicks = 10, fwdTicks = 0} - contains 9 previous rows and the current row
        • @@ -1669,11 +1669,11 @@ static UpdateByOperation RollingStd(long revTicks, long fwdTicks, String... pair /** * Create a {@link RollingStdSpec rolling sample standard deviation} for the supplied column name pairs, using time - * as the windowing unit. Sample standard deviation is computed using Bessel's correction: https://en.wikipedia.org/wiki/Bessel%27s_correction - * This ensures that the sample variance will be an unbiased estimator of population variance. - * This function accepts {@link Duration duration} as the reverse window parameter. A row containing a {@code null} - * in the timestamp column belongs to no window and will not have a value computed or be considered in the windows - * of other rows. + * as the windowing unit. Sample standard deviation is computed using Bessel's correction: + * https://en.wikipedia.org/wiki/Bessel%27s_correction This ensures that the sample variance will be an unbiased + * estimator of population variance. This function accepts {@link Duration duration} as the reverse window + * parameter. A row containing a {@code null} in the timestamp column belongs to no window and will not have a value + * computed or be considered in the windows of other rows. * * Here are some examples of window values: *
            @@ -1692,12 +1692,12 @@ static UpdateByOperation RollingStd(String timestampCol, Duration revDuration, S /** * Create a {@link RollingStdSpec rolling sample standard deviation} for the supplied column name pairs, using time - * as the windowing unit. Sample standard deviation is computed using Bessel's correction: https://en.wikipedia.org/wiki/Bessel%27s_correction - * This ensures that the sample variance will be an unbiased estimator of population variance. - * This function accepts {@link Duration durations} as the reverse and forward window parameters. Negative values - * are allowed and can be used to generate completely forward or completely reverse windows. A row containing a - * {@code null} in the timestamp column belongs to no window and will not have a value computed or be considered in - * the windows of other rows. + * as the windowing unit. Sample standard deviation is computed using Bessel's correction: + * https://en.wikipedia.org/wiki/Bessel%27s_correction This ensures that the sample variance will be an unbiased + * estimator of population variance. This function accepts {@link Duration durations} as the reverse and forward + * window parameters. Negative values are allowed and can be used to generate completely forward or completely + * reverse windows. A row containing a {@code null} in the timestamp column belongs to no window and will not have a + * value computed or be considered in the windows of other rows. * * Here are some examples of window values: *
              @@ -1727,11 +1727,11 @@ static UpdateByOperation RollingStd(String timestampCol, Duration revDuration, D /** * Create a {@link RollingStdSpec rolling sample standard deviation} for the supplied column name pairs, using time - * as the windowing unit. Sample standard deviation is computed using Bessel's correction: https://en.wikipedia.org/wiki/Bessel%27s_correction - * This ensures that the sample variance will be an unbiased estimator of population variance. - * This function accepts {@code nanoseconds} as the reverse window parameters. A row containing a {@code null} in - * the timestamp column belongs to no window and will not have a value computed or be considered in the windows of - * other rows. + * as the windowing unit. Sample standard deviation is computed using Bessel's correction: + * https://en.wikipedia.org/wiki/Bessel%27s_correction This ensures that the sample variance will be an unbiased + * estimator of population variance. This function accepts {@code nanoseconds} as the reverse window parameters. A + * row containing a {@code null} in the timestamp column belongs to no window and will not have a value computed or + * be considered in the windows of other rows. * * @param timestampCol the name of the timestamp column * @param revTime the look-behind window size (in nanoseconds) @@ -1744,12 +1744,12 @@ static UpdateByOperation RollingStd(String timestampCol, long revTime, String... /** * Create a {@link RollingStdSpec rolling sample standard deviation} for the supplied column name pairs, using time - * as the windowing unit. Sample standard deviation is computed using Bessel's correction: https://en.wikipedia.org/wiki/Bessel%27s_correction - * This ensures that the sample variance will be an unbiased estimator of population variance. - * This function accepts {@code nanoseconds} as the reverse and forward window parameters. Negative - * values are allowed and can be used to generate completely forward or completely reverse windows. A row containing - * a {@code null} in the timestamp column belongs to no window and will not have a value computed or be considered - * in the windows of other rows. + * as the windowing unit. Sample standard deviation is computed using Bessel's correction: + * https://en.wikipedia.org/wiki/Bessel%27s_correction This ensures that the sample variance will be an unbiased + * estimator of population variance. This function accepts {@code nanoseconds} as the reverse and forward window + * parameters. Negative values are allowed and can be used to generate completely forward or completely reverse + * windows. A row containing a {@code null} in the timestamp column belongs to no window and will not have a value + * computed or be considered in the windows of other rows. * * @param timestampCol the name of the timestamp column * @param revTime the look-behind window size (in nanoseconds) From 0373b517268cd4819c9444e44280cd78ba09de24 Mon Sep 17 00:00:00 2001 From: alexpeters1208 Date: Mon, 13 Nov 2023 10:19:28 -0600 Subject: [PATCH 3/5] Chip review suggestions --- .../include/public/deephaven/client/client.h | 26 ++++++--- .../public/deephaven/client/update_by.h | 26 +++++---- engine/function/src/templates/Numeric.ftl | 56 ++++++++++++++----- go/pkg/client/query.go | 16 ++++-- go/pkg/client/tablehandle.go | 8 ++- py/client/pydeephaven/_table_interface.py | 2 + py/client/pydeephaven/agg.py | 6 ++ py/client/pydeephaven/table.py | 2 + py/client/pydeephaven/updateby.py | 20 ++++--- py/server/deephaven/agg.py | 8 +++ py/server/deephaven/table.py | 2 + py/server/deephaven/updateby.py | 19 ++++--- .../io/deephaven/api/TableOperations.java | 24 ++++++++ .../io/deephaven/api/agg/Aggregation.java | 6 ++ .../io/deephaven/api/agg/spec/AggSpecStd.java | 6 +- .../io/deephaven/api/agg/spec/AggSpecVar.java | 6 +- .../api/updateby/UpdateByOperation.java | 48 ++++++++++------ .../web/client/api/JsColumnStatistics.java | 4 ++ .../tree/enums/JsAggregationOperation.java | 7 +++ 19 files changed, 212 insertions(+), 80 deletions(-) diff --git a/cpp-client/deephaven/dhclient/include/public/deephaven/client/client.h b/cpp-client/deephaven/dhclient/include/public/deephaven/client/client.h index 3e0c80e8aa9..a99ca9c46e9 100644 --- a/cpp-client/deephaven/dhclient/include/public/deephaven/client/client.h +++ b/cpp-client/deephaven/dhclient/include/public/deephaven/client/client.h @@ -569,8 +569,10 @@ class Aggregate { /** * Returns an aggregator that computes the sample standard deviation of values, within an - * aggregation group, for each input column. Sample standard deviation is computed using - * Bessel's correction: https://en.wikipedia.org/wiki/Bessel%27s_correction + * aggregation group, for each input column. + * + * Sample standard deviation is computed using Bessel's correction (https://en.wikipedia.org/wiki/Bessel%27s_correction), + * which ensures that the sample variance will be an unbiased estimator of population variance. */ [[nodiscard]] static Aggregate Std(std::vector column_specs); @@ -610,8 +612,10 @@ class Aggregate { /** * Returns an aggregator that computes the sample variance of values, within an aggregation group, - * for each input column. Sample variance is computed using Bessel's correction: - * https://en.wikipedia.org/wiki/Bessel%27s_correction + * for each input column. + * + * Sample variance is computed using Bessel's correction (https://en.wikipedia.org/wiki/Bessel%27s_correction), + * which ensures that the sample variance will be an unbiased estimator of population variance. */ [[nodiscard]] static Aggregate Var(std::vector column_specs); @@ -803,9 +807,11 @@ Aggregate AggPct(double percentile, Args &&... args) { } /** - * Returns an aggregator that computes the sample standard deviation of values, within an - * aggregation group, for each input column. Sample standard deviation is computed using Bessel's correction: - * https://en.wikipedia.org/wiki/Bessel%27s_correction + * Returns an aggregator that computes the sample standard deviation of values, within an aggregation group, + * for each input column. + * + * Sample standard deviation is computed using Bessel's correction (https://en.wikipedia.org/wiki/Bessel%27s_correction), + * which ensures that the sample variance will be an unbiased estimator of population variance. */ template [[nodiscard]] @@ -825,8 +831,10 @@ Aggregate aggSum(Args &&... args) { /** * Returns an aggregator that computes the sample variance of values, within an aggregation group, - * for each input column. Sample variance is computed using - * Bessel's correction: https://en.wikipedia.org/wiki/Bessel%27s_correction + * for each input column. + * + * Sample variance is computed using Bessel's correction (https://en.wikipedia.org/wiki/Bessel%27s_correction), + * which ensures that the sample variance will be an unbiased estimator of population variance. */ template [[nodiscard]] diff --git a/cpp-client/deephaven/dhclient/include/public/deephaven/client/update_by.h b/cpp-client/deephaven/dhclient/include/public/deephaven/client/update_by.h index a681d61ea09..f2d722718e1 100644 --- a/cpp-client/deephaven/dhclient/include/public/deephaven/client/update_by.h +++ b/cpp-client/deephaven/dhclient/include/public/deephaven/client/update_by.h @@ -528,12 +528,13 @@ UpdateByOperation rollingCountTime(std::string timestamp_col, std::vector cols, int rev_ticks, int fwd_ticks = 0); /** * Creates a rolling sample standard deviation UpdateByOperation for the supplied column names, using time as the - * windowing unit. Sample standard deviation is computed using Bessel's correction, - * discussed here: https://en.wikipedia.org/wiki/Bessel%27s_correction - * This function accepts nanoseconds or time strings as the reverse and forward window parameters. - * window parameters. Negative values are allowed and can be used to generate completely forward or - * completely reverse windows. A row containing a null in the timestamp column belongs to no window - * and will not be considered in the windows of other rows; its output will be null. + * windowing unit. This function accepts nanoseconds or time strings as the reverse and forward window parameters. + * Negative values are allowed and can be used to generate completely forward or completely reverse windows. + * A row containing a null in the timestamp column belongs to no window and will not be considered in the windows + * of other rows; its output will be null. + * + * Sample standard deviation is computed using Bessel's correction (https://en.wikipedia.org/wiki/Bessel%27s_correction), + * which ensures that the sample variance will be an unbiased estimator of population variance. * * See the documentation of rollingSumTime() for examples of window values. * diff --git a/engine/function/src/templates/Numeric.ftl b/engine/function/src/templates/Numeric.ftl index dd0468b782e..6270b603bea 100644 --- a/engine/function/src/templates/Numeric.ftl +++ b/engine/function/src/templates/Numeric.ftl @@ -428,7 +428,9 @@ public class Numeric { /** * Returns the sample variance. Null values are excluded. - * Sample variance is computed using Bessel's correction: https://en.wikipedia.org/wiki/Bessel%27s_correction + * + * Sample variance is computed using Bessel's correction (https://en.wikipedia.org/wiki/Bessel%27s_correction), + * which ensures that the sample variance will be an unbiased estimator of population variance. * * @param values values. * @return sample variance of non-null values. @@ -439,7 +441,9 @@ public class Numeric { /** * Returns the sample variance. Null values are excluded. - * Sample variance is computed using Bessel's correction: https://en.wikipedia.org/wiki/Bessel%27s_correction + * + * Sample variance is computed using Bessel's correction (https://en.wikipedia.org/wiki/Bessel%27s_correction), + * which ensures that the sample variance will be an unbiased estimator of population variance. * * @param values values. * @return sample variance of non-null values. @@ -454,7 +458,9 @@ public class Numeric { /** * Returns the sample variance. Null values are excluded. - * Sample variance is computed using Bessel's correction: https://en.wikipedia.org/wiki/Bessel%27s_correction + * + * Sample variance is computed using Bessel's correction (https://en.wikipedia.org/wiki/Bessel%27s_correction), + * which ensures that the sample variance will be an unbiased estimator of population variance. * * @param values values. * @return sample variance of non-null values. @@ -499,7 +505,9 @@ public class Numeric { /** * Returns the weighted sample variance. Null values are excluded. - * Weighted sample variance is computed using Bessel's correction: https://en.wikipedia.org/wiki/Bessel%27s_correction + * + * Weighted sample variance is computed using Bessel's correction (https://en.wikipedia.org/wiki/Bessel%27s_correction), + * which ensures that the weighted sample variance will be an unbiased estimator of weighted population variance. * * @param values values. * @param weights weights @@ -515,7 +523,9 @@ public class Numeric { /** * Returns the weighted sample variance. Null values are excluded. - * Weighted sample variance is computed using Bessel's correction: https://en.wikipedia.org/wiki/Bessel%27s_correction + * + * Weighted sample variance is computed using Bessel's correction (https://en.wikipedia.org/wiki/Bessel%27s_correction), + * which ensures that the weighted sample variance will be an unbiased estimator of weighted population variance. * * @param values values. * @param weights weights @@ -531,7 +541,9 @@ public class Numeric { /** * Returns the weighted sample variance. Null values are excluded. - * Weighted sample variance is computed using Bessel's correction: https://en.wikipedia.org/wiki/Bessel%27s_correction + * + * Sample variance is computed using Bessel's correction (https://en.wikipedia.org/wiki/Bessel%27s_correction), + * which ensures that the sample variance will be an unbiased estimator of population variance. * * @param values values. * @param weights weights @@ -547,7 +559,9 @@ public class Numeric { /** * Returns the weighted sample variance. Null values are excluded. - * Weighted sample variance is computed using Bessel's correction: https://en.wikipedia.org/wiki/Bessel%27s_correction + * + * Sample variance is computed using Bessel's correction (https://en.wikipedia.org/wiki/Bessel%27s_correction), + * which ensures that the sample variance will be an unbiased estimator of population variance. * * @param values values. * @param weights weights @@ -605,7 +619,9 @@ public class Numeric { /** * Returns the sample standard deviation. Null values are excluded. - * Sample standard deviation is computed using Bessel's correction: https://en.wikipedia.org/wiki/Bessel%27s_correction + * + * Sample standard deviation is computed using Bessel's correction (https://en.wikipedia.org/wiki/Bessel%27s_correction), + * which ensures that the sample variance will be an unbiased estimator of population variance. * * @param values values. * @return sample standard deviation of non-null values. @@ -616,7 +632,9 @@ public class Numeric { /** * Returns the sample standard deviation. Null values are excluded. - * Sample standard deviation is computed using Bessel's correction: https://en.wikipedia.org/wiki/Bessel%27s_correction + * + * Sample standard deviation is computed using Bessel's correction (https://en.wikipedia.org/wiki/Bessel%27s_correction), + * which ensures that the sample variance will be an unbiased estimator of population variance. * * @param values values. * @return sample standard deviation of non-null values. @@ -631,7 +649,9 @@ public class Numeric { /** * Returns the sample standard deviation. Null values are excluded. - * Sample standard deviation is computed using Bessel's correction: https://en.wikipedia.org/wiki/Bessel%27s_correction + * + * Sample standard deviation is computed using Bessel's correction (https://en.wikipedia.org/wiki/Bessel%27s_correction), + * which ensures that the sample variance will be an unbiased estimator of population variance. * * @param values values. * @return sample standard deviation of non-null values. @@ -650,7 +670,9 @@ public class Numeric { /** * Returns the weighted sample standard deviation. Null values are excluded. - * Weighted sample standard deviation is computed using Bessel's correction: https://en.wikipedia.org/wiki/Bessel%27s_correction + * + * Weighted sample standard deviation is computed using Bessel's correction (https://en.wikipedia.org/wiki/Bessel%27s_correction), + * which ensures that the weighted sample variance will be an unbiased estimator of weighted population variance. * * @param values values. * @param weights weights @@ -666,7 +688,9 @@ public class Numeric { /** * Returns the weighted sample standard deviation. Null values are excluded. - * Weighted sample standard deviation is computed using Bessel's correction: https://en.wikipedia.org/wiki/Bessel%27s_correction + * + * Weighted sample standard deviation is computed using Bessel's correction (https://en.wikipedia.org/wiki/Bessel%27s_correction), + * which ensures that the weighted sample variance will be an unbiased estimator of weighted population variance. * * @param values values. * @param weights weights @@ -682,7 +706,9 @@ public class Numeric { /** * Returns the weighted sample standard deviation. Null values are excluded. - * Weighted sample standard deviation is computed using Bessel's correction: https://en.wikipedia.org/wiki/Bessel%27s_correction + * + * Weighted sample standard deviation is computed using Bessel's correction (https://en.wikipedia.org/wiki/Bessel%27s_correction), + * which ensures that the weighted sample variance will be an unbiased estimator of weighted population variance. * * @param values values. * @param weights weights @@ -698,7 +724,9 @@ public class Numeric { /** * Returns the weighted sample standard deviation. Null values are excluded. - * Weighted sample standard deviation is computed using Bessel's correction: https://en.wikipedia.org/wiki/Bessel%27s_correction + * + * Weighted sample standard deviation is computed using Bessel's correction (https://en.wikipedia.org/wiki/Bessel%27s_correction), + * which ensures that the weighted sample variance will be an unbiased estimator of weighted population variance. * * @param values values. * @param weights weights diff --git a/go/pkg/client/query.go b/go/pkg/client/query.go index b157d8cdf89..cfaac62a844 100644 --- a/go/pkg/client/query.go +++ b/go/pkg/client/query.go @@ -1014,15 +1014,19 @@ func (qb QueryNode) AvgBy(by ...string) QueryNode { } // StdBy returns the sample standard deviation for each group. Null values are ignored. -// Sample standard deviation is calculated using `Bessel's correction `_. // Columns not used in the grouping must be numeric. +// +// Sample standard deviation is calculated using `Bessel's correction `_, +// which ensures that the sample variance will be an unbiased estimator of population variance. func (qb QueryNode) StdBy(by ...string) QueryNode { return qb.addOp(dedicatedAggOp{child: qb, colNames: by, kind: tablepb2.ComboAggregateRequest_STD}) } // VarBy returns the sample variance for each group. Null values are ignored. -// Sample variance is calculated using `Bessel's correction `_. // Columns not used in the grouping must be numeric. +// +// Sample variance is calculated using `Bessel's correction `_, +// which ensures that the sample variance will be an unbiased estimator of population variance. func (qb QueryNode) VarBy(by ...string) QueryNode { return qb.addOp(dedicatedAggOp{child: qb, colNames: by, kind: tablepb2.ComboAggregateRequest_VAR}) } @@ -1159,16 +1163,20 @@ func (b *AggBuilder) Percentile(percentile float64, cols ...string) *AggBuilder } // Std returns an aggregator that computes the sample standard deviation of values, within an aggregation group, for each input column. -// Sample standard deviation is calculated using `Bessel's correction `_. // The source columns are specified by cols. +// +// Sample standard deviation is calculated using `Bessel's correction `_, +// which ensures that the sample variance will be an unbiased estimator of population variance. func (b *AggBuilder) StdDev(cols ...string) *AggBuilder { b.addAgg(aggPart{matchPairs: cols, kind: tablepb2.ComboAggregateRequest_STD}) return b } // Var returns an aggregator that computes the sample variance of values, within an aggregation group, for each input column. -// Sample variance is calculated using `Bessel's correction `_. // The source columns are specified by cols. +// +// Sample variance is calculated using `Bessel's correction `_, +// which ensures that the sample variance will be an unbiased estimator of population variance. func (b *AggBuilder) Variance(cols ...string) *AggBuilder { b.addAgg(aggPart{matchPairs: cols, kind: tablepb2.ComboAggregateRequest_VAR}) return b diff --git a/go/pkg/client/tablehandle.go b/go/pkg/client/tablehandle.go index 3c7ee1d807e..5b1f5f38a32 100644 --- a/go/pkg/client/tablehandle.go +++ b/go/pkg/client/tablehandle.go @@ -544,8 +544,10 @@ func (th *TableHandle) AvgBy(ctx context.Context, cols ...string) (*TableHandle, } // StdBy returns the sample standard deviation for each group. Null values are ignored. -// Sample standard deviation is calculated using `Bessel's correction `_. // Columns not used in the grouping must be numeric. +// +// Sample standard deviation is calculated using `Bessel's correction `_, +// which ensures that the sample variance will be an unbiased estimator of population variance. func (th *TableHandle) StdBy(ctx context.Context, cols ...string) (*TableHandle, error) { if !th.rLockIfValid() { return nil, ErrInvalidTableHandle @@ -555,8 +557,10 @@ func (th *TableHandle) StdBy(ctx context.Context, cols ...string) (*TableHandle, } // VarBy returns the sample variance for each group. Null values are ignored. -// Sample variance is calculated using `Bessel's correction `_. // Columns not used in the grouping must be numeric. +// +// Sample variance is calculated using `Bessel's correction `_, +// which ensures that the sample variance will be an unbiased estimator of population variance. func (th *TableHandle) VarBy(ctx context.Context, cols ...string) (*TableHandle, error) { if !th.rLockIfValid() { return nil, ErrInvalidTableHandle diff --git a/py/client/pydeephaven/_table_interface.py b/py/client/pydeephaven/_table_interface.py index aee77b24b18..fc3e0b3a5c3 100644 --- a/py/client/pydeephaven/_table_interface.py +++ b/py/client/pydeephaven/_table_interface.py @@ -468,6 +468,7 @@ def avg_by(self, by: Union[str, List[str]] = None) -> Union[Table, Query]: def std_by(self, by: Union[str, List[str]] = None) -> Union[Table, Query]: """The std_by method creates a new table containing the sample standard deviation for each group. Columns not used in the grouping must be of numeric types. + Sample standard deviation is computed using `Bessel's correction `_, which ensures that the sample variance will be an unbiased estimator of population variance. @@ -487,6 +488,7 @@ def std_by(self, by: Union[str, List[str]] = None) -> Union[Table, Query]: def var_by(self, by: Union[str, List[str]] = None) -> Union[Table, Query]: """The var_by method creates a new table containing the sample variance for each group. Columns not used in the grouping must be of numeric types. + Sample variance is computed using `Bessel's correction `_, which ensures that the sample variance will be an unbiased estimator of population variance. diff --git a/py/client/pydeephaven/agg.py b/py/client/pydeephaven/agg.py index 2f2334b1a26..b50df148f1f 100644 --- a/py/client/pydeephaven/agg.py +++ b/py/client/pydeephaven/agg.py @@ -302,6 +302,9 @@ def sorted_last(order_by: str, cols: Union[str, List[str]] = None) -> Aggregatio def std(cols: Union[str, List[str]] = None) -> Aggregation: """Creates a Std (sample standard deviation) aggregation. + Sample standard deviation is computed using `Bessel's correction `_, + which ensures that the sample variance will be an unbiased estimator of population variance. + Args: cols (Union[str, List[str]]): the column(s) to aggregate on, can be renaming expressions, i.e. "new_col = col"; default is None, only valid when used in Table agg_all_by operation @@ -368,6 +371,9 @@ def unique(cols: Union[str, List[str]] = None, include_nulls: bool = False, def var(cols: Union[str, List[str]] = None) -> Aggregation: """Creates a sample Variance aggregation. + Sample variance is computed using `Bessel's correction `_, + which ensures that the sample variance will be an unbiased estimator of population variance. + Args: cols (Union[str, List[str]]): the column(s) to aggregate on, can be renaming expressions, i.e. "new_col = col"; default is None, only valid when used in Table agg_all_by operation diff --git a/py/client/pydeephaven/table.py b/py/client/pydeephaven/table.py index b7c19adcd89..4251639731f 100644 --- a/py/client/pydeephaven/table.py +++ b/py/client/pydeephaven/table.py @@ -531,6 +531,7 @@ def avg_by(self, by: Union[str, List[str]] = None) -> Table: def std_by(self, by: Union[str, List[str]] = None) -> Table: """The std_by method creates a new table containing the sample standard deviation for each group. Columns not used in the grouping must be of numeric types. + Sample standard deviation is computed using `Bessel's correction `_, which ensures that the sample variance will be an unbiased estimator of population variance. @@ -549,6 +550,7 @@ def std_by(self, by: Union[str, List[str]] = None) -> Table: def var_by(self, by: Union[str, List[str]] = None) -> Table: """The var_by method creates a new table containing the sample variance for each group. Columns not used in the grouping must be of numeric types. + Sample variance is computed using `Bessel's correction `_, which ensures that the sample variance will be an unbiased estimator of population variance. diff --git a/py/client/pydeephaven/updateby.py b/py/client/pydeephaven/updateby.py index bebad5edc55..af1f1a71edd 100644 --- a/py/client/pydeephaven/updateby.py +++ b/py/client/pydeephaven/updateby.py @@ -1280,11 +1280,12 @@ def rolling_count_time(ts_col: str, cols: Union[str, List[str]], rev_time: Union def rolling_std_tick(cols: Union[str, List[str]], rev_ticks: int, fwd_ticks: int = 0) -> UpdateByOperation: """Creates a rolling sample standard deviation UpdateByOperation for the supplied column names, using ticks as the - windowing unit. Sample standard deviation is computed using `Bessel's correction `_, + windowing unit. Ticks are row counts, and you may specify the reverse and forward window in number of rows to + include. The current row is considered to belong to the reverse window but not the forward window. Also, negative + values are allowed and can be used to generate completely forward or completely reverse windows. + + Sample standard deviation is computed using `Bessel's correction `_, which ensures that the sample variance will be an unbiased estimator of population variance. - Ticks are row counts, and you may specify the reverse and forward window in number of rows to include. The current - row is considered to belong to the reverse window but not the forward window. Also, negative values are allowed and - can be used to generate completely forward or completely reverse windows. Here are some examples of window values: rev_ticks = 1, fwd_ticks = 0 - contains only the current row @@ -1325,12 +1326,13 @@ def rolling_std_tick(cols: Union[str, List[str]], rev_ticks: int, fwd_ticks: int def rolling_std_time(ts_col: str, cols: Union[str, List[str]], rev_time: Union[int, str], fwd_time: Union[int, str] = 0) -> UpdateByOperation: """Creates a rolling sample standard deviation UpdateByOperation for the supplied column names, using time as the - windowing unit. Sample standard deviation is computed using `Bessel's correction `_, + windowing unit. This function accepts nanoseconds or time strings as the reverse and forward window parameters. + Negative values are allowed and can be used to generate completely forward or completely reverse windows. A row + containing a null in the timestamp column belongs to no window and will not be considered in the windows of other + rows; its output will be null. + + Sample standard deviation is computed using `Bessel's correction `_, which ensures that the sample variance will be an unbiased estimator of population variance. - This function accepts nanoseconds or time strings as the reverse and forward window parameters. Negative values are - allowed and can be used to generate completely forward or completely reverse windows. A row containing a null in - the timestamp column belongs to no window and will not be considered in the windows of other rows; its output will - be null. Here are some examples of window values: rev_time = 0, fwd_time = 0 - contains rows that exactly match the current row timestamp diff --git a/py/server/deephaven/agg.py b/py/server/deephaven/agg.py index 434e03d8a08..7e4c4293f73 100644 --- a/py/server/deephaven/agg.py +++ b/py/server/deephaven/agg.py @@ -274,6 +274,10 @@ def sorted_last(order_by: str, cols: Union[str, List[str]] = None) -> Aggregatio def std(cols: Union[str, List[str]] = None) -> Aggregation: """Creates a Std (sample standard deviation) aggregation. + Sample standard deviation is computed using `Bessel's correction `_, + which ensures that the sample variance will be an unbiased estimator of population variance. + + Args: cols (Union[str, List[str]]): the column(s) to aggregate on, can be renaming expressions, i.e. "new_col = col"; default is None, only valid when used in Table agg_all_by operation @@ -314,6 +318,10 @@ def unique(cols: Union[str, List[str]] = None, include_nulls: bool = False, non_ def var(cols: Union[str, List[str]] = None) -> Aggregation: """Creates a sample Var aggregation. + Sample standard deviation is computed using `Bessel's correction `_, + which ensures that the sample variance will be an unbiased estimator of population variance. + + Args: cols (Union[str, List[str]]): the column(s) to aggregate on, can be renaming expressions, i.e. "new_col = col"; default is None, only valid when used in Table agg_all_by operation diff --git a/py/server/deephaven/table.py b/py/server/deephaven/table.py index e31408bc841..b541480b4a2 100644 --- a/py/server/deephaven/table.py +++ b/py/server/deephaven/table.py @@ -1783,6 +1783,7 @@ def weighted_avg_by(self, wcol: str, by: Union[str, Sequence[str]] = None) -> Ta def std_by(self, by: Union[str, Sequence[str]] = None) -> Table: """The std_by method creates a new table containing the sample standard deviation for each group. + Sample standard deviation is computed using `Bessel's correction `_, which ensures that the sample variance will be an unbiased estimator of population variance. @@ -1806,6 +1807,7 @@ def std_by(self, by: Union[str, Sequence[str]] = None) -> Table: def var_by(self, by: Union[str, Sequence[str]] = None) -> Table: """The var_by method creates a new table containing the sample variance for each group. + Sample variance is computed using `Bessel's correction `_, which ensures that the sample variance will be an unbiased estimator of population variance. diff --git a/py/server/deephaven/updateby.py b/py/server/deephaven/updateby.py index 1ccf48b9f10..56f60a23836 100644 --- a/py/server/deephaven/updateby.py +++ b/py/server/deephaven/updateby.py @@ -1191,11 +1191,12 @@ def rolling_count_time(ts_col: str, cols: Union[str, List[str]], rev_time: Union def rolling_std_tick(cols: Union[str, List[str]], rev_ticks: int, fwd_ticks: int = 0) -> UpdateByOperation: """Creates a rolling sample standard deviation UpdateByOperation for the supplied column names, using ticks as the - windowing unit. Sample standard deviation is computed using `Bessel's correction `_, + windowing unit. Ticks are row counts, and you may specify the reverse and forward window in number of rows to + include. The current row is considered to belong to the reverse window but not the forward window. Also, negative + values are allowed and can be used to generate completely forward or completely reverse windows. + + Sample standard deviation is computed using `Bessel's correction `_, which ensures that the sample variance will be an unbiased estimator of population variance. - Ticks are row counts, and you may specify the reverse and forward window in number of rows to include. - The current row is considered to belong to the reverse window but not the forward window. Also, negative values are - allowed and can be used to generate completely forward or completely reverse windows. Here are some examples of window values: | `rev_ticks = 1, fwd_ticks = 0` - contains only the current row @@ -1231,11 +1232,13 @@ def rolling_std_tick(cols: Union[str, List[str]], rev_ticks: int, fwd_ticks: int def rolling_std_time(ts_col: str, cols: Union[str, List[str]], rev_time: Union[int, str], fwd_time: Union[int, str] = 0) -> UpdateByOperation: """Creates a rolling sample standard deviation UpdateByOperation for the supplied column names, using time as the - windowing unit. Sample standard deviation is computed using `Bessel's correction `_, + windowing unit. This function accepts nanoseconds or time strings as the reverse and forward window parameters. + Negative values are allowed and can be used to generate completely forward or completely reverse windows. A row + containing a null in the timestamp column belongs to no window and will not be considered in the windows of other + rows; its output will be null. + + Sample standard deviation is computed using `Bessel's correction `_, which ensures that the sample variance will be an unbiased estimator of population variance. - This function accepts nanoseconds or time strings as the reverse and forward window parameters. Negative values are - allowed and can be used to generate completely forward or completely reverse windows. A row containing a null in the - timestamp column belongs to no window and will not be considered in the windows of other rows; its output will be null. Here are some examples of window values: | `rev_time = 0, fwd_time = 0` - contains rows that exactly match the current row timestamp diff --git a/table-api/src/main/java/io/deephaven/api/TableOperations.java b/table-api/src/main/java/io/deephaven/api/TableOperations.java index d3c0b81db26..97911930c42 100644 --- a/table-api/src/main/java/io/deephaven/api/TableOperations.java +++ b/table-api/src/main/java/io/deephaven/api/TableOperations.java @@ -1168,6 +1168,9 @@ TOPS updateBy(UpdateByControl control, Collection o * Produces a single row table with the sample standard deviation of each column. *

              * When the input table is empty, zero output rows are produced. + *

              + * Sample standard deviation is computed using Bessel's correction (https://en.wikipedia.org/wiki/Bessel%27s_correction), + * which ensures that the sample variance will be an unbiased estimator of population variance. */ @ConcurrentMethod TOPS stdBy(); @@ -1175,6 +1178,9 @@ TOPS updateBy(UpdateByControl control, Collection o /** * Groups the data column according to groupByColumns and computes the sample standard deviation for * the rest of the fields + *

              + * Sample standard deviation is computed using Bessel's correction (https://en.wikipedia.org/wiki/Bessel%27s_correction), + * which ensures that the sample variance will be an unbiased estimator of population variance. * * @param groupByColumns The grouping columns as in {@link TableOperations#groupBy} */ @@ -1184,6 +1190,9 @@ TOPS updateBy(UpdateByControl control, Collection o /** * Groups the data column according to groupByColumns and computes the sample standard deviation for * the rest of the fields + *

              + * Sample standard deviation is computed using Bessel's correction (https://en.wikipedia.org/wiki/Bessel%27s_correction), + * which ensures that the sample variance will be an unbiased estimator of population variance. * * @param groupByColumns The grouping columns as in {@link TableOperations#groupBy} */ @@ -1193,6 +1202,9 @@ TOPS updateBy(UpdateByControl control, Collection o /** * Groups the data column according to groupByColumns and computes the sample standard deviation for * the rest of the fields + *

              + * Sample standard deviation is computed using Bessel's correction (https://en.wikipedia.org/wiki/Bessel%27s_correction), + * which ensures that the sample variance will be an unbiased estimator of population variance. * * @param groupByColumns The grouping columns as in {@link TableOperations#groupBy} */ @@ -1205,6 +1217,9 @@ TOPS updateBy(UpdateByControl control, Collection o * Produces a single row table with the sample variance of each column. *

              * When the input table is empty, zero output rows are produced. + *

              + * Sample variance is computed using Bessel's correction (https://en.wikipedia.org/wiki/Bessel%27s_correction), + * which ensures that the sample variance will be an unbiased estimator of population variance. */ @ConcurrentMethod TOPS varBy(); @@ -1212,6 +1227,9 @@ TOPS updateBy(UpdateByControl control, Collection o /** * Groups the data column according to groupByColumns and computes the sample variance for the rest of * the fields + *

              + * Sample variance is computed using Bessel's correction (https://en.wikipedia.org/wiki/Bessel%27s_correction), + * which ensures that the sample variance will be an unbiased estimator of population variance. * * @param groupByColumns The grouping columns as in {@link TableOperations#groupBy} */ @@ -1221,6 +1239,9 @@ TOPS updateBy(UpdateByControl control, Collection o /** * Groups the data column according to groupByColumns and computes the sample variance for the rest of * the fields + *

              + * Sample variance is computed using Bessel's correction (https://en.wikipedia.org/wiki/Bessel%27s_correction), + * which ensures that the sample variance will be an unbiased estimator of population variance. * * @param groupByColumns The grouping columns as in {@link TableOperations#groupBy} */ @@ -1230,6 +1251,9 @@ TOPS updateBy(UpdateByControl control, Collection o /** * Groups the data column according to groupByColumns and computes the variance for the rest of the * fields + *

              + * Sample variance is computed using Bessel's correction (https://en.wikipedia.org/wiki/Bessel%27s_correction), + * which ensures that the sample variance will be an unbiased estimator of population variance. * * @param groupByColumns The grouping columns as in {@link TableOperations#groupBy} */ diff --git a/table-api/src/main/java/io/deephaven/api/agg/Aggregation.java b/table-api/src/main/java/io/deephaven/api/agg/Aggregation.java index 9cb0afd93bd..bb6832bdb6d 100644 --- a/table-api/src/main/java/io/deephaven/api/agg/Aggregation.java +++ b/table-api/src/main/java/io/deephaven/api/agg/Aggregation.java @@ -525,6 +525,9 @@ static Aggregation AggSortedLast(Collection sortColumns, Strin * Create a {@link io.deephaven.api.agg.spec.AggSpecStd sample standard deviation} aggregation for the supplied * column name pairs. * + * Sample standard deviation is computed using Bessel's correction (https://en.wikipedia.org/wiki/Bessel%27s_correction), + * which ensures that the sample variance will be an unbiased estimator of population variance. + * * @param pairs The input/output column name pairs * @return The aggregation */ @@ -611,6 +614,9 @@ static Aggregation AggUnique(boolean includeNulls, UnionObject nonUniqueSentinel * Create a {@link io.deephaven.api.agg.spec.AggSpecVar sample variance} aggregation for the supplied column name * pairs. * + * Sample variance is computed using Bessel's correction (https://en.wikipedia.org/wiki/Bessel%27s_correction), + * which ensures that the sample variance will be an unbiased estimator of population variance. + * * @param pairs The input/output column name pairs * @return The aggregation */ diff --git a/table-api/src/main/java/io/deephaven/api/agg/spec/AggSpecStd.java b/table-api/src/main/java/io/deephaven/api/agg/spec/AggSpecStd.java index e3b87e888f2..9acb9f83876 100644 --- a/table-api/src/main/java/io/deephaven/api/agg/spec/AggSpecStd.java +++ b/table-api/src/main/java/io/deephaven/api/agg/spec/AggSpecStd.java @@ -9,8 +9,10 @@ /** * Specifies an aggregation that outputs the sample standard deviation of the input column values for each group. Only - * works for numeric input types. Sample standard deviation is computed using Bessel's correction: - * https://en.wikipedia.org/wiki/Bessel%27s_correction + * works for numeric input types. + * + * Sample standard deviation is computed using Bessel's correction (https://en.wikipedia.org/wiki/Bessel%27s_correction), + * which ensures that the sample variance will be an unbiased estimator of population variance. * * @see TableOperations#stdBy */ diff --git a/table-api/src/main/java/io/deephaven/api/agg/spec/AggSpecVar.java b/table-api/src/main/java/io/deephaven/api/agg/spec/AggSpecVar.java index 5c1861e4bc5..232c5980e54 100644 --- a/table-api/src/main/java/io/deephaven/api/agg/spec/AggSpecVar.java +++ b/table-api/src/main/java/io/deephaven/api/agg/spec/AggSpecVar.java @@ -9,8 +9,10 @@ /** * Specifies an aggregation that outputs the sample variance of the input column values for each group. Only works for - * numeric input types. Sample variance is computed using Bessel's correction: - * https://en.wikipedia.org/wiki/Bessel%27s_correction + * numeric input types. + * + * Sample variance is computed using Bessel's correction (https://en.wikipedia.org/wiki/Bessel%27s_correction), + * which ensures that the sample variance will be an unbiased estimator of population variance. * * @see TableOperations#varBy */ diff --git a/table-api/src/main/java/io/deephaven/api/updateby/UpdateByOperation.java b/table-api/src/main/java/io/deephaven/api/updateby/UpdateByOperation.java index eee01d8b1ea..1ed54701f62 100644 --- a/table-api/src/main/java/io/deephaven/api/updateby/UpdateByOperation.java +++ b/table-api/src/main/java/io/deephaven/api/updateby/UpdateByOperation.java @@ -1621,13 +1621,15 @@ static UpdateByOperation RollingCount(String timestampCol, long revTime, long fw /** * Create a {@link RollingStdSpec rolling sample standard deviation} for the supplied column name pairs, using ticks - * as the windowing unit. Sample standard deviation is computed using Bessel's correction: - * https://en.wikipedia.org/wiki/Bessel%27s_correction This ensures that the sample variance will be an unbiased - * estimator of population variance. Ticks are row counts and you may specify the previous window in number of rows + * as the windowing unit. Ticks are row counts and you may specify the previous window in number of rows * to include. The current row is considered to belong to the reverse window, so calling this with * {@code revTicks = 1} will simply return the current row. Specifying {@code revTicks = 10} will include the * previous 9 rows to this one and this row for a total of 10 rows. * + * Sample standard deviation is computed using Bessel's correction + * (https://en.wikipedia.org/wiki/Bessel%27s_correction), which ensures that the sample variance will be an unbiased + * estimator of population variance. + * * @param revTicks the look-behind window size (in rows/ticks) * @param pairs The input/output column name pairs * @return The aggregation @@ -1638,9 +1640,7 @@ static UpdateByOperation RollingStd(long revTicks, String... pairs) { /** * Create a {@link RollingStdSpec rolling sample standard deviation} for the supplied column name pairs, using ticks - * as the windowing unit. Sample standard deviation is computed using Bessel's correction: - * https://en.wikipedia.org/wiki/Bessel%27s_correction This ensures that the sample variance will be an unbiased - * estimator of population variance. Ticks are row counts and you may specify the reverse and forward window in + * as the windowing unit. Ticks are row counts and you may specify the reverse and forward window in * number of rows to include. The current row is considered to belong to the reverse window but not the forward * window. Also, negative values are allowed and can be used to generate completely forward or completely reverse * windows. Here are some examples of window values: @@ -1658,6 +1658,10 @@ static UpdateByOperation RollingStd(long revTicks, String... pairs) { * following the current row (inclusive) *

            * + * Sample standard deviation is computed using Bessel's correction + * (https://en.wikipedia.org/wiki/Bessel%27s_correction), which ensures that the sample variance will be an unbiased + * estimator of population variance. + * * @param revTicks the look-behind window size (in rows/ticks) * @param fwdTicks the look-ahead window size (in rows/ticks) * @param pairs The input/output column name pairs @@ -1669,9 +1673,7 @@ static UpdateByOperation RollingStd(long revTicks, long fwdTicks, String... pair /** * Create a {@link RollingStdSpec rolling sample standard deviation} for the supplied column name pairs, using time - * as the windowing unit. Sample standard deviation is computed using Bessel's correction: - * https://en.wikipedia.org/wiki/Bessel%27s_correction This ensures that the sample variance will be an unbiased - * estimator of population variance. This function accepts {@link Duration duration} as the reverse window + * as the windowing unit. This function accepts {@link Duration duration} as the reverse window * parameter. A row containing a {@code null} in the timestamp column belongs to no window and will not have a value * computed or be considered in the windows of other rows. * @@ -1681,6 +1683,10 @@ static UpdateByOperation RollingStd(long revTicks, long fwdTicks, String... pair *
          • {@code revDuration = 10m} - contains rows from 10m earlier through the current row timestamp (inclusive)
          • *
          * + * Sample standard deviation is computed using Bessel's correction + * (https://en.wikipedia.org/wiki/Bessel%27s_correction), which ensures that the sample variance will be an unbiased + * estimator of population variance. + * * @param timestampCol the name of the timestamp column * @param revDuration the look-behind window size (in Duration) * @param pairs The input/output column name pairs @@ -1692,9 +1698,7 @@ static UpdateByOperation RollingStd(String timestampCol, Duration revDuration, S /** * Create a {@link RollingStdSpec rolling sample standard deviation} for the supplied column name pairs, using time - * as the windowing unit. Sample standard deviation is computed using Bessel's correction: - * https://en.wikipedia.org/wiki/Bessel%27s_correction This ensures that the sample variance will be an unbiased - * estimator of population variance. This function accepts {@link Duration durations} as the reverse and forward + * as the windowing unit. This function accepts {@link Duration durations} as the reverse and forward * window parameters. Negative values are allowed and can be used to generate completely forward or completely * reverse windows. A row containing a {@code null} in the timestamp column belongs to no window and will not have a * value computed or be considered in the windows of other rows. @@ -1714,6 +1718,10 @@ static UpdateByOperation RollingStd(String timestampCol, Duration revDuration, S * current row timestamp (inclusive), this is a purely forwards looking window *
        * + * Sample standard deviation is computed using Bessel's correction + * (https://en.wikipedia.org/wiki/Bessel%27s_correction), which ensures that the sample variance will be an unbiased + * estimator of population variance. + * * @param timestampCol the name of the timestamp column * @param revDuration the look-behind window size (in Duration) * @param fwdDuration the look-ahead window size (in Duration) @@ -1727,12 +1735,14 @@ static UpdateByOperation RollingStd(String timestampCol, Duration revDuration, D /** * Create a {@link RollingStdSpec rolling sample standard deviation} for the supplied column name pairs, using time - * as the windowing unit. Sample standard deviation is computed using Bessel's correction: - * https://en.wikipedia.org/wiki/Bessel%27s_correction This ensures that the sample variance will be an unbiased - * estimator of population variance. This function accepts {@code nanoseconds} as the reverse window parameters. A + * as the windowing unit. This function accepts {@code nanoseconds} as the reverse window parameters. A * row containing a {@code null} in the timestamp column belongs to no window and will not have a value computed or * be considered in the windows of other rows. * + * Sample standard deviation is computed using Bessel's correction + * (https://en.wikipedia.org/wiki/Bessel%27s_correction), which ensures that the sample variance will be an unbiased + * estimator of population variance. + * * @param timestampCol the name of the timestamp column * @param revTime the look-behind window size (in nanoseconds) * @param pairs The input/output column name pairs @@ -1744,13 +1754,15 @@ static UpdateByOperation RollingStd(String timestampCol, long revTime, String... /** * Create a {@link RollingStdSpec rolling sample standard deviation} for the supplied column name pairs, using time - * as the windowing unit. Sample standard deviation is computed using Bessel's correction: - * https://en.wikipedia.org/wiki/Bessel%27s_correction This ensures that the sample variance will be an unbiased - * estimator of population variance. This function accepts {@code nanoseconds} as the reverse and forward window + * as the windowing unit. This function accepts {@code nanoseconds} as the reverse and forward window * parameters. Negative values are allowed and can be used to generate completely forward or completely reverse * windows. A row containing a {@code null} in the timestamp column belongs to no window and will not have a value * computed or be considered in the windows of other rows. * + * Sample standard deviation is computed using Bessel's correction + * (https://en.wikipedia.org/wiki/Bessel%27s_correction), which ensures that the sample variance will be an unbiased + * estimator of population variance. + * * @param timestampCol the name of the timestamp column * @param revTime the look-behind window size (in nanoseconds) * @param fwdTime the look-ahead window size (in nanoseconds) diff --git a/web/client-api/src/main/java/io/deephaven/web/client/api/JsColumnStatistics.java b/web/client-api/src/main/java/io/deephaven/web/client/api/JsColumnStatistics.java index b100bca4ba0..2b70704618e 100644 --- a/web/client-api/src/main/java/io/deephaven/web/client/api/JsColumnStatistics.java +++ b/web/client-api/src/main/java/io/deephaven/web/client/api/JsColumnStatistics.java @@ -73,6 +73,10 @@ public enum StatType { MAX_ABS("MAX (ABS)", null), /** * The sample standard deviation of the values in the column. + * + * Sample standard deviation is computed using Bessel's correction + * (https://en.wikipedia.org/wiki/Bessel%27s_correction), which ensures that the sample variance will be an + * unbiased estimator of population variance. */ STD_DEV("STD DEV", "double"), /** diff --git a/web/client-api/src/main/java/io/deephaven/web/client/api/tree/enums/JsAggregationOperation.java b/web/client-api/src/main/java/io/deephaven/web/client/api/tree/enums/JsAggregationOperation.java index 3a19eec6ec1..5fe2bee84c6 100644 --- a/web/client-api/src/main/java/io/deephaven/web/client/api/tree/enums/JsAggregationOperation.java +++ b/web/client-api/src/main/java/io/deephaven/web/client/api/tree/enums/JsAggregationOperation.java @@ -47,6 +47,10 @@ public class JsAggregationOperation { /** * The sample variance of all values in the specified column. Can only apply to numeric types. String value * is "Var". + * + * Sample variance is computed using Bessel's correction + * (https://en.wikipedia.org/wiki/Bessel%27s_correction), which ensures that the sample variance will be an + * unbiased estimator of population variance. */ VAR = "Var", /** @@ -57,6 +61,9 @@ public class JsAggregationOperation { /** * The sample standard deviation of all values in the specified column. Can only apply to numeric types. * String value is "Std". + * Sample standard deviation is computed using Bessel's correction + * (https://en.wikipedia.org/wiki/Bessel%27s_correction), which ensures that the sample variance will be an + * unbiased estimator of population variance. */ STD = "Std", /** From 44cd62e4f336fe8e5a1b5c3afad77f0ae613c9e4 Mon Sep 17 00:00:00 2001 From: alexpeters1208 Date: Mon, 13 Nov 2023 10:27:03 -0600 Subject: [PATCH 4/5] Spotless apply --- .../web/client/api/tree/enums/JsAggregationOperation.java | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/web/client-api/src/main/java/io/deephaven/web/client/api/tree/enums/JsAggregationOperation.java b/web/client-api/src/main/java/io/deephaven/web/client/api/tree/enums/JsAggregationOperation.java index 5fe2bee84c6..ab0824b1a98 100644 --- a/web/client-api/src/main/java/io/deephaven/web/client/api/tree/enums/JsAggregationOperation.java +++ b/web/client-api/src/main/java/io/deephaven/web/client/api/tree/enums/JsAggregationOperation.java @@ -60,8 +60,7 @@ public class JsAggregationOperation { AVG = "Avg", /** * The sample standard deviation of all values in the specified column. Can only apply to numeric types. - * String value is "Std". - * Sample standard deviation is computed using Bessel's correction + * String value is "Std". Sample standard deviation is computed using Bessel's correction * (https://en.wikipedia.org/wiki/Bessel%27s_correction), which ensures that the sample variance will be an * unbiased estimator of population variance. */ From 605dc35ac74b30ae527962d2e3788dc4636d4e2b Mon Sep 17 00:00:00 2001 From: alexpeters1208 Date: Mon, 13 Nov 2023 21:45:21 -0600 Subject: [PATCH 5/5] Spotless apply (again) --- .../io/deephaven/api/TableOperations.java | 20 +++++---- .../io/deephaven/api/agg/Aggregation.java | 5 ++- .../io/deephaven/api/agg/spec/AggSpecStd.java | 5 ++- .../io/deephaven/api/agg/spec/AggSpecVar.java | 4 +- .../api/updateby/UpdateByOperation.java | 44 +++++++++---------- 5 files changed, 42 insertions(+), 36 deletions(-) diff --git a/table-api/src/main/java/io/deephaven/api/TableOperations.java b/table-api/src/main/java/io/deephaven/api/TableOperations.java index 97911930c42..c8899973338 100644 --- a/table-api/src/main/java/io/deephaven/api/TableOperations.java +++ b/table-api/src/main/java/io/deephaven/api/TableOperations.java @@ -1169,8 +1169,9 @@ TOPS updateBy(UpdateByControl control, Collection o *

        * When the input table is empty, zero output rows are produced. *

        - * Sample standard deviation is computed using Bessel's correction (https://en.wikipedia.org/wiki/Bessel%27s_correction), - * which ensures that the sample variance will be an unbiased estimator of population variance. + * Sample standard deviation is computed using Bessel's correction + * (https://en.wikipedia.org/wiki/Bessel%27s_correction), which ensures that the sample variance will be an unbiased + * estimator of population variance. */ @ConcurrentMethod TOPS stdBy(); @@ -1179,8 +1180,9 @@ TOPS updateBy(UpdateByControl control, Collection o * Groups the data column according to groupByColumns and computes the sample standard deviation for * the rest of the fields *

        - * Sample standard deviation is computed using Bessel's correction (https://en.wikipedia.org/wiki/Bessel%27s_correction), - * which ensures that the sample variance will be an unbiased estimator of population variance. + * Sample standard deviation is computed using Bessel's correction + * (https://en.wikipedia.org/wiki/Bessel%27s_correction), which ensures that the sample variance will be an unbiased + * estimator of population variance. * * @param groupByColumns The grouping columns as in {@link TableOperations#groupBy} */ @@ -1191,8 +1193,9 @@ TOPS updateBy(UpdateByControl control, Collection o * Groups the data column according to groupByColumns and computes the sample standard deviation for * the rest of the fields *

        - * Sample standard deviation is computed using Bessel's correction (https://en.wikipedia.org/wiki/Bessel%27s_correction), - * which ensures that the sample variance will be an unbiased estimator of population variance. + * Sample standard deviation is computed using Bessel's correction + * (https://en.wikipedia.org/wiki/Bessel%27s_correction), which ensures that the sample variance will be an unbiased + * estimator of population variance. * * @param groupByColumns The grouping columns as in {@link TableOperations#groupBy} */ @@ -1203,8 +1206,9 @@ TOPS updateBy(UpdateByControl control, Collection o * Groups the data column according to groupByColumns and computes the sample standard deviation for * the rest of the fields *

        - * Sample standard deviation is computed using Bessel's correction (https://en.wikipedia.org/wiki/Bessel%27s_correction), - * which ensures that the sample variance will be an unbiased estimator of population variance. + * Sample standard deviation is computed using Bessel's correction + * (https://en.wikipedia.org/wiki/Bessel%27s_correction), which ensures that the sample variance will be an unbiased + * estimator of population variance. * * @param groupByColumns The grouping columns as in {@link TableOperations#groupBy} */ diff --git a/table-api/src/main/java/io/deephaven/api/agg/Aggregation.java b/table-api/src/main/java/io/deephaven/api/agg/Aggregation.java index bb6832bdb6d..23d249e05dc 100644 --- a/table-api/src/main/java/io/deephaven/api/agg/Aggregation.java +++ b/table-api/src/main/java/io/deephaven/api/agg/Aggregation.java @@ -525,8 +525,9 @@ static Aggregation AggSortedLast(Collection sortColumns, Strin * Create a {@link io.deephaven.api.agg.spec.AggSpecStd sample standard deviation} aggregation for the supplied * column name pairs. * - * Sample standard deviation is computed using Bessel's correction (https://en.wikipedia.org/wiki/Bessel%27s_correction), - * which ensures that the sample variance will be an unbiased estimator of population variance. + * Sample standard deviation is computed using Bessel's correction + * (https://en.wikipedia.org/wiki/Bessel%27s_correction), which ensures that the sample variance will be an unbiased + * estimator of population variance. * * @param pairs The input/output column name pairs * @return The aggregation diff --git a/table-api/src/main/java/io/deephaven/api/agg/spec/AggSpecStd.java b/table-api/src/main/java/io/deephaven/api/agg/spec/AggSpecStd.java index 9acb9f83876..2e7513bb666 100644 --- a/table-api/src/main/java/io/deephaven/api/agg/spec/AggSpecStd.java +++ b/table-api/src/main/java/io/deephaven/api/agg/spec/AggSpecStd.java @@ -11,8 +11,9 @@ * Specifies an aggregation that outputs the sample standard deviation of the input column values for each group. Only * works for numeric input types. * - * Sample standard deviation is computed using Bessel's correction (https://en.wikipedia.org/wiki/Bessel%27s_correction), - * which ensures that the sample variance will be an unbiased estimator of population variance. + * Sample standard deviation is computed using Bessel's correction + * (https://en.wikipedia.org/wiki/Bessel%27s_correction), which ensures that the sample variance will be an unbiased + * estimator of population variance. * * @see TableOperations#stdBy */ diff --git a/table-api/src/main/java/io/deephaven/api/agg/spec/AggSpecVar.java b/table-api/src/main/java/io/deephaven/api/agg/spec/AggSpecVar.java index 232c5980e54..b86e5fa3eeb 100644 --- a/table-api/src/main/java/io/deephaven/api/agg/spec/AggSpecVar.java +++ b/table-api/src/main/java/io/deephaven/api/agg/spec/AggSpecVar.java @@ -11,8 +11,8 @@ * Specifies an aggregation that outputs the sample variance of the input column values for each group. Only works for * numeric input types. * - * Sample variance is computed using Bessel's correction (https://en.wikipedia.org/wiki/Bessel%27s_correction), - * which ensures that the sample variance will be an unbiased estimator of population variance. + * Sample variance is computed using Bessel's correction (https://en.wikipedia.org/wiki/Bessel%27s_correction), which + * ensures that the sample variance will be an unbiased estimator of population variance. * * @see TableOperations#varBy */ diff --git a/table-api/src/main/java/io/deephaven/api/updateby/UpdateByOperation.java b/table-api/src/main/java/io/deephaven/api/updateby/UpdateByOperation.java index 1ed54701f62..745b85734cb 100644 --- a/table-api/src/main/java/io/deephaven/api/updateby/UpdateByOperation.java +++ b/table-api/src/main/java/io/deephaven/api/updateby/UpdateByOperation.java @@ -1621,10 +1621,10 @@ static UpdateByOperation RollingCount(String timestampCol, long revTime, long fw /** * Create a {@link RollingStdSpec rolling sample standard deviation} for the supplied column name pairs, using ticks - * as the windowing unit. Ticks are row counts and you may specify the previous window in number of rows - * to include. The current row is considered to belong to the reverse window, so calling this with - * {@code revTicks = 1} will simply return the current row. Specifying {@code revTicks = 10} will include the - * previous 9 rows to this one and this row for a total of 10 rows. + * as the windowing unit. Ticks are row counts and you may specify the previous window in number of rows to include. + * The current row is considered to belong to the reverse window, so calling this with {@code revTicks = 1} will + * simply return the current row. Specifying {@code revTicks = 10} will include the previous 9 rows to this one and + * this row for a total of 10 rows. * * Sample standard deviation is computed using Bessel's correction * (https://en.wikipedia.org/wiki/Bessel%27s_correction), which ensures that the sample variance will be an unbiased @@ -1640,10 +1640,10 @@ static UpdateByOperation RollingStd(long revTicks, String... pairs) { /** * Create a {@link RollingStdSpec rolling sample standard deviation} for the supplied column name pairs, using ticks - * as the windowing unit. Ticks are row counts and you may specify the reverse and forward window in - * number of rows to include. The current row is considered to belong to the reverse window but not the forward - * window. Also, negative values are allowed and can be used to generate completely forward or completely reverse - * windows. Here are some examples of window values: + * as the windowing unit. Ticks are row counts and you may specify the reverse and forward window in number of rows + * to include. The current row is considered to belong to the reverse window but not the forward window. Also, + * negative values are allowed and can be used to generate completely forward or completely reverse windows. Here + * are some examples of window values: *

          *
        • {@code revTicks = 1, fwdTicks = 0} - contains only the current row
        • *
        • {@code revTicks = 10, fwdTicks = 0} - contains 9 previous rows and the current row
        • @@ -1673,9 +1673,9 @@ static UpdateByOperation RollingStd(long revTicks, long fwdTicks, String... pair /** * Create a {@link RollingStdSpec rolling sample standard deviation} for the supplied column name pairs, using time - * as the windowing unit. This function accepts {@link Duration duration} as the reverse window - * parameter. A row containing a {@code null} in the timestamp column belongs to no window and will not have a value - * computed or be considered in the windows of other rows. + * as the windowing unit. This function accepts {@link Duration duration} as the reverse window parameter. A row + * containing a {@code null} in the timestamp column belongs to no window and will not have a value computed or be + * considered in the windows of other rows. * * Here are some examples of window values: *
            @@ -1698,10 +1698,10 @@ static UpdateByOperation RollingStd(String timestampCol, Duration revDuration, S /** * Create a {@link RollingStdSpec rolling sample standard deviation} for the supplied column name pairs, using time - * as the windowing unit. This function accepts {@link Duration durations} as the reverse and forward - * window parameters. Negative values are allowed and can be used to generate completely forward or completely - * reverse windows. A row containing a {@code null} in the timestamp column belongs to no window and will not have a - * value computed or be considered in the windows of other rows. + * as the windowing unit. This function accepts {@link Duration durations} as the reverse and forward window + * parameters. Negative values are allowed and can be used to generate completely forward or completely reverse + * windows. A row containing a {@code null} in the timestamp column belongs to no window and will not have a value + * computed or be considered in the windows of other rows. * * Here are some examples of window values: *
              @@ -1735,9 +1735,9 @@ static UpdateByOperation RollingStd(String timestampCol, Duration revDuration, D /** * Create a {@link RollingStdSpec rolling sample standard deviation} for the supplied column name pairs, using time - * as the windowing unit. This function accepts {@code nanoseconds} as the reverse window parameters. A - * row containing a {@code null} in the timestamp column belongs to no window and will not have a value computed or - * be considered in the windows of other rows. + * as the windowing unit. This function accepts {@code nanoseconds} as the reverse window parameters. A row + * containing a {@code null} in the timestamp column belongs to no window and will not have a value computed or be + * considered in the windows of other rows. * * Sample standard deviation is computed using Bessel's correction * (https://en.wikipedia.org/wiki/Bessel%27s_correction), which ensures that the sample variance will be an unbiased @@ -1754,10 +1754,10 @@ static UpdateByOperation RollingStd(String timestampCol, long revTime, String... /** * Create a {@link RollingStdSpec rolling sample standard deviation} for the supplied column name pairs, using time - * as the windowing unit. This function accepts {@code nanoseconds} as the reverse and forward window - * parameters. Negative values are allowed and can be used to generate completely forward or completely reverse - * windows. A row containing a {@code null} in the timestamp column belongs to no window and will not have a value - * computed or be considered in the windows of other rows. + * as the windowing unit. This function accepts {@code nanoseconds} as the reverse and forward window parameters. + * Negative values are allowed and can be used to generate completely forward or completely reverse windows. A row + * containing a {@code null} in the timestamp column belongs to no window and will not have a value computed or be + * considered in the windows of other rows. * * Sample standard deviation is computed using Bessel's correction * (https://en.wikipedia.org/wiki/Bessel%27s_correction), which ensures that the sample variance will be an unbiased