diff --git a/docs/references/http_api_reference.mdx b/docs/references/http_api_reference.mdx index 0851271e11..2108f31fcc 100644 --- a/docs/references/http_api_reference.mdx +++ b/docs/references/http_api_reference.mdx @@ -1776,6 +1776,7 @@ Searches for data in a specified table. The search can range from a simple vecto - `"filter"`: `string` - `"fusion"`: `object` - `"sort"` : `object[]` + - `"group_by"`: `string[]` - `"limit"` : `string` - `"offset"` : `string` - `"option"` : `object` @@ -2013,6 +2014,9 @@ curl --request GET \ - `"sort"` : `object[]` Defines how to sort the results. +- `"group_by"`: `string[]` + Indicates the expression to group by. + - `"limit"` : `string` Indicates the limit row count. diff --git a/docs/references/pysdk_api_reference.md b/docs/references/pysdk_api_reference.md index fc1f25ddd1..0970cf3f83 100644 --- a/docs/references/pysdk_api_reference.md +++ b/docs/references/pysdk_api_reference.md @@ -1876,6 +1876,36 @@ table_obj.output(["c1", "c2"]).sort([["c2", SortType.Asc], ["c1", SortType.Desc] --- +### group_by + +```python +table_object.group_by(group_by_columns) +``` + +Creates a group-by expression using `group_by_columns`. + +#### Parameters + +##### group_by_columns: `list[str] | str`, *Required* + +A list of strings specifying the expression to group by. Each string in the list represents a column name or an expression. + +#### Returns + +- Success: An `infinity.local_infinity.table.LocalTable` object in embedded mode or an `infinity.remote_thrift.table.RemoteTable` object in client-server mode. +- Failure: `InfinityException` + - `error_code`: `int` A non-zero value indicating a specific error condition. + - `error_msg`: `str` A message providing additional details about the error. + +#### Examples + +```python +table_obj.output(["c1", "sum(c2)"]).group_by(["c1"]).to_df() +table_obj.output(["c1", "avg(c1)", "count(c2)", "min(c3)", "max(c4)"]).group_by(["c1", "c1+c2"]).to_df() +``` + +--- + ### limit ```python diff --git a/python/infinity_embedded/local_infinity/query_builder.py b/python/infinity_embedded/local_infinity/query_builder.py index 3e1967e232..30b4cd3723 100644 --- a/python/infinity_embedded/local_infinity/query_builder.py +++ b/python/infinity_embedded/local_infinity/query_builder.py @@ -421,6 +421,18 @@ def offset(self, offset: Optional[int]) -> InfinityLocalQueryBuilder: self._offset = offset_expr return self + def group_by(self, columns: List[str] | str): + group_by_list = [] + if isinstance(columns, list): + for column in columns: + parsed_expr = parse_expr(maybe_parse(column)) + group_by_list.append(parsed_expr) + else: + parsed_expr = parse_expr(maybe_parse(columns)) + group_by_list.append(parsed_expr) + self._group_by = group_by_list + return self + def output(self, columns: Optional[list]) -> InfinityLocalQueryBuilder: self._columns = columns select_list: List[WrapParsedExpr] = [] diff --git a/python/infinity_embedded/local_infinity/table.py b/python/infinity_embedded/local_infinity/table.py index 046dd6b4a7..025430e09f 100644 --- a/python/infinity_embedded/local_infinity/table.py +++ b/python/infinity_embedded/local_infinity/table.py @@ -378,6 +378,12 @@ def limit(self, limit: Optional[int]): def offset(self, offset: Optional[int]): self.query_builder.offset(offset) return self + + def group_by(self, group_by_expr_list: Optional[List[str]] | Optional[str]): + if group_by_expr_list is None: + return self + self.query_builder.group_by(group_by_expr_list) + return self def sort(self, order_by_expr_list: Optional[List[list[str, SortType]]]): for order_by_expr in order_by_expr_list: diff --git a/python/infinity_embedded/local_infinity/utils.py b/python/infinity_embedded/local_infinity/utils.py index db13980551..2480670041 100644 --- a/python/infinity_embedded/local_infinity/utils.py +++ b/python/infinity_embedded/local_infinity/utils.py @@ -32,6 +32,11 @@ def traverse_conditions(cons, fn=None): + if isinstance(cons, exp.Alias): + expr = traverse_conditions(cons.args['this']) + expr.alias_name = cons.alias + return expr + if isinstance(cons, exp.Binary): parsed_expr = WrapParsedExpr() function_expr = WrapFunctionExpr() diff --git a/python/infinity_http.py b/python/infinity_http.py index 128a4aacc6..6dbb88538d 100644 --- a/python/infinity_http.py +++ b/python/infinity_http.py @@ -376,7 +376,7 @@ def create_table( fields.append(tmp) except: raise InfinityException(ErrorCode.SYNTAX_ERROR, "http adapter create table parse error") - print(fields) + # print(fields) url = f"databases/{self.database_name}/tables/{table_name}" h = self.net.set_up_header(["accept", "content-type"]) @@ -712,6 +712,7 @@ def __init__(self, output: list, table_http: table_http): self._match_sparse = [] self._search_exprs = [] self._sort = [] + self._group_by = [] self._limit = None self._offset = None self._option = None @@ -730,6 +731,8 @@ def select(self): tmp["highlight"] = self._highlight if len(self._sort): tmp["sort"] = self._sort + if len(self._group_by): + tmp["group_by"] = self._group_by if self._limit is not None: tmp["limit"] = str(self._limit) if self._offset is not None: @@ -774,6 +777,10 @@ def explain(self, ExplainType=ExplainType.Physical): tmp["output"] = self._output if len(self._highlight): tmp["highlight"] = self._highlight + if len(self._sort): + tmp["sort"] = self._sort + if len(self._group_by): + tmp["group_by"] = self._group_by if self._limit is not None: tmp["limit"] = self._limit if self._offset is not None: @@ -827,6 +834,10 @@ def limit(self, limit_num): def offset(self, offset): self._offset = offset return self + + def group_by(self, group_by_list): + self._group_by = group_by_list + return self def option(self, option: {}): # option_str = json.dumps(option) @@ -915,32 +926,37 @@ def to_result(self): for col in col_types: df_dict[col] = () + line_i = 0 for res in self.output_res: - for k in res: - # print(res[k]) - if k not in df_dict: - df_dict[k] = () - tup = df_dict[k] - if isinstance(res[k], (int, float)): - new_tup = tup + (res[k],) - elif is_list(res[k]): - new_tup = tup + (ast.literal_eval(res[k]),) - elif is_date(res[k]): - new_tup = tup + (res[k],) - elif is_time(res[k]): - new_tup = tup + (res[k],) - elif is_datetime(res[k]): - new_tup = tup + (res[k],) - elif is_sparse(res[k]): # sparse vector - sparse_vec = str2sparse(res[k]) + for col in res: + col_name = next(iter(col)) + v = col[col_name] + if col_name not in df_dict: + df_dict[col_name] = () + tup = df_dict[col_name] + if len(tup) == line_i + 1: + continue + if isinstance(v, (int, float)): + new_tup = tup + (v,) + elif is_list(v): + new_tup = tup + (ast.literal_eval(v),) + elif is_date(v): + new_tup = tup + (v,) + elif is_time(v): + new_tup = tup + (v,) + elif is_datetime(v): + new_tup = tup + (v,) + elif is_sparse(v): # sparse vector + sparse_vec = str2sparse(v) new_tup = tup + (sparse_vec,) else: - if res[k].lower() == 'true': - res[k] = True - elif res[k].lower() == 'false': - res[k] = False - new_tup = tup + (res[k],) - df_dict[k] = new_tup + if v.lower() == 'true': + v = True + elif v.lower() == 'false': + v = False + new_tup = tup + (v,) + df_dict[col_name] = new_tup + line_i += 1 # print(self.output_res) # print(df_dict) extra_result = None @@ -960,6 +976,7 @@ def to_result(self): k1 = k1.replace("+", " ") k1 = k1.replace("-", " ") cols = k1.split(" ") + cols = [col for col in cols if col != ""] # print(cols) function_name = "" @@ -974,6 +991,9 @@ def to_result(self): elif is_float(col.strip()): df_type[k] = dtype('float64') df_type[k] = function_return_type(function_name, df_type[k]) + elif col == "/": + df_type[k] = dtype('float64') + break else: function_name = col.strip().lower() if (function_name in functions): diff --git a/python/infinity_sdk/infinity/remote_thrift/query_builder.py b/python/infinity_sdk/infinity/remote_thrift/query_builder.py index 536b28605a..300b01595a 100644 --- a/python/infinity_sdk/infinity/remote_thrift/query_builder.py +++ b/python/infinity_sdk/infinity/remote_thrift/query_builder.py @@ -338,6 +338,17 @@ def offset(self, offset: Optional[int]) -> InfinityThriftQueryBuilder: offset_expr = ParsedExpr(type=expr_type) self._offset = offset_expr return self + + def group_by(self, columns: List[str] | str) -> InfinityThriftQueryBuilder: + group_by_list: List[ParsedExpr] = [] + if isinstance(columns, list): + for column in columns: + column = column.lower() + group_by_list.append(parse_expr(maybe_parse(column))) + else: + group_by_list.append(parse_expr(maybe_parse(columns))) + self._groupby = group_by_list + return self def output(self, columns: Optional[list]) -> InfinityThriftQueryBuilder: self._columns = columns diff --git a/python/infinity_sdk/infinity/remote_thrift/table.py b/python/infinity_sdk/infinity/remote_thrift/table.py index 6d07e66f5a..ac8672b16b 100644 --- a/python/infinity_sdk/infinity/remote_thrift/table.py +++ b/python/infinity_sdk/infinity/remote_thrift/table.py @@ -397,6 +397,12 @@ def limit(self, limit: Optional[int]): def offset(self, offset: Optional[int]): self.query_builder.offset(offset) return self + + def group_by(self, group_by_expr_list: Optional[List[str]] | Optional[str]): + if group_by_expr_list is None: + return self + self.query_builder.group_by(group_by_expr_list) + return self def sort(self, order_by_expr_list: Optional[List[list[str, SortType]]]): for order_by_expr in order_by_expr_list: @@ -500,7 +506,7 @@ def _execute_query(self, query: Query) -> tuple[dict[str, list[Any]], dict[str, highlight_list=query.highlight, search_expr=query.search, where_expr=query.filter, - group_by_list=None, + group_by_list=query.groupby, limit_expr=query.limit, offset_expr=query.offset, order_by_list=query.sort, diff --git a/python/infinity_sdk/infinity/remote_thrift/utils.py b/python/infinity_sdk/infinity/remote_thrift/utils.py index 396d945dc6..dde44b6895 100644 --- a/python/infinity_sdk/infinity/remote_thrift/utils.py +++ b/python/infinity_sdk/infinity/remote_thrift/utils.py @@ -181,6 +181,11 @@ def generic_match_to_string(generic_match_expr: ttypes.GenericMatchExpr) -> str: def traverse_conditions(cons, fn=None) -> ttypes.ParsedExpr: + if isinstance(cons, exp.Alias): + expr = traverse_conditions(cons.args['this']) + expr.alias_name = cons.alias + return expr + if isinstance(cons, exp.Binary): parsed_expr = ttypes.ParsedExpr() function_expr = ttypes.FunctionExpr() diff --git a/python/test_pysdk/common/common_data.py b/python/test_pysdk/common/common_data.py index 4f9d7f13fa..dd57c16d1e 100644 --- a/python/test_pysdk/common/common_data.py +++ b/python/test_pysdk/common/common_data.py @@ -38,7 +38,8 @@ ] functions = [ - "sqrt", "round", "ceil", "floor", "filter_text", "filter_fulltext", "or", "and", "not" + "sqrt", "round", "ceil", "floor", "filter_text", "filter_fulltext", "or", "and", "not", "char_length", + "sum", "min", "max", "count", "avg" ] bool_functions = [ @@ -57,6 +58,19 @@ def function_return_type(function_name, param_type) : return dtype('bool') elif function_name == "trunc": return dtype('str_') + elif function_name == "char_length": + return dtype('int32') + elif function_name == "sum": + if(param_type == dtype('int8') or param_type == dtype('int16') or param_type == dtype('int32') or param_type == dtype('int64')): + return dtype('int64') + else: + return dtype('float64') + elif function_name == "min" or function_name == "max": + return param_type + elif function_name == "count": + return dtype('int64') + elif function_name == "avg": + return dtype('float64') else: return param_type diff --git a/python/test_pysdk/test_groupby.py b/python/test_pysdk/test_groupby.py new file mode 100644 index 0000000000..0a55db5258 --- /dev/null +++ b/python/test_pysdk/test_groupby.py @@ -0,0 +1,633 @@ +import importlib +import sys +import os +import os +import pandas as pd +import pytest +from common import common_values +import infinity +import infinity.index as index +import infinity_embedded +from numpy import dtype +from infinity.errors import ErrorCode +from infinity.common import ConflictType, SortType + +current_dir = os.path.dirname(os.path.abspath(__file__)) +parent_dir = os.path.dirname(current_dir) +if parent_dir not in sys.path: + sys.path.insert(0, parent_dir) +from infinity_http import infinity_http +from common.utils import copy_data +from datetime import date, time, datetime + + +@pytest.fixture(scope="class") +def local_infinity(request): + return request.config.getoption("--local-infinity") + + +@pytest.fixture(scope="class") +def http(request): + return request.config.getoption("--http") + + +@pytest.fixture(scope="class") +def setup_class(request, local_infinity, http): + if local_infinity: + module = importlib.import_module("infinity_embedded.index") + globals()["index"] = module + module = importlib.import_module("infinity_embedded.common") + func = getattr(module, "ConflictType") + globals()["ConflictType"] = func + func = getattr(module, "InfinityException") + globals()["InfinityException"] = func + uri = common_values.TEST_LOCAL_PATH + request.cls.infinity_obj = infinity_embedded.connect(uri) + elif http: + uri = common_values.TEST_LOCAL_HOST + request.cls.infinity_obj = infinity_http() + else: + uri = common_values.TEST_LOCAL_HOST + request.cls.infinity_obj = infinity.connect(uri) + request.cls.uri = uri + yield + request.cls.infinity_obj.disconnect() + + +@pytest.mark.usefixtures("setup_class") +@pytest.mark.usefixtures("suffix") +class TestInfinity: + + # test/sql/dql/aggregate/test_groupby_aggtype.slt + def test_groupby_aggtype(self, suffix): + db_obj = self.infinity_obj.get_database("default_db") + + table_name = "test_simple_groupby" + suffix + db_obj.drop_table(table_name, ConflictType.Ignore) + table_obj = db_obj.create_table( + table_name, + { + "c1": {"type": "int"}, + "c2": {"type": "float"}, + }, + ) + table_obj.insert( + [ + {"c1": 1, "c2": 1.0}, + {"c1": 2, "c2": 2.0}, + {"c1": 1, "c2": 3.0}, + {"c1": 2, "c2": 4.0}, + {"c1": 1, "c2": 5.0}, + ] + ) + + # test agg sum + res, extra_result = table_obj.output(["c1", "sum(c2)"]).group_by("c1").to_df() + gt = pd.DataFrame({"c1": (1, 2), "sum(c2)": (9.0, 6.0)}).astype( + {"c1": dtype("int32"), "sum(c2)": dtype("float64")} + ) + pd.testing.assert_frame_equal( + res.sort_values(by=res.columns.tolist()).reset_index(drop=True), + gt.sort_values(by=gt.columns.tolist()).reset_index(drop=True), + ) + + # test agg min + res, extra_result = table_obj.output(["c1", "min(c2)"]).group_by("c1").to_df() + gt = pd.DataFrame({"c1": (1, 2), "min(c2)": (1.0, 2.0)}).astype( + {"c1": dtype("int32"), "min(c2)": dtype("float32")} + ) + pd.testing.assert_frame_equal( + res.sort_values(by=res.columns.tolist()).reset_index(drop=True), + gt.sort_values(by=gt.columns.tolist()).reset_index(drop=True), + ) + + # test agg max + res, extra_result = table_obj.output(["c1", "max(c2)"]).group_by("c1").to_df() + gt = pd.DataFrame({"c1": (1, 2), "max(c2)": (5.0, 4.0)}).astype( + {"c1": dtype("int32"), "max(c2)": dtype("float32")} + ) + pd.testing.assert_frame_equal( + res.sort_values(by=res.columns.tolist()).reset_index(drop=True), + gt.sort_values(by=gt.columns.tolist()).reset_index(drop=True), + ) + + # test agg count + res, extra_result = table_obj.output(["c1", "count(*)"]).group_by("c1").to_df() + gt = pd.DataFrame({"c1": (1, 2), "count(star)": (3, 2)}).astype( + {"c1": dtype("int32"), "count(star)": dtype("int64")} + ) + pd.testing.assert_frame_equal( + res.sort_values(by=res.columns.tolist()).reset_index(drop=True), + gt.sort_values(by=gt.columns.tolist()).reset_index(drop=True), + ) + + # test agg avg + res, extra_result = table_obj.output(["c1", "avg(c2)"]).group_by("c1").to_df() + gt = pd.DataFrame({"c1": (1, 2), "(sum(c2) / count(c2))": (3.0, 3.0)}).astype( + {"c1": dtype("int32"), "(sum(c2) / count(c2))": dtype("float64")} + ) + pd.testing.assert_frame_equal( + res.sort_values(by=res.columns.tolist()).reset_index(drop=True), + gt.sort_values(by=gt.columns.tolist()).reset_index(drop=True), + ) + + res = db_obj.drop_table(table_name, ConflictType.Ignore) + assert res.error_code == ErrorCode.OK + + # test/sql/dql/aggregate/test_groupby_datatype.slt + def test_groupby_datatype(self, suffix): + db_obj = self.infinity_obj.get_database("default_db") + + table_name = "test_simple_groupby" + suffix + db_obj.drop_table(table_name, ConflictType.Ignore) + table_obj = db_obj.create_table( + table_name, + { + "c1": {"type": "int"}, + "c2": {"type": "float"}, + "c3": {"type": "varchar"}, + }, + ) + table_obj.insert( + [ + {"c1": 1, "c2": 1.0, "c3": "abc"}, + {"c1": 2, "c2": 2.0, "c3": "abcdef"}, + {"c1": 3, "c2": 3.0, "c3": "abcdefghi"}, + {"c1": 1, "c2": 4.0, "c3": "abcdefghijkl"}, + {"c1": 2, "c2": 5.0, "c3": "abcdefghijklmno"}, + {"c1": 3, "c2": 6.0, "c3": "abcdefghijklmnopqr"}, + {"c1": 1, "c2": 1.0, "c3": "abcdefghijklmnopqrstu"}, + {"c1": 2, "c2": 2.0, "c3": "abcdefghijklmnopqrstuvwx"}, + {"c1": 3, "c2": 3.0, "c3": "abcdefghijklmnopqrstuvwxyz"}, + {"c1": 1, "c2": 4.0, "c3": "abc"}, + {"c1": 2, "c2": 5.0, "c3": "abcdef"}, + {"c1": 3, "c2": 6.0, "c3": "abcdefghi"}, + {"c1": 1, "c2": 1.0, "c3": "abcdefghijkl"}, + {"c1": 2, "c2": 2.0, "c3": "abcdefghijklmno"}, + {"c1": 3, "c2": 3.0, "c3": "abcdefghijklmnopqr"}, + {"c1": 1, "c2": 4.0, "c3": "abcdefghijklmnopqrstu"}, + {"c1": 2, "c2": 5.0, "c3": "abcdefghijklmnopqrstuvwx"}, + {"c1": 3, "c2": 6.0, "c3": "abcdefghijklmnopqrstuvwxyz"}, + ] + ) + + res, extra_result = ( + table_obj.output(["c3", "sum(c1)", "sum(c2)"]).group_by("c3").to_df() + ) + gt = pd.DataFrame( + { + "c3": [ + "abc", + "abcdef", + "abcdefghi", + "abcdefghijkl", + "abcdefghijklmno", + "abcdefghijklmnopqr", + "abcdefghijklmnopqrstu", + "abcdefghijklmnopqrstuvwx", + "abcdefghijklmnopqrstuvwxyz", + ], + "sum(c1)": [2, 4, 6, 2, 4, 6, 2, 4, 6], + "sum(c2)": [5.0, 7.0, 9.0, 5.0, 7.0, 9.0, 5.0, 7.0, 9.0], + } + ).astype( + { + "c3": dtype("object"), + "sum(c1)": dtype("int64"), + "sum(c2)": dtype("float64"), + } + ) + pd.testing.assert_frame_equal( + res.sort_values(by=res.columns.tolist()).reset_index(drop=True), + gt.sort_values(by=gt.columns.tolist()).reset_index(drop=True), + ) + + res, extra_result = ( + table_obj.output(["c3", "char_length(c3)", "sum(c1)"]) + .group_by("c3") + .to_df() + ) + gt = pd.DataFrame( + { + "c3": [ + "abc", + "abcdef", + "abcdefghi", + "abcdefghijkl", + "abcdefghijklmno", + "abcdefghijklmnopqr", + "abcdefghijklmnopqrstu", + "abcdefghijklmnopqrstuvwx", + "abcdefghijklmnopqrstuvwxyz", + ], + "char_length(c3)": [3, 6, 9, 12, 15, 18, 21, 24, 26], + "sum(c1)": [2, 4, 6, 2, 4, 6, 2, 4, 6], + } + ).astype({"char_length(c3)": dtype("int32"), "sum(c1)": dtype("int64")}) + pd.testing.assert_frame_equal( + res.sort_values(by=res.columns.tolist()).reset_index(drop=True), + gt.sort_values(by=gt.columns.tolist()).reset_index(drop=True), + ) + + res = db_obj.drop_table(table_name, ConflictType.Ignore) + assert res.error_code == ErrorCode.OK + + table_obj = db_obj.create_table( + table_name, + { + "c1": {"type": "int"}, + "d": {"type": "date"}, + "dt": {"type": "datetime"}, + "t": {"type": "time"}, + "ts": {"type": "timestamp"}, + }, + ) + table_obj.insert( + [ + { + "c1": 1, + "d": "1970-01-01", + "dt": "1970-01-01 00:00:00", + "t": "00:00:00", + "ts": "1970-01-01 00:00:00", + }, + { + "c1": 2, + "d": "1970-01-01", + "dt": "1970-01-01 00:00:00", + "t": "11:59:59", + "ts": "1970-01-01 11:59:59", + }, + { + "c1": 3, + "d": "1970-01-01", + "dt": "1970-01-01 00:00:00", + "t": "12:00:00", + "ts": "1970-01-01 12:00:00", + }, + { + "c1": 4, + "d": "1970-01-01", + "dt": "1970-01-01 00:00:00", + "t": "23:59:59", + "ts": "1970-01-01 23:59:59", + }, + { + "c1": 5, + "d": "1970-01-02", + "dt": "1970-01-02 00:00:00", + "t": "00:00:00", + "ts": "1970-01-01 00:00:00", + }, + { + "c1": 6, + "d": "1970-01-02", + "dt": "1970-01-02 00:00:00", + "t": "11:59:59", + "ts": "1970-01-01 11:59:59", + }, + { + "c1": 7, + "d": "1970-01-02", + "dt": "1970-01-02 00:00:00", + "t": "12:00:00", + "ts": "1970-01-01 12:00:00", + }, + { + "c1": 8, + "d": "1970-01-02", + "dt": "1970-01-02 00:00:00", + "t": "23:59:59", + "ts": "1970-01-01 23:59:59", + }, + { + "c1": 9, + "d": "1970-01-03", + "dt": "1970-01-03 00:00:00", + "t": "00:00:00", + "ts": "1970-01-01 00:00:00", + }, + { + "c1": 10, + "d": "1970-01-03", + "dt": "1970-01-03 00:00:00", + "t": "11:59:59", + "ts": "1970-01-01 11:59:59", + }, + { + "c1": 11, + "d": "1970-01-03", + "dt": "1970-01-03 00:00:00", + "t": "12:00:00", + "ts": "1970-01-01 12:00:00", + }, + { + "c1": 12, + "d": "1970-01-03", + "dt": "1970-01-03 00:00:00", + "t": "23:59:59", + "ts": "1970-01-01 23:59:59", + }, + ] + ) + + res, extra_result = table_obj.output(["d", "sum(c1)"]).group_by("d").to_df() + gt = pd.DataFrame( + { + "d": [ + "1970-01-01", + "1970-01-02", + "1970-01-03", + ], + "sum(c1)": [10, 26, 42], + } + ) + pd.testing.assert_frame_equal( + res.sort_values(by=res.columns.tolist()).reset_index(drop=True), + gt.sort_values(by=gt.columns.tolist()).reset_index(drop=True), + ) + + res, extra_result = table_obj.output(["t", "sum(c1)"]).group_by("t").to_df() + gt = pd.DataFrame( + { + "t": [ + "00:00:00", + "11:59:59", + "12:00:00", + "23:59:59", + ], + "sum(c1)": [15, 18, 21, 24], + } + ) + pd.testing.assert_frame_equal( + res.sort_values(by=res.columns.tolist()).reset_index(drop=True), + gt.sort_values(by=gt.columns.tolist()).reset_index(drop=True), + ) + + res, extra_result = table_obj.output(["dt", "sum(c1)"]).group_by("dt").to_df() + gt = pd.DataFrame( + { + "dt": [ + "1970-01-01 00:00:00", + "1970-01-02 00:00:00", + "1970-01-03 00:00:00", + ], + "sum(c1)": [10, 26, 42], + } + ) + pd.testing.assert_frame_equal( + res.sort_values(by=res.columns.tolist()).reset_index(drop=True), + gt.sort_values(by=gt.columns.tolist()).reset_index(drop=True), + ) + + res, extra_result = table_obj.output(["ts", "sum(c1)"]).group_by("ts").to_df() + gt = pd.DataFrame( + { + "ts": [ + "1970-01-01 00:00:00", + "1970-01-01 11:59:59", + "1970-01-01 12:00:00", + "1970-01-01 23:59:59", + ], + "sum(c1)": [15, 18, 21, 24], + } + ) + pd.testing.assert_frame_equal( + res.sort_values(by=res.columns.tolist()).reset_index(drop=True), + gt.sort_values(by=gt.columns.tolist()).reset_index(drop=True), + ) + + # test/sql/dql/aggregate/test_groupby_complex.slt + def test_groupby_complex(self, suffix): + db_obj = self.infinity_obj.get_database("default_db") + + table_name = "test_simple_groupby" + suffix + db_obj.drop_table(table_name, ConflictType.Ignore) + table_obj = db_obj.create_table( + table_name, + { + "c1": {"type": "int"}, + "c2": {"type": "int"}, + "c3": {"type": "float"}, + }, + ) + table_obj.insert( + [ + {"c1": 1, "c2": 1, "c3": 1.0}, + {"c1": 2, "c2": 2, "c3": 2.0}, + {"c1": 1, "c2": 3, "c3": 3.0}, + {"c1": 2, "c2": 1, "c3": 4.0}, + {"c1": 1, "c2": 2, "c3": 5.0}, + {"c1": 2, "c2": 3, "c3": 6.0}, + {"c1": 1, "c2": 1, "c3": 7.0}, + {"c1": 2, "c2": 2, "c3": 8.0}, + {"c1": 1, "c2": 3, "c3": 1.0}, + {"c1": 2, "c2": 1, "c3": 2.0}, + {"c1": 1, "c2": 2, "c3": 3.0}, + {"c1": 2, "c2": 3, "c3": 4.0}, + ] + ) + + res, extra_result = ( + table_obj.output(["c1", "c2", "sum(c3)"]).group_by(["c1", "c2"]).to_df() + ) + gt = pd.DataFrame( + { + "c1": [1, 1, 1, 2, 2, 2], + "c2": [1, 2, 3, 1, 2, 3], + "sum(c3)": [8.0, 8.0, 4.0, 6.0, 10.0, 10.0], + } + ).astype( + {"c1": dtype("int32"), "c2": dtype("int32"), "sum(c3)": dtype("float64")} + ) + pd.testing.assert_frame_equal( + res.sort_values(by=res.columns.tolist()).reset_index(drop=True), + gt.sort_values(by=gt.columns.tolist()).reset_index(drop=True), + ) + + res, extra_result = ( + table_obj.output(["c1+c2", "sum(c3)"]).group_by(["c1+c2"]).to_df() + ) + gt = pd.DataFrame( + { + "(c1 + c2)": [2, 3, 4, 5], + "sum(c3)": [8.0, 14.0, 14.0, 10.0], + } + ).astype({"(c1 + c2)": dtype("int32"), "sum(c3)": dtype("float64")}) + pd.testing.assert_frame_equal( + res.sort_values(by=res.columns.tolist()).reset_index(drop=True), + gt.sort_values(by=gt.columns.tolist()).reset_index(drop=True), + ) + + res, extra_result = ( + table_obj.output(["abs(c1-c2)", "c1+c2", "sum(c3)"]) + .group_by(["abs(c1-c2)", "c1+c2"]) + .to_df() + ) + gt = pd.DataFrame( + { + "abs((c1 - c2))": [0, 0, 1, 1, 2], + "(c1 + c2)": [2, 4, 3, 5, 4], + "sum(c3)": [8.0, 10.0, 14.0, 10.0, 4.0], + } + ).astype( + { + "abs((c1 - c2))": dtype("int32"), + "(c1 + c2)": dtype("int32"), + "sum(c3)": dtype("float64"), + } + ) + pd.testing.assert_frame_equal( + res.sort_values(by=res.columns.tolist()).reset_index(drop=True), + gt.sort_values(by=gt.columns.tolist()).reset_index(drop=True), + ) + + res, extra_result = ( + table_obj.output(["c1", "c2", "sum(c3)"]) + .filter("c1 > 1") + .group_by(["c1", "c2"]) + .to_df() + ) + gt = pd.DataFrame( + { + "c1": [2, 2, 2], + "c2": [1, 2, 3], + "sum(c3)": [6.0, 10.0, 10.0], + } + ).astype( + {"c1": dtype("int32"), "c2": dtype("int32"), "sum(c3)": dtype("float64")} + ) + pd.testing.assert_frame_equal( + res.sort_values(by=res.columns.tolist()).reset_index(drop=True), + gt.sort_values(by=gt.columns.tolist()).reset_index(drop=True), + ) + + res, extra_result = ( + table_obj.output(["c1", "c3", "sum(c2)"]).group_by(["c1", "c3"]).to_df() + ) + gt = pd.DataFrame( + { + "c1": [1, 1, 1, 1, 2, 2, 2, 2], + "c3": [1.0, 3.0, 5.0, 7.0, 2.0, 4.0, 6.0, 8.0], + "sum(c2)": [4, 5, 2, 1, 3, 4, 3, 2], + } + ).astype( + {"c1": dtype("int32"), "c3": dtype("float32"), "sum(c2)": dtype("int64")} + ) + pd.testing.assert_frame_equal( + res.sort_values(by=res.columns.tolist()).reset_index(drop=True), + gt.sort_values(by=gt.columns.tolist()).reset_index(drop=True), + ) + + res, extra_result = ( + table_obj.output(["c3", "sum(c1)", "sum(c2)"]).group_by("c3").to_df() + ) + gt = pd.DataFrame( + { + "c3": [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0], + "sum(c1)": [2, 4, 2, 4, 1, 2, 1, 2], + "sum(c2)": [4, 3, 5, 4, 2, 3, 1, 2], + } + ).astype( + { + "c3": dtype("float32"), + "sum(c1)": dtype("int64"), + "sum(c2)": dtype("int64"), + } + ) + pd.testing.assert_frame_equal( + res.sort_values(by=res.columns.tolist()).reset_index(drop=True), + gt.sort_values(by=gt.columns.tolist()).reset_index(drop=True), + ) + + res, extra_result = ( + table_obj.output(["c3", "count(c3)", "sum(c1)", "sum(c2)"]) + .group_by("c3") + .to_df() + ) + gt = pd.DataFrame( + { + "c3": [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0], + "count(c3)": [2, 2, 2, 2, 1, 1, 1, 1], + "sum(c1)": [2, 4, 2, 4, 1, 2, 1, 2], + "sum(c2)": [4, 3, 5, 4, 2, 3, 1, 2], + } + ).astype( + { + "c3": dtype("float32"), + "count(c3)": dtype("int64"), + "sum(c1)": dtype("int64"), + "sum(c2)": dtype("int64"), + } + ) + pd.testing.assert_frame_equal( + res.sort_values(by=res.columns.tolist()).reset_index(drop=True), + gt.sort_values(by=gt.columns.tolist()).reset_index(drop=True), + ) + + res, extra_result = ( + table_obj.output(["c3", "sum(c1)", "sum(c2)"]) + .group_by("c3") + .sort([["c3", SortType.Desc]]) + .to_df() + ) + gt = pd.DataFrame( + { + "c3": [8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0], + "sum(c1)": [2, 1, 2, 1, 4, 2, 4, 2], + "sum(c2)": [2, 1, 3, 2, 4, 5, 3, 4], + } + ).astype( + { + "c3": dtype("float32"), + "sum(c1)": dtype("int64"), + "sum(c2)": dtype("int64"), + } + ) + pd.testing.assert_frame_equal(res, gt) + + res, extra_result = ( + table_obj.output(["c3", "sum(c1) as sum1", "sum(c2) as sum2"]) + .group_by("c3") + .sort([["sum1", SortType.Asc], ["sum2", SortType.Asc]]) + .to_df() + ) + gt = pd.DataFrame( + { + "c3": [7.0, 5.0, 8.0, 6.0, 1.0, 3.0, 2.0, 4.0], + "sum1": [1, 1, 2, 2, 2, 2, 4, 4], + "sum2": [1, 2, 2, 3, 4, 5, 3, 4], + } + ).astype( + { + "c3": dtype("float32"), + "sum1": dtype("int64"), + "sum2": dtype("int64"), + } + ) + pd.testing.assert_frame_equal(res, gt) + + table_obj.delete("c1 <= 1") + + res, extra_result = ( + table_obj.output(["c1", "c2", "sum(c3)"]) + .group_by(["c1", "c2"]) + .sort([["c1", SortType.Asc], ["c2", SortType.Asc]]) + .to_df() + ) + gt = pd.DataFrame( + { + "c1": [2, 2, 2], + "c2": [1, 2, 3], + "sum(c3)": [6.0, 10.0, 10.0], + } + ).astype( + {"c1": dtype("int32"), "c2": dtype("int32"), "sum(c3)": dtype("float64")} + ) + pd.testing.assert_frame_equal( + res.sort_values(by=res.columns.tolist()).reset_index(drop=True), + gt.sort_values(by=gt.columns.tolist()).reset_index(drop=True), + ) + + res = db_obj.drop_table(table_name, ConflictType.Error) + assert res.error_code == ErrorCode.OK diff --git a/src/embedded_infinity/wrap_infinity.cpp b/src/embedded_infinity/wrap_infinity.cpp index 90bc038740..d05a40a2e7 100644 --- a/src/embedded_infinity/wrap_infinity.cpp +++ b/src/embedded_infinity/wrap_infinity.cpp @@ -526,34 +526,50 @@ ParsedExpr *WrapInExpr::GetParsedExpr(Status &status) { } ParsedExpr *WrapParsedExpr::GetParsedExpr(Status &status) { + ParsedExpr *result = nullptr; status.code_ = ErrorCode::kOk; switch (type) { case ParsedExprType::kConstant: - return constant_expr.GetParsedExpr(status); + result = constant_expr.GetParsedExpr(status); + break; case ParsedExprType::kColumn: - return column_expr.GetParsedExpr(status); + result = column_expr.GetParsedExpr(status); + break; case ParsedExprType::kFunction: - return function_expr.GetParsedExpr(status); + result = function_expr.GetParsedExpr(status); + break; case ParsedExprType::kBetween: - return between_expr.GetParsedExpr(status); + result = between_expr.GetParsedExpr(status); + break; case ParsedExprType::kKnn: - return knn_expr.GetParsedExpr(status); + result = knn_expr.GetParsedExpr(status); + break; case ParsedExprType::kMatch: - return match_expr.GetParsedExpr(status); + result = match_expr.GetParsedExpr(status); + break; case ParsedExprType::kMatchSparse: - return match_sparse_expr.GetParsedExpr(status); + result = match_sparse_expr.GetParsedExpr(status); + break; case ParsedExprType::kMatchTensor: - return match_tensor_expr.GetParsedExpr(status); + result = match_tensor_expr.GetParsedExpr(status); + break; case ParsedExprType::kFusion: - return fusion_expr.GetParsedExpr(status); + result = fusion_expr.GetParsedExpr(status); + break; case ParsedExprType::kSearch: - return search_expr.GetParsedExpr(status); + result = search_expr.GetParsedExpr(status); + break; case ParsedExprType::kIn: - return in_expr.GetParsedExpr(status); + result = in_expr.GetParsedExpr(status); + break; default: status = Status::InvalidParsedExprType(); + break; + } + if (result) { + result->alias_ = alias_name; } - return nullptr; + return result; } OrderByExpr *WrapOrderByExpr::GetOrderByExpr(Status &status) { diff --git a/src/embedded_infinity/wrap_infinity.cppm b/src/embedded_infinity/wrap_infinity.cppm index 678d9534ed..7093a2d19e 100644 --- a/src/embedded_infinity/wrap_infinity.cppm +++ b/src/embedded_infinity/wrap_infinity.cppm @@ -281,6 +281,7 @@ export struct WrapParsedExpr { WrapFusionExpr fusion_expr; WrapSearchExpr search_expr; WrapInExpr in_expr; + String alias_name; ParsedExpr *GetParsedExpr(Status &status); }; diff --git a/src/embedded_infinity_ext.cpp b/src/embedded_infinity_ext.cpp index 7c243df794..28eb74031a 100644 --- a/src/embedded_infinity_ext.cpp +++ b/src/embedded_infinity_ext.cpp @@ -229,7 +229,8 @@ NB_MODULE(embedded_infinity_ext, m) { .def_rw("match_tensor_expr", &WrapParsedExpr::match_tensor_expr) .def_rw("fusion_expr", &WrapParsedExpr::fusion_expr) .def_rw("search_expr", &WrapParsedExpr::search_expr) - .def_rw("in_expr", &WrapParsedExpr::in_expr); + .def_rw("in_expr", &WrapParsedExpr::in_expr) + .def_rw("alias_name", &WrapParsedExpr::alias_name); // Bind WrapOrderByExpr nb::class_(m, "WrapOrderByExpr") diff --git a/src/executor/operator/physical_aggregate.cpp b/src/executor/operator/physical_aggregate.cpp index 243ecef7d9..f8faf84c3e 100644 --- a/src/executor/operator/physical_aggregate.cpp +++ b/src/executor/operator/physical_aggregate.cpp @@ -100,7 +100,7 @@ bool PhysicalAggregate::Execute(QueryContext *query_context, OperatorState *oper groupby_table->data_blocks_.emplace_back(DataBlock::MakeUniquePtr()); DataBlock *output_data_block = groupby_table->data_blocks_.back().get(); - output_data_block->Init(groupby_types, 1); + output_data_block->Init(groupby_types); ExpressionEvaluator groupby_executor; groupby_executor.Init(input_data_block); diff --git a/src/network/http/http_search.cpp b/src/network/http/http_search.cpp index 30d883fc41..6dab54298a 100644 --- a/src/network/http/http_search.cpp +++ b/src/network/http/http_search.cpp @@ -67,6 +67,7 @@ void HTTPSearch::Process(Infinity *infinity_ptr, Vector *output_columns{nullptr}; Vector *highlight_columns{nullptr}; Vector *order_by_list{nullptr}; + Vector *group_by_columns{nullptr}; bool total_hits_count_flag{}; DeferFn defer_fn([&]() { if (output_columns != nullptr) { @@ -148,6 +149,18 @@ void HTTPSearch::Process(Infinity *infinity_ptr, if (order_by_list == nullptr) { return; } + } else if (IsEqual(key, "group_by")) { + if (group_by_columns != nullptr) { + response["error_code"] = ErrorCode::kInvalidExpression; + response["error_message"] = "More than one group by field."; + return; + } + auto &group_by_list = elem.value(); + + group_by_columns = ParseOutput(group_by_list, http_status, response); + if (group_by_columns == nullptr) { + return; + } } else if (IsEqual(key, "filter")) { if (filter) { @@ -240,12 +253,13 @@ void HTTPSearch::Process(Infinity *infinity_ptr, output_columns, highlight_columns, order_by_list, - nullptr, + group_by_columns, total_hits_count_flag); output_columns = nullptr; highlight_columns = nullptr; order_by_list = nullptr; + group_by_columns = nullptr; if (result.IsOk()) { SizeT block_rows = result.result_table_->DataBlockCount(); for (SizeT block_id = 0; block_id < block_rows; ++block_id) { @@ -256,6 +270,7 @@ void HTTPSearch::Process(Infinity *infinity_ptr, for (int row = 0; row < row_count; ++row) { nlohmann::json json_result_row; for (SizeT col = 0; col < column_cnt; ++col) { + nlohmann::json json_result_cell; Value value = data_block->GetValue(col, row); const String &column_name = result.result_table_->GetColumnNameById(col); switch (value.type().type()) { @@ -263,22 +278,23 @@ void HTTPSearch::Process(Infinity *infinity_ptr, case LogicalType::kSmallInt: case LogicalType::kInteger: case LogicalType::kBigInt: { - json_result_row[column_name] = value.ToInteger(); + json_result_cell[column_name] = value.ToInteger(); break; } case LogicalType::kFloat: { - json_result_row[column_name] = value.ToFloat(); + json_result_cell[column_name] = value.ToFloat(); break; } case LogicalType::kDouble: { - json_result_row[column_name] = value.ToDouble(); + json_result_cell[column_name] = value.ToDouble(); break; } default: { - json_result_row[column_name] = value.ToString(); + json_result_cell[column_name] = value.ToString(); break; } } + json_result_row.push_back(json_result_cell); } response["output"].push_back(json_result_row); } diff --git a/src/network/infinity_thrift_service.cpp b/src/network/infinity_thrift_service.cpp index bf9b8fc3b6..8d424160f0 100644 --- a/src/network/infinity_thrift_service.cpp +++ b/src/network/infinity_thrift_service.cpp @@ -655,6 +655,29 @@ void InfinityThriftService::Select(infinity_thrift_rpc::SelectResponse &response order_by_expr = nullptr; } } + Vector *group_by_list = nullptr; + DeferFn defer_fn10([&]() { + if (group_by_list != nullptr) { + for (auto &expr_ptr : *group_by_list) { + delete expr_ptr; + expr_ptr = nullptr; + } + delete group_by_list; + group_by_list = nullptr; + } + }); + if (!request.group_by_list.empty()) { + group_by_list = new Vector(); + group_by_list->reserve(request.group_by_list.size()); + for (auto &expr : request.group_by_list) { + auto parsed_expr = GetParsedExprFromProto(parsed_expr_status, expr); + if (!parsed_expr_status.ok()) { + ProcessStatus(response, parsed_expr_status); + return; + } + group_by_list->emplace_back(parsed_expr); + } + } // auto end2 = std::chrono::steady_clock::now(); // phase_2_duration_ += end2 - start2; @@ -670,7 +693,7 @@ void InfinityThriftService::Select(infinity_thrift_rpc::SelectResponse &response output_columns, highlight_columns, order_by_list, - nullptr, + group_by_list, request.total_hits_count); output_columns = nullptr; highlight_columns = nullptr; @@ -679,6 +702,7 @@ void InfinityThriftService::Select(infinity_thrift_rpc::SelectResponse &response limit = nullptr; offset = nullptr; order_by_list = nullptr; + group_by_list = nullptr; // auto end3 = std::chrono::steady_clock::now(); // // phase_3_duration_ += end3 - start3; @@ -2383,31 +2407,28 @@ InExpr *InfinityThriftService::GetInExprFromProto(Status &status, const infinity } ParsedExpr *InfinityThriftService::GetParsedExprFromProto(Status &status, const infinity_thrift_rpc::ParsedExpr &expr) { + ParsedExpr *result = nullptr; if (expr.type.__isset.column_expr == true) { - auto parsed_expr = GetColumnExprFromProto(*expr.type.column_expr); - return parsed_expr; + result = GetColumnExprFromProto(*expr.type.column_expr); } else if (expr.type.__isset.constant_expr == true) { - auto parsed_expr = GetConstantFromProto(status, *expr.type.constant_expr); - return parsed_expr; + result = GetConstantFromProto(status, *expr.type.constant_expr); } else if (expr.type.__isset.function_expr == true) { - auto parsed_expr = GetFunctionExprFromProto(status, *expr.type.function_expr); - return parsed_expr; + result = GetFunctionExprFromProto(status, *expr.type.function_expr); } else if (expr.type.__isset.knn_expr == true) { - auto parsed_expr = GetKnnExprFromProto(status, *expr.type.knn_expr); - return parsed_expr; + result = GetKnnExprFromProto(status, *expr.type.knn_expr); } else if (expr.type.__isset.match_expr == true) { - auto parsed_expr = GetMatchExprFromProto(status, *expr.type.match_expr); - return parsed_expr; + result = GetMatchExprFromProto(status, *expr.type.match_expr); } else if (expr.type.__isset.fusion_expr == true) { - auto parsed_expr = GetFusionExprFromProto(*expr.type.fusion_expr); - return parsed_expr; + result = GetFusionExprFromProto(*expr.type.fusion_expr); } else if (expr.type.__isset.in_expr == true) { - auto parsed_expr = GetInExprFromProto(status, *expr.type.in_expr); - return parsed_expr; + result = GetInExprFromProto(status, *expr.type.in_expr); } else { status = Status::InvalidParsedExprType(); } - return nullptr; + if (result) { + result->alias_ = expr.alias_name; + } + return result; } OrderByExpr *InfinityThriftService::GetOrderByExprFromProto(Status &status, const infinity_thrift_rpc::OrderByExpr &expr) { diff --git a/src/planner/binder/order_binder.cpp b/src/planner/binder/order_binder.cpp index bbaebdc802..b029cbcf5a 100644 --- a/src/planner/binder/order_binder.cpp +++ b/src/planner/binder/order_binder.cpp @@ -62,6 +62,38 @@ void OrderBinder::PushExtraExprToSelectList(ParsedExpr *expr, const SharedPtr OrderBinder::BuildExpression(const ParsedExpr &expr, BindContext *bind_context_ptr, i64 depth, bool root) { + String expr_name = expr.GetName(); + // If the expr isn't from aggregate function and coming from group by lists. + if (bind_context_ptr->group_index_by_name_.contains(expr_name)) { + i64 groupby_index = bind_context_ptr->group_index_by_name_[expr_name]; + const SharedPtr &group_expr = bind_context_ptr->group_exprs_[groupby_index]; + + SharedPtr result = ColumnExpression::Make(group_expr->Type(), + bind_context_ptr->group_by_table_name_, + bind_context_ptr->group_by_table_index_, + expr_name, + groupby_index, + depth); + + result->source_position_ = SourcePosition(bind_context_ptr->binding_context_id_, ExprSourceType::kGroupBy); + return result; + } + // If the expr is coming from aggregate function list + if (bind_context_ptr->aggregate_index_by_name_.contains(expr_name)) { + i64 aggregate_index = bind_context_ptr->aggregate_index_by_name_[expr_name]; + const SharedPtr &aggregate_expr = bind_context_ptr->aggregate_exprs_[aggregate_index]; + + SharedPtr result = ColumnExpression::Make(aggregate_expr->Type(), + bind_context_ptr->aggregate_table_name_, + bind_context_ptr->aggregate_table_index_, + expr_name, + aggregate_index, + depth); + + result->source_position_ = SourcePosition(bind_context_ptr->binding_context_id_, ExprSourceType::kAggregate); + return result; + } + if (expr.type_ == ParsedExprType::kFunction or expr.type_ == ParsedExprType::kColumn) { return ExpressionBinder::BuildExpression(expr, bind_context_ptr, depth, root); } diff --git a/test/sql/dql/aggregate/test_groupby_aggtype.slt b/test/sql/dql/aggregate/test_groupby_aggtype.slt index 1d8a540848..9946b8cdbf 100644 --- a/test/sql/dql/aggregate/test_groupby_aggtype.slt +++ b/test/sql/dql/aggregate/test_groupby_aggtype.slt @@ -18,12 +18,6 @@ SELECT c1, SUM(c2) FROM simple_groupby GROUP BY c1; 1 9.000000 2 6.000000 -query IR rowsort -SELECT c1, AVG(c2) FROM simple_groupby GROUP BY c1; ----- -1 3.000000 -2 3.000000 - query IR rowsort SELECT c1, MIN(c2) FROM simple_groupby GROUP BY c1; ---- @@ -42,7 +36,7 @@ SELECT c1, COUNT(c2) FROM simple_groupby GROUP BY c1; 1 3 2 2 -query IF rowsort +query IR rowsort SELECT c1, AVG(c2) FROM simple_groupby GROUP BY c1; ---- 1 3.000000 diff --git a/test/sql/dql/aggregate/test_groupby_complex.slt b/test/sql/dql/aggregate/test_groupby_complex.slt index 1c18613090..d6e1cb4e7f 100644 --- a/test/sql/dql/aggregate/test_groupby_complex.slt +++ b/test/sql/dql/aggregate/test_groupby_complex.slt @@ -29,6 +29,30 @@ SELECT c1, c2, SUM(c3) FROM simple_groupby GROUP BY c1, c2; 2 2 10.000000 2 3 10.000000 +query IR rowsort +SELECT c1+c2, SUM(c3) FROM simple_groupby GROUP BY c1+c2; +---- +2 8.000000 +3 14.000000 +4 14.000000 +5 10.000000 + +query IIR rowsort +SELECT abs(c1-c2), c1+c2, SUM(c3) FROM simple_groupby GROUP BY abs(c1-c2), c1+c2; +---- +0 2 8.000000 +0 4 10.000000 +1 3 14.000000 +1 5 10.000000 +2 4 4.000000 + +query IIR rowsort +SELECT c1, c2, SUM(c3) FROM simple_groupby WHERE c1 > 1 GROUP BY c1, c2; +---- +2 1 6.000000 +2 2 10.000000 +2 3 10.000000 + query IRI rowsort SELECT c1, c3, SUM(c2) FROM simple_groupby GROUP BY c1, c3; ---- @@ -65,12 +89,29 @@ SELECT c3, COUNT(c3), SUM(c1), SUM(c2) FROM simple_groupby GROUP BY c3; 7.000000 1 1 1 8.000000 1 2 2 -query IIR rowsort -SELECT c1, c2, SUM(c3) FROM simple_groupby WHERE c1 > 1 GROUP BY c1, c2; +query RII +SELECT c3, SUM(c1), SUM(c2) FROM simple_groupby GROUP BY c3 ORDER BY c3 DESC; ---- -2 1 6.000000 -2 2 10.000000 -2 3 10.000000 +8.000000 2 2 +7.000000 1 1 +6.000000 2 3 +5.000000 1 2 +4.000000 4 4 +3.000000 2 5 +2.000000 4 3 +1.000000 2 4 + +query RII +SELECT c3, SUM(c1) AS sum1, SUM(c2) AS sum2 FROM simple_groupby GROUP BY c3 ORDER BY sum1, sum2; +---- +7.000000 1 1 +5.000000 1 2 +8.000000 2 2 +6.000000 2 3 +1.000000 2 4 +3.000000 2 5 +2.000000 4 3 +4.000000 4 4 statement ok DELETE FROM simple_groupby WHERE c1 <= 1;