Skip to content

Commit

Permalink
Groupby3 (#2493)
Browse files Browse the repository at this point in the history
### What problem does this PR solve?

Add having expression

### Type of change

- [x] New Feature (non-breaking change which adds functionality)
- [x] Test cases
  • Loading branch information
small-turtle-1 authored Jan 24, 2025
1 parent 28c903a commit bfc3462
Show file tree
Hide file tree
Showing 21 changed files with 312 additions and 13 deletions.
2 changes: 1 addition & 1 deletion benchmark/local_infinity/knn/knn_query_benchmark.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -221,7 +221,7 @@ int main(int argc, char *argv[]) {
auto select_rowid_expr = new FunctionExpr();
select_rowid_expr->func_name_ = "row_id";
output_columns->emplace_back(select_rowid_expr);
auto result = infinity->Search(db_name, table_name, search_expr, nullptr, nullptr, nullptr, output_columns, nullptr, nullptr, nullptr, false);
auto result = infinity->Search(db_name, table_name, search_expr, nullptr, nullptr, nullptr, output_columns, nullptr, nullptr, nullptr, nullptr, false);
{
auto &cv = result.result_table_->GetDataBlockById(0)->column_vectors;
auto &column = *cv[0];
Expand Down
4 changes: 4 additions & 0 deletions docs/references/http_api_reference.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -1777,6 +1777,7 @@ Searches for data in a specified table. The search can range from a simple vecto
- `"fusion"`: `object`
- `"sort"` : `object[]`
- `"group_by"`: `string[]`
- `"having"`: `string`
- `"limit"` : `string`
- `"offset"` : `string`
- `"option"` : `object`
Expand Down Expand Up @@ -2017,6 +2018,9 @@ curl --request GET \
- `"group_by"`: `string[]`
Indicates the expression to group by.
- `"having"`: `string`
Indicates the having condition.
- `"limit"` : `string`
Indicates the limit row count.
Expand Down
29 changes: 29 additions & 0 deletions docs/references/pysdk_api_reference.md
Original file line number Diff line number Diff line change
Expand Up @@ -1906,6 +1906,35 @@ table_obj.output(["c1", "avg(c1)", "count(c2)", "min(c3)", "max(c4)"]).group_by(

---

### having

```python
table_object.having(expr)
```

Creates a filtering condition expression for the group-by result.

#### Parameters

##### having_expr: `str`, *Required*

A string specifying the having expression.

#### Returns

- Success: An `infinity.local_infinity.table.LocalTable` object in embedded mode or an `infinity.remote_thrift.table.RemoteTable` object in client-server mode.
- Failure: `InfinityException`
- `error_code`: `int` A non-zero value indicating a specific error condition.
- `error_msg`: `str` A message providing additional details about the error.

#### Examples

```python
table_obj.output(["c1", "sum(c2)"]).group_by(["c1"]).having("sum(c2) > 10").to_df()
```

---

### limit

```python
Expand Down
4 changes: 4 additions & 0 deletions python/infinity_embedded/local_infinity/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,7 @@ def search(self,
highlight_list: list[WrapParsedExpr] = [],
order_by_list: list[WrapOrderByExpr] = [],
group_by_list: list[WrapParsedExpr] = [],
having_expr: WrapParsedExpr = None,
total_hits_count_flag: bool = False,
search_expr: WrapSearchExpr = None,
where_expr: WrapParsedExpr = None,
Expand All @@ -220,6 +221,7 @@ def search(self,
highlight_list,
order_by_list,
group_by_list,
having_expr,
total_hits_count_flag,
search_expr,
where_expr,
Expand All @@ -236,6 +238,7 @@ def explain(self,
order_by_list: list[WrapOrderByExpr] = [],
group_by_list: list[WrapParsedExpr] = [],
search_expr: WrapSearchExpr = None,
having_expr: WrapParsedExpr = None,
where_expr: WrapParsedExpr = None,
limit_expr: WrapParsedExpr = None,
offset_expr: WrapParsedExpr = None):
Expand All @@ -249,6 +252,7 @@ def explain(self,
order_by_list,
group_by_list,
search_expr,
having_expr,
where_expr,
limit_expr,
offset_expr),
Expand Down
16 changes: 14 additions & 2 deletions python/infinity_embedded/local_infinity/query_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ def __init__(
search: Optional[WrapSearchExpr],
filter: Optional[WrapParsedExpr],
group_by: Optional[List[WrapParsedExpr]],
having: Optional[WrapParsedExpr],
limit: Optional[WrapParsedExpr],
offset: Optional[WrapParsedExpr],
sort: Optional[List[WrapOrderByExpr]],
Expand All @@ -51,6 +52,7 @@ def __init__(
self.search = search
self.filter = filter
self.group_by = group_by
self.having = having
self.limit = limit
self.offset = offset
self.sort = sort
Expand All @@ -65,12 +67,13 @@ def __init__(
search: Optional[WrapSearchExpr],
filter: Optional[WrapParsedExpr],
group_by: Optional[List[WrapParsedExpr]],
having: Optional[WrapParsedExpr],
limit: Optional[WrapParsedExpr],
offset: Optional[WrapParsedExpr],
sort: Optional[List[WrapOrderByExpr]],
explain_type: Optional[BaseExplainType],
):
super().__init__(columns, highlight, search, filter, group_by, limit, offset, sort, None)
super().__init__(columns, highlight, search, filter, group_by, having, limit, offset, sort, None)
self.explain_type = explain_type


Expand All @@ -82,6 +85,7 @@ def __init__(self, table):
self._search = None
self._filter = None
self._group_by = None
self._having = None
self._limit = None
self._offset = None
self._sort = None
Expand All @@ -93,6 +97,7 @@ def reset(self):
self._search = None
self._filter = None
self._group_by = None
self._having = None
self._limit = None
self._offset = None
self._sort = None
Expand Down Expand Up @@ -421,7 +426,7 @@ def offset(self, offset: Optional[int]) -> InfinityLocalQueryBuilder:
self._offset = offset_expr
return self

def group_by(self, columns: List[str] | str):
def group_by(self, columns: List[str] | str) -> InfinityLocalQueryBuilder:
group_by_list = []
if isinstance(columns, list):
for column in columns:
Expand All @@ -433,6 +438,11 @@ def group_by(self, columns: List[str] | str):
self._group_by = group_by_list
return self

def having(self, having: Optional[str]) -> InfinityLocalQueryBuilder:
having_expr = traverse_conditions(condition(having))
self._having = having_expr
return self

def output(self, columns: Optional[list]) -> InfinityLocalQueryBuilder:
self._columns = columns
select_list: List[WrapParsedExpr] = []
Expand Down Expand Up @@ -700,6 +710,7 @@ def to_result(self) -> tuple[dict[str, list[Any]], dict[str, Any], {}]:
search=self._search,
filter=self._filter,
group_by=self._group_by,
having = self._having,
limit=self._limit,
offset=self._offset,
sort=self._sort,
Expand Down Expand Up @@ -731,6 +742,7 @@ def explain(self, explain_type=ExplainType.kPhysical) -> Any:
search=self._search,
filter=self._filter,
group_by=self._group_by,
having=self._having,
limit=self._limit,
offset=self._offset,
explain_type=explain_type,
Expand Down
6 changes: 6 additions & 0 deletions python/infinity_embedded/local_infinity/table.py
Original file line number Diff line number Diff line change
Expand Up @@ -385,6 +385,10 @@ def group_by(self, group_by_expr_list: Optional[List[str]] | Optional[str]):
self.query_builder.group_by(group_by_expr_list)
return self

def having(self, having: Optional[str]):
self.query_builder.having(having)
return self

def sort(self, order_by_expr_list: Optional[List[list[str, SortType]]]):
for order_by_expr in order_by_expr_list:
if len(order_by_expr) != 2:
Expand Down Expand Up @@ -466,6 +470,7 @@ def _execute_query(self, query: Query):
highlight_list=highlight,
order_by_list=order_by_list,
group_by_list=group_by_list,
having_expr=query.having,
total_hits_count_flag=total_hits_count_flag,
search_expr=query.search,
where_expr=query.filter,
Expand Down Expand Up @@ -498,6 +503,7 @@ def _explain_query(self, query: ExplainQuery) -> Any:
highlight_list=highlight,
order_by_list=order_by_list,
group_by_list=group_by_list,
having_expr=query.having,
search_expr=query.search,
where_expr=query.filter,
limit_expr=query.limit,
Expand Down
9 changes: 9 additions & 0 deletions python/infinity_http.py
Original file line number Diff line number Diff line change
Expand Up @@ -713,6 +713,7 @@ def __init__(self, output: list, table_http: table_http):
self._search_exprs = []
self._sort = []
self._group_by = []
self._having = []
self._limit = None
self._offset = None
self._option = None
Expand All @@ -733,6 +734,8 @@ def select(self):
tmp["sort"] = self._sort
if len(self._group_by):
tmp["group_by"] = self._group_by
if len(self._having):
tmp["having"] = self._having
if self._limit is not None:
tmp["limit"] = str(self._limit)
if self._offset is not None:
Expand Down Expand Up @@ -781,6 +784,8 @@ def explain(self, ExplainType=ExplainType.Physical):
tmp["sort"] = self._sort
if len(self._group_by):
tmp["group_by"] = self._group_by
if len(self._having):
tmp["having"] = self._having
if self._limit is not None:
tmp["limit"] = self._limit
if self._offset is not None:
Expand Down Expand Up @@ -838,6 +843,10 @@ def offset(self, offset):
def group_by(self, group_by_list):
self._group_by = group_by_list
return self

def having(self, having_expr):
self._having = having_expr
return self

def option(self, option: {}):
# option_str = json.dumps(option)
Expand Down
3 changes: 2 additions & 1 deletion python/infinity_sdk/infinity/remote_thrift/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -264,7 +264,7 @@ def export_data(self, db_name: str, table_name: str, file_name: str, export_opti

@retry_wrapper
def select(self, db_name: str, table_name: str, select_list, highlight_list, search_expr,
where_expr, group_by_list, limit_expr, offset_expr, order_by_list, total_hits_count):
where_expr, group_by_list, having_expr, limit_expr, offset_expr, order_by_list, total_hits_count):
return self.client.Select(SelectRequest(session_id=self.session_id,
db_name=db_name,
table_name=table_name,
Expand All @@ -273,6 +273,7 @@ def select(self, db_name: str, table_name: str, select_list, highlight_list, sea
search_expr=search_expr,
where_expr=where_expr,
group_by_list=group_by_list,
having_expr=having_expr,
limit_expr=limit_expr,
offset_expr=offset_expr,
order_by_list=order_by_list,
Expand Down
14 changes: 13 additions & 1 deletion python/infinity_sdk/infinity/remote_thrift/query_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ def __init__(
search: Optional[SearchExpr],
filter: Optional[ParsedExpr],
groupby: Optional[List[ParsedExpr]],
having: Optional[ParsedExpr],
limit: Optional[ParsedExpr],
offset: Optional[ParsedExpr],
sort: Optional[List[OrderByExpr]],
Expand All @@ -55,6 +56,7 @@ def __init__(
self.search = search
self.filter = filter
self.groupby = groupby
self.having = having
self.limit = limit
self.offset = offset
self.sort = sort
Expand All @@ -69,12 +71,13 @@ def __init__(
search: Optional[SearchExpr],
filter: Optional[ParsedExpr],
groupby: Optional[List[ParsedExpr]],
having: Optional[ParsedExpr],
limit: Optional[ParsedExpr],
offset: Optional[ParsedExpr],
sort: Optional[List[OrderByExpr]],
explain_type: Optional[ExplainType],
):
super().__init__(columns, highlight, search, filter, groupby, limit, offset, sort, False)
super().__init__(columns, highlight, search, filter, groupby, having, limit, offset, sort, False)
self.explain_type = explain_type


Expand All @@ -86,6 +89,7 @@ def __init__(self, table):
self._search = None
self._filter = None
self._groupby = None
self._having = None
self._limit = None
self._offset = None
self._sort = None
Expand All @@ -97,6 +101,7 @@ def reset(self):
self._search = None
self._filter = None
self._groupby = None
self._having = None
self._limit = None
self._offset = None
self._sort = None
Expand Down Expand Up @@ -349,6 +354,11 @@ def group_by(self, columns: List[str] | str) -> InfinityThriftQueryBuilder:
group_by_list.append(parse_expr(maybe_parse(columns)))
self._groupby = group_by_list
return self

def having(self, having: Optional[str]) -> InfinityThriftQueryBuilder:
having_expr = traverse_conditions(condition(having))
self._having = having_expr
return self

def output(self, columns: Optional[list]) -> InfinityThriftQueryBuilder:
self._columns = columns
Expand Down Expand Up @@ -538,6 +548,7 @@ def to_result(self) -> tuple[dict[str, list[Any]], dict[str, Any], {}]:
search=self._search,
filter=self._filter,
groupby=self._groupby,
having=self._having,
limit=self._limit,
offset=self._offset,
sort=self._sort,
Expand Down Expand Up @@ -569,6 +580,7 @@ def explain(self, explain_type=ExplainType.Physical) -> Any:
search=self._search,
filter=self._filter,
groupby=self._groupby,
having=self._having,
limit=self._limit,
offset=self._offset,
sort=self._sort,
Expand Down
5 changes: 5 additions & 0 deletions python/infinity_sdk/infinity/remote_thrift/table.py
Original file line number Diff line number Diff line change
Expand Up @@ -404,6 +404,10 @@ def group_by(self, group_by_expr_list: Optional[List[str]] | Optional[str]):
self.query_builder.group_by(group_by_expr_list)
return self

def having(self, having_expr: Optional[str]):
self.query_builder.having(having_expr)
return self

def sort(self, order_by_expr_list: Optional[List[list[str, SortType]]]):
for order_by_expr in order_by_expr_list:
if len(order_by_expr) != 2:
Expand Down Expand Up @@ -507,6 +511,7 @@ def _execute_query(self, query: Query) -> tuple[dict[str, list[Any]], dict[str,
search_expr=query.search,
where_expr=query.filter,
group_by_list=query.groupby,
having_expr=query.having,
limit_expr=query.limit,
offset_expr=query.offset,
order_by_list=query.sort,
Expand Down
Loading

0 comments on commit bfc3462

Please sign in to comment.