Skip to content

Commit

Permalink
Update http support for Array (#2494)
Browse files Browse the repository at this point in the history
### What problem does this PR solve?

Support create, insert and select Array type in HTTP API.

Issue link:#2455

### Type of change

- [x] New Feature (non-breaking change which adds functionality)
- [x] Test cases
  • Loading branch information
yangzq50 authored Jan 24, 2025
1 parent bfc3462 commit 4014ca3
Show file tree
Hide file tree
Showing 5 changed files with 171 additions and 85 deletions.
16 changes: 12 additions & 4 deletions example/http/insert_search_data.sh
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,10 @@ curl --request POST \
{
"name": "tensor",
"type": "tensor,4,float"
},
{
"name": "nested_array",
"type": "array,array,varchar"
}
]
} '
Expand All @@ -67,7 +71,8 @@ curl --request POST \
"vec": [1.0, 1.2, 0.8, 0.9],
"sparse_column": {"10":1.1, "20":2.2, "30": 3.3},
"year": 2024,
"tensor": [[1.0, 0.0, 0.0, 0.0], [1.1, 0.0, 0.0, 0.0]]
"tensor": [[1.0, 0.0, 0.0, 0.0], [1.1, 0.0, 0.0, 0.0]],
"nested_array": {"array": [{"array": ["hello", "world"]}, {"array": ["!"]}, {"array": [""]}, {"array": []}]}
},
{
"num": 2,
Expand All @@ -76,7 +81,8 @@ curl --request POST \
"vec": [4.0, 4.2, 4.3, 4.5],
"sparse_column": {"40":4.4, "50":5.5, "60": 6.6},
"year": 2023,
"tensor": [[4.0, 0.0, 4.3, 4.5], [4.0, 4.2, 4.4, 5.0]]
"tensor": [[4.0, 0.0, 4.3, 4.5], [4.0, 4.2, 4.4, 5.0]],
"nested_array": {"array": [{"array": ["hello world!"]}]}
},
{
"num": 3,
Expand All @@ -85,7 +91,8 @@ curl --request POST \
"vec": [4.0, 4.2, 4.3, 4.2],
"sparse_column": {"70":7.7, "80":8.8, "90": 9.9},
"year": 2019,
"tensor": [[0.9, 0.1, 0.0, 0.0], [1.1, 0.0, 0.0, 0.0]]
"tensor": [[0.9, 0.1, 0.0, 0.0], [1.1, 0.0, 0.0, 0.0]],
"nested_array": {"array": [{"array": []}]}
},
{
"num": 4,
Expand All @@ -94,7 +101,8 @@ curl --request POST \
"vec": [4.0, 4.2, 4.3, 4.5],
"sparse_column": {"20":7.7, "80":7.8, "90": 97.9},
"year": 2018,
"tensor": [[5.0, 4.2, 4.3, 4.5], [4.0, 4.2, 4.3, 4.4]]
"tensor": [[5.0, 4.2, 4.3, 4.5], [4.0, 4.2, 4.3, 4.4]],
"nested_array": {"array": []}
}
] '

Expand Down
24 changes: 24 additions & 0 deletions src/executor/operator/physical_import.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -936,6 +936,30 @@ SharedPtr<ConstantExpr> BuildConstantExprFromJson(const nlohmann::json &json_obj
}
}
}
case nlohmann::json::value_t::object: {
if (json_object.size() == 1 && json_object.begin().key() == "array") {
const auto &array_obj = json_object.begin().value();
if (array_obj.type() != nlohmann::json::value_t::array) {
const auto error_info = fmt::format("Unrecognized json object type in array: {}, expect array!", array_obj.type_name());
RecoverableError(Status::ImportFileFormatError(error_info));
return nullptr;
}
auto res = MakeShared<ConstantExpr>(LiteralType::kCurlyBracketsArray);
for (const auto &elem : array_obj) {
auto elem_expr = BuildConstantExprFromJson(elem);
if (!elem_expr) {
RecoverableError(Status::ImportFileFormatError("Failed to build expr for element of array!"));
return nullptr;
}
res->curly_brackets_array_.push_back(std::move(elem_expr));
}
return res;
} else {
const auto error_info = fmt::format("Unrecognized json object type: {}", json_object.type_name());
RecoverableError(Status::ImportFileFormatError(error_info));
return nullptr;
}
}
default: {
const auto error_info = fmt::format("Unrecognized json object type: {}", json_object.type_name());
RecoverableError(Status::ImportFileFormatError(error_info));
Expand Down
196 changes: 115 additions & 81 deletions src/network/http_server.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ import logical_type;
import embedding_info;
import sparse_info;
import decimal_info;
import array_info;
import status;
import constant_expr;
import command_statement;
Expand All @@ -66,6 +67,98 @@ namespace {

using namespace infinity;

Pair<SharedPtr<DataType>, infinity::Status> ParseColumnType(const Span<const std::string> tokens, const nlohmann::json &field_element) {
SharedPtr<DataType> column_type;
if (tokens.empty()) {
return {nullptr, infinity::Status::ParserError("Empty column type")};
} else if (tokens[0] == "vector" || tokens[0] == "multivector" || tokens[0] == "tensor" || tokens[0] == "tensorarray") {
if (tokens.size() != 3) {
return {nullptr, infinity::Status::ParserError("vector / multivector / tensor / tensorarray type syntax error")};
}
const SizeT dimension = std::stoull(tokens[1]);
const auto &etype = tokens[2];
EmbeddingDataType e_data_type;
if (etype == "int" || etype == "integer" || etype == "int32") {
e_data_type = EmbeddingDataType::kElemInt32;
} else if (etype == "uint8") {
e_data_type = EmbeddingDataType::kElemUInt8;
} else if (etype == "int8" || etype == "tinyint") {
e_data_type = EmbeddingDataType::kElemInt8;
} else if (etype == "float" || etype == "float32") {
e_data_type = EmbeddingDataType::kElemFloat;
} else if (etype == "double" || etype == "float64") {
e_data_type = EmbeddingDataType::kElemDouble;
} else if (etype == "float16") {
e_data_type = EmbeddingDataType::kElemFloat16;
} else if (etype == "bfloat16") {
e_data_type = EmbeddingDataType::kElemBFloat16;
} else if (etype == "bit") {
e_data_type = EmbeddingDataType::kElemBit;
} else {
return {nullptr, infinity::Status::InvalidEmbeddingDataType(etype)};
}
auto type_info = EmbeddingInfo::Make(e_data_type, dimension);
auto logical_type_v = LogicalType::kInvalid;
if (tokens[0] == "vector") {
logical_type_v = LogicalType::kEmbedding;
} else if (tokens[0] == "multivector") {
logical_type_v = LogicalType::kMultiVector;
} else if (tokens[0] == "tensor") {
logical_type_v = LogicalType::kTensor;
} else if (tokens[0] == "tensorarray") {
logical_type_v = LogicalType::kTensorArray;
}
column_type = std::make_shared<DataType>(logical_type_v, std::move(type_info));
} else if (tokens[0] == "sparse") {
if (tokens.size() != 4) {
return {nullptr, infinity::Status::ParserError("sparse type syntax error")};
}
const SizeT dimension = std::stoull(tokens[1]);
const auto &dtype = tokens[2];
const auto &itype = tokens[3];
EmbeddingDataType d_data_type;
EmbeddingDataType i_data_type;
if (dtype == "integer" || dtype == "int" || dtype == "int32") {
d_data_type = EmbeddingDataType::kElemInt32;
} else if (dtype == "float" || dtype == "float32") {
d_data_type = EmbeddingDataType::kElemFloat;
} else if (dtype == "double" || dtype == "float64") {
d_data_type = EmbeddingDataType::kElemDouble;
} else {
return {nullptr, infinity::Status::InvalidEmbeddingDataType(dtype)};
}

if (itype == "tinyint" || itype == "int8") {
i_data_type = EmbeddingDataType::kElemInt8;
} else if (itype == "smallint" || itype == "int16") {
i_data_type = EmbeddingDataType::kElemInt16;
} else if (itype == "integer" || itype == "int" || itype == "int32") {
i_data_type = EmbeddingDataType::kElemInt32;
} else if (itype == "bigint" || itype == "int64") {
i_data_type = EmbeddingDataType::kElemInt64;
} else {
return {nullptr, infinity::Status::InvalidEmbeddingDataType(itype)};
}
auto type_info = SparseInfo::Make(d_data_type, i_data_type, dimension, SparseStoreType::kSort);
column_type = std::make_shared<DataType>(LogicalType::kSparse, std::move(type_info));
} else if (tokens[0] == "decimal") {
auto type_info = DecimalInfo::Make(field_element["precision"], field_element["scale"]);
column_type = std::make_shared<DataType>(LogicalType::kDecimal, std::move(type_info));
} else if (tokens[0] == "array") {
auto [element_type, stat] = ParseColumnType(tokens.subspan<1>(), field_element);
if (!stat.ok()) {
return {nullptr, std::move(stat)};
}
auto type_info = ArrayInfo::Make(std::move(*element_type));
column_type = std::make_shared<DataType>(LogicalType::kArray, std::move(type_info));
} else if (tokens.size() == 1) {
column_type = DataType::StringDeserialize(tokens[0]);
} else {
return {nullptr, infinity::Status::ParserError(fmt::format("{} isn't supported.", tokens[0]))};
}
return std::make_pair(std::move(column_type), infinity::Status::OK());
}

infinity::Status ParseColumnDefs(const nlohmann::json &fields, Vector<ColumnDef *> &column_definitions) {
SizeT column_count = fields.size();
for (SizeT column_id = 0; column_id < column_count; ++column_id) {
Expand All @@ -86,89 +179,12 @@ infinity::Status ParseColumnDefs(const nlohmann::json &fields, Vector<ColumnDef
tokens = SplitStrByComma(value_type);

SharedPtr<DataType> column_type{nullptr};
SharedPtr<TypeInfo> type_info{nullptr};
try {
if (tokens[0] == "vector" || tokens[0] == "multivector" || tokens[0] == "tensor" || tokens[0] == "tensorarray") {
if (tokens.size() != 3) {
return infinity::Status::ParserError("vector / multivector / tensor / tensorarray type syntax error");
}
SizeT dimension = std::stoull(tokens[1]);
String etype = tokens[2];
EmbeddingDataType e_data_type;
if (etype == "int" || etype == "integer" || etype == "int32") {
e_data_type = EmbeddingDataType::kElemInt32;
} else if (etype == "uint8") {
e_data_type = EmbeddingDataType::kElemUInt8;
} else if (etype == "int8" || etype == "tinyint") {
e_data_type = EmbeddingDataType::kElemInt8;
} else if (etype == "float" || etype == "float32") {
e_data_type = EmbeddingDataType::kElemFloat;
} else if (etype == "double" || etype == "float64") {
e_data_type = EmbeddingDataType::kElemDouble;
} else if (etype == "float16") {
e_data_type = EmbeddingDataType::kElemFloat16;
} else if (etype == "bfloat16") {
e_data_type = EmbeddingDataType::kElemBFloat16;
} else if (etype == "bit") {
e_data_type = EmbeddingDataType::kElemBit;
} else {
return infinity::Status::InvalidEmbeddingDataType(etype);
}
type_info = EmbeddingInfo::Make(e_data_type, dimension);
LogicalType logical_type_v = LogicalType::kInvalid;
if (tokens[0] == "vector") {
logical_type_v = LogicalType::kEmbedding;
} else if (tokens[0] == "multivector") {
logical_type_v = LogicalType::kMultiVector;
} else if (tokens[0] == "tensor") {
logical_type_v = LogicalType::kTensor;
} else if (tokens[0] == "tensorarray") {
logical_type_v = LogicalType::kTensorArray;
}
column_type = std::make_shared<DataType>(logical_type_v, type_info);
} else if (tokens[0] == "sparse") {
if (tokens.size() != 4) {
return infinity::Status::ParserError("sparse type syntax error");
}
SizeT dimension = std::stoull(tokens[1]);
String dtype = tokens[2];
String itype = tokens[3];
EmbeddingDataType d_data_type;
EmbeddingDataType i_data_type;
if (dtype == "integer" || dtype == "int" || dtype == "int32") {
d_data_type = EmbeddingDataType::kElemInt32;
} else if (dtype == "float" || dtype == "float32") {
d_data_type = EmbeddingDataType::kElemFloat;
} else if (dtype == "double" || dtype == "float64") {
d_data_type = EmbeddingDataType::kElemDouble;
} else {
return infinity::Status::InvalidEmbeddingDataType(dtype);
}

if (itype == "tinyint" || itype == "int8") {
i_data_type = EmbeddingDataType::kElemInt8;
} else if (itype == "smallint" || itype == "int16") {
i_data_type = EmbeddingDataType::kElemInt16;
} else if (itype == "integer" || itype == "int" || itype == "int32") {
i_data_type = EmbeddingDataType::kElemInt32;
} else if (itype == "bigint" || itype == "int64") {
i_data_type = EmbeddingDataType::kElemInt64;
} else {
return infinity::Status::InvalidEmbeddingDataType(itype);
}
type_info = SparseInfo::Make(d_data_type, i_data_type, dimension, SparseStoreType::kSort);
column_type = std::make_shared<DataType>(LogicalType::kSparse, type_info);
} else if (tokens[0] == "decimal") {
type_info = DecimalInfo::Make(field_element["precision"], field_element["scale"]);
column_type = std::make_shared<DataType>(LogicalType::kDecimal, type_info);
} else if (tokens[0] == "array") {
type_info = nullptr;
return infinity::Status::ParserError("Array isn't implemented here.");
} else if (tokens.size() == 1) {
column_type = DataType::StringDeserialize(tokens[0]);
} else {
return infinity::Status::ParserError(fmt::format("{} isn't supported.", tokens[0]));
auto [result_type, err_status] = ParseColumnType(tokens, field_element);
if (!err_status.ok()) {
return std::move(err_status);
}
column_type = std::move(result_type);
} catch (std::exception &e) {
return infinity::Status::ParserError("type syntax error");
}
Expand Down Expand Up @@ -1168,6 +1184,24 @@ class InsertHandler final : public HttpRequestHandler {
break;
}
case nlohmann::json::value_t::object: {
// check array type
if (value.size() == 1 && value.begin().key() == "array") {
SharedPtr<ConstantExpr> array_expr;
try {
auto array_result = BuildConstantExprFromJson(value);
if (!array_result) {
throw std::runtime_error("Empty return value!");
}
array_expr = std::move(array_result);
} catch (std::exception &e) {
json_response["error_code"] = ErrorCode::kSyntaxError;
json_response["error_message"] = fmt::format("Error when parsing array value: {}", e.what());
return ResponseFactory::createResponse(http_status, json_response.dump());
}
auto const_expr = std::make_unique<ConstantExpr>(std::move(*array_expr));
insert_one_row->values_.emplace_back(std::move(const_expr));
break;
}
std::unique_ptr<ConstantExpr> const_sparse_expr = {};
if (value.size() == 0) {
json_response["error_code"] = ErrorCode::kInvalidEmbeddingDataType;
Expand Down
18 changes: 18 additions & 0 deletions src/parser/expr/constant_expr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,24 @@

namespace infinity {

ConstantExpr::ConstantExpr(ConstantExpr &&constant_expr) noexcept : ParsedExpr(ParsedExprType::kConstant) {
literal_type_ = constant_expr.literal_type_;
bool_value_ = constant_expr.bool_value_;
integer_value_ = constant_expr.integer_value_;
double_value_ = constant_expr.double_value_;
str_value_ = constant_expr.str_value_;
constant_expr.str_value_ = nullptr;
interval_type_ = constant_expr.interval_type_;
date_value_ = constant_expr.date_value_;
constant_expr.date_value_ = nullptr;
long_array_ = std::move(constant_expr.long_array_);
double_array_ = std::move(constant_expr.double_array_);
sub_array_array_ = std::move(constant_expr.sub_array_array_);
long_sparse_array_ = std::move(constant_expr.long_sparse_array_);
double_sparse_array_ = std::move(constant_expr.double_sparse_array_);
curly_brackets_array_ = std::move(constant_expr.curly_brackets_array_);
}

ConstantExpr::~ConstantExpr() {
switch (literal_type_) {
case LiteralType::kString: {
Expand Down
2 changes: 2 additions & 0 deletions src/parser/expr/constant_expr.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,8 @@ class ConstantExpr : public ParsedExpr {
public:
explicit ConstantExpr(LiteralType literal_type) : ParsedExpr(ParsedExprType::kConstant), literal_type_(literal_type) {}

ConstantExpr(ConstantExpr &&constant_expr) noexcept;

~ConstantExpr() override;

[[nodiscard]] std::string ToString() const override;
Expand Down

0 comments on commit 4014ca3

Please sign in to comment.