-
Notifications
You must be signed in to change notification settings - Fork 57
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add scalar support in ORT backend #213
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -885,7 +885,9 @@ ModelState::AutoCompleteIO(const char* key, const OnnxTensorInfoMap& io_infos) | |||||
triton::common::TritonJson::Value reshape_dims( | ||||||
ModelConfig(), triton::common::TritonJson::ValueType::ARRAY); | ||||||
RETURN_IF_ERROR(reshape.Add("shape", std::move(reshape_dims))); | ||||||
RETURN_IF_ERROR(io.Add("reshape", std::move(reshape))); | ||||||
if (MaxBatchSize() > 0) { | ||||||
RETURN_IF_ERROR(io.Add("reshape", std::move(reshape))); | ||||||
} | ||||||
} | ||||||
RETURN_IF_ERROR(io.Add("dims", std::move(dims))); | ||||||
RETURN_IF_ERROR(ios.Append(std::move(io))); | ||||||
|
@@ -998,6 +1000,12 @@ class ModelInstanceState : public BackendModelInstance { | |||||
// map of output name -> tensor info | ||||||
OnnxTensorInfoMap output_tensor_infos_; | ||||||
|
||||||
// map of input name -> tensor info | ||||||
OnnxTensorInfoMap input_tensor_infos_; | ||||||
|
||||||
// A map from scalar output tensors to the dimension specified in model config | ||||||
std::unordered_map<std::string, std::vector<int64_t>> scalar_outputs_; | ||||||
|
||||||
// Onnx Runtime variables that will be reset and used for every run | ||||||
// on this instance. | ||||||
std::vector<OrtValue*> input_tensors_; | ||||||
|
@@ -1313,9 +1321,8 @@ ModelInstanceState::ValidateInputs(const size_t expected_input_cnt) | |||||
{ | ||||||
std::set<std::string> input_tensor_names; | ||||||
RETURN_IF_ERROR(InputNames(session_, input_tensor_names)); | ||||||
|
||||||
OnnxTensorInfoMap input_tensor_infos; | ||||||
RETURN_IF_ERROR(InputInfos(session_, default_allocator_, input_tensor_infos)); | ||||||
RETURN_IF_ERROR( | ||||||
InputInfos(session_, default_allocator_, input_tensor_infos_)); | ||||||
|
||||||
std::set<std::string> overridable_initializer_tensor_names; | ||||||
RETURN_IF_ERROR(OverridableInitializerNames( | ||||||
|
@@ -1325,12 +1332,13 @@ ModelInstanceState::ValidateInputs(const size_t expected_input_cnt) | |||||
RETURN_IF_ERROR(OverridableInitializerInfos( | ||||||
session_, default_allocator_, overridable_initializer_tensor_infos)); | ||||||
|
||||||
if (input_tensor_infos.size() != expected_input_cnt) { | ||||||
if (input_tensor_infos_.size() != expected_input_cnt) { | ||||||
return TRITONSERVER_ErrorNew( | ||||||
TRITONSERVER_ERROR_INVALID_ARG, | ||||||
(std::string("unable to load model '") + model_state_->Name() + | ||||||
"', configuration expects " + std::to_string(expected_input_cnt) + | ||||||
" inputs, model provides " + std::to_string(input_tensor_infos.size())) | ||||||
" inputs, model provides " + | ||||||
std::to_string(input_tensor_infos_.size())) | ||||||
.c_str()); | ||||||
} | ||||||
|
||||||
|
@@ -1357,8 +1365,9 @@ ModelInstanceState::ValidateInputs(const size_t expected_input_cnt) | |||||
|
||||||
const auto& tensor_names = | ||||||
io_optional ? overridable_initializer_tensor_names : input_tensor_names; | ||||||
const auto& tensor_infos = | ||||||
io_optional ? overridable_initializer_tensor_infos : input_tensor_infos; | ||||||
const auto& tensor_infos = io_optional | ||||||
? overridable_initializer_tensor_infos | ||||||
: input_tensor_infos_; | ||||||
auto iit = tensor_infos.find(io_name); | ||||||
if (iit == tensor_infos.end()) { | ||||||
RETURN_IF_ERROR(CheckAllowedModelInput(io, tensor_names)); | ||||||
|
@@ -1419,9 +1428,30 @@ ModelInstanceState::ValidateInputs(const size_t expected_input_cnt) | |||||
.c_str()); | ||||||
} | ||||||
} else { | ||||||
RETURN_IF_ERROR(CompareDimsSupported( | ||||||
model_state_->Name(), io_name, iit->second.dims_, dims, | ||||||
model_state_->MaxBatchSize(), false /* compare_exact */)); | ||||||
if (model_state_->MaxBatchSize() != 0 || iit->second.dims_.size() > 0) { | ||||||
RETURN_IF_ERROR(CompareDimsSupported( | ||||||
model_state_->Name(), io_name, iit->second.dims_, dims, | ||||||
model_state_->MaxBatchSize(), false /* compare_exact */)); | ||||||
rmccorm4 marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||
} else { | ||||||
// if max_batch_size == 0 and is a scalar tensor all the | ||||||
// dimensions specified must be equal to 1 | ||||||
for (auto& dim : dims) { | ||||||
if (dim != 1) { | ||||||
return TRITONSERVER_ErrorNew( | ||||||
TRITONSERVER_ERROR_INVALID_ARG, | ||||||
(std::string("unable to load model '") + model_state_->Name() + | ||||||
"', scalar tensor '" + io_name + | ||||||
"', should only provide 1 in the model configuration when the " | ||||||
"model doesn't support batching. Model configuration " | ||||||
"provided: " + | ||||||
ShapeToString(dims) + ".") | ||||||
.c_str()); | ||||||
} | ||||||
} | ||||||
|
||||||
// store the dimension for reference. | ||||||
scalar_inputs_[io_name] = dims; | ||||||
} | ||||||
} | ||||||
} | ||||||
|
||||||
|
@@ -1482,9 +1512,30 @@ ModelInstanceState::ValidateOutputs() | |||||
|
||||||
// The batch output shape doesn't necessarily match the model | ||||||
if (model_state_->FindBatchOutput(io_name) == nullptr) { | ||||||
RETURN_IF_ERROR(CompareDimsSupported( | ||||||
model_state_->Name(), io_name, iit->second.dims_, dims, | ||||||
model_state_->MaxBatchSize(), true /* compare_exact */)); | ||||||
// if max_batch_size == 0 and is a scalar tensor all the | ||||||
// dimensions specified must be equal to 1 | ||||||
if (model_state_->MaxBatchSize() > 0 || iit->second.dims_.size() > 0) { | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Summarizing offline discussion, I'd rather see const bool is_scalar = (iit->second.dims_.size() == 0);
if (is_scalar) {
// Dimensional "volume" of Triton dims must be 1 for scalars.
if std::any_of(dims.begin(), dims.end(), [](int dim) { dim != 1}) {
// ERROR
}
scalar_outputs_[io_name] = dims;
} else {
RETURN_IF_ERROR(CompareDimsSupported(...));
} There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. fixed. |
||||||
RETURN_IF_ERROR(CompareDimsSupported( | ||||||
model_state_->Name(), io_name, iit->second.dims_, dims, | ||||||
model_state_->MaxBatchSize(), true /* compare_exact */)); | ||||||
} else { | ||||||
for (auto& dim : dims) { | ||||||
if (dim != 1) { | ||||||
return TRITONSERVER_ErrorNew( | ||||||
TRITONSERVER_ERROR_INVALID_ARG, | ||||||
(std::string("unable to load model '") + model_state_->Name() + | ||||||
"', scalar tensor '" + io_name + | ||||||
"', should only provide 1 in the model configuration when the " | ||||||
"model doesn't support batching. Model configuration " | ||||||
"provided: " + | ||||||
ShapeToString(dims) + ".") | ||||||
.c_str()); | ||||||
} | ||||||
} | ||||||
|
||||||
// store the dimension for reference. | ||||||
scalar_outputs_[io_name] = dims; | ||||||
} | ||||||
} | ||||||
} | ||||||
|
||||||
|
@@ -1900,13 +1951,34 @@ ModelInstanceState::SetInputTensors( | |||||
input_name, nullptr, 0, allowed_input_types, &input_buffer, | ||||||
&batchn_byte_size, &memory_type, &memory_type_id)); | ||||||
|
||||||
auto iti = input_tensor_infos_.find(input_name); | ||||||
if (iti == input_tensor_infos_.end()) { | ||||||
return TRITONSERVER_ErrorNew( | ||||||
TRITONSERVER_ERROR_INTERNAL, | ||||||
std::string( | ||||||
std::string( | ||||||
"Failed to retrieve the ONNX input tensor info from '") + | ||||||
input_name + "'.") | ||||||
.c_str()); | ||||||
} | ||||||
|
||||||
// Create ORT Tensor | ||||||
RETURN_IF_ORT_ERROR(ort_api->CreateTensorWithDataAsOrtValue( | ||||||
memory_type == TRITONSERVER_MEMORY_GPU ? cuda_allocator_info_ | ||||||
: cpu_allocator_info_, | ||||||
(void*)input_buffer, batchn_byte_size, batchn_shape.data(), | ||||||
batchn_shape.size(), ConvertToOnnxDataType(input_datatype), | ||||||
&input_tensors_.back())); | ||||||
if (iti->second.dims_.size() == 0) { | ||||||
rmccorm4 marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||
// scalar tensor | ||||||
RETURN_IF_ORT_ERROR(ort_api->CreateTensorWithDataAsOrtValue( | ||||||
memory_type == TRITONSERVER_MEMORY_GPU ? cuda_allocator_info_ | ||||||
: cpu_allocator_info_, | ||||||
(void*)input_buffer, batchn_byte_size, nullptr /* scalar */, | ||||||
0 /* number of dims */, ConvertToOnnxDataType(input_datatype), | ||||||
&input_tensors_.back())); | ||||||
} else { | ||||||
RETURN_IF_ORT_ERROR(ort_api->CreateTensorWithDataAsOrtValue( | ||||||
memory_type == TRITONSERVER_MEMORY_GPU ? cuda_allocator_info_ | ||||||
: cpu_allocator_info_, | ||||||
(void*)input_buffer, batchn_byte_size, batchn_shape.data(), | ||||||
batchn_shape.size(), ConvertToOnnxDataType(input_datatype), | ||||||
&input_tensors_.back())); | ||||||
} | ||||||
RETURN_IF_ORT_ERROR( | ||||||
ort_api->BindInput(io_binding_, input_name, input_tensors_.back())); | ||||||
} else { | ||||||
|
@@ -2283,6 +2355,22 @@ ModelInstanceState::ReadOutputTensors( | |||||
batchn_shape, dtype, output_tensor, &output_buffer, string_buffers, | ||||||
offsets)); | ||||||
|
||||||
// If the number of dimensions is equal to zero, it means that it is a | ||||||
// scalar and it would use the dimensions specified in the mdel | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
// configuration. | ||||||
if (batchn_shape.size() == 0) { | ||||||
auto scalar_output_dims_it = scalar_outputs_.find(name); | ||||||
if (scalar_output_dims_it == scalar_outputs_.end()) { | ||||||
return TRITONSERVER_ErrorNew( | ||||||
TRITONSERVER_ERROR_INTERNAL, | ||||||
std::string( | ||||||
"Failed to find the scalar output dimension for " + name + | ||||||
" in the model configuration.") | ||||||
.c_str()); | ||||||
} | ||||||
batchn_shape = scalar_output_dims_it->second; | ||||||
} | ||||||
|
||||||
if (output_tensor_pair.first != -1) { | ||||||
if (dtype == TRITONSERVER_TYPE_BYTES) { | ||||||
auto content = string_buffers.back().data(); | ||||||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you add a comment about why reshape causes issues with non-batching case for future reference?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Actually, is this breaking any functionality for non-batching models that specify a "reshape" in their model config? Such as our densenet example: https://github.com/triton-inference-server/server/blob/main/docs/examples/model_repository/densenet_onnx/config.pbtxt
This looks like it's restricted to (1) autocomplete and (2) dims are empty. But just double checking.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
added a comment.
I don't think it would break it since they all do have some dimensions.