diff --git a/mediapipe/tasks/cc/components/processors/proto/BUILD b/mediapipe/tasks/cc/components/processors/proto/BUILD index 55cf3fca18..82d4ea21bc 100644 --- a/mediapipe/tasks/cc/components/processors/proto/BUILD +++ b/mediapipe/tasks/cc/components/processors/proto/BUILD @@ -93,14 +93,3 @@ mediapipe_proto_library( "//mediapipe/framework:calculator_proto", ], ) - -mediapipe_proto_library( - name = "transformer_params_proto", - srcs = ["transformer_params.proto"], -) - -mediapipe_proto_library( - name = "llm_params_proto", - srcs = ["llm_params.proto"], - deps = [":transformer_params_proto"], -) diff --git a/mediapipe/tasks/cc/components/processors/proto/llm_params.proto b/mediapipe/tasks/cc/components/processors/proto/llm_params.proto deleted file mode 100644 index b0c2535981..0000000000 --- a/mediapipe/tasks/cc/components/processors/proto/llm_params.proto +++ /dev/null @@ -1,41 +0,0 @@ -/* Copyright 2023 The MediaPipe Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -syntax = "proto3"; - -package mediapipe.tasks.components.processors.proto; - -import "mediapipe/tasks/cc/components/processors/proto/transformer_params.proto"; - -option java_package = "com.google.mediapipe.tasks.components.processors.proto"; -option java_outer_classname = "LLMParametersProto"; - -// Parameters for Large Language Models (LLM). -message LLMParameters { - TransformerParameters transformer_parameters = 1; - - // Size of vocabulary. - int32 vocab_size = 2; - - // Whether or not to disable KV cache, which is also referred as state - // somewhere else. - bool disable_kv_cache = 3; - - // Id of the start token. - int32 start_token_id = 4; - - // Token to determine the end of output stream. - string stop_token = 5; -} diff --git a/mediapipe/tasks/cc/components/processors/proto/transformer_params.proto b/mediapipe/tasks/cc/components/processors/proto/transformer_params.proto deleted file mode 100644 index a04aa95718..0000000000 --- a/mediapipe/tasks/cc/components/processors/proto/transformer_params.proto +++ /dev/null @@ -1,64 +0,0 @@ -/* Copyright 2023 The MediaPipe Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -syntax = "proto3"; - -package mediapipe.tasks.components.processors.proto; - -option java_package = "com.google.mediapipe.tasks.components.processors.proto"; -option java_outer_classname = "TransformerParametersProto"; - -// The parameters of transformer (https://arxiv.org/pdf/1706.03762.pdf) -message TransformerParameters { - // Batch size of tensors. - int32 batch_size = 1; - - // Maximum sequence length of the input/output tensor. - int32 max_seq_length = 2; - - // Embedding dimension (or model dimension), `d_model` in the paper. - // `d_k` == `d_v` == `d_model`/`h`. - int32 embedding_dim = 3; - - // Hidden dimension used in the feedforward layer, `d_ff` in the paper. - int32 hidden_dimension = 4; - - // Head dimension, `d_k` or `d_v` in the paper. - int32 head_dimension = 5; - - // Number of heads, `h` in the paper. - int32 num_heads = 6; - - // Number of stacked transformers, `N` in the paper. - int32 num_stacks = 7; - - // Deprecated: bool use_mqa. Use num_kv_heads below. - reserved 8; - - // Number of kv heads. 0 means Multi-Head-Attention (MHA), key and value have - // same number of heads as query; 1 means Multi-Query-Attention (MQA), key and - // value have one head; otherwise, this specifies the number of heads for key - // and value, and Grouped-Query-Attention (GQA) will be used. See - // https://arxiv.org/pdf/2305.13245.pdf for details. - int32 num_kv_heads = 9; - - // Different types of attention mask type. - enum AttentionMaskType { - UNSPECIFIED = 0; - CAUSAL = 1; - PREFIX = 2; - } - AttentionMaskType attention_mask_type = 10; -}