From 548be07b92bda665b6f096a3e7f453e9cc34d0c3 Mon Sep 17 00:00:00 2001 From: thxCode Date: Mon, 17 Jun 2024 23:43:42 +0800 Subject: [PATCH] refactor: support expert ffl Signed-off-by: thxCode --- file_architecture.go | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/file_architecture.go b/file_architecture.go index f03b8f9..c242972 100644 --- a/file_architecture.go +++ b/file_architecture.go @@ -23,6 +23,10 @@ type GGUFArchitectureMetadata struct { BlockCount uint64 `json:"blockCount"` // FeedForwardLength(n_ff) is the length of the feed-forward layer. FeedForwardLength uint64 `json:"feedForwardLength,omitempty"` + // ExpertFeedForwardLength(expert_feed_forward_length) is the length of the feed-forward layer in the expert model. + ExpertFeedForwardLength uint64 `json:"expertFeedForwardLength,omitempty"` + // ExpertSharedFeedForwardLength(expert_shared_feed_forward_length) is the length of the shared feed-forward layer in the expert model. + ExpertSharedFeedForwardLength uint64 `json:"expertSharedFeedForwardLength,omitempty"` // ExpertCount(n_expert) is the number of experts in MoE models. ExpertCount uint32 `json:"expertCount,omitempty"` // ExpertUsedCount(n_expert_used) is the number of experts used during each token evaluation in MoE models. @@ -98,8 +102,11 @@ func (gf *GGUFFile) Architecture() (ga GGUFArchitectureMetadata) { embeddingLengthKey = arch + ".embedding_length" blockCountKey = arch + ".block_count" feedForwardLengthKey = arch + ".feed_forward_length" - expertCountKey = arch + ".expert_count" - expertUsedCountKey = arch + ".expert_used_count" + + expertFeedForwardLengthKey = arch + ".expert_feed_forward_length" + expertSharedFeedForwardLengthKey = arch + ".expert_shared_feed_forward_length" + expertCountKey = arch + ".expert_count" + expertUsedCountKey = arch + ".expert_used_count" attentionHeadCountKey = arch + ".attention.head_count" attentionHeadCountKVKey = arch + ".attention.head_count_kv" @@ -175,12 +182,19 @@ func (gf *GGUFFile) Architecture() (ga GGUFArchitectureMetadata) { if v, ok := m[feedForwardLengthKey]; ok { ga.FeedForwardLength = ValueNumeric[uint64](v) } + if v, ok := m[expertCountKey]; ok { ga.ExpertCount = ValueNumeric[uint32](v) } if v, ok := m[expertUsedCountKey]; ok { ga.ExpertUsedCount = ValueNumeric[uint32](v) } + if v, ok := m[expertFeedForwardLengthKey]; ok { + ga.ExpertFeedForwardLength = ValueNumeric[uint64](v) + } + if v, ok := m[expertSharedFeedForwardLengthKey]; ok { + ga.ExpertSharedFeedForwardLength = ValueNumeric[uint64](v) + } if v, ok := m[attentionHeadCountKey]; ok { ga.AttentionHeadCount = ValueNumeric[uint64](v)