From 4c7f72ff90b21ee1676420a4454dffbd1dc534d3 Mon Sep 17 00:00:00 2001 From: lipandeng Date: Mon, 20 Jan 2025 21:17:20 +0800 Subject: [PATCH 1/4] feat: add component examples feat: add callback helper for file loader feat: add custom loader feat: add embedding examples feat: add embedding examples feat: add indexer&retriever examples feat: revert claude go mod --- .../customloader/custom_loader_impl.go | 105 ++++++++++++++++++ .../loader/file/examples/customloader/load.go | 50 +++++++++ .../file/examples/fileloader/file_loader.go | 93 ++++++++++++++++ components/document/loader/file/go.sum | 1 + .../headersplitter/header_splitter.go | 83 ++++++++++++++ .../openai/examples/embedding/embedding.go | 48 +++++++- .../examples/builtin_embedding/store.go | 50 ++++++++- .../examples/builtin_embedding/search.go | 56 +++++++++- 8 files changed, 473 insertions(+), 13 deletions(-) create mode 100644 components/document/loader/file/examples/customloader/custom_loader_impl.go create mode 100644 components/document/loader/file/examples/customloader/load.go create mode 100644 components/document/loader/file/examples/fileloader/file_loader.go create mode 100644 components/document/transformer/splitter/markdown/examples/headersplitter/header_splitter.go diff --git a/components/document/loader/file/examples/customloader/custom_loader_impl.go b/components/document/loader/file/examples/customloader/custom_loader_impl.go new file mode 100644 index 00000000..4af8b4f0 --- /dev/null +++ b/components/document/loader/file/examples/customloader/custom_loader_impl.go @@ -0,0 +1,105 @@ +/* + * Copyright 2025 CloudWeGo Authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package main + +import ( + "context" + "time" + + "github.com/cloudwego/eino/callbacks" + "github.com/cloudwego/eino/components/document" + "github.com/cloudwego/eino/schema" +) + +type customLoaderOptions struct { + Timeout time.Duration + RetryCount int +} + +func WithTimeout(timeout time.Duration) document.LoaderOption { + return document.WrapLoaderImplSpecificOptFn(func(o *customLoaderOptions) { + o.Timeout = timeout + }) +} + +func WithRetryCount(count int) document.LoaderOption { + return document.WrapLoaderImplSpecificOptFn(func(o *customLoaderOptions) { + o.RetryCount = count + }) +} + +func NewCustomLoader(config *Config) (*CustomLoader, error) { + return &CustomLoader{ + timeout: config.DefaultTimeout, + retryCount: config.DefaultRetryCount, + }, nil +} + +type CustomLoader struct { + timeout time.Duration + retryCount int +} + +type Config struct { + DefaultTimeout time.Duration + DefaultRetryCount int +} + +func (l *CustomLoader) Load(ctx context.Context, src document.Source, opts ...document.LoaderOption) ([]*schema.Document, error) { + // 1. 处理 option + options := &customLoaderOptions{ + Timeout: l.timeout, + RetryCount: l.retryCount, + } + options = document.GetLoaderImplSpecificOptions(options, opts...) + var err error + + // 2. 处理错误,并进行错误回调方法 + defer func() { + if err != nil { + callbacks.OnError(ctx, err) + } + }() + + // 3. 开始加载前的回调 + ctx = callbacks.OnStart(ctx, &document.LoaderCallbackInput{ + Source: src, + }) + + // 4. 执行加载逻辑 + docs, err := l.doLoad(ctx, src, options) + + if err != nil { + return nil, err + } + + ctx = callbacks.OnEnd(ctx, &document.LoaderCallbackOutput{ + Source: src, + Docs: docs, + }) + + return docs, nil +} + +func (l *CustomLoader) doLoad(ctx context.Context, src document.Source, opts *customLoaderOptions) ([]*schema.Document, error) { + // 实现文档加载逻辑 + // 1. 加载文档内容 + // 2. 构造 Document 对象,注意可在 MetaData 中保存文档来源等重要信息 + return []*schema.Document{{ + Content: "Hello World", + }}, nil +} diff --git a/components/document/loader/file/examples/customloader/load.go b/components/document/loader/file/examples/customloader/load.go new file mode 100644 index 00000000..d8bf0424 --- /dev/null +++ b/components/document/loader/file/examples/customloader/load.go @@ -0,0 +1,50 @@ +/* + * Copyright 2025 CloudWeGo Authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package main + +import ( + "context" + "log" + "time" + + "github.com/cloudwego/eino/components/document" +) + +func main() { + ctx := context.Background() + + log.Printf("===== call Custom Loader directly =====") + // 初始化 loader + loader, err := NewCustomLoader(&Config{ + DefaultTimeout: 10 * time.Second, + DefaultRetryCount: 10, + }) + if err != nil { + log.Fatalf("NewCustomLoader failed, err=%v", err) + } + + // 加载文档 + filePath := "../../testdata/test.md" + docs, err := loader.Load(ctx, document.Source{ + URI: filePath, + }) + if err != nil { + log.Fatalf("loader.Load failed, err=%v", err) + } + + log.Printf("doc content: %v", docs[0].Content) +} diff --git a/components/document/loader/file/examples/fileloader/file_loader.go b/components/document/loader/file/examples/fileloader/file_loader.go new file mode 100644 index 00000000..f367dda4 --- /dev/null +++ b/components/document/loader/file/examples/fileloader/file_loader.go @@ -0,0 +1,93 @@ +/* + * Copyright 2024 CloudWeGo Authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package main + +import ( + "context" + "log" + + "github.com/cloudwego/eino/callbacks" + "github.com/cloudwego/eino/components/document" + "github.com/cloudwego/eino/compose" + "github.com/cloudwego/eino/schema" + callbacksHelper "github.com/cloudwego/eino/utils/callbacks" + + "github.com/cloudwego/eino-ext/components/document/loader/file" +) + +func main() { + ctx := context.Background() + + log.Printf("===== call File Loader directly =====") + // 初始化 loader (以file loader为例) + loader, err := file.NewFileLoader(ctx, &file.FileLoaderConfig{ + // 配置参数 + UseNameAsID: true, + }) + if err != nil { + log.Fatalf("file.NewFileLoader failed, err=%v", err) + } + + // 加载文档 + filePath := "../../testdata/test.md" + docs, err := loader.Load(ctx, document.Source{ + URI: filePath, + }) + if err != nil { + log.Fatalf("loader.Load failed, err=%v", err) + } + + log.Printf("doc content: %v", docs[0].Content) + log.Printf("Extension: %s\n", docs[0].MetaData[file.MetaKeyExtension]) // 输出: Extension: .txt + log.Printf("Source: %s\n", docs[0].MetaData[file.MetaKeySource]) // 输出: Source: ./document.txt + + log.Printf("===== call File Loader in Chain =====") + // 创建 callback handler + handler := &callbacksHelper.LoaderCallbackHandler{ + OnStart: func(ctx context.Context, info *callbacks.RunInfo, input *document.LoaderCallbackInput) context.Context { + log.Printf("start loading docs...: %s\n", input.Source.URI) + return ctx + }, + OnEnd: func(ctx context.Context, info *callbacks.RunInfo, output *document.LoaderCallbackOutput) context.Context { + log.Printf("complete loading docs,total loaded docs: %d\n", len(output.Docs)) + return ctx + }, + // OnError + } + + // 使用 callback handler + helper := callbacksHelper.NewHandlerHelper(). + Loader(handler). + Handler() + + chain := compose.NewChain[document.Source, []*schema.Document]() + chain.AppendLoader(loader) + // 在运行时使用 + run, err := chain.Compile(ctx) + if err != nil { + log.Fatalf("chain.Compile failed, err=%v", err) + } + + outDocs, err := run.Invoke(ctx, document.Source{ + URI: filePath, + }, compose.WithCallbacks(helper)) + if err != nil { + log.Fatalf("run.Invoke failed, err=%v", err) + } + + log.Printf("doc content: %v", outDocs[0].Content) +} diff --git a/components/document/loader/file/go.sum b/components/document/loader/file/go.sum index 18181160..8518e6cc 100644 --- a/components/document/loader/file/go.sum +++ b/components/document/loader/file/go.sum @@ -114,6 +114,7 @@ github.com/ugorji/go/codec v1.2.7/go.mod h1:WGN1fab3R1fzQlVQTkfxVtIBhWDRqOviHU95 github.com/x-cray/logrus-prefixed-formatter v0.5.2 h1:00txxvfBM9muc0jiLIEAkAcIMJzfthRT6usrui8uGmg= github.com/yargevad/filepathx v1.0.0 h1:SYcT+N3tYGi+NvazubCNlvgIPbzAk7i7y2dwg3I5FYc= github.com/yargevad/filepathx v1.0.0/go.mod h1:BprfX/gpYNJHJfc35GjRRpVcwWXS89gGulUIU5tK3tA= +go.uber.org/mock v0.4.0 h1:VcM4ZOtdbR4f6VXfiOpwpVJDL6lCReaZ6mw31wqh7KU= golang.org/x/arch v0.0.0-20210923205945-b76863e36670 h1:18EFjUmQOcUvxNYSkA6jO9VAiXCnxFY6NyDX0bHDmkU= golang.org/x/arch v0.0.0-20210923205945-b76863e36670/go.mod h1:5om86z9Hs0C8fWVUuoMHwpExlXzs5Tkyp9hOrfG7pp8= golang.org/x/crypto v0.0.0-20180904163835-0709b304e793/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= diff --git a/components/document/transformer/splitter/markdown/examples/headersplitter/header_splitter.go b/components/document/transformer/splitter/markdown/examples/headersplitter/header_splitter.go new file mode 100644 index 00000000..b04a4ac2 --- /dev/null +++ b/components/document/transformer/splitter/markdown/examples/headersplitter/header_splitter.go @@ -0,0 +1,83 @@ +package main + +import ( + "context" + "log" + + "github.com/cloudwego/eino/callbacks" + "github.com/cloudwego/eino/components/document" + "github.com/cloudwego/eino/compose" + "github.com/cloudwego/eino/schema" + callbacksHelper "github.com/cloudwego/eino/utils/callbacks" + + "github.com/cloudwego/eino-ext/components/document/transformer/splitter/markdown" +) + +func main() { + ctx := context.Background() + + // 初始化 transformer (以 markdown 为例) + transformer, err := markdown.NewHeaderSplitter(ctx, &markdown.HeaderConfig{ + // 配置参数 + Headers: map[string]string{ + "##": "", + }, + }) + if err != nil { + log.Fatalf("markdown.NewHeaderSplitter failed, err=%v", err) + } + + markdownDoc := &schema.Document{ + Content: "## Title 1\nHello Word\n## Title 2\nWord Hello", + } + + log.Printf("===== call Header Splitter directly =====") + + // 转换文档 + transformedDocs, err := transformer.Transform(ctx, []*schema.Document{markdownDoc}) + if err != nil { + log.Fatalf("transformer.Transform failed, err=%v", err) + } + + for idx, doc := range transformedDocs { + log.Printf("doc segment %v: %v", idx, doc.Content) + } + + log.Printf("===== call Header Splitter in chain =====") + + // 创建 callback handler + handler := &callbacksHelper.TransformerCallbackHandler{ + OnStart: func(ctx context.Context, info *callbacks.RunInfo, input *document.TransformerCallbackInput) context.Context { + log.Printf("input access, len: %v, content: %s\n", len(input.Input), input.Input[0].Content) + return ctx + }, + OnEnd: func(ctx context.Context, info *callbacks.RunInfo, output *document.TransformerCallbackOutput) context.Context { + log.Printf("output finished, len: %v\n", len(output.Output)) + return ctx + }, + // OnError + } + + // 使用 callback handler + helper := callbacksHelper.NewHandlerHelper(). + Transformer(handler). + Handler() + + chain := compose.NewChain[[]*schema.Document, []*schema.Document]() + chain.AppendDocumentTransformer(transformer) + + // 在运行时使用 + run, err := chain.Compile(ctx) + if err != nil { + log.Fatalf("chain.Compile failed, err=%v", err) + } + + outDocs, err := run.Invoke(ctx, []*schema.Document{markdownDoc}, compose.WithCallbacks(helper)) + if err != nil { + log.Fatalf("run.Invoke failed, err=%v", err) + } + + for idx, doc := range outDocs { + log.Printf("doc segment %v: %v", idx, doc.Content) + } +} diff --git a/components/embedding/openai/examples/embedding/embedding.go b/components/embedding/openai/examples/embedding/embedding.go index a9c477ae..952e16bf 100644 --- a/components/embedding/openai/examples/embedding/embedding.go +++ b/components/embedding/openai/examples/embedding/embedding.go @@ -19,8 +19,14 @@ package main import ( "context" "fmt" + "log" "os" + "github.com/cloudwego/eino/callbacks" + "github.com/cloudwego/eino/components/embedding" + "github.com/cloudwego/eino/compose" + callbacksHelper "github.com/cloudwego/eino/utils/callbacks" + "github.com/cloudwego/eino-ext/components/embedding/openai" ) @@ -33,7 +39,7 @@ func main() { defaultDim = 1024 ) - embedding, err := openai.NewEmbedder(ctx, &openai.EmbeddingConfig{ + embedder, err := openai.NewEmbedder(ctx, &openai.EmbeddingConfig{ APIKey: accessKey, Model: "text-embedding-3-large", Dimensions: &defaultDim, @@ -43,10 +49,44 @@ func main() { panic(fmt.Errorf("new embedder error: %v\n", err)) } - resp, err := embedding.EmbedStrings(ctx, []string{"hello", "how are you"}) + log.Printf("===== call Embedder directly =====") + + vectors, err := embedder.EmbedStrings(ctx, []string{"hello", "how are you"}) + if err != nil { + panic(fmt.Errorf("embedder.EmbedStrings failed, err=%v", err)) + } + + log.Printf("vectors : %v", vectors) + + log.Printf("===== call Embedder in Chain =====") + + handler := &callbacksHelper.EmbeddingCallbackHandler{ + OnStart: func(ctx context.Context, runInfo *callbacks.RunInfo, input *embedding.CallbackInput) context.Context { + log.Printf("input access, len: %v, content: %s\n", len(input.Texts), input.Texts) + return ctx + }, + OnEnd: func(ctx context.Context, runInfo *callbacks.RunInfo, output *embedding.CallbackOutput) context.Context { + log.Printf("output finished, len: %v\n", len(output.Embeddings)) + return ctx + }, + } + + callbackHandler := callbacksHelper.NewHandlerHelper().Embedding(handler).Handler() + + chain := compose.NewChain[[]string, [][]float64]() + chain.AppendEmbedding(embedder) + + // 编译并运行 + runnable, err := chain.Compile(ctx) + if err != nil { + panic(fmt.Errorf("chain.Compile failed, err=%v", err)) + } + + vectors, err = runnable.Invoke(ctx, []string{"hello", "how are you"}, + compose.WithCallbacks(callbackHandler)) if err != nil { - panic(fmt.Errorf("generate failed, err=%v", err)) + panic(fmt.Errorf("runnable.Invoke failed, err=%v", err)) } - fmt.Printf("output=%v", resp) + log.Printf("vectors in chain: %v", vectors) } diff --git a/components/indexer/volc_vikingdb/examples/builtin_embedding/store.go b/components/indexer/volc_vikingdb/examples/builtin_embedding/store.go index 97abd4e8..1792c068 100644 --- a/components/indexer/volc_vikingdb/examples/builtin_embedding/store.go +++ b/components/indexer/volc_vikingdb/examples/builtin_embedding/store.go @@ -19,10 +19,16 @@ package main import ( "context" "fmt" + "log" "os" - "github.com/cloudwego/eino-ext/components/indexer/volc_vikingdb" + "github.com/cloudwego/eino/callbacks" + "github.com/cloudwego/eino/components/indexer" + "github.com/cloudwego/eino/compose" "github.com/cloudwego/eino/schema" + callbacksHelper "github.com/cloudwego/eino/utils/callbacks" + + "github.com/cloudwego/eino-ext/components/indexer/volc_vikingdb" ) func main() { @@ -65,7 +71,7 @@ func main() { AddBatchSize: 10, } - indexer, err := volc_vikingdb.NewIndexer(ctx, cfg) + volcIndexer, err := volc_vikingdb.NewIndexer(ctx, cfg) if err != nil { fmt.Printf("NewIndexer failed, %v\n", err) return @@ -79,11 +85,49 @@ func main() { volc_vikingdb.SetExtraDataTTL(doc, 1000) docs := []*schema.Document{doc} - resp, err := indexer.Store(ctx, docs) + + log.Printf("===== call Indexer directly =====") + + resp, err := volcIndexer.Store(ctx, docs) if err != nil { fmt.Printf("Store failed, %v\n", err) return } fmt.Printf("vikingDB store success, docs=%v, resp ids=%v\n", docs, resp) + + log.Printf("===== call Indexer in chain =====") + + // 创建 callback handler + handler := &callbacksHelper.IndexerCallbackHandler{ + OnStart: func(ctx context.Context, info *callbacks.RunInfo, input *indexer.CallbackInput) context.Context { + log.Printf("input access, len: %v, content: %s\n", len(input.Docs), input.Docs[0].Content) + return ctx + }, + OnEnd: func(ctx context.Context, info *callbacks.RunInfo, output *indexer.CallbackOutput) context.Context { + log.Printf("output finished, len: %v, ids=%v\n", len(output.IDs), output.IDs) + return ctx + }, + // OnError + } + + // 使用 callback handler + helper := callbacksHelper.NewHandlerHelper(). + Indexer(handler). + Handler() + + chain := compose.NewChain[[]*schema.Document, []string]() + chain.AppendIndexer(volcIndexer) + + // 在运行时使用 + run, err := chain.Compile(ctx) + if err != nil { + log.Fatalf("chain.Compile failed, err=%v", err) + } + + outIDs, err := run.Invoke(ctx, docs, compose.WithCallbacks(helper)) + if err != nil { + log.Fatalf("run.Invoke failed, err=%v", err) + } + fmt.Printf("vikingDB store success, docs=%v, resp ids=%v\n", docs, outIDs) } diff --git a/components/retriever/volc_vikingdb/examples/builtin_embedding/search.go b/components/retriever/volc_vikingdb/examples/builtin_embedding/search.go index 3ea30be5..be1e1d2d 100644 --- a/components/retriever/volc_vikingdb/examples/builtin_embedding/search.go +++ b/components/retriever/volc_vikingdb/examples/builtin_embedding/search.go @@ -18,9 +18,15 @@ package main import ( "context" - "fmt" + "log" "os" + "github.com/cloudwego/eino/callbacks" + "github.com/cloudwego/eino/components/retriever" + "github.com/cloudwego/eino/compose" + "github.com/cloudwego/eino/schema" + callbacksHelper "github.com/cloudwego/eino/utils/callbacks" + "github.com/cloudwego/eino-ext/components/retriever/volc_vikingdb" ) @@ -71,20 +77,58 @@ func main() { FilterDSL: nil, // 对应索引中的【标量过滤字段】,未设置时至空即可,表达式详见 https://www.volcengine.com/docs/84313/1254609 } - ret, err := volc_vikingdb.NewRetriever(ctx, cfg) + volcRetriever, err := volc_vikingdb.NewRetriever(ctx, cfg) if err != nil { - fmt.Printf("NewRetriever failed, %v\n", err) + log.Printf("NewRetriever failed, %v\n", err) return } + log.Printf("===== call Indexer directly =====") + query := "tourist attraction" - docs, err := ret.Retrieve(ctx, query) + docs, err := volcRetriever.Retrieve(ctx, query) if err != nil { - fmt.Printf("vikingDB retrieve failed, %v\n", err) + log.Printf("vikingDB retrieve failed, %v\n", err) return } - fmt.Printf("vikingDB retrieve success, query=%v, docs=%v\n", query, docs) + log.Printf("vikingDB retrieve success, query=%v, docs=%v", query, docs) + + log.Printf("===== call Indexer in chain =====") + + // 创建 callback handler + handler := &callbacksHelper.RetrieverCallbackHandler{ + OnStart: func(ctx context.Context, info *callbacks.RunInfo, input *retriever.CallbackInput) context.Context { + log.Printf("input access, content: %s\n", input.Query) + return ctx + }, + OnEnd: func(ctx context.Context, info *callbacks.RunInfo, output *retriever.CallbackOutput) context.Context { + log.Printf("output finished, len: %v\n", len(output.Docs)) + return ctx + }, + // OnError + } + + // 使用 callback handler + helper := callbacksHelper.NewHandlerHelper(). + Retriever(handler). + Handler() + + chain := compose.NewChain[string, []*schema.Document]() + chain.AppendRetriever(volcRetriever) + + // 在运行时使用 + run, err := chain.Compile(ctx) + if err != nil { + log.Fatalf("chain.Compile failed, err=%v", err) + } + + outDocs, err := run.Invoke(ctx, query, compose.WithCallbacks(helper)) + if err != nil { + log.Fatalf("run.Invoke failed, err=%v", err) + } + + log.Printf("vikingDB retrieve success, query=%v, docs=%v", query, outDocs) } func of[T any](v T) *T { From a9536361cbb1cf486694e4302beef5004c46522b Mon Sep 17 00:00:00 2001 From: lipandeng Date: Sun, 26 Jan 2025 16:52:36 +0800 Subject: [PATCH 2/4] feat: adjust examples --- .../file/examples/fileloader/file_loader.go | 8 +++--- .../headersplitter/header_splitter.go | 26 +++++++++++++++---- .../transformer/splitter/markdown/header.go | 16 +++++++++++- .../openai/examples/embedding/embedding.go | 8 +++--- .../examples/builtin_embedding/store.go | 8 +++--- .../examples/builtin_embedding/search.go | 8 +++--- 6 files changed, 53 insertions(+), 21 deletions(-) diff --git a/components/document/loader/file/examples/fileloader/file_loader.go b/components/document/loader/file/examples/fileloader/file_loader.go index f367dda4..de0212c2 100644 --- a/components/document/loader/file/examples/fileloader/file_loader.go +++ b/components/document/loader/file/examples/fileloader/file_loader.go @@ -57,7 +57,7 @@ func main() { log.Printf("===== call File Loader in Chain =====") // 创建 callback handler - handler := &callbacksHelper.LoaderCallbackHandler{ + handlerHelper := &callbacksHelper.LoaderCallbackHandler{ OnStart: func(ctx context.Context, info *callbacks.RunInfo, input *document.LoaderCallbackInput) context.Context { log.Printf("start loading docs...: %s\n", input.Source.URI) return ctx @@ -70,8 +70,8 @@ func main() { } // 使用 callback handler - helper := callbacksHelper.NewHandlerHelper(). - Loader(handler). + handler := callbacksHelper.NewHandlerHelper(). + Loader(handlerHelper). Handler() chain := compose.NewChain[document.Source, []*schema.Document]() @@ -84,7 +84,7 @@ func main() { outDocs, err := run.Invoke(ctx, document.Source{ URI: filePath, - }, compose.WithCallbacks(helper)) + }, compose.WithCallbacks(handler)) if err != nil { log.Fatalf("run.Invoke failed, err=%v", err) } diff --git a/components/document/transformer/splitter/markdown/examples/headersplitter/header_splitter.go b/components/document/transformer/splitter/markdown/examples/headersplitter/header_splitter.go index b04a4ac2..9200ed14 100644 --- a/components/document/transformer/splitter/markdown/examples/headersplitter/header_splitter.go +++ b/components/document/transformer/splitter/markdown/examples/headersplitter/header_splitter.go @@ -1,3 +1,19 @@ +/* + * Copyright 2025 CloudWeGo Authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + package main import ( @@ -20,7 +36,7 @@ func main() { transformer, err := markdown.NewHeaderSplitter(ctx, &markdown.HeaderConfig{ // 配置参数 Headers: map[string]string{ - "##": "", + "##": "headerNameOfLevel2", }, }) if err != nil { @@ -46,7 +62,7 @@ func main() { log.Printf("===== call Header Splitter in chain =====") // 创建 callback handler - handler := &callbacksHelper.TransformerCallbackHandler{ + handlerHelper := &callbacksHelper.TransformerCallbackHandler{ OnStart: func(ctx context.Context, info *callbacks.RunInfo, input *document.TransformerCallbackInput) context.Context { log.Printf("input access, len: %v, content: %s\n", len(input.Input), input.Input[0].Content) return ctx @@ -59,8 +75,8 @@ func main() { } // 使用 callback handler - helper := callbacksHelper.NewHandlerHelper(). - Transformer(handler). + handler := callbacksHelper.NewHandlerHelper(). + Transformer(handlerHelper). Handler() chain := compose.NewChain[[]*schema.Document, []*schema.Document]() @@ -72,7 +88,7 @@ func main() { log.Fatalf("chain.Compile failed, err=%v", err) } - outDocs, err := run.Invoke(ctx, []*schema.Document{markdownDoc}, compose.WithCallbacks(helper)) + outDocs, err := run.Invoke(ctx, []*schema.Document{markdownDoc}, compose.WithCallbacks(handler)) if err != nil { log.Fatalf("run.Invoke failed, err=%v", err) } diff --git a/components/document/transformer/splitter/markdown/header.go b/components/document/transformer/splitter/markdown/header.go index c587d182..115e74d5 100644 --- a/components/document/transformer/splitter/markdown/header.go +++ b/components/document/transformer/splitter/markdown/header.go @@ -26,7 +26,21 @@ import ( ) type HeaderConfig struct { - // Headers specify the headers to be identified and their names in document metadata. Headers can only consist of '#'. + // Headers specify the headers to be identified and their names in document metadata. + // Headers can only consist of '#'. + // e.g. + // headers = map[string]string{ "##": "headerNameOfLevel2" } + // then the document split from original documents: + // originDoc := &schema.Document{ + // Content: "hell\n##Title 2\n hello world", + // } + // splitDoc := &schema.Document{ + // Content: "## Title 2\n hello world", + // Metadata: map[string]any{ + // // other fields + // "headerNameOfLevel2": "Title 2", + // }, + // } Headers map[string]string // TrimHeaders specify if results contain header lines. TrimHeaders bool diff --git a/components/embedding/openai/examples/embedding/embedding.go b/components/embedding/openai/examples/embedding/embedding.go index 952e16bf..d04c0720 100644 --- a/components/embedding/openai/examples/embedding/embedding.go +++ b/components/embedding/openai/examples/embedding/embedding.go @@ -60,7 +60,7 @@ func main() { log.Printf("===== call Embedder in Chain =====") - handler := &callbacksHelper.EmbeddingCallbackHandler{ + handlerHelper := &callbacksHelper.EmbeddingCallbackHandler{ OnStart: func(ctx context.Context, runInfo *callbacks.RunInfo, input *embedding.CallbackInput) context.Context { log.Printf("input access, len: %v, content: %s\n", len(input.Texts), input.Texts) return ctx @@ -71,7 +71,9 @@ func main() { }, } - callbackHandler := callbacksHelper.NewHandlerHelper().Embedding(handler).Handler() + handler := callbacksHelper.NewHandlerHelper(). + Embedding(handlerHelper). + Handler() chain := compose.NewChain[[]string, [][]float64]() chain.AppendEmbedding(embedder) @@ -83,7 +85,7 @@ func main() { } vectors, err = runnable.Invoke(ctx, []string{"hello", "how are you"}, - compose.WithCallbacks(callbackHandler)) + compose.WithCallbacks(handler)) if err != nil { panic(fmt.Errorf("runnable.Invoke failed, err=%v", err)) } diff --git a/components/indexer/volc_vikingdb/examples/builtin_embedding/store.go b/components/indexer/volc_vikingdb/examples/builtin_embedding/store.go index 1792c068..2a105d14 100644 --- a/components/indexer/volc_vikingdb/examples/builtin_embedding/store.go +++ b/components/indexer/volc_vikingdb/examples/builtin_embedding/store.go @@ -99,7 +99,7 @@ func main() { log.Printf("===== call Indexer in chain =====") // 创建 callback handler - handler := &callbacksHelper.IndexerCallbackHandler{ + handlerHelper := &callbacksHelper.IndexerCallbackHandler{ OnStart: func(ctx context.Context, info *callbacks.RunInfo, input *indexer.CallbackInput) context.Context { log.Printf("input access, len: %v, content: %s\n", len(input.Docs), input.Docs[0].Content) return ctx @@ -112,8 +112,8 @@ func main() { } // 使用 callback handler - helper := callbacksHelper.NewHandlerHelper(). - Indexer(handler). + handler := callbacksHelper.NewHandlerHelper(). + Indexer(handlerHelper). Handler() chain := compose.NewChain[[]*schema.Document, []string]() @@ -125,7 +125,7 @@ func main() { log.Fatalf("chain.Compile failed, err=%v", err) } - outIDs, err := run.Invoke(ctx, docs, compose.WithCallbacks(helper)) + outIDs, err := run.Invoke(ctx, docs, compose.WithCallbacks(handler)) if err != nil { log.Fatalf("run.Invoke failed, err=%v", err) } diff --git a/components/retriever/volc_vikingdb/examples/builtin_embedding/search.go b/components/retriever/volc_vikingdb/examples/builtin_embedding/search.go index be1e1d2d..ee99d5b4 100644 --- a/components/retriever/volc_vikingdb/examples/builtin_embedding/search.go +++ b/components/retriever/volc_vikingdb/examples/builtin_embedding/search.go @@ -97,7 +97,7 @@ func main() { log.Printf("===== call Indexer in chain =====") // 创建 callback handler - handler := &callbacksHelper.RetrieverCallbackHandler{ + handlerHelper := &callbacksHelper.RetrieverCallbackHandler{ OnStart: func(ctx context.Context, info *callbacks.RunInfo, input *retriever.CallbackInput) context.Context { log.Printf("input access, content: %s\n", input.Query) return ctx @@ -110,8 +110,8 @@ func main() { } // 使用 callback handler - helper := callbacksHelper.NewHandlerHelper(). - Retriever(handler). + handler := callbacksHelper.NewHandlerHelper(). + Retriever(handlerHelper). Handler() chain := compose.NewChain[string, []*schema.Document]() @@ -123,7 +123,7 @@ func main() { log.Fatalf("chain.Compile failed, err=%v", err) } - outDocs, err := run.Invoke(ctx, query, compose.WithCallbacks(helper)) + outDocs, err := run.Invoke(ctx, query, compose.WithCallbacks(handler)) if err != nil { log.Fatalf("run.Invoke failed, err=%v", err) } From 69031cdb0f17bc3705ca62c1b904ea96387269d3 Mon Sep 17 00:00:00 2001 From: lipandeng Date: Sun, 26 Jan 2025 17:07:27 +0800 Subject: [PATCH 3/4] feat: adjust coments --- .../examples/customloader/{load.go => main.go} | 0 .../fileloader/{file_loader.go => main.go} | 0 .../{header_splitter.go => main.go} | 0 .../transformer/splitter/markdown/header.go | 15 +++++++++++---- .../examples/embedding/{embedding.go => main.go} | 0 .../store.go => embed_indexer/main.go} | 3 ++- .../store.go => indexer/main.go} | 0 .../search.go => embed_retriever/main.go} | 0 .../search.go => retriever/main.go} | 0 9 files changed, 13 insertions(+), 5 deletions(-) rename components/document/loader/file/examples/customloader/{load.go => main.go} (100%) rename components/document/loader/file/examples/fileloader/{file_loader.go => main.go} (100%) rename components/document/transformer/splitter/markdown/examples/headersplitter/{header_splitter.go => main.go} (100%) rename components/embedding/openai/examples/embedding/{embedding.go => main.go} (100%) rename components/indexer/volc_vikingdb/examples/{custom_embedding/store.go => embed_indexer/main.go} (99%) rename components/indexer/volc_vikingdb/examples/{builtin_embedding/store.go => indexer/main.go} (100%) rename components/retriever/volc_vikingdb/examples/{custom_embedding/search.go => embed_retriever/main.go} (100%) rename components/retriever/volc_vikingdb/examples/{builtin_embedding/search.go => retriever/main.go} (100%) diff --git a/components/document/loader/file/examples/customloader/load.go b/components/document/loader/file/examples/customloader/main.go similarity index 100% rename from components/document/loader/file/examples/customloader/load.go rename to components/document/loader/file/examples/customloader/main.go diff --git a/components/document/loader/file/examples/fileloader/file_loader.go b/components/document/loader/file/examples/fileloader/main.go similarity index 100% rename from components/document/loader/file/examples/fileloader/file_loader.go rename to components/document/loader/file/examples/fileloader/main.go diff --git a/components/document/transformer/splitter/markdown/examples/headersplitter/header_splitter.go b/components/document/transformer/splitter/markdown/examples/headersplitter/main.go similarity index 100% rename from components/document/transformer/splitter/markdown/examples/headersplitter/header_splitter.go rename to components/document/transformer/splitter/markdown/examples/headersplitter/main.go diff --git a/components/document/transformer/splitter/markdown/header.go b/components/document/transformer/splitter/markdown/header.go index 115e74d5..52b196d3 100644 --- a/components/document/transformer/splitter/markdown/header.go +++ b/components/document/transformer/splitter/markdown/header.go @@ -29,13 +29,20 @@ type HeaderConfig struct { // Headers specify the headers to be identified and their names in document metadata. // Headers can only consist of '#'. // e.g. - // headers = map[string]string{ "##": "headerNameOfLevel2" } - // then the document split from original documents: - // originDoc := &schema.Document{ + // // the Header Config: + // config := &HeaderConfig{ + // Headers: map[string]string{ "##": "headerNameOfLevel2" }, + // TrimHeaders: false, + // } + // + // // the original document: + // originDoc := &schema.Document{ // Content: "hell\n##Title 2\n hello world", // } + // + // // one of the split documents: // splitDoc := &schema.Document{ - // Content: "## Title 2\n hello world", + // Content: "##Title 2\n hello world", // Metadata: map[string]any{ // // other fields // "headerNameOfLevel2": "Title 2", diff --git a/components/embedding/openai/examples/embedding/embedding.go b/components/embedding/openai/examples/embedding/main.go similarity index 100% rename from components/embedding/openai/examples/embedding/embedding.go rename to components/embedding/openai/examples/embedding/main.go diff --git a/components/indexer/volc_vikingdb/examples/custom_embedding/store.go b/components/indexer/volc_vikingdb/examples/embed_indexer/main.go similarity index 99% rename from components/indexer/volc_vikingdb/examples/custom_embedding/store.go rename to components/indexer/volc_vikingdb/examples/embed_indexer/main.go index 0245b356..3e54db27 100644 --- a/components/indexer/volc_vikingdb/examples/custom_embedding/store.go +++ b/components/indexer/volc_vikingdb/examples/embed_indexer/main.go @@ -21,9 +21,10 @@ import ( "fmt" "os" - "github.com/cloudwego/eino-ext/components/indexer/volc_vikingdb" "github.com/cloudwego/eino/components/embedding" "github.com/cloudwego/eino/schema" + + "github.com/cloudwego/eino-ext/components/indexer/volc_vikingdb" ) func main() { diff --git a/components/indexer/volc_vikingdb/examples/builtin_embedding/store.go b/components/indexer/volc_vikingdb/examples/indexer/main.go similarity index 100% rename from components/indexer/volc_vikingdb/examples/builtin_embedding/store.go rename to components/indexer/volc_vikingdb/examples/indexer/main.go diff --git a/components/retriever/volc_vikingdb/examples/custom_embedding/search.go b/components/retriever/volc_vikingdb/examples/embed_retriever/main.go similarity index 100% rename from components/retriever/volc_vikingdb/examples/custom_embedding/search.go rename to components/retriever/volc_vikingdb/examples/embed_retriever/main.go diff --git a/components/retriever/volc_vikingdb/examples/builtin_embedding/search.go b/components/retriever/volc_vikingdb/examples/retriever/main.go similarity index 100% rename from components/retriever/volc_vikingdb/examples/builtin_embedding/search.go rename to components/retriever/volc_vikingdb/examples/retriever/main.go From 203bc8a9a3240ce776e3e8beebc02a0e1998063b Mon Sep 17 00:00:00 2001 From: lipandeng Date: Sun, 26 Jan 2025 17:10:09 +0800 Subject: [PATCH 4/4] feat: adjust coments format --- .../transformer/splitter/markdown/header.go | 26 +++++++++---------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/components/document/transformer/splitter/markdown/header.go b/components/document/transformer/splitter/markdown/header.go index 52b196d3..6d468408 100644 --- a/components/document/transformer/splitter/markdown/header.go +++ b/components/document/transformer/splitter/markdown/header.go @@ -29,25 +29,25 @@ type HeaderConfig struct { // Headers specify the headers to be identified and their names in document metadata. // Headers can only consist of '#'. // e.g. - // // the Header Config: - // config := &HeaderConfig{ - // Headers: map[string]string{ "##": "headerNameOfLevel2" }, + // // the Header Config: + // config := &HeaderConfig{ + // Headers: map[string]string{ "##": "headerNameOfLevel2" }, // TrimHeaders: false, - // } + // } // - // // the original document: - // originDoc := &schema.Document{ - // Content: "hell\n##Title 2\n hello world", - // } + // // the original document: + // originDoc := &schema.Document{ + // Content: "hell\n##Title 2\n hello world", + // } // - // // one of the split documents: - // splitDoc := &schema.Document{ - // Content: "##Title 2\n hello world", + // // one of the split documents: + // splitDoc := &schema.Document{ + // Content: "##Title 2\n hello world", // Metadata: map[string]any{ - // // other fields + // // other fields // "headerNameOfLevel2": "Title 2", // }, - // } + // } Headers map[string]string // TrimHeaders specify if results contain header lines. TrimHeaders bool