diff --git a/components/indexer/es8/consts.go b/components/indexer/es8/consts.go new file mode 100644 index 0000000..de8b5bb --- /dev/null +++ b/components/indexer/es8/consts.go @@ -0,0 +1,23 @@ +/* + * Copyright 2024 CloudWeGo Authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package es8 + +const typ = "ElasticSearch8" + +const ( + defaultBatchSize = 5 +) diff --git a/components/indexer/es8/go.mod b/components/indexer/es8/go.mod new file mode 100644 index 0000000..a9547e7 --- /dev/null +++ b/components/indexer/es8/go.mod @@ -0,0 +1,52 @@ +module github.com/cloudwego/eino-ext/components/indexer/es8 + +go 1.22 + +require ( + github.com/bytedance/mockey v1.2.13 + github.com/cloudwego/eino v0.3.6 + github.com/elastic/go-elasticsearch/v8 v8.16.0 + github.com/smartystreets/goconvey v1.8.1 +) + +require ( + github.com/bytedance/sonic v1.12.2 // indirect + github.com/bytedance/sonic/loader v0.2.0 // indirect + github.com/cloudwego/base64x v0.1.4 // indirect + github.com/cloudwego/iasm v0.2.0 // indirect + github.com/dustin/go-humanize v1.0.1 // indirect + github.com/elastic/elastic-transport-go/v8 v8.6.0 // indirect + github.com/getkin/kin-openapi v0.118.0 // indirect + github.com/go-logr/logr v1.4.2 // indirect + github.com/go-logr/stdr v1.2.2 // indirect + github.com/go-openapi/jsonpointer v0.19.5 // indirect + github.com/go-openapi/swag v0.19.5 // indirect + github.com/goph/emperror v0.17.2 // indirect + github.com/gopherjs/gopherjs v1.17.2 // indirect + github.com/invopop/yaml v0.1.0 // indirect + github.com/josharian/intern v1.0.0 // indirect + github.com/json-iterator/go v1.1.12 // indirect + github.com/jtolds/gls v4.20.0+incompatible // indirect + github.com/klauspost/cpuid/v2 v2.0.9 // indirect + github.com/mailru/easyjson v0.7.7 // indirect + github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect + github.com/modern-go/reflect2 v1.0.2 // indirect + github.com/mohae/deepcopy v0.0.0-20170929034955-c48cc78d4826 // indirect + github.com/nikolalohinski/gonja v1.5.3 // indirect + github.com/pelletier/go-toml/v2 v2.0.9 // indirect + github.com/perimeterx/marshmallow v1.1.4 // indirect + github.com/pkg/errors v0.9.1 // indirect + github.com/sirupsen/logrus v1.9.3 // indirect + github.com/slongfield/pyfmt v0.0.0-20220222012616-ea85ff4c361f // indirect + github.com/smarty/assertions v1.15.0 // indirect + github.com/twitchyliquid64/golang-asm v0.15.1 // indirect + github.com/yargevad/filepathx v1.0.0 // indirect + go.opentelemetry.io/otel v1.28.0 // indirect + go.opentelemetry.io/otel/metric v1.28.0 // indirect + go.opentelemetry.io/otel/trace v1.28.0 // indirect + golang.org/x/arch v0.11.0 // indirect + golang.org/x/exp v0.0.0-20230713183714-613f0c0eb8a1 // indirect + golang.org/x/sys v0.26.0 // indirect + gopkg.in/yaml.v2 v2.4.0 // indirect + gopkg.in/yaml.v3 v3.0.1 // indirect +) diff --git a/components/indexer/es8/go.sum b/components/indexer/es8/go.sum new file mode 100644 index 0000000..889b85b --- /dev/null +++ b/components/indexer/es8/go.sum @@ -0,0 +1,179 @@ +github.com/airbrake/gobrake v3.6.1+incompatible/go.mod h1:wM4gu3Cn0W0K7GUuVWnlXZU11AGBXMILnrdOU8Kn00o= +github.com/bitly/go-simplejson v0.5.0/go.mod h1:cXHtHw4XUPsvGaxgjIAn8PhEWG9NfngEKAMDJEczWVA= +github.com/bmizerany/assert v0.0.0-20160611221934-b7ed37b82869/go.mod h1:Ekp36dRnpXw/yCqJaO+ZrUyxD+3VXMFFr56k5XYrpB4= +github.com/bugsnag/bugsnag-go v1.4.0/go.mod h1:2oa8nejYd4cQ/b0hMIopN0lCRxU0bueqREvZLWFrtK8= +github.com/bugsnag/panicwrap v1.2.0/go.mod h1:D/8v3kj0zr8ZAKg1AQ6crr+5VwKN5eIywRkfhyM/+dE= +github.com/bytedance/mockey v1.2.13 h1:jokWZAm/pUEbD939Rhznz615MKUCZNuvCFQlJ2+ntoo= +github.com/bytedance/mockey v1.2.13/go.mod h1:1BPHF9sol5R1ud/+0VEHGQq/+i2lN+GTsr3O2Q9IENY= +github.com/bytedance/sonic v1.12.2 h1:oaMFuRTpMHYLpCntGca65YWt5ny+wAceDERTkT2L9lg= +github.com/bytedance/sonic v1.12.2/go.mod h1:B8Gt/XvtZ3Fqj+iSKMypzymZxw/FVwgIGKzMzT9r/rk= +github.com/bytedance/sonic/loader v0.1.1/go.mod h1:ncP89zfokxS5LZrJxl5z0UJcsk4M4yY2JpfqGeCtNLU= +github.com/bytedance/sonic/loader v0.2.0 h1:zNprn+lsIP06C/IqCHs3gPQIvnvpKbbxyXQP1iU4kWM= +github.com/bytedance/sonic/loader v0.2.0/go.mod h1:ncP89zfokxS5LZrJxl5z0UJcsk4M4yY2JpfqGeCtNLU= +github.com/certifi/gocertifi v0.0.0-20190105021004-abcd57078448/go.mod h1:GJKEexRPVJrBSOjoqN5VNOIKJ5Q3RViH6eu3puDRwx4= +github.com/cloudwego/base64x v0.1.4 h1:jwCgWpFanWmN8xoIUHa2rtzmkd5J2plF/dnLS6Xd/0Y= +github.com/cloudwego/base64x v0.1.4/go.mod h1:0zlkT4Wn5C6NdauXdJRhSKRlJvmclQ1hhJgA0rcu/8w= +github.com/cloudwego/eino v0.3.6 h1:3yfdKKxMVWefdOyGXHuqUMM5cc9iioijj2mpPsDZKIg= +github.com/cloudwego/eino v0.3.6/go.mod h1:+kmJimGEcKuSI6OKhet7kBedkm1WUZS3H1QRazxgWUo= +github.com/cloudwego/iasm v0.2.0 h1:1KNIy1I1H9hNNFEEH3DVnI4UujN+1zjpuk6gwHLTssg= +github.com/cloudwego/iasm v0.2.0/go.mod h1:8rXZaNYT2n95jn+zTI1sDr+IgcD2GVs0nlbbQPiEFhY= +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY= +github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto= +github.com/elastic/elastic-transport-go/v8 v8.6.0 h1:Y2S/FBjx1LlCv5m6pWAF2kDJAHoSjSRSJCApolgfthA= +github.com/elastic/elastic-transport-go/v8 v8.6.0/go.mod h1:YLHer5cj0csTzNFXoNQ8qhtGY1GTvSqPnKWKaqQE3Hk= +github.com/elastic/go-elasticsearch/v8 v8.16.0 h1:f7bR+iBz8GTAVhwyFO3hm4ixsz2eMaEy0QroYnXV3jE= +github.com/elastic/go-elasticsearch/v8 v8.16.0/go.mod h1:lGMlgKIbYoRvay3xWBeKahAiJOgmFDsjZC39nmO3H64= +github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo= +github.com/getkin/kin-openapi v0.118.0 h1:z43njxPmJ7TaPpMSCQb7PN0dEYno4tyBPQcrFdHoLuM= +github.com/getkin/kin-openapi v0.118.0/go.mod h1:l5e9PaFUo9fyLJCPGQeXI2ML8c3P8BHOEV2VaAVf/pc= +github.com/getsentry/raven-go v0.2.0/go.mod h1:KungGk8q33+aIAZUIVWZDr2OfAEBsO49PX4NzFV5kcQ= +github.com/go-check/check v0.0.0-20180628173108-788fd7840127 h1:0gkP6mzaMqkmpcJYCFOLkIBwI7xFExG03bbkOkCvUPI= +github.com/go-check/check v0.0.0-20180628173108-788fd7840127/go.mod h1:9ES+weclKsC9YodN5RgxqK/VD9HM9JsCSh7rNhMZE98= +github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= +github.com/go-logr/logr v1.4.2 h1:6pFjapn8bFcIbiKo3XT4j/BhANplGihG6tvd+8rYgrY= +github.com/go-logr/logr v1.4.2/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= +github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= +github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE= +github.com/go-openapi/jsonpointer v0.19.5 h1:gZr+CIYByUqjcgeLXnQu2gHYQC9o73G2XUeOFYEICuY= +github.com/go-openapi/jsonpointer v0.19.5/go.mod h1:Pl9vOtqEWErmShwVjC8pYs9cog34VGT37dQOVbmoatg= +github.com/go-openapi/swag v0.19.5 h1:lTz6Ys4CmqqCQmZPBlbQENR1/GucA2bzYTE12Pw4tFY= +github.com/go-openapi/swag v0.19.5/go.mod h1:POnQmlKehdgb5mhVOsnJFsivZCEZ/vjK9gh66Z9tfKk= +github.com/go-test/deep v1.0.8 h1:TDsG77qcSprGbC6vTN8OuXp5g+J+b5Pcguhf7Zt61VM= +github.com/go-test/deep v1.0.8/go.mod h1:5C2ZWiW0ErCdrYzpqxLbTX7MG14M9iiw8DgHncVwcsE= +github.com/gofrs/uuid v3.2.0+incompatible/go.mod h1:b2aQJv3Z4Fp6yNu3cdSllBxTCLRxnplIgP/c0N/04lM= +github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= +github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= +github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= +github.com/goph/emperror v0.17.2 h1:yLapQcmEsO0ipe9p5TaN22djm3OFV/TfM/fcYP0/J18= +github.com/goph/emperror v0.17.2/go.mod h1:+ZbQ+fUNO/6FNiUo0ujtMjhgad9Xa6fQL9KhH4LNHic= +github.com/gopherjs/gopherjs v1.17.2 h1:fQnZVsXk8uxXIStYb0N4bGk7jeyTalG/wsZjQ25dO0g= +github.com/gopherjs/gopherjs v1.17.2/go.mod h1:pRRIvn/QzFLrKfvEz3qUuEhtE/zLCWfreZ6J5gM2i+k= +github.com/gorilla/mux v1.8.0/go.mod h1:DVbg23sWSpFRCP0SfiEN6jmj59UnW/n46BH5rLB71So= +github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU= +github.com/invopop/yaml v0.1.0 h1:YW3WGUoJEXYfzWBjn00zIlrw7brGVD0fUKRYDPAPhrc= +github.com/invopop/yaml v0.1.0/go.mod h1:2XuRLgs/ouIrW3XNzuNj7J3Nvu/Dig5MXvbCEdiBN3Q= +github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY= +github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y= +github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= +github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= +github.com/jtolds/gls v4.20.0+incompatible h1:xdiiI2gbIgH/gLH7ADydsJ1uDOEzR8yvV7C0MuV77Wo= +github.com/jtolds/gls v4.20.0+incompatible/go.mod h1:QJZ7F/aHp+rZTRtaJ1ow/lLfFfVYBRgL+9YlvaHOwJU= +github.com/kardianos/osext v0.0.0-20190222173326-2bc1f35cddc0/go.mod h1:1NbS8ALrpOvjt0rHPNLyCIeMtbizbir8U//inJ+zuB8= +github.com/klauspost/cpuid/v2 v2.0.9 h1:lgaqFMSdTdQYdZ04uHyN2d/eKdOMyi2YLSvlQIBFYa4= +github.com/klauspost/cpuid/v2 v2.0.9/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg= +github.com/knz/go-libedit v1.10.1/go.mod h1:MZTVkCWyz0oBc7JOWP3wNAzd002ZbM/5hgShxwh4x8M= +github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= +github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI= +github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= +github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= +github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE= +github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= +github.com/mailru/easyjson v0.0.0-20190614124828-94de47d64c63/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc= +github.com/mailru/easyjson v0.0.0-20190626092158-b2ccc519800e/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc= +github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0= +github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc= +github.com/mattn/go-colorable v0.1.2 h1:/bC9yWikZXAL9uJdulbSfyVNIR3n3trXl+v8+1sx8mU= +github.com/mattn/go-colorable v0.1.2/go.mod h1:U0ppj6V5qS13XJ6of8GYAs25YV2eR4EVcfRqFIhoBtE= +github.com/mattn/go-isatty v0.0.8 h1:HLtExJ+uU2HOZ+wI0Tt5DtUDrx8yhUqDcp7fYERX4CE= +github.com/mattn/go-isatty v0.0.8/go.mod h1:Iq45c/XA43vh69/j3iqttzPXn0bhXyGjM0Hdxcsrc5s= +github.com/mgutz/ansi v0.0.0-20170206155736-9520e82c474b h1:j7+1HpAFS1zy5+Q4qx1fWh90gTKwiN4QCGoY9TWyyO4= +github.com/mgutz/ansi v0.0.0-20170206155736-9520e82c474b/go.mod h1:01TrycV0kFyexm33Z7vhZRXopbI8J3TDReVlkTgMUxE= +github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= +github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg= +github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= +github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M= +github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= +github.com/mohae/deepcopy v0.0.0-20170929034955-c48cc78d4826 h1:RWengNIwukTxcDr9M+97sNutRR1RKhG96O6jWumTTnw= +github.com/mohae/deepcopy v0.0.0-20170929034955-c48cc78d4826/go.mod h1:TaXosZuwdSHYgviHp1DAtfrULt5eUgsSMsZf+YrPgl8= +github.com/nikolalohinski/gonja v1.5.3 h1:GsA+EEaZDZPGJ8JtpeGN78jidhOlxeJROpqMT9fTj9c= +github.com/nikolalohinski/gonja v1.5.3/go.mod h1:RmjwxNiXAEqcq1HeK5SSMmqFJvKOfTfXhkJv6YBtPa4= +github.com/onsi/ginkgo v1.6.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE= +github.com/onsi/ginkgo v1.8.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE= +github.com/onsi/gomega v1.5.0/go.mod h1:ex+gbHU/CVuBBDIJjb2X0qEXbFg53c61hWP/1CpauHY= +github.com/pelletier/go-toml/v2 v2.0.9 h1:uH2qQXheeefCCkuBBSLi7jCiSmj3VRh2+Goq2N7Xxu0= +github.com/pelletier/go-toml/v2 v2.0.9/go.mod h1:tJU2Z3ZkXwnxa4DPO899bsyIoywizdUvyaeZurnPPDc= +github.com/perimeterx/marshmallow v1.1.4 h1:pZLDH9RjlLGGorbXhcaQLhfuV0pFMNfPO55FuFkxqLw= +github.com/perimeterx/marshmallow v1.1.4/go.mod h1:dsXbUu8CRzfYP5a87xpp0xq9S3u0Vchtcl8we9tYaXw= +github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= +github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= +github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/rollbar/rollbar-go v1.0.2/go.mod h1:AcFs5f0I+c71bpHlXNNDbOWJiKwjFDtISeXco0L5PKQ= +github.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPxbbu5VWo= +github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ= +github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ= +github.com/slongfield/pyfmt v0.0.0-20220222012616-ea85ff4c361f h1:Z2cODYsUxQPofhpYRMQVwWz4yUVpHF+vPi+eUdruUYI= +github.com/slongfield/pyfmt v0.0.0-20220222012616-ea85ff4c361f/go.mod h1:JqzWyvTuI2X4+9wOHmKSQCYxybB/8j6Ko43qVmXDuZg= +github.com/smarty/assertions v1.15.0 h1:cR//PqUBUiQRakZWqBiFFQ9wb8emQGDb0HeGdqGByCY= +github.com/smarty/assertions v1.15.0/go.mod h1:yABtdzeQs6l1brC900WlRNwj6ZR55d7B+E8C6HtKdec= +github.com/smartystreets/goconvey v1.8.1 h1:qGjIddxOk4grTu9JPOU31tVfq3cNdBlNa5sSznIX1xY= +github.com/smartystreets/goconvey v1.8.1/go.mod h1:+/u4qLyY6x1jReYOp7GOM2FSt8aP9CzCZL03bI28W60= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= +github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= +github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= +github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= +github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= +github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= +github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= +github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= +github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= +github.com/twitchyliquid64/golang-asm v0.15.1 h1:SU5vSMR7hnwNxj24w34ZyCi/FmDZTkS4MhqMhdFk5YI= +github.com/twitchyliquid64/golang-asm v0.15.1/go.mod h1:a1lVb/DtPvCB8fslRZhAngC2+aY1QWCk3Cedj/Gdt08= +github.com/ugorji/go v1.2.7 h1:qYhyWUUd6WbiM+C6JZAUkIJt/1WrjzNHY9+KCIjVqTo= +github.com/ugorji/go v1.2.7/go.mod h1:nF9osbDWLy6bDVv/Rtoh6QgnvNDpmCalQV5urGCCS6M= +github.com/ugorji/go/codec v1.2.7 h1:YPXUKf7fYbp/y8xloBqZOw2qaVggbfwMlI8WM3wZUJ0= +github.com/ugorji/go/codec v1.2.7/go.mod h1:WGN1fab3R1fzQlVQTkfxVtIBhWDRqOviHU95kRgeqEY= +github.com/x-cray/logrus-prefixed-formatter v0.5.2 h1:00txxvfBM9muc0jiLIEAkAcIMJzfthRT6usrui8uGmg= +github.com/x-cray/logrus-prefixed-formatter v0.5.2/go.mod h1:2duySbKsL6M18s5GU7VPsoEPHyzalCE06qoARUCeBBE= +github.com/yargevad/filepathx v1.0.0 h1:SYcT+N3tYGi+NvazubCNlvgIPbzAk7i7y2dwg3I5FYc= +github.com/yargevad/filepathx v1.0.0/go.mod h1:BprfX/gpYNJHJfc35GjRRpVcwWXS89gGulUIU5tK3tA= +go.opentelemetry.io/otel v1.28.0 h1:/SqNcYk+idO0CxKEUOtKQClMK/MimZihKYMruSMViUo= +go.opentelemetry.io/otel v1.28.0/go.mod h1:q68ijF8Fc8CnMHKyzqL6akLO46ePnjkgfIMIjUIX9z4= +go.opentelemetry.io/otel/metric v1.28.0 h1:f0HGvSl1KRAU1DLgLGFjrwVyismPlnuU6JD6bOeuA5Q= +go.opentelemetry.io/otel/metric v1.28.0/go.mod h1:Fb1eVBFZmLVTMb6PPohq3TO9IIhUisDsbJoL/+uQW4s= +go.opentelemetry.io/otel/sdk v1.21.0 h1:FTt8qirL1EysG6sTQRZ5TokkU8d0ugCj8htOgThZXQ8= +go.opentelemetry.io/otel/sdk v1.21.0/go.mod h1:Nna6Yv7PWTdgJHVRD9hIYywQBRx7pbox6nwBnZIxl/E= +go.opentelemetry.io/otel/trace v1.28.0 h1:GhQ9cUuQGmNDd5BTCP2dAvv75RdMxEfTmYejp+lkx9g= +go.opentelemetry.io/otel/trace v1.28.0/go.mod h1:jPyXzNPg6da9+38HEwElrQiHlVMTnVfM3/yv2OlIHaI= +go.uber.org/mock v0.4.0 h1:VcM4ZOtdbR4f6VXfiOpwpVJDL6lCReaZ6mw31wqh7KU= +go.uber.org/mock v0.4.0/go.mod h1:a6FSlNadKUHUa9IP5Vyt1zh4fC7uAwxMutEAscFbkZc= +golang.org/x/arch v0.11.0 h1:KXV8WWKCXm6tRpLirl2szsO5j/oOODwZf4hATmGVNs4= +golang.org/x/arch v0.11.0/go.mod h1:FEVrYAQjsQXMVJ1nsMoVVXPZg6p2JE2mx8psSWTDQys= +golang.org/x/crypto v0.0.0-20180904163835-0709b304e793/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= +golang.org/x/crypto v0.11.0 h1:6Ewdq3tDic1mg5xRO4milcWCfMVQhI4NkqWWvqejpuA= +golang.org/x/crypto v0.11.0/go.mod h1:xgJhtzW8F9jGdVFWZESrid1U1bjeNy4zgy5cRr/CIio= +golang.org/x/exp v0.0.0-20230713183714-613f0c0eb8a1 h1:MGwJjxBy0HJshjDNfLsYO8xppfqWlA5ZT9OhtUUhTNw= +golang.org/x/exp v0.0.0-20230713183714-613f0c0eb8a1/go.mod h1:FXUEEKJgO7OQYeo8N01OfiKP8RXMtf6e8aTskBGqWdc= +golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sys v0.0.0-20180905080454-ebe1bf3edb33/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.26.0 h1:KHjCJyddX0LoSTb3J+vWpupP9p0oznkqVk/IfjymZbo= +golang.org/x/sys v0.26.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/term v0.10.0 h1:3R7pNqamzBraeqj/Tj8qt1aQ2HpmlC+Cx/qL/7hn4/c= +golang.org/x/term v0.10.0/go.mod h1:lpqdcUyK/oCiQxvxVrppt5ggO2KCZ5QblwqPnfZ6d5o= +golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127 h1:qIbj1fsPNlZgppZ+VLlY7N33q108Sa+fhmuc+sWQYwY= +gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/fsnotify.v1 v1.4.7/go.mod h1:Tz8NjZHkW78fSQdbUxIjBTcgA1z1m8ZHf0WmKUhAMys= +gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7/go.mod h1:dt/ZhP58zS4L8KSrWDmTeBkI65Dw0HsyUHuEVlX15mw= +gopkg.in/yaml.v2 v2.2.1/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY= +gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +gopkg.in/yaml.v3 v3.0.0/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +nullprogram.com/x/optparse v1.0.0/go.mod h1:KdyPE+Igbe0jQUrVfMqDMeJQIJZEuyV7pjYmp6pbG50= diff --git a/components/indexer/es8/indexer.go b/components/indexer/es8/indexer.go new file mode 100644 index 0000000..27234f4 --- /dev/null +++ b/components/indexer/es8/indexer.go @@ -0,0 +1,264 @@ +/* + * Copyright 2024 CloudWeGo Authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package es8 + +import ( + "bytes" + "context" + "encoding/json" + "fmt" + + "github.com/cloudwego/eino/callbacks" + "github.com/cloudwego/eino/components" + "github.com/cloudwego/eino/components/embedding" + "github.com/cloudwego/eino/components/indexer" + "github.com/cloudwego/eino/schema" + "github.com/elastic/go-elasticsearch/v8" + "github.com/elastic/go-elasticsearch/v8/esutil" +) + +type IndexerConfig struct { + ESConfig elasticsearch.Config `json:"es_config"` + Index string `json:"index"` + // BatchSize controls max texts size for embedding + BatchSize int `json:"batch_size"` + // FieldMapping supports customize es fields from eino document. + // Each key - FieldValue.Value from field2Value will be saved, and + // vector of FieldValue.Value will be saved if FieldValue.EmbedKey is not empty. + DocumentToFields func(ctx context.Context, doc *schema.Document) (field2Value map[string]FieldValue, err error) + // Embedding vectorization method, must provide in two cases + // 1. VectorFields contains fields except doc Content + // 2. VectorFields contains doc Content and vector not provided in doc extra (see Document.Vector method) + Embedding embedding.Embedder +} + +type FieldValue struct { + // Value original Value + Value any + // EmbedKey if set, Value will be vectorized and saved to es. + // If Stringify method is provided, Embedding input text will be Stringify(Value). + // If Stringify method not set, retriever will try to assert Value as string. + EmbedKey string + // Stringify converts Value to string + Stringify func(val any) (string, error) +} + +type Indexer struct { + client *elasticsearch.Client + config *IndexerConfig +} + +func NewIndexer(_ context.Context, conf *IndexerConfig) (*Indexer, error) { + client, err := elasticsearch.NewClient(conf.ESConfig) + if err != nil { + return nil, fmt.Errorf("[NewIndexer] new es client failed, %w", err) + } + + if conf.DocumentToFields == nil { + return nil, fmt.Errorf("[NewIndexer] DocumentToFields method not provided") + } + + if conf.BatchSize == 0 { + conf.BatchSize = defaultBatchSize + } + + return &Indexer{ + client: client, + config: conf, + }, nil +} + +func (i *Indexer) Store(ctx context.Context, docs []*schema.Document, opts ...indexer.Option) (ids []string, err error) { + defer func() { + if err != nil { + callbacks.OnError(ctx, err) + } + }() + + ctx = callbacks.OnStart(ctx, &indexer.CallbackInput{Docs: docs}) + + options := indexer.GetCommonOptions(&indexer.Options{ + Embedding: i.config.Embedding, + }, opts...) + + if err = i.bulkAdd(ctx, docs, options); err != nil { + return nil, err + } + + ids = iter(docs, func(t *schema.Document) string { return t.ID }) + + callbacks.OnEnd(ctx, &indexer.CallbackOutput{IDs: ids}) + + return ids, nil +} + +func (i *Indexer) bulkAdd(ctx context.Context, docs []*schema.Document, options *indexer.Options) error { + emb := options.Embedding + bi, err := esutil.NewBulkIndexer(esutil.BulkIndexerConfig{ + Index: i.config.Index, + Client: i.client, + }) + if err != nil { + return err + } + + var ( + tuples []tuple + texts []string + ) + + embAndAdd := func() error { + var vectors [][]float64 + + if len(texts) > 0 { + if emb == nil { + return fmt.Errorf("[bulkAdd] embedding method not provided") + } + + vectors, err = emb.EmbedStrings(i.makeEmbeddingCtx(ctx, emb), texts) + if err != nil { + return fmt.Errorf("[bulkAdd] embedding failed, %w", err) + } + + if len(vectors) != len(texts) { + return fmt.Errorf("[bulkAdd] invalid vector length, expected=%d, got=%d", len(texts), len(vectors)) + } + } + + for _, t := range tuples { + fields := t.fields + for k, idx := range t.key2Idx { + fields[k] = vectors[idx] + } + + b, err := json.Marshal(fields) + if err != nil { + return fmt.Errorf("[bulkAdd] marshal bulk item failed, %w", err) + } + + if err = bi.Add(ctx, esutil.BulkIndexerItem{ + Index: i.config.Index, + Action: "index", + DocumentID: t.id, + Body: bytes.NewReader(b), + }); err != nil { + return err + } + } + + tuples = tuples[:0] + texts = texts[:0] + + return nil + } + + for idx := range docs { + doc := docs[idx] + fields, err := i.config.DocumentToFields(ctx, doc) + if err != nil { + return fmt.Errorf("[bulkAdd] FieldMapping failed, %w", err) + } + + rawFields := make(map[string]any) + embSize := 0 + for k, v := range fields { + rawFields[k] = v.Value + if v.EmbedKey != "" { + embSize++ + } + } + + if embSize > i.config.BatchSize { + return fmt.Errorf("[bulkAdd] needEmbeddingFields length over batch size, batch size=%d, got size=%d", + i.config.BatchSize, embSize) + } + + if len(texts)+embSize > i.config.BatchSize { + if err = embAndAdd(); err != nil { + return err + } + } + + key2Idx := make(map[string]int, embSize) + for k, v := range fields { + if v.EmbedKey != "" { + if v.EmbedKey == k { + return fmt.Errorf("[bulkAdd] duplicate key for value and vector, field=%s", k) + } + + var text string + if v.Stringify != nil { + text, err = v.Stringify(v.Value) + if err != nil { + return err + } + } else { + var ok bool + text, ok = v.Value.(string) + if !ok { + return fmt.Errorf("[bulkAdd] assert value as string failed, key=%s, emb_key=%s", k, v.EmbedKey) + } + } + + key2Idx[v.EmbedKey] = len(texts) + texts = append(texts, text) + } + } + + tuples = append(tuples, tuple{ + id: doc.ID, + fields: rawFields, + key2Idx: key2Idx, + }) + } + + if len(tuples) > 0 { + if err = embAndAdd(); err != nil { + return err + } + } + + return bi.Close(ctx) +} + +func (i *Indexer) makeEmbeddingCtx(ctx context.Context, emb embedding.Embedder) context.Context { + runInfo := &callbacks.RunInfo{ + Component: components.ComponentOfEmbedding, + } + + if embType, ok := components.GetType(emb); ok { + runInfo.Type = embType + } + + runInfo.Name = runInfo.Type + string(runInfo.Component) + + return callbacks.ReuseHandlers(ctx, runInfo) +} + +func (i *Indexer) GetType() string { + return typ +} + +func (i *Indexer) IsCallbacksEnabled() bool { + return true +} + +type tuple struct { + id string + fields map[string]any + key2Idx map[string]int +} diff --git a/components/indexer/es8/indexer_test.go b/components/indexer/es8/indexer_test.go new file mode 100644 index 0000000..59623e6 --- /dev/null +++ b/components/indexer/es8/indexer_test.go @@ -0,0 +1,241 @@ +/* + * Copyright 2024 CloudWeGo Authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package es8 + +import ( + "context" + "encoding/json" + "fmt" + "io" + "testing" + + . "github.com/bytedance/mockey" + "github.com/cloudwego/eino/components/embedding" + "github.com/cloudwego/eino/components/indexer" + "github.com/cloudwego/eino/schema" + "github.com/elastic/go-elasticsearch/v8/esutil" + "github.com/smartystreets/goconvey/convey" +) + +func TestBulkAdd(t *testing.T) { + PatchConvey("test bulkAdd", t, func() { + ctx := context.Background() + extField := "extra_field" + + d1 := &schema.Document{ID: "123", Content: "asd", MetaData: map[string]any{extField: "ext_1"}} + d2 := &schema.Document{ID: "456", Content: "qwe", MetaData: map[string]any{extField: "ext_2"}} + docs := []*schema.Document{d1, d2} + bi, err := esutil.NewBulkIndexer(esutil.BulkIndexerConfig{}) + convey.So(err, convey.ShouldBeNil) + + PatchConvey("test NewBulkIndexer error", func() { + mockErr := fmt.Errorf("test err") + Mock(esutil.NewBulkIndexer).Return(nil, mockErr).Build() + i := &Indexer{ + config: &IndexerConfig{ + Index: "mock_index", + DocumentToFields: func(ctx context.Context, doc *schema.Document) (field2Value map[string]FieldValue, err error) { + return nil, nil + }, + }, + } + err := i.bulkAdd(ctx, docs, &indexer.Options{ + Embedding: &mockEmbedding{size: []int{1}, mockVector: []float64{2.1}}, + }) + convey.So(err, convey.ShouldBeError, mockErr) + }) + + PatchConvey("test FieldMapping error", func() { + mockErr := fmt.Errorf("test err") + Mock(esutil.NewBulkIndexer).Return(bi, nil).Build() + i := &Indexer{ + config: &IndexerConfig{ + Index: "mock_index", + DocumentToFields: func(ctx context.Context, doc *schema.Document) (field2Value map[string]FieldValue, err error) { + return nil, mockErr + }, + }, + } + err := i.bulkAdd(ctx, docs, &indexer.Options{ + Embedding: &mockEmbedding{size: []int{1}, mockVector: []float64{2.1}}, + }) + convey.So(err, convey.ShouldBeError, fmt.Errorf("[bulkAdd] FieldMapping failed, %w", mockErr)) + }) + + PatchConvey("test len(needEmbeddingFields) > i.config.BatchSize", func() { + Mock(esutil.NewBulkIndexer).Return(bi, nil).Build() + i := &Indexer{ + config: &IndexerConfig{ + Index: "mock_index", + BatchSize: 1, + DocumentToFields: func(ctx context.Context, doc *schema.Document) (field2Value map[string]FieldValue, err error) { + return map[string]FieldValue{ + "k1": {Value: "v1", EmbedKey: "k"}, + "k2": {Value: "v2", EmbedKey: "kk"}, + }, nil + }, + }, + } + err := i.bulkAdd(ctx, docs, &indexer.Options{ + Embedding: &mockEmbedding{size: []int{1}, mockVector: []float64{2.1}}, + }) + convey.So(err, convey.ShouldBeError, fmt.Errorf("[bulkAdd] needEmbeddingFields length over batch size, batch size=%d, got size=%d", i.config.BatchSize, 2)) + }) + + PatchConvey("test embedding not provided", func() { + Mock(esutil.NewBulkIndexer).Return(bi, nil).Build() + i := &Indexer{ + config: &IndexerConfig{ + Index: "mock_index", + BatchSize: 2, + DocumentToFields: func(ctx context.Context, doc *schema.Document) (field2Value map[string]FieldValue, err error) { + return map[string]FieldValue{ + "k0": {Value: "v0"}, + "k1": {Value: "v1", EmbedKey: "vk1"}, + "k2": {Value: 222, EmbedKey: "vk2", Stringify: func(val any) (string, error) { + return "222", nil + }}, + "k3": {Value: 123}, + }, nil + }, + }, + } + err := i.bulkAdd(ctx, docs, &indexer.Options{ + Embedding: nil, + }) + convey.So(err, convey.ShouldBeError, fmt.Errorf("[bulkAdd] embedding method not provided")) + }) + + PatchConvey("test embed failed", func() { + mockErr := fmt.Errorf("test err") + Mock(esutil.NewBulkIndexer).Return(bi, nil).Build() + i := &Indexer{ + config: &IndexerConfig{ + Index: "mock_index", + BatchSize: 2, + DocumentToFields: func(ctx context.Context, doc *schema.Document) (field2Value map[string]FieldValue, err error) { + return map[string]FieldValue{ + "k0": {Value: "v0"}, + "k1": {Value: "v1", EmbedKey: "vk1"}, + "k2": {Value: 222, EmbedKey: "vk2", Stringify: func(val any) (string, error) { + return "222", nil + }}, + "k3": {Value: 123}, + }, nil + }, + }, + } + err := i.bulkAdd(ctx, docs, &indexer.Options{ + Embedding: &mockEmbedding{err: mockErr}, + }) + convey.So(err, convey.ShouldBeError, fmt.Errorf("[bulkAdd] embedding failed, %w", mockErr)) + }) + + PatchConvey("test len(vectors) != len(texts)", func() { + Mock(esutil.NewBulkIndexer).Return(bi, nil).Build() + i := &Indexer{ + config: &IndexerConfig{ + Index: "mock_index", + BatchSize: 2, + DocumentToFields: func(ctx context.Context, doc *schema.Document) (field2Value map[string]FieldValue, err error) { + return map[string]FieldValue{ + "k0": {Value: "v0"}, + "k1": {Value: "v1", EmbedKey: "vk1"}, + "k2": {Value: 222, EmbedKey: "vk2", Stringify: func(val any) (string, error) { + return "222", nil + }}, + "k3": {Value: 123}, + }, nil + }, + }, + } + err := i.bulkAdd(ctx, docs, &indexer.Options{ + Embedding: &mockEmbedding{size: []int{1}, mockVector: []float64{2.1}}, + }) + convey.So(err, convey.ShouldBeError, fmt.Errorf("[bulkAdd] invalid vector length, expected=%d, got=%d", 2, 1)) + }) + + PatchConvey("test success", func() { + var mps []esutil.BulkIndexerItem + Mock(esutil.NewBulkIndexer).Return(bi, nil).Build() + Mock(GetMethod(bi, "Add")).To(func(ctx context.Context, item esutil.BulkIndexerItem) error { + mps = append(mps, item) + return nil + }).Build() + Mock(GetMethod(bi, "Close")).Return(nil).Build() + + i := &Indexer{ + config: &IndexerConfig{ + Index: "mock_index", + BatchSize: 2, + DocumentToFields: func(ctx context.Context, doc *schema.Document) (field2Value map[string]FieldValue, err error) { + return map[string]FieldValue{ + "k0": {Value: doc.Content}, + "k1": {Value: "v1", EmbedKey: "vk1"}, + "k2": {Value: 222, EmbedKey: "vk2", Stringify: func(val any) (string, error) { return "222", nil }}, + "k3": {Value: 123}, + }, nil + }, + }, + } + err := i.bulkAdd(ctx, docs, &indexer.Options{ + Embedding: &mockEmbedding{size: []int{2, 2}, mockVector: []float64{2.1}}, + }) + convey.So(err, convey.ShouldBeNil) + convey.So(len(mps), convey.ShouldEqual, 2) + for j, doc := range docs { + item := mps[j] + convey.So(item.DocumentID, convey.ShouldEqual, doc.ID) + b, err := io.ReadAll(item.Body) + convey.So(err, convey.ShouldBeNil) + var mp map[string]interface{} + convey.So(json.Unmarshal(b, &mp), convey.ShouldBeNil) + convey.So(mp["k0"], convey.ShouldEqual, doc.Content) + convey.So(mp["k1"], convey.ShouldEqual, "v1") + convey.So(mp["k2"], convey.ShouldEqual, 222) + convey.So(mp["k3"], convey.ShouldEqual, 123) + convey.So(mp["vk1"], convey.ShouldEqual, []any{2.1}) + convey.So(mp["vk2"], convey.ShouldEqual, []any{2.1}) + } + }) + }) +} + +type mockEmbedding struct { + err error + call int + size []int + mockVector []float64 +} + +func (m *mockEmbedding) EmbedStrings(ctx context.Context, texts []string, opts ...embedding.Option) ([][]float64, error) { + if m.err != nil { + return nil, m.err + } + + if m.call >= len(m.size) { + return nil, fmt.Errorf("call limit error") + } + + resp := make([][]float64, m.size[m.call]) + m.call++ + for i := range resp { + resp[i] = m.mockVector + } + + return resp, nil +} diff --git a/components/indexer/es8/utils.go b/components/indexer/es8/utils.go new file mode 100644 index 0000000..937e3b5 --- /dev/null +++ b/components/indexer/es8/utils.go @@ -0,0 +1,30 @@ +/* + * Copyright 2024 CloudWeGo Authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package es8 + +func GetType() string { + return typ +} + +func iter[T, D any](src []T, fn func(T) D) []D { + resp := make([]D, len(src)) + for i := range src { + resp[i] = fn(src[i]) + } + + return resp +} diff --git a/components/retriever/es8/consts.go b/components/retriever/es8/consts.go new file mode 100644 index 0000000..7f5da11 --- /dev/null +++ b/components/retriever/es8/consts.go @@ -0,0 +1,23 @@ +/* + * Copyright 2024 CloudWeGo Authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package es8 + +const typ = "ElasticSearch8" + +func GetType() string { + return typ +} diff --git a/components/retriever/es8/go.mod b/components/retriever/es8/go.mod new file mode 100644 index 0000000..d62c84e --- /dev/null +++ b/components/retriever/es8/go.mod @@ -0,0 +1,55 @@ +module github.com/cloudwego/eino-ext/components/retriever/es8 + +go 1.22 + +require ( + github.com/bytedance/mockey v1.2.13 + github.com/cloudwego/eino v0.3.6 + github.com/elastic/go-elasticsearch/v8 v8.16.0 + github.com/smartystreets/goconvey v1.8.1 + github.com/stretchr/testify v1.9.0 +) + +require ( + github.com/bytedance/sonic v1.12.2 // indirect + github.com/bytedance/sonic/loader v0.2.0 // indirect + github.com/cloudwego/base64x v0.1.4 // indirect + github.com/cloudwego/iasm v0.2.0 // indirect + github.com/davecgh/go-spew v1.1.1 // indirect + github.com/dustin/go-humanize v1.0.1 // indirect + github.com/elastic/elastic-transport-go/v8 v8.6.0 // indirect + github.com/getkin/kin-openapi v0.118.0 // indirect + github.com/go-logr/logr v1.4.2 // indirect + github.com/go-logr/stdr v1.2.2 // indirect + github.com/go-openapi/jsonpointer v0.19.5 // indirect + github.com/go-openapi/swag v0.19.5 // indirect + github.com/goph/emperror v0.17.2 // indirect + github.com/gopherjs/gopherjs v1.17.2 // indirect + github.com/invopop/yaml v0.1.0 // indirect + github.com/josharian/intern v1.0.0 // indirect + github.com/json-iterator/go v1.1.12 // indirect + github.com/jtolds/gls v4.20.0+incompatible // indirect + github.com/klauspost/cpuid/v2 v2.0.9 // indirect + github.com/mailru/easyjson v0.7.7 // indirect + github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect + github.com/modern-go/reflect2 v1.0.2 // indirect + github.com/mohae/deepcopy v0.0.0-20170929034955-c48cc78d4826 // indirect + github.com/nikolalohinski/gonja v1.5.3 // indirect + github.com/pelletier/go-toml/v2 v2.0.9 // indirect + github.com/perimeterx/marshmallow v1.1.4 // indirect + github.com/pkg/errors v0.9.1 // indirect + github.com/pmezard/go-difflib v1.0.0 // indirect + github.com/sirupsen/logrus v1.9.3 // indirect + github.com/slongfield/pyfmt v0.0.0-20220222012616-ea85ff4c361f // indirect + github.com/smarty/assertions v1.15.0 // indirect + github.com/twitchyliquid64/golang-asm v0.15.1 // indirect + github.com/yargevad/filepathx v1.0.0 // indirect + go.opentelemetry.io/otel v1.28.0 // indirect + go.opentelemetry.io/otel/metric v1.28.0 // indirect + go.opentelemetry.io/otel/trace v1.28.0 // indirect + golang.org/x/arch v0.11.0 // indirect + golang.org/x/exp v0.0.0-20230713183714-613f0c0eb8a1 // indirect + golang.org/x/sys v0.26.0 // indirect + gopkg.in/yaml.v2 v2.4.0 // indirect + gopkg.in/yaml.v3 v3.0.1 // indirect +) diff --git a/components/retriever/es8/go.sum b/components/retriever/es8/go.sum new file mode 100644 index 0000000..7b9198f --- /dev/null +++ b/components/retriever/es8/go.sum @@ -0,0 +1,181 @@ +github.com/airbrake/gobrake v3.6.1+incompatible/go.mod h1:wM4gu3Cn0W0K7GUuVWnlXZU11AGBXMILnrdOU8Kn00o= +github.com/bitly/go-simplejson v0.5.0/go.mod h1:cXHtHw4XUPsvGaxgjIAn8PhEWG9NfngEKAMDJEczWVA= +github.com/bmizerany/assert v0.0.0-20160611221934-b7ed37b82869/go.mod h1:Ekp36dRnpXw/yCqJaO+ZrUyxD+3VXMFFr56k5XYrpB4= +github.com/bugsnag/bugsnag-go v1.4.0/go.mod h1:2oa8nejYd4cQ/b0hMIopN0lCRxU0bueqREvZLWFrtK8= +github.com/bugsnag/panicwrap v1.2.0/go.mod h1:D/8v3kj0zr8ZAKg1AQ6crr+5VwKN5eIywRkfhyM/+dE= +github.com/bytedance/mockey v1.2.13 h1:jokWZAm/pUEbD939Rhznz615MKUCZNuvCFQlJ2+ntoo= +github.com/bytedance/mockey v1.2.13/go.mod h1:1BPHF9sol5R1ud/+0VEHGQq/+i2lN+GTsr3O2Q9IENY= +github.com/bytedance/sonic v1.12.2 h1:oaMFuRTpMHYLpCntGca65YWt5ny+wAceDERTkT2L9lg= +github.com/bytedance/sonic v1.12.2/go.mod h1:B8Gt/XvtZ3Fqj+iSKMypzymZxw/FVwgIGKzMzT9r/rk= +github.com/bytedance/sonic/loader v0.1.1/go.mod h1:ncP89zfokxS5LZrJxl5z0UJcsk4M4yY2JpfqGeCtNLU= +github.com/bytedance/sonic/loader v0.2.0 h1:zNprn+lsIP06C/IqCHs3gPQIvnvpKbbxyXQP1iU4kWM= +github.com/bytedance/sonic/loader v0.2.0/go.mod h1:ncP89zfokxS5LZrJxl5z0UJcsk4M4yY2JpfqGeCtNLU= +github.com/certifi/gocertifi v0.0.0-20190105021004-abcd57078448/go.mod h1:GJKEexRPVJrBSOjoqN5VNOIKJ5Q3RViH6eu3puDRwx4= +github.com/cloudwego/base64x v0.1.4 h1:jwCgWpFanWmN8xoIUHa2rtzmkd5J2plF/dnLS6Xd/0Y= +github.com/cloudwego/base64x v0.1.4/go.mod h1:0zlkT4Wn5C6NdauXdJRhSKRlJvmclQ1hhJgA0rcu/8w= +github.com/cloudwego/eino v0.3.5 h1:9PkAOX/phFifrGXkfl4L9rdecxOQJBJY1FtZqF4bz3c= +github.com/cloudwego/eino v0.3.5/go.mod h1:+kmJimGEcKuSI6OKhet7kBedkm1WUZS3H1QRazxgWUo= +github.com/cloudwego/eino v0.3.6 h1:3yfdKKxMVWefdOyGXHuqUMM5cc9iioijj2mpPsDZKIg= +github.com/cloudwego/eino v0.3.6/go.mod h1:+kmJimGEcKuSI6OKhet7kBedkm1WUZS3H1QRazxgWUo= +github.com/cloudwego/iasm v0.2.0 h1:1KNIy1I1H9hNNFEEH3DVnI4UujN+1zjpuk6gwHLTssg= +github.com/cloudwego/iasm v0.2.0/go.mod h1:8rXZaNYT2n95jn+zTI1sDr+IgcD2GVs0nlbbQPiEFhY= +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY= +github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto= +github.com/elastic/elastic-transport-go/v8 v8.6.0 h1:Y2S/FBjx1LlCv5m6pWAF2kDJAHoSjSRSJCApolgfthA= +github.com/elastic/elastic-transport-go/v8 v8.6.0/go.mod h1:YLHer5cj0csTzNFXoNQ8qhtGY1GTvSqPnKWKaqQE3Hk= +github.com/elastic/go-elasticsearch/v8 v8.16.0 h1:f7bR+iBz8GTAVhwyFO3hm4ixsz2eMaEy0QroYnXV3jE= +github.com/elastic/go-elasticsearch/v8 v8.16.0/go.mod h1:lGMlgKIbYoRvay3xWBeKahAiJOgmFDsjZC39nmO3H64= +github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo= +github.com/getkin/kin-openapi v0.118.0 h1:z43njxPmJ7TaPpMSCQb7PN0dEYno4tyBPQcrFdHoLuM= +github.com/getkin/kin-openapi v0.118.0/go.mod h1:l5e9PaFUo9fyLJCPGQeXI2ML8c3P8BHOEV2VaAVf/pc= +github.com/getsentry/raven-go v0.2.0/go.mod h1:KungGk8q33+aIAZUIVWZDr2OfAEBsO49PX4NzFV5kcQ= +github.com/go-check/check v0.0.0-20180628173108-788fd7840127 h1:0gkP6mzaMqkmpcJYCFOLkIBwI7xFExG03bbkOkCvUPI= +github.com/go-check/check v0.0.0-20180628173108-788fd7840127/go.mod h1:9ES+weclKsC9YodN5RgxqK/VD9HM9JsCSh7rNhMZE98= +github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= +github.com/go-logr/logr v1.4.2 h1:6pFjapn8bFcIbiKo3XT4j/BhANplGihG6tvd+8rYgrY= +github.com/go-logr/logr v1.4.2/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= +github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= +github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE= +github.com/go-openapi/jsonpointer v0.19.5 h1:gZr+CIYByUqjcgeLXnQu2gHYQC9o73G2XUeOFYEICuY= +github.com/go-openapi/jsonpointer v0.19.5/go.mod h1:Pl9vOtqEWErmShwVjC8pYs9cog34VGT37dQOVbmoatg= +github.com/go-openapi/swag v0.19.5 h1:lTz6Ys4CmqqCQmZPBlbQENR1/GucA2bzYTE12Pw4tFY= +github.com/go-openapi/swag v0.19.5/go.mod h1:POnQmlKehdgb5mhVOsnJFsivZCEZ/vjK9gh66Z9tfKk= +github.com/go-test/deep v1.0.8 h1:TDsG77qcSprGbC6vTN8OuXp5g+J+b5Pcguhf7Zt61VM= +github.com/go-test/deep v1.0.8/go.mod h1:5C2ZWiW0ErCdrYzpqxLbTX7MG14M9iiw8DgHncVwcsE= +github.com/gofrs/uuid v3.2.0+incompatible/go.mod h1:b2aQJv3Z4Fp6yNu3cdSllBxTCLRxnplIgP/c0N/04lM= +github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= +github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= +github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= +github.com/goph/emperror v0.17.2 h1:yLapQcmEsO0ipe9p5TaN22djm3OFV/TfM/fcYP0/J18= +github.com/goph/emperror v0.17.2/go.mod h1:+ZbQ+fUNO/6FNiUo0ujtMjhgad9Xa6fQL9KhH4LNHic= +github.com/gopherjs/gopherjs v1.17.2 h1:fQnZVsXk8uxXIStYb0N4bGk7jeyTalG/wsZjQ25dO0g= +github.com/gopherjs/gopherjs v1.17.2/go.mod h1:pRRIvn/QzFLrKfvEz3qUuEhtE/zLCWfreZ6J5gM2i+k= +github.com/gorilla/mux v1.8.0/go.mod h1:DVbg23sWSpFRCP0SfiEN6jmj59UnW/n46BH5rLB71So= +github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU= +github.com/invopop/yaml v0.1.0 h1:YW3WGUoJEXYfzWBjn00zIlrw7brGVD0fUKRYDPAPhrc= +github.com/invopop/yaml v0.1.0/go.mod h1:2XuRLgs/ouIrW3XNzuNj7J3Nvu/Dig5MXvbCEdiBN3Q= +github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY= +github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y= +github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= +github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= +github.com/jtolds/gls v4.20.0+incompatible h1:xdiiI2gbIgH/gLH7ADydsJ1uDOEzR8yvV7C0MuV77Wo= +github.com/jtolds/gls v4.20.0+incompatible/go.mod h1:QJZ7F/aHp+rZTRtaJ1ow/lLfFfVYBRgL+9YlvaHOwJU= +github.com/kardianos/osext v0.0.0-20190222173326-2bc1f35cddc0/go.mod h1:1NbS8ALrpOvjt0rHPNLyCIeMtbizbir8U//inJ+zuB8= +github.com/klauspost/cpuid/v2 v2.0.9 h1:lgaqFMSdTdQYdZ04uHyN2d/eKdOMyi2YLSvlQIBFYa4= +github.com/klauspost/cpuid/v2 v2.0.9/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg= +github.com/knz/go-libedit v1.10.1/go.mod h1:MZTVkCWyz0oBc7JOWP3wNAzd002ZbM/5hgShxwh4x8M= +github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= +github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI= +github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= +github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= +github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE= +github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= +github.com/mailru/easyjson v0.0.0-20190614124828-94de47d64c63/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc= +github.com/mailru/easyjson v0.0.0-20190626092158-b2ccc519800e/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc= +github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0= +github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc= +github.com/mattn/go-colorable v0.1.2 h1:/bC9yWikZXAL9uJdulbSfyVNIR3n3trXl+v8+1sx8mU= +github.com/mattn/go-colorable v0.1.2/go.mod h1:U0ppj6V5qS13XJ6of8GYAs25YV2eR4EVcfRqFIhoBtE= +github.com/mattn/go-isatty v0.0.8 h1:HLtExJ+uU2HOZ+wI0Tt5DtUDrx8yhUqDcp7fYERX4CE= +github.com/mattn/go-isatty v0.0.8/go.mod h1:Iq45c/XA43vh69/j3iqttzPXn0bhXyGjM0Hdxcsrc5s= +github.com/mgutz/ansi v0.0.0-20170206155736-9520e82c474b h1:j7+1HpAFS1zy5+Q4qx1fWh90gTKwiN4QCGoY9TWyyO4= +github.com/mgutz/ansi v0.0.0-20170206155736-9520e82c474b/go.mod h1:01TrycV0kFyexm33Z7vhZRXopbI8J3TDReVlkTgMUxE= +github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= +github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg= +github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= +github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M= +github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= +github.com/mohae/deepcopy v0.0.0-20170929034955-c48cc78d4826 h1:RWengNIwukTxcDr9M+97sNutRR1RKhG96O6jWumTTnw= +github.com/mohae/deepcopy v0.0.0-20170929034955-c48cc78d4826/go.mod h1:TaXosZuwdSHYgviHp1DAtfrULt5eUgsSMsZf+YrPgl8= +github.com/nikolalohinski/gonja v1.5.3 h1:GsA+EEaZDZPGJ8JtpeGN78jidhOlxeJROpqMT9fTj9c= +github.com/nikolalohinski/gonja v1.5.3/go.mod h1:RmjwxNiXAEqcq1HeK5SSMmqFJvKOfTfXhkJv6YBtPa4= +github.com/onsi/ginkgo v1.6.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE= +github.com/onsi/ginkgo v1.8.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE= +github.com/onsi/gomega v1.5.0/go.mod h1:ex+gbHU/CVuBBDIJjb2X0qEXbFg53c61hWP/1CpauHY= +github.com/pelletier/go-toml/v2 v2.0.9 h1:uH2qQXheeefCCkuBBSLi7jCiSmj3VRh2+Goq2N7Xxu0= +github.com/pelletier/go-toml/v2 v2.0.9/go.mod h1:tJU2Z3ZkXwnxa4DPO899bsyIoywizdUvyaeZurnPPDc= +github.com/perimeterx/marshmallow v1.1.4 h1:pZLDH9RjlLGGorbXhcaQLhfuV0pFMNfPO55FuFkxqLw= +github.com/perimeterx/marshmallow v1.1.4/go.mod h1:dsXbUu8CRzfYP5a87xpp0xq9S3u0Vchtcl8we9tYaXw= +github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= +github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= +github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/rollbar/rollbar-go v1.0.2/go.mod h1:AcFs5f0I+c71bpHlXNNDbOWJiKwjFDtISeXco0L5PKQ= +github.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPxbbu5VWo= +github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ= +github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ= +github.com/slongfield/pyfmt v0.0.0-20220222012616-ea85ff4c361f h1:Z2cODYsUxQPofhpYRMQVwWz4yUVpHF+vPi+eUdruUYI= +github.com/slongfield/pyfmt v0.0.0-20220222012616-ea85ff4c361f/go.mod h1:JqzWyvTuI2X4+9wOHmKSQCYxybB/8j6Ko43qVmXDuZg= +github.com/smarty/assertions v1.15.0 h1:cR//PqUBUiQRakZWqBiFFQ9wb8emQGDb0HeGdqGByCY= +github.com/smarty/assertions v1.15.0/go.mod h1:yABtdzeQs6l1brC900WlRNwj6ZR55d7B+E8C6HtKdec= +github.com/smartystreets/goconvey v1.8.1 h1:qGjIddxOk4grTu9JPOU31tVfq3cNdBlNa5sSznIX1xY= +github.com/smartystreets/goconvey v1.8.1/go.mod h1:+/u4qLyY6x1jReYOp7GOM2FSt8aP9CzCZL03bI28W60= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= +github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= +github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= +github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= +github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= +github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= +github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= +github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= +github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= +github.com/twitchyliquid64/golang-asm v0.15.1 h1:SU5vSMR7hnwNxj24w34ZyCi/FmDZTkS4MhqMhdFk5YI= +github.com/twitchyliquid64/golang-asm v0.15.1/go.mod h1:a1lVb/DtPvCB8fslRZhAngC2+aY1QWCk3Cedj/Gdt08= +github.com/ugorji/go v1.2.7 h1:qYhyWUUd6WbiM+C6JZAUkIJt/1WrjzNHY9+KCIjVqTo= +github.com/ugorji/go v1.2.7/go.mod h1:nF9osbDWLy6bDVv/Rtoh6QgnvNDpmCalQV5urGCCS6M= +github.com/ugorji/go/codec v1.2.7 h1:YPXUKf7fYbp/y8xloBqZOw2qaVggbfwMlI8WM3wZUJ0= +github.com/ugorji/go/codec v1.2.7/go.mod h1:WGN1fab3R1fzQlVQTkfxVtIBhWDRqOviHU95kRgeqEY= +github.com/x-cray/logrus-prefixed-formatter v0.5.2 h1:00txxvfBM9muc0jiLIEAkAcIMJzfthRT6usrui8uGmg= +github.com/x-cray/logrus-prefixed-formatter v0.5.2/go.mod h1:2duySbKsL6M18s5GU7VPsoEPHyzalCE06qoARUCeBBE= +github.com/yargevad/filepathx v1.0.0 h1:SYcT+N3tYGi+NvazubCNlvgIPbzAk7i7y2dwg3I5FYc= +github.com/yargevad/filepathx v1.0.0/go.mod h1:BprfX/gpYNJHJfc35GjRRpVcwWXS89gGulUIU5tK3tA= +go.opentelemetry.io/otel v1.28.0 h1:/SqNcYk+idO0CxKEUOtKQClMK/MimZihKYMruSMViUo= +go.opentelemetry.io/otel v1.28.0/go.mod h1:q68ijF8Fc8CnMHKyzqL6akLO46ePnjkgfIMIjUIX9z4= +go.opentelemetry.io/otel/metric v1.28.0 h1:f0HGvSl1KRAU1DLgLGFjrwVyismPlnuU6JD6bOeuA5Q= +go.opentelemetry.io/otel/metric v1.28.0/go.mod h1:Fb1eVBFZmLVTMb6PPohq3TO9IIhUisDsbJoL/+uQW4s= +go.opentelemetry.io/otel/sdk v1.21.0 h1:FTt8qirL1EysG6sTQRZ5TokkU8d0ugCj8htOgThZXQ8= +go.opentelemetry.io/otel/sdk v1.21.0/go.mod h1:Nna6Yv7PWTdgJHVRD9hIYywQBRx7pbox6nwBnZIxl/E= +go.opentelemetry.io/otel/trace v1.28.0 h1:GhQ9cUuQGmNDd5BTCP2dAvv75RdMxEfTmYejp+lkx9g= +go.opentelemetry.io/otel/trace v1.28.0/go.mod h1:jPyXzNPg6da9+38HEwElrQiHlVMTnVfM3/yv2OlIHaI= +go.uber.org/mock v0.4.0 h1:VcM4ZOtdbR4f6VXfiOpwpVJDL6lCReaZ6mw31wqh7KU= +go.uber.org/mock v0.4.0/go.mod h1:a6FSlNadKUHUa9IP5Vyt1zh4fC7uAwxMutEAscFbkZc= +golang.org/x/arch v0.11.0 h1:KXV8WWKCXm6tRpLirl2szsO5j/oOODwZf4hATmGVNs4= +golang.org/x/arch v0.11.0/go.mod h1:FEVrYAQjsQXMVJ1nsMoVVXPZg6p2JE2mx8psSWTDQys= +golang.org/x/crypto v0.0.0-20180904163835-0709b304e793/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= +golang.org/x/crypto v0.11.0 h1:6Ewdq3tDic1mg5xRO4milcWCfMVQhI4NkqWWvqejpuA= +golang.org/x/crypto v0.11.0/go.mod h1:xgJhtzW8F9jGdVFWZESrid1U1bjeNy4zgy5cRr/CIio= +golang.org/x/exp v0.0.0-20230713183714-613f0c0eb8a1 h1:MGwJjxBy0HJshjDNfLsYO8xppfqWlA5ZT9OhtUUhTNw= +golang.org/x/exp v0.0.0-20230713183714-613f0c0eb8a1/go.mod h1:FXUEEKJgO7OQYeo8N01OfiKP8RXMtf6e8aTskBGqWdc= +golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sys v0.0.0-20180905080454-ebe1bf3edb33/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.26.0 h1:KHjCJyddX0LoSTb3J+vWpupP9p0oznkqVk/IfjymZbo= +golang.org/x/sys v0.26.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/term v0.10.0 h1:3R7pNqamzBraeqj/Tj8qt1aQ2HpmlC+Cx/qL/7hn4/c= +golang.org/x/term v0.10.0/go.mod h1:lpqdcUyK/oCiQxvxVrppt5ggO2KCZ5QblwqPnfZ6d5o= +golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127 h1:qIbj1fsPNlZgppZ+VLlY7N33q108Sa+fhmuc+sWQYwY= +gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/fsnotify.v1 v1.4.7/go.mod h1:Tz8NjZHkW78fSQdbUxIjBTcgA1z1m8ZHf0WmKUhAMys= +gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7/go.mod h1:dt/ZhP58zS4L8KSrWDmTeBkI65Dw0HsyUHuEVlX15mw= +gopkg.in/yaml.v2 v2.2.1/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY= +gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +gopkg.in/yaml.v3 v3.0.0/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +nullprogram.com/x/optparse v1.0.0/go.mod h1:KdyPE+Igbe0jQUrVfMqDMeJQIJZEuyV7pjYmp6pbG50= diff --git a/components/retriever/es8/option.go b/components/retriever/es8/option.go new file mode 100644 index 0000000..2dd3f21 --- /dev/null +++ b/components/retriever/es8/option.go @@ -0,0 +1,27 @@ +/* + * Copyright 2024 CloudWeGo Authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package es8 + +import ( + "github.com/elastic/go-elasticsearch/v8/typedapi/types" +) + +// ESImplOptions es specified options +// Use retriever.GetImplSpecificOptions[ESImplOptions] to get ESImplOptions from options. +type ESImplOptions struct { + Filters []types.Query `json:"filters,omitempty"` +} diff --git a/components/retriever/es8/retriever.go b/components/retriever/es8/retriever.go new file mode 100644 index 0000000..b26c519 --- /dev/null +++ b/components/retriever/es8/retriever.go @@ -0,0 +1,150 @@ +/* + * Copyright 2024 CloudWeGo Authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package es8 + +import ( + "context" + "fmt" + + "github.com/elastic/go-elasticsearch/v8" + "github.com/elastic/go-elasticsearch/v8/typedapi/core/search" + "github.com/elastic/go-elasticsearch/v8/typedapi/types" + + "github.com/cloudwego/eino/callbacks" + "github.com/cloudwego/eino/components/embedding" + "github.com/cloudwego/eino/components/retriever" + "github.com/cloudwego/eino/schema" +) + +type RetrieverConfig struct { + ESConfig elasticsearch.Config `json:"es_config"` + + Index string `json:"index"` + TopK int `json:"top_k"` + ScoreThreshold *float64 `json:"score_threshold"` + + // SearchMode retrieve strategy, see prepared impls in search_mode package: + // use search_mode.SearchModeExactMatch with string query + // use search_mode.SearchModeApproximate with search_mode.ApproximateQuery + // use search_mode.SearchModeDenseVectorSimilarity with search_mode.DenseVectorSimilarityQuery + // use search_mode.SearchModeSparseVectorTextExpansion with search_mode.SparseVectorTextExpansionQuery + // use search_mode.SearchModeRawStringRequest with json search request + SearchMode SearchMode `json:"search_mode"` + // ResultParser parse document from es search hits. + // If ResultParser not provided, defaultResultParser will be used as default + ResultParser func(ctx context.Context, hit types.Hit) (doc *schema.Document, err error) + // Embedding vectorization method, must provide when SearchMode needed + Embedding embedding.Embedder +} + +type SearchMode interface { + // BuildRequest generate search request from config, query and options. + // Additionally, some specified options (like filters for query) will be provided in options, + // and use retriever.GetImplSpecificOptions[options.ESImplOptions] to get it. + BuildRequest(ctx context.Context, conf *RetrieverConfig, query string, opts ...retriever.Option) (*search.Request, error) +} + +type Retriever struct { + client *elasticsearch.TypedClient + config *RetrieverConfig +} + +func NewRetriever(_ context.Context, conf *RetrieverConfig) (*Retriever, error) { + if conf.SearchMode == nil { + return nil, fmt.Errorf("[NewRetriever] search mode not provided") + } + + if conf.ResultParser == nil { + return nil, fmt.Errorf("[NewRetriever] result parser not provided") + } + + client, err := elasticsearch.NewTypedClient(conf.ESConfig) + if err != nil { + return nil, fmt.Errorf("[NewRetriever] new es client failed, %w", err) + } + + return &Retriever{ + client: client, + config: conf, + }, nil +} + +func (r *Retriever) Retrieve(ctx context.Context, query string, opts ...retriever.Option) (docs []*schema.Document, err error) { + defer func() { + if err != nil { + callbacks.OnError(ctx, err) + } + }() + + options := retriever.GetCommonOptions(&retriever.Options{ + Index: &r.config.Index, + TopK: &r.config.TopK, + ScoreThreshold: r.config.ScoreThreshold, + Embedding: r.config.Embedding, + }, opts...) + + ctx = callbacks.OnStart(ctx, &retriever.CallbackInput{ + Query: query, + TopK: *options.TopK, + ScoreThreshold: options.ScoreThreshold, + }) + + req, err := r.config.SearchMode.BuildRequest(ctx, r.config, query, opts...) + if err != nil { + return nil, err + } + + resp, err := r.client.Search(). + Index(r.config.Index). + Request(req). + Do(ctx) + if err != nil { + return nil, err + } + + docs, err = r.parseSearchResult(ctx, resp) + if err != nil { + return nil, err + } + + callbacks.OnEnd(ctx, &retriever.CallbackOutput{Docs: docs}) + + return docs, nil +} + +func (r *Retriever) parseSearchResult(ctx context.Context, resp *search.Response) (docs []*schema.Document, err error) { + docs = make([]*schema.Document, 0, len(resp.Hits.Hits)) + + for _, hit := range resp.Hits.Hits { + doc, err := r.config.ResultParser(ctx, hit) + if err != nil { + return nil, err + } + + docs = append(docs, doc) + } + + return docs, nil +} + +func (r *Retriever) GetType() string { + return typ +} + +func (r *Retriever) IsCallbacksEnabled() bool { + return true +} diff --git a/components/retriever/es8/retriever_test.go b/components/retriever/es8/retriever_test.go new file mode 100644 index 0000000..38f9164 --- /dev/null +++ b/components/retriever/es8/retriever_test.go @@ -0,0 +1,99 @@ +/* + * Copyright 2024 CloudWeGo Authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package es8 + +import ( + "context" + "encoding/json" + "fmt" + "testing" + + "github.com/bytedance/mockey" + "github.com/cloudwego/eino/components/retriever" + "github.com/cloudwego/eino/schema" + "github.com/elastic/go-elasticsearch/v8" + "github.com/elastic/go-elasticsearch/v8/typedapi/core/search" + "github.com/elastic/go-elasticsearch/v8/typedapi/types" + "github.com/stretchr/testify/assert" +) + +func TestNewRetriever(t *testing.T) { + ctx := context.Background() + + t.Run("retrieve_documents", func(t *testing.T) { + r, err := NewRetriever(ctx, &RetrieverConfig{ + ESConfig: elasticsearch.Config{}, + Index: "eino_ut", + TopK: 10, + ResultParser: func(ctx context.Context, hit types.Hit) (doc *schema.Document, err error) { + var mp map[string]any + if err := json.Unmarshal(hit.Source_, &mp); err != nil { + return nil, err + } + + var id string + if hit.Id_ != nil { + id = *hit.Id_ + } + + content, ok := mp["eino_doc_content"].(string) + if !ok { + return nil, fmt.Errorf("content not found") + } + + return &schema.Document{ + ID: id, + Content: content, + MetaData: nil, + }, nil + }, + SearchMode: &mockSearchMode{}, + }) + assert.NoError(t, err) + + defer mockey.Mock(mockey.GetMethod(r.client.Search(), "Index")). + Return(r.client.Search()).Build().Patch().UnPatch() + + defer mockey.Mock(mockey.GetMethod(r.client.Search(), "Request")). + Return(r.client.Search()).Build().Patch().UnPatch() + + defer mockey.Mock(mockey.GetMethod(r.client.Search(), "Do")).Return(&search.Response{ + Hits: types.HitsMetadata{ + Hits: []types.Hit{ + { + Source_: json.RawMessage([]byte(`{ + "eino_doc_content": "i'm fine, thank you" +}`)), + }, + }, + }, + }, nil).Build().Patch().UnPatch() + + docs, err := r.Retrieve(ctx, "how are you") + assert.NoError(t, err) + + assert.Len(t, docs, 1) + assert.Equal(t, "i'm fine, thank you", docs[0].Content) + }) + +} + +type mockSearchMode struct{} + +func (m *mockSearchMode) BuildRequest(ctx context.Context, conf *RetrieverConfig, query string, opts ...retriever.Option) (*search.Request, error) { + return &search.Request{}, nil +} diff --git a/components/retriever/es8/search_mode/approximate.go b/components/retriever/es8/search_mode/approximate.go new file mode 100644 index 0000000..87e6e31 --- /dev/null +++ b/components/retriever/es8/search_mode/approximate.go @@ -0,0 +1,145 @@ +/* + * Copyright 2025 CloudWeGo Authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package search_mode + +import ( + "context" + "fmt" + + "github.com/cloudwego/eino-ext/components/retriever/es8" + "github.com/cloudwego/eino/components/retriever" + "github.com/elastic/go-elasticsearch/v8/typedapi/core/search" + "github.com/elastic/go-elasticsearch/v8/typedapi/types" +) + +// SearchModeApproximate retrieve with multiple approximate strategy (filter+knn+rrf) +// knn: https://www.elastic.co/guide/en/elasticsearch/reference/current/knn-search.html +// rrf: https://www.elastic.co/guide/en/elasticsearch/reference/current/rrf.html +func SearchModeApproximate(config *ApproximateConfig) es8.SearchMode { + return &approximate{config} +} + +type ApproximateConfig struct { + // QueryFieldName the name of query field, required when using Hybrid + QueryFieldName string + // VectorFieldName the name of the vector field to search against, required + VectorFieldName string + // Hybrid if true, add filters and rff to knn query + Hybrid bool + // RRF (Reciprocal Rank Fusion) is a method for combining multiple result sets, is used to + // even the score from the knn query and text query + RRF bool + // RRFRankConstant determines how much influence documents in + // individual result sets per query have over the final ranked result set + RRFRankConstant *int64 + // RRFWindowSize determines the size ptrWithoutZero the individual result sets per query + RRFWindowSize *int64 + // QueryVectorBuilderModelID the query vector builder model id + // see: https://www.elastic.co/guide/en/machine-learning/8.16/ml-nlp-text-emb-vector-search-example.html + QueryVectorBuilderModelID *string + // Boost Floating point number used to decrease or increase the relevance scores ptrWithoutZero the query. + // Boost values are relative to the default value ptrWithoutZero 1.0. + // A boost value between 0 and 1.0 decreases the relevance score. + // A value greater than 1.0 increases the relevance score. + Boost *float32 + // K The final number ptrWithoutZero nearest neighbors to return as top hits + K *int + // NumCandidates The number ptrWithoutZero nearest neighbor candidates to consider per shard + NumCandidates *int + // Similarity The minimum similarity for a vector to be considered a match + Similarity *float32 +} + +type approximate struct { + config *ApproximateConfig +} + +func (a *approximate) BuildRequest(ctx context.Context, conf *es8.RetrieverConfig, query string, opts ...retriever.Option) (*search.Request, error) { + + co := retriever.GetCommonOptions(&retriever.Options{ + Index: ptrWithoutZero(conf.Index), + TopK: ptrWithoutZero(conf.TopK), + ScoreThreshold: conf.ScoreThreshold, + Embedding: conf.Embedding, + }, opts...) + + io := retriever.GetImplSpecificOptions[es8.ESImplOptions](nil, opts...) + + knn := types.KnnSearch{ + Boost: a.config.Boost, + Field: a.config.VectorFieldName, + Filter: io.Filters, + K: a.config.K, + NumCandidates: a.config.NumCandidates, + QueryVector: nil, + QueryVectorBuilder: nil, + Similarity: a.config.Similarity, + } + + if a.config.QueryVectorBuilderModelID != nil { + knn.QueryVectorBuilder = &types.QueryVectorBuilder{TextEmbedding: &types.TextEmbedding{ + ModelId: *a.config.QueryVectorBuilderModelID, + ModelText: query, + }} + } else { + emb := co.Embedding + if emb == nil { + return nil, fmt.Errorf("[BuildRequest][SearchModeApproximate] embedding not provided") + } + + vector, err := emb.EmbedStrings(makeEmbeddingCtx(ctx, emb), []string{query}) + if err != nil { + return nil, fmt.Errorf("[BuildRequest][SearchModeApproximate] embedding failed, %w", err) + } + + if len(vector) != 1 { + return nil, fmt.Errorf("[BuildRequest][SearchModeApproximate] vector len error, expected=1, got=%d", len(vector)) + } + + knn.QueryVector = f64To32(vector[0]) + } + + req := &search.Request{Knn: []types.KnnSearch{knn}, Size: co.TopK} + + if a.config.Hybrid { + req.Query = &types.Query{ + Bool: &types.BoolQuery{ + Filter: io.Filters, + Must: []types.Query{ + { + Match: map[string]types.MatchQuery{ + a.config.QueryFieldName: {Query: query}, + }, + }, + }, + }, + } + + if a.config.RRF { + req.Rank = &types.RankContainer{Rrf: &types.RrfRank{ + RankConstant: a.config.RRFRankConstant, + RankWindowSize: a.config.RRFWindowSize, + }} + } + } + + if co.ScoreThreshold != nil { + req.MinScore = (*types.Float64)(ptrWithoutZero(*co.ScoreThreshold)) + } + + return req, nil +} diff --git a/components/retriever/es8/search_mode/approximate_test.go b/components/retriever/es8/search_mode/approximate_test.go new file mode 100644 index 0000000..897dd94 --- /dev/null +++ b/components/retriever/es8/search_mode/approximate_test.go @@ -0,0 +1,144 @@ +/* + * Copyright 2025 CloudWeGo Authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package search_mode + +import ( + "context" + "encoding/json" + "testing" + + . "github.com/bytedance/mockey" + "github.com/cloudwego/eino-ext/components/retriever/es8" + "github.com/cloudwego/eino/components/embedding" + "github.com/cloudwego/eino/components/retriever" + "github.com/elastic/go-elasticsearch/v8/typedapi/types" + "github.com/smartystreets/goconvey/convey" +) + +func TestSearchModeApproximate(t *testing.T) { + PatchConvey("test SearchModeApproximate", t, func() { + PatchConvey("test BuildRequest", func() { + ctx := context.Background() + queryFieldName := "eino_doc_content" + vectorFieldName := "vector_eino_doc_content" + query := "content" + + PatchConvey("test QueryVectorBuilderModelID", func() { + a := &approximate{config: &ApproximateConfig{ + QueryFieldName: queryFieldName, + VectorFieldName: vectorFieldName, + Hybrid: false, + RRF: false, + RRFRankConstant: nil, + RRFWindowSize: nil, + QueryVectorBuilderModelID: ptrWithoutZero("mock_model"), + Boost: ptrWithoutZero(float32(1.0)), + K: ptrWithoutZero(10), + NumCandidates: ptrWithoutZero(100), + Similarity: ptrWithoutZero(float32(0.5)), + }} + + conf := &es8.RetrieverConfig{} + req, err := a.BuildRequest(ctx, conf, query, + retriever.WithEmbedding(nil), + retriever.WrapImplSpecificOptFn[es8.ESImplOptions](func(o *es8.ESImplOptions) { + o.Filters = []types.Query{ + {Match: map[string]types.MatchQuery{"label": {Query: "good"}}}, + } + })) + convey.So(err, convey.ShouldBeNil) + b, err := json.Marshal(req) + convey.So(err, convey.ShouldBeNil) + convey.So(string(b), convey.ShouldEqual, `{"knn":[{"boost":1,"field":"vector_eino_doc_content","filter":[{"match":{"label":{"query":"good"}}}],"k":10,"num_candidates":100,"query_vector_builder":{"text_embedding":{"model_id":"mock_model","model_text":"content"}},"similarity":0.5}]}`) + }) + + PatchConvey("test embedding", func() { + a := &approximate{config: &ApproximateConfig{ + QueryFieldName: queryFieldName, + VectorFieldName: vectorFieldName, + Hybrid: false, + RRF: false, + RRFRankConstant: nil, + RRFWindowSize: nil, + QueryVectorBuilderModelID: nil, + Boost: ptrWithoutZero(float32(1.0)), + K: ptrWithoutZero(10), + NumCandidates: ptrWithoutZero(100), + Similarity: ptrWithoutZero(float32(0.5)), + }} + + conf := &es8.RetrieverConfig{} + req, err := a.BuildRequest(ctx, conf, query, + retriever.WithEmbedding(&mockEmbedding{size: 1, mockVector: []float64{1.1, 1.2}}), + retriever.WrapImplSpecificOptFn[es8.ESImplOptions](func(o *es8.ESImplOptions) { + o.Filters = []types.Query{ + {Match: map[string]types.MatchQuery{"label": {Query: "good"}}}, + } + })) + convey.So(err, convey.ShouldBeNil) + b, err := json.Marshal(req) + convey.So(err, convey.ShouldBeNil) + convey.So(string(b), convey.ShouldEqual, `{"knn":[{"boost":1,"field":"vector_eino_doc_content","filter":[{"match":{"label":{"query":"good"}}}],"k":10,"num_candidates":100,"query_vector":[1.1,1.2],"similarity":0.5}]}`) + }) + + PatchConvey("test hybrid with rrf", func() { + a := &approximate{config: &ApproximateConfig{ + QueryFieldName: queryFieldName, + VectorFieldName: vectorFieldName, + Hybrid: true, + RRF: true, + RRFRankConstant: ptrWithoutZero(int64(10)), + RRFWindowSize: ptrWithoutZero(int64(5)), + QueryVectorBuilderModelID: nil, + Boost: ptrWithoutZero(float32(1.0)), + K: ptrWithoutZero(10), + NumCandidates: ptrWithoutZero(100), + Similarity: ptrWithoutZero(float32(0.5)), + }} + + conf := &es8.RetrieverConfig{} + req, err := a.BuildRequest(ctx, conf, query, + retriever.WithEmbedding(&mockEmbedding{size: 1, mockVector: []float64{1.1, 1.2}}), + retriever.WithTopK(10), + retriever.WithScoreThreshold(1.1), + retriever.WrapImplSpecificOptFn[es8.ESImplOptions](func(o *es8.ESImplOptions) { + o.Filters = []types.Query{ + {Match: map[string]types.MatchQuery{"label": {Query: "good"}}}, + } + })) + convey.So(err, convey.ShouldBeNil) + b, err := json.Marshal(req) + convey.So(err, convey.ShouldBeNil) + convey.So(string(b), convey.ShouldEqual, `{"knn":[{"boost":1,"field":"vector_eino_doc_content","filter":[{"match":{"label":{"query":"good"}}}],"k":10,"num_candidates":100,"query_vector":[1.1,1.2],"similarity":0.5}],"min_score":1.1,"query":{"bool":{"filter":[{"match":{"label":{"query":"good"}}}],"must":[{"match":{"eino_doc_content":{"query":"content"}}}]}},"rank":{"rrf":{"rank_constant":10,"rank_window_size":5}},"size":10}`) + }) + }) + }) +} + +type mockEmbedding struct { + size int + mockVector []float64 +} + +func (m mockEmbedding) EmbedStrings(ctx context.Context, texts []string, opts ...embedding.Option) ([][]float64, error) { + resp := make([][]float64, m.size) + for i := range resp { + resp[i] = m.mockVector + } + + return resp, nil +} diff --git a/components/retriever/es8/search_mode/dense_vector_similarity.go b/components/retriever/es8/search_mode/dense_vector_similarity.go new file mode 100644 index 0000000..cd4df4d --- /dev/null +++ b/components/retriever/es8/search_mode/dense_vector_similarity.go @@ -0,0 +1,115 @@ +/* + * Copyright 2025 CloudWeGo Authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package search_mode + +import ( + "context" + "encoding/json" + "fmt" + + "github.com/cloudwego/eino-ext/components/retriever/es8" + "github.com/cloudwego/eino/components/retriever" + "github.com/elastic/go-elasticsearch/v8/typedapi/core/search" + "github.com/elastic/go-elasticsearch/v8/typedapi/types" +) + +// SearchModeDenseVectorSimilarity calculate embedding similarity between dense_vector field and query +// see: https://www.elastic.co/guide/en/elasticsearch/reference/7.17/query-dsl-script-score-query.html#vector-functions +func SearchModeDenseVectorSimilarity(typ DenseVectorSimilarityType, vectorFieldName string) es8.SearchMode { + return &denseVectorSimilarity{fmt.Sprintf(denseVectorScriptMap[typ], vectorFieldName)} +} + +type denseVectorSimilarity struct { + script string +} + +func (d *denseVectorSimilarity) BuildRequest(ctx context.Context, conf *es8.RetrieverConfig, query string, + opts ...retriever.Option) (*search.Request, error) { + + co := retriever.GetCommonOptions(&retriever.Options{ + Index: ptrWithoutZero(conf.Index), + TopK: ptrWithoutZero(conf.TopK), + ScoreThreshold: conf.ScoreThreshold, + Embedding: conf.Embedding, + }, opts...) + + io := retriever.GetImplSpecificOptions[es8.ESImplOptions](nil, opts...) + + emb := co.Embedding + if emb == nil { + return nil, fmt.Errorf("[BuildRequest][SearchModeDenseVectorSimilarity] embedding not provided") + } + + vector, err := emb.EmbedStrings(makeEmbeddingCtx(ctx, emb), []string{query}) + if err != nil { + return nil, fmt.Errorf("[BuildRequest][SearchModeDenseVectorSimilarity] embedding failed, %w", err) + } + + if len(vector) != 1 { + return nil, fmt.Errorf("[BuildRequest][SearchModeDenseVectorSimilarity] vector size invalid, expect=1, got=%d", len(vector)) + } + + vb, err := json.Marshal(vector[0]) + if err != nil { + return nil, fmt.Errorf("[BuildRequest][SearchModeDenseVectorSimilarity] marshal vector to bytes failed, %w", err) + } + + q := &types.Query{ + ScriptScore: &types.ScriptScoreQuery{ + Script: types.Script{ + Source: ptrWithoutZero(d.script), + Params: map[string]json.RawMessage{"embedding": vb}, + }, + }, + } + + if len(io.Filters) > 0 { + q.ScriptScore.Query = &types.Query{ + Bool: &types.BoolQuery{Filter: io.Filters}, + } + } else { + q.ScriptScore.Query = &types.Query{ + MatchAll: &types.MatchAllQuery{}, + } + } + + req := &search.Request{Query: q, Size: co.TopK} + if co.ScoreThreshold != nil { + req.MinScore = (*types.Float64)(ptrWithoutZero(*co.ScoreThreshold)) + } + + return req, nil +} + +type DenseVectorSimilarityType string + +const ( + DenseVectorSimilarityTypeCosineSimilarity DenseVectorSimilarityType = "cosineSimilarity" + DenseVectorSimilarityTypeDotProduct DenseVectorSimilarityType = "dotProduct" + DenseVectorSimilarityTypeL1Norm DenseVectorSimilarityType = "l1norm" + DenseVectorSimilarityTypeL2Norm DenseVectorSimilarityType = "l2norm" +) + +var denseVectorScriptMap = map[DenseVectorSimilarityType]string{ + DenseVectorSimilarityTypeCosineSimilarity: `cosineSimilarity(params.embedding, '%s') + 1.0`, + DenseVectorSimilarityTypeDotProduct: ` + double value = dotProduct(params.query_vector, '%s'); + return sigmoid(1, Math.E, -value); + `, + DenseVectorSimilarityTypeL1Norm: `1 / (1 + l1norm(params.embedding, '%s'))`, + DenseVectorSimilarityTypeL2Norm: `1 / (1 + l2norm(params.embedding, '%s'))`, +} diff --git a/components/retriever/es8/search_mode/dense_vector_similarity_test.go b/components/retriever/es8/search_mode/dense_vector_similarity_test.go new file mode 100644 index 0000000..1a7f548 --- /dev/null +++ b/components/retriever/es8/search_mode/dense_vector_similarity_test.go @@ -0,0 +1,84 @@ +/* + * Copyright 2025 CloudWeGo Authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package search_mode + +import ( + "context" + "encoding/json" + "fmt" + "testing" + + . "github.com/bytedance/mockey" + "github.com/cloudwego/eino-ext/components/retriever/es8" + "github.com/cloudwego/eino/components/retriever" + "github.com/elastic/go-elasticsearch/v8/typedapi/types" + "github.com/smartystreets/goconvey/convey" +) + +func TestSearchModeDenseVectorSimilarity(t *testing.T) { + PatchConvey("test SearchModeDenseVectorSimilarity", t, func() { + PatchConvey("test BuildRequest", func() { + ctx := context.Background() + vectorFieldName := "vector_eino_doc_content" + d := SearchModeDenseVectorSimilarity(DenseVectorSimilarityTypeCosineSimilarity, vectorFieldName) + query := "content" + + PatchConvey("test embedding not provided", func() { + conf := &es8.RetrieverConfig{} + req, err := d.BuildRequest(ctx, conf, query, retriever.WithEmbedding(nil)) + convey.So(err, convey.ShouldBeError, "[BuildRequest][SearchModeDenseVectorSimilarity] embedding not provided") + convey.So(req, convey.ShouldBeNil) + }) + + PatchConvey("test vector size invalid", func() { + conf := &es8.RetrieverConfig{} + req, err := d.BuildRequest(ctx, conf, query, retriever.WithEmbedding(mockEmbedding{size: 2, mockVector: []float64{1.1, 1.2}})) + convey.So(err, convey.ShouldBeError, "[BuildRequest][SearchModeDenseVectorSimilarity] vector size invalid, expect=1, got=2") + convey.So(req, convey.ShouldBeNil) + }) + + PatchConvey("test success", func() { + typ2Exp := map[DenseVectorSimilarityType]string{ + DenseVectorSimilarityTypeCosineSimilarity: `{"min_score":1.1,"query":{"script_score":{"query":{"bool":{"filter":[{"match":{"label":{"query":"good"}}}]}},"script":{"params":{"embedding":[1.1,1.2]},"source":"cosineSimilarity(params.embedding, 'vector_eino_doc_content') + 1.0"}}},"size":10}`, + DenseVectorSimilarityTypeDotProduct: `{"min_score":1.1,"query":{"script_score":{"query":{"bool":{"filter":[{"match":{"label":{"query":"good"}}}]}},"script":{"params":{"embedding":[1.1,1.2]},"source":"\n double value = dotProduct(params.query_vector, 'vector_eino_doc_content');\n return sigmoid(1, Math.E, -value);\n "}}},"size":10}`, + DenseVectorSimilarityTypeL1Norm: `{"min_score":1.1,"query":{"script_score":{"query":{"bool":{"filter":[{"match":{"label":{"query":"good"}}}]}},"script":{"params":{"embedding":[1.1,1.2]},"source":"1 / (1 + l1norm(params.embedding, 'vector_eino_doc_content'))"}}},"size":10}`, + DenseVectorSimilarityTypeL2Norm: `{"min_score":1.1,"query":{"script_score":{"query":{"bool":{"filter":[{"match":{"label":{"query":"good"}}}]}},"script":{"params":{"embedding":[1.1,1.2]},"source":"1 / (1 + l2norm(params.embedding, 'vector_eino_doc_content'))"}}},"size":10}`, + } + + for typ, exp := range typ2Exp { + similarity := SearchModeDenseVectorSimilarity(typ, vectorFieldName) + + conf := &es8.RetrieverConfig{} + req, err := similarity.BuildRequest(ctx, conf, query, retriever.WithEmbedding(&mockEmbedding{size: 1, mockVector: []float64{1.1, 1.2}}), + retriever.WithTopK(10), + retriever.WithScoreThreshold(1.1), + retriever.WrapImplSpecificOptFn[es8.ESImplOptions](func(o *es8.ESImplOptions) { + o.Filters = []types.Query{ + {Match: map[string]types.MatchQuery{"label": {Query: "good"}}}, + } + })) + + convey.So(err, convey.ShouldBeNil) + b, err := json.Marshal(req) + convey.So(err, convey.ShouldBeNil) + fmt.Println(string(b)) + convey.So(string(b), convey.ShouldEqual, exp) + } + }) + }) + }) +} diff --git a/components/retriever/es8/search_mode/exact_match.go b/components/retriever/es8/search_mode/exact_match.go new file mode 100644 index 0000000..0282eef --- /dev/null +++ b/components/retriever/es8/search_mode/exact_match.go @@ -0,0 +1,58 @@ +/* + * Copyright 2025 CloudWeGo Authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package search_mode + +import ( + "context" + + "github.com/cloudwego/eino-ext/components/retriever/es8" + "github.com/cloudwego/eino/components/retriever" + "github.com/elastic/go-elasticsearch/v8/typedapi/core/search" + "github.com/elastic/go-elasticsearch/v8/typedapi/types" +) + +func SearchModeExactMatch(queryFieldName string) es8.SearchMode { + return &exactMatch{queryFieldName} +} + +type exactMatch struct { + name string +} + +func (e exactMatch) BuildRequest(ctx context.Context, conf *es8.RetrieverConfig, query string, + opts ...retriever.Option) (*search.Request, error) { + + options := retriever.GetCommonOptions(&retriever.Options{ + Index: ptrWithoutZero(conf.Index), + TopK: ptrWithoutZero(conf.TopK), + ScoreThreshold: conf.ScoreThreshold, + Embedding: conf.Embedding, + }, opts...) + + q := &types.Query{ + Match: map[string]types.MatchQuery{ + e.name: {Query: query}, + }, + } + + req := &search.Request{Query: q, Size: options.TopK} + if options.ScoreThreshold != nil { + req.MinScore = (*types.Float64)(ptrWithoutZero(*options.ScoreThreshold)) + } + + return req, nil +} diff --git a/components/retriever/es8/search_mode/exact_match_test.go b/components/retriever/es8/search_mode/exact_match_test.go new file mode 100644 index 0000000..fc90250 --- /dev/null +++ b/components/retriever/es8/search_mode/exact_match_test.go @@ -0,0 +1,41 @@ +/* + * Copyright 2025 CloudWeGo Authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package search_mode + +import ( + "context" + "encoding/json" + "testing" + + . "github.com/bytedance/mockey" + "github.com/cloudwego/eino-ext/components/retriever/es8" + "github.com/smartystreets/goconvey/convey" +) + +func TestSearchModeExactMatch(t *testing.T) { + PatchConvey("test SearchModeExactMatch", t, func() { + ctx := context.Background() + conf := &es8.RetrieverConfig{} + searchMode := SearchModeExactMatch("test_field") + req, err := searchMode.BuildRequest(ctx, conf, "test_query") + convey.So(err, convey.ShouldBeNil) + b, err := json.Marshal(req) + convey.So(err, convey.ShouldBeNil) + convey.So(string(b), convey.ShouldEqual, `{"query":{"match":{"test_field":{"query":"test_query"}}}}`) + }) + +} diff --git a/components/retriever/es8/search_mode/raw_string.go b/components/retriever/es8/search_mode/raw_string.go new file mode 100644 index 0000000..01eccd9 --- /dev/null +++ b/components/retriever/es8/search_mode/raw_string.go @@ -0,0 +1,42 @@ +/* + * Copyright 2025 CloudWeGo Authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package search_mode + +import ( + "context" + + "github.com/cloudwego/eino-ext/components/retriever/es8" + "github.com/cloudwego/eino/components/retriever" + "github.com/elastic/go-elasticsearch/v8/typedapi/core/search" +) + +func SearchModeRawStringRequest() es8.SearchMode { + return &rawString{} +} + +type rawString struct{} + +func (r rawString) BuildRequest(ctx context.Context, conf *es8.RetrieverConfig, query string, + opts ...retriever.Option) (*search.Request, error) { + + req, err := search.NewRequest().FromJSON(query) + if err != nil { + return nil, err + } + + return req, nil +} diff --git a/components/retriever/es8/search_mode/raw_string_test.go b/components/retriever/es8/search_mode/raw_string_test.go new file mode 100644 index 0000000..75d2619 --- /dev/null +++ b/components/retriever/es8/search_mode/raw_string_test.go @@ -0,0 +1,48 @@ +/* + * Copyright 2025 CloudWeGo Authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package search_mode + +import ( + "context" + "testing" + + . "github.com/bytedance/mockey" + "github.com/cloudwego/eino-ext/components/retriever/es8" + "github.com/smartystreets/goconvey/convey" +) + +func TestSearchModeRawStringRequest(t *testing.T) { + PatchConvey("test SearchModeRawStringRequest", t, func() { + ctx := context.Background() + conf := &es8.RetrieverConfig{} + searchMode := SearchModeRawStringRequest() + + PatchConvey("test from json error", func() { + r, err := searchMode.BuildRequest(ctx, conf, "test_query") + convey.So(err, convey.ShouldNotBeNil) + convey.So(r, convey.ShouldBeNil) + }) + + PatchConvey("test success", func() { + q := `{"query":{"match":{"test_field":{"query":"test_query"}}}}` + r, err := searchMode.BuildRequest(ctx, conf, q) + convey.So(err, convey.ShouldBeNil) + convey.So(r, convey.ShouldNotBeNil) + convey.So(r.Query.Match["test_field"].Query, convey.ShouldEqual, "test_query") + }) + }) +} diff --git a/components/retriever/es8/search_mode/sparse_vector_text_expansion.go b/components/retriever/es8/search_mode/sparse_vector_text_expansion.go new file mode 100644 index 0000000..0b9a999 --- /dev/null +++ b/components/retriever/es8/search_mode/sparse_vector_text_expansion.go @@ -0,0 +1,74 @@ +/* + * Copyright 2025 CloudWeGo Authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package search_mode + +import ( + "context" + "fmt" + + "github.com/cloudwego/eino-ext/components/retriever/es8" + "github.com/cloudwego/eino/components/retriever" + "github.com/elastic/go-elasticsearch/v8/typedapi/core/search" + "github.com/elastic/go-elasticsearch/v8/typedapi/types" +) + +// SearchModeSparseVectorTextExpansion convert the query text into a list ptrWithoutZero token-weight pairs, +// which are then used in a query against a sparse vector +// see: https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-text-expansion-query.html +func SearchModeSparseVectorTextExpansion(modelID, vectorFieldName string) es8.SearchMode { + return &sparseVectorTextExpansion{modelID, vectorFieldName} +} + +type sparseVectorTextExpansion struct { + modelID string + vectorFieldName string +} + +func (s sparseVectorTextExpansion) BuildRequest(ctx context.Context, conf *es8.RetrieverConfig, query string, + opts ...retriever.Option) (*search.Request, error) { + + co := retriever.GetCommonOptions(&retriever.Options{ + Index: ptrWithoutZero(conf.Index), + TopK: ptrWithoutZero(conf.TopK), + ScoreThreshold: conf.ScoreThreshold, + Embedding: conf.Embedding, + }, opts...) + + io := retriever.GetImplSpecificOptions[es8.ESImplOptions](nil, opts...) + + name := fmt.Sprintf("%s.tokens", s.vectorFieldName) + teq := types.TextExpansionQuery{ + ModelId: s.modelID, + ModelText: query, + } + + q := &types.Query{ + Bool: &types.BoolQuery{ + Must: []types.Query{ + {TextExpansion: map[string]types.TextExpansionQuery{name: teq}}, + }, + Filter: io.Filters, + }, + } + + req := &search.Request{Query: q, Size: co.TopK} + if co.ScoreThreshold != nil { + req.MinScore = (*types.Float64)(ptrWithoutZero(*co.ScoreThreshold)) + } + + return req, nil +} diff --git a/components/retriever/es8/search_mode/sparse_vector_text_expansion_test.go b/components/retriever/es8/search_mode/sparse_vector_text_expansion_test.go new file mode 100644 index 0000000..019d0ce --- /dev/null +++ b/components/retriever/es8/search_mode/sparse_vector_text_expansion_test.go @@ -0,0 +1,55 @@ +/* + * Copyright 2025 CloudWeGo Authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package search_mode + +import ( + "context" + "encoding/json" + "testing" + + . "github.com/bytedance/mockey" + "github.com/cloudwego/eino-ext/components/retriever/es8" + "github.com/cloudwego/eino/components/retriever" + "github.com/elastic/go-elasticsearch/v8/typedapi/types" + "github.com/smartystreets/goconvey/convey" +) + +func TestSearchModeSparseVectorTextExpansion(t *testing.T) { + PatchConvey("test SearchModeSparseVectorTextExpansion", t, func() { + PatchConvey("test BuildRequest", func() { + ctx := context.Background() + vectorFieldName := "vector_eino_doc_content" + s := SearchModeSparseVectorTextExpansion("mock_model_id", vectorFieldName) + + conf := &es8.RetrieverConfig{} + req, err := s.BuildRequest(ctx, conf, "content", + retriever.WithTopK(10), + retriever.WithScoreThreshold(1.1), + retriever.WrapImplSpecificOptFn[es8.ESImplOptions](func(o *es8.ESImplOptions) { + o.Filters = []types.Query{ + {Match: map[string]types.MatchQuery{"label": {Query: "good"}}}, + } + })) + + convey.So(err, convey.ShouldBeNil) + convey.So(req, convey.ShouldNotBeNil) + b, err := json.Marshal(req) + convey.So(err, convey.ShouldBeNil) + convey.So(string(b), convey.ShouldEqual, `{"min_score":1.1,"query":{"bool":{"filter":[{"match":{"label":{"query":"good"}}}],"must":[{"text_expansion":{"vector_eino_doc_content.tokens":{"model_id":"mock_model_id","model_text":"content"}}}]}},"size":10}`) + }) + }) +} diff --git a/components/retriever/es8/search_mode/utils.go b/components/retriever/es8/search_mode/utils.go new file mode 100644 index 0000000..ebddb7e --- /dev/null +++ b/components/retriever/es8/search_mode/utils.go @@ -0,0 +1,56 @@ +/* + * Copyright 2025 CloudWeGo Authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package search_mode + +import ( + "context" + + "github.com/cloudwego/eino/callbacks" + "github.com/cloudwego/eino/components" + "github.com/cloudwego/eino/components/embedding" +) + +func makeEmbeddingCtx(ctx context.Context, emb embedding.Embedder) context.Context { + runInfo := &callbacks.RunInfo{ + Component: components.ComponentOfEmbedding, + } + + if embType, ok := components.GetType(emb); ok { + runInfo.Type = embType + } + + runInfo.Name = runInfo.Type + string(runInfo.Component) + + return callbacks.ReuseHandlers(ctx, runInfo) +} + +func f64To32(f64 []float64) []float32 { + f32 := make([]float32, len(f64)) + for i, f := range f64 { + f32[i] = float32(f) + } + + return f32 +} + +func ptrWithoutZero[T string | int64 | int | float64 | float32](v T) *T { + var zero T + if zero == v { + return nil + } + return &v +}