From 6485fb4bb6515ac95d4bac1c66aa9387942595d5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aleksa=20Siri=C5=A1ki?= <31509435+aleksasiriski@users.noreply.github.com> Date: Fri, 15 Nov 2024 03:06:53 +0100 Subject: [PATCH 01/11] feat!: remove categories from config and instead pass them as base64 encoded json param --- docs/example_category.json | 54 ++++++ src/config/defaults.go | 63 ++----- src/config/defaults_cat_general.go | 44 ----- src/config/defaults_cat_images.go | 32 ---- src/config/defaults_cat_science.go | 30 ---- src/config/defaults_cat_suggestions.go | 32 ---- src/config/defaults_cat_thorough.go | 52 ------ src/config/defaults_ranking.go | 63 ------- src/config/load.go | 123 ++++--------- src/config/structs_category.go | 91 ---------- src/config/structs_config.go | 16 +- src/config/structs_engines.go | 20 +++ .../{route_proxy.go => route_image_proxy.go} | 2 +- ...route_search.go => route_search_images.go} | 44 ++--- ...estions.go => route_search_suggestions.go} | 26 ++- src/router/routes/route_search_web.go | 168 ++++++++++++++++++ src/router/routes/setup.go | 87 ++++----- src/search/category/convert.go | 83 +++++++++ src/search/category/disable.go | 27 +++ src/search/category/json.go | 41 +++++ src/search/category/name.go | 40 ----- src/search/category/type.go | 43 +++++ src/search/engines/bing/imagesearch_test.go | 4 +- src/search/engines/bing/search_test.go | 4 +- src/search/engines/brave/search_test.go | 4 +- src/search/engines/duckduckgo/search_test.go | 4 +- src/search/engines/duckduckgo/suggest_test.go | 3 +- src/search/engines/etools/search_test.go | 4 +- src/search/engines/google/imagesearch_test.go | 4 +- src/search/engines/google/search_test.go | 4 +- src/search/engines/google/suggest_test.go | 3 +- .../engines/googlescholar/search_test.go | 4 +- src/search/engines/mojeek/search_test.go | 4 +- src/search/engines/presearch/search_test.go | 4 +- src/search/engines/qwant/search_test.go | 4 +- src/search/engines/startpage/search_test.go | 4 +- src/search/engines/swisscows/search_test.go | 4 +- src/search/engines/yahoo/search_test.go | 4 +- src/search/engines/yep/search_test.go | 2 +- src/search/init.go | 13 +- src/search/result/rank/results.go | 4 +- src/search/result/rank/score.go | 8 +- src/search/result/rank/suggestions.go | 4 +- src/search/{imagesearch.go => s_images.go} | 7 +- src/search/{suggest.go => s_suggestions.go} | 7 +- src/search/{search.go => s_web.go} | 7 +- src/search/scraper/collector.go | 19 -- src/search/scraper/enginebase.go | 24 +-- src/search/scraper/interfaces.go | 12 +- src/search/searchtype/name.go | 32 ++++ 50 files changed, 661 insertions(+), 721 deletions(-) create mode 100644 docs/example_category.json delete mode 100644 src/config/defaults_cat_general.go delete mode 100644 src/config/defaults_cat_images.go delete mode 100644 src/config/defaults_cat_science.go delete mode 100644 src/config/defaults_cat_suggestions.go delete mode 100644 src/config/defaults_cat_thorough.go delete mode 100644 src/config/defaults_ranking.go delete mode 100644 src/config/structs_category.go create mode 100644 src/config/structs_engines.go rename src/router/routes/{route_proxy.go => route_image_proxy.go} (97%) rename src/router/routes/{route_search.go => route_search_images.go} (86%) rename src/router/routes/{route_suggestions.go => route_search_suggestions.go} (77%) create mode 100644 src/router/routes/route_search_web.go create mode 100644 src/search/category/convert.go create mode 100644 src/search/category/disable.go create mode 100644 src/search/category/json.go delete mode 100644 src/search/category/name.go create mode 100644 src/search/category/type.go rename src/search/{imagesearch.go => s_images.go} (95%) rename src/search/{suggest.go => s_suggestions.go} (92%) rename src/search/{search.go => s_web.go} (93%) create mode 100644 src/search/searchtype/name.go diff --git a/docs/example_category.json b/docs/example_category.json new file mode 100644 index 00000000..68034abf --- /dev/null +++ b/docs/example_category.json @@ -0,0 +1,54 @@ +{ + "engines": { + "google": { + "enabled": true, + "required": false, + "requiredbyorigin": true, + "preferred": false, + "prefferedbyorigin": false + }, + "bing": { + "enabled": true, + "required": false, + "requiredbyorigin": true, + "preferred": false, + "prefferedbyorigin": false + }, + "brave": { + "enabled": true, + "required": false, + "requiredbyorigin": false, + "preferred": true, + "prefferedbyorigin": false + } + }, + "ranking": { + "rexp": 0.5, + "a": 1, + "b": 0, + "c": 1, + "d": 0, + "tra": 1, + "trb": 0, + "trc": 1, + "trd": 0, + "engines": { + "google": { + "mul": 1, + "const": 0 + }, + "bing": { + "mul": 1, + "const": 0 + }, + "brave": { + "mul": 1, + "const": 0 + } + } + }, + "timings": { + "preferredtimeout": "500", + "HardTimeout": "1500" + } +} diff --git a/src/config/defaults.go b/src/config/defaults.go index febd7b3c..2699a83c 100644 --- a/src/config/defaults.go +++ b/src/config/defaults.go @@ -3,7 +3,8 @@ package config import ( "time" - "github.com/hearchco/agent/src/search/category" + exchengines "github.com/hearchco/agent/src/exchange/engines" + "github.com/hearchco/agent/src/search/engines" "github.com/hearchco/agent/src/utils/moretime" ) @@ -31,57 +32,21 @@ func New() Config { Timeout: 3 * time.Second, }, }, - Categories: map[category.Name]Category{ - category.SUGGESTIONS: { - Engines: suggestionsEngines, - RequiredEngines: suggestionsRequiredEngines, - RequiredByOriginEngines: suggestionsRequiredByOriginEngines, - PreferredEngines: suggestionsPreferredEngines, - PreferredByOriginEngines: suggestionsPreferredByOriginEngines, - Ranking: suggestionsRanking(), - Timings: suggestionsTimings, - }, - category.GENERAL: { - Engines: generalEngines, - RequiredEngines: generalRequiredEngines, - RequiredByOriginEngines: generalRequiredByOriginEngines, - PreferredEngines: generalPreferredEngines, - PreferredByOriginEngines: generalPreferredByOriginEngines, - Ranking: generalRanking(), - Timings: generalTimings, - }, - category.IMAGES: { - Engines: imagesEngines, - RequiredEngines: imagesRequiredEngines, - RequiredByOriginEngines: imagesRequiredByOriginEngines, - PreferredEngines: imagesPreferredEngines, - PreferredByOriginEngines: imagesPreferredByOriginEngines, - Ranking: imagesRanking(), - Timings: imagesTimings, - }, - category.SCIENCE: { - Engines: scienceEngines, - RequiredEngines: scienceRequiredEngines, - RequiredByOriginEngines: scienceRequiredByOriginEngines, - PreferredEngines: sciencePreferredEngines, - PreferredByOriginEngines: sciencePreferredByOriginEngines, - Ranking: scienceRanking(), - Timings: scienceTimings, - }, - category.THOROUGH: { - Engines: thoroughEngines, - RequiredEngines: thoroughRequiredEngines, - RequiredByOriginEngines: thoroughRequiredByOriginEngines, - PreferredEngines: thoroughPreferredEngines, - PreferredByOriginEngines: thoroughPreferredByOriginEngines, - Ranking: thoroughRanking(), - Timings: thoroughTimings, - }, + Engines: EngineConfig{ + NoWeb: []engines.Name{}, + NoImages: []engines.Name{}, + NoSuggestions: []engines.Name{}, }, Exchange: Exchange{ BaseCurrency: "EUR", - Engines: exchangeEngines, - Timings: exchangeTimings, + Engines: []exchengines.Name{ + exchengines.CURRENCYAPI, + exchengines.EXCHANGERATEAPI, + exchengines.FRANKFURTER, + }, + Timings: ExchangeTimings{ + HardTimeout: 1 * time.Second, + }, }, } } diff --git a/src/config/defaults_cat_general.go b/src/config/defaults_cat_general.go deleted file mode 100644 index fa0041bc..00000000 --- a/src/config/defaults_cat_general.go +++ /dev/null @@ -1,44 +0,0 @@ -package config - -import ( - "time" - - "github.com/hearchco/agent/src/search/engines" -) - -var generalEngines = []engines.Name{ - engines.BING, - engines.BRAVE, - engines.DUCKDUCKGO, - engines.ETOOLS, - engines.GOOGLE, - engines.MOJEEK, - engines.PRESEARCH, - engines.QWANT, - engines.STARTPAGE, - engines.SWISSCOWS, - engines.YAHOO, - // engines.YEP, -} - -var generalRequiredEngines = []engines.Name{} - -var generalRequiredByOriginEngines = []engines.Name{ - engines.BING, - engines.GOOGLE, -} - -var generalPreferredEngines = []engines.Name{} - -var generalPreferredByOriginEngines = []engines.Name{ - engines.BRAVE, -} - -func generalRanking() CategoryRanking { - return ReqPrefOthRanking(generalRequiredEngines, generalPreferredEngines, generalEngines) -} - -var generalTimings = CategoryTimings{ - PreferredTimeout: 500 * time.Millisecond, - HardTimeout: 1500 * time.Millisecond, -} diff --git a/src/config/defaults_cat_images.go b/src/config/defaults_cat_images.go deleted file mode 100644 index 1ab8b856..00000000 --- a/src/config/defaults_cat_images.go +++ /dev/null @@ -1,32 +0,0 @@ -package config - -import ( - "time" - - "github.com/hearchco/agent/src/search/engines" -) - -var imagesEngines = []engines.Name{ - engines.BING, - engines.GOOGLE, -} - -var imagesRequiredEngines = []engines.Name{} - -var imagesRequiredByOriginEngines = []engines.Name{ - engines.BING, - engines.GOOGLE, -} - -var imagesPreferredEngines = []engines.Name{} - -var imagesPreferredByOriginEngines = []engines.Name{} - -func imagesRanking() CategoryRanking { - return ReqPrefOthRanking(imagesRequiredEngines, imagesPreferredEngines, imagesEngines) -} - -var imagesTimings = CategoryTimings{ - PreferredTimeout: 500 * time.Millisecond, - HardTimeout: 1500 * time.Millisecond, -} diff --git a/src/config/defaults_cat_science.go b/src/config/defaults_cat_science.go deleted file mode 100644 index f4ab453e..00000000 --- a/src/config/defaults_cat_science.go +++ /dev/null @@ -1,30 +0,0 @@ -package config - -import ( - "time" - - "github.com/hearchco/agent/src/search/engines" -) - -var scienceEngines = []engines.Name{ - engines.GOOGLESCHOLAR, -} - -var scienceRequiredEngines = []engines.Name{ - engines.GOOGLESCHOLAR, -} - -var scienceRequiredByOriginEngines = []engines.Name{} - -var sciencePreferredEngines = []engines.Name{} - -var sciencePreferredByOriginEngines = []engines.Name{} - -func scienceRanking() CategoryRanking { - return ReqPrefOthRanking(scienceRequiredEngines, sciencePreferredEngines, scienceEngines) -} - -var scienceTimings = CategoryTimings{ - PreferredTimeout: 700 * time.Millisecond, - HardTimeout: 3 * time.Second, -} diff --git a/src/config/defaults_cat_suggestions.go b/src/config/defaults_cat_suggestions.go deleted file mode 100644 index 99151956..00000000 --- a/src/config/defaults_cat_suggestions.go +++ /dev/null @@ -1,32 +0,0 @@ -package config - -import ( - "time" - - "github.com/hearchco/agent/src/search/engines" -) - -var suggestionsEngines = []engines.Name{ - engines.DUCKDUCKGO, - engines.GOOGLE, -} - -var suggestionsRequiredEngines = []engines.Name{ - engines.DUCKDUCKGO, - engines.GOOGLE, -} - -var suggestionsRequiredByOriginEngines = []engines.Name{} - -var suggestionsPreferredEngines = []engines.Name{} - -var suggestionsPreferredByOriginEngines = []engines.Name{} - -func suggestionsRanking() CategoryRanking { - return ReqPrefOthRanking(suggestionsRequiredEngines, suggestionsPreferredEngines, suggestionsEngines) -} - -var suggestionsTimings = CategoryTimings{ - PreferredTimeout: 300 * time.Millisecond, - HardTimeout: 500 * time.Millisecond, -} diff --git a/src/config/defaults_cat_thorough.go b/src/config/defaults_cat_thorough.go deleted file mode 100644 index 17bcc77a..00000000 --- a/src/config/defaults_cat_thorough.go +++ /dev/null @@ -1,52 +0,0 @@ -package config - -import ( - "time" - - "github.com/hearchco/agent/src/search/engines" -) - -var thoroughEngines = []engines.Name{ - engines.BING, - engines.BRAVE, - engines.DUCKDUCKGO, - engines.ETOOLS, - engines.GOOGLE, - engines.MOJEEK, - engines.PRESEARCH, - engines.QWANT, - engines.STARTPAGE, - engines.SWISSCOWS, - engines.YAHOO, - // engines.YEP, -} - -var thoroughRequiredEngines = []engines.Name{ - engines.BING, - engines.BRAVE, - engines.DUCKDUCKGO, - engines.ETOOLS, - engines.GOOGLE, - engines.MOJEEK, - engines.PRESEARCH, - engines.QWANT, - engines.STARTPAGE, - engines.SWISSCOWS, - engines.YAHOO, - // engines.YEP, -} - -var thoroughRequiredByOriginEngines = []engines.Name{} - -var thoroughPreferredEngines = []engines.Name{} - -var thoroughPreferredByOriginEngines = []engines.Name{} - -func thoroughRanking() CategoryRanking { - return EmptyRanking(thoroughEngines) -} - -var thoroughTimings = CategoryTimings{ - PreferredTimeout: 3 * time.Second, - HardTimeout: 5 * time.Second, -} diff --git a/src/config/defaults_ranking.go b/src/config/defaults_ranking.go deleted file mode 100644 index 34236abe..00000000 --- a/src/config/defaults_ranking.go +++ /dev/null @@ -1,63 +0,0 @@ -package config - -import ( - "github.com/hearchco/agent/src/search/engines" -) - -func initCategoryRanking() CategoryRanking { - return CategoryRanking{ - REXP: 0.5, - A: 1, - B: 0, - C: 1, - D: 0, - TRA: 1, - TRB: 0, - TRC: 1, - TRD: 0, - Engines: map[string]CategoryEngineRanking{}, - } -} - -func EmptyRanking(engs []engines.Name) CategoryRanking { - rnk := initCategoryRanking() - - for _, eng := range engs { - rnk.Engines[eng.String()] = CategoryEngineRanking{ - Mul: 1, - Const: 0, - } - } - - return rnk -} - -func ReqPrefOthRanking(req []engines.Name, pref []engines.Name, oth []engines.Name) CategoryRanking { - rnk := initCategoryRanking() - - // First set the least important engines - for _, eng := range oth { - rnk.Engines[eng.String()] = CategoryEngineRanking{ - Mul: 1, - Const: 0, - } - } - - // Afterwards overwrite with the preferred engines - for _, eng := range pref { - rnk.Engines[eng.String()] = CategoryEngineRanking{ - Mul: 1.25, - Const: 0, - } - } - - // Finally overwrite with the required engines - for _, eng := range req { - rnk.Engines[eng.String()] = CategoryEngineRanking{ - Mul: 1.5, - Const: 0, - } - } - - return rnk -} diff --git a/src/config/load.go b/src/config/load.go index 60220411..6c572e5a 100644 --- a/src/config/load.go +++ b/src/config/load.go @@ -2,7 +2,6 @@ package config import ( "os" - "slices" "strings" "github.com/knadh/koanf/parsers/yaml" @@ -14,7 +13,6 @@ import ( "github.com/hearchco/agent/src/exchange/currency" exchengines "github.com/hearchco/agent/src/exchange/engines" - "github.com/hearchco/agent/src/search/category" "github.com/hearchco/agent/src/search/engines" "github.com/hearchco/agent/src/utils/moretime" ) @@ -92,8 +90,8 @@ func (c Config) getReader() ReaderConfig { Timeout: moretime.ConvertToFancyTime(c.Server.ImageProxy.Timeout), }, }, - // Initialize the categories map. - RCategories: map[category.Name]ReaderCategory{}, + // Initialize the engines config map. + REngines: map[string]ReaderEngineConfig{}, // Exchange config. RExchange: ReaderExchange{ BaseCurrency: c.Exchange.BaseCurrency.String(), @@ -104,35 +102,21 @@ func (c Config) getReader() ReaderConfig { }, } - // Set the categories map config. - for catName, catConf := range c.Categories { - // Timings config. - timingsConf := ReaderCategoryTimings{ - PreferredTimeout: moretime.ConvertToFancyTime(catConf.Timings.PreferredTimeout), - HardTimeout: moretime.ConvertToFancyTime(catConf.Timings.HardTimeout), - Delay: moretime.ConvertToFancyTime(catConf.Timings.Delay), - RandomDelay: moretime.ConvertToFancyTime(catConf.Timings.RandomDelay), - Parallelism: catConf.Timings.Parallelism, - } - - // Set the category config. - rc.RCategories[catName] = ReaderCategory{ - // Initialize the engines map. - REngines: map[string]ReaderCategoryEngine{}, - Ranking: catConf.Ranking, - RTimings: timingsConf, - } - - // Set the engines map config. - for _, eng := range catConf.Engines { - rc.RCategories[catName].REngines[eng.ToLower()] = ReaderCategoryEngine{ - Enabled: true, - Required: slices.Contains(catConf.RequiredEngines, eng), - RequiredByOrigin: slices.Contains(catConf.RequiredByOriginEngines, eng), - Preferred: slices.Contains(catConf.PreferredEngines, eng), - PreferredByOrigin: slices.Contains(catConf.PreferredByOriginEngines, eng), - } - } + // Set the engines config map. + for _, engName := range c.Engines.NoWeb { + eng := rc.REngines[engName.String()] + eng.NoWeb = true + rc.REngines[engName.String()] = eng + } + for _, engName := range c.Engines.NoImages { + eng := rc.REngines[engName.String()] + eng.NoImages = true + rc.REngines[engName.String()] = eng + } + for _, engName := range c.Engines.NoSuggestions { + eng := rc.REngines[engName.String()] + eng.NoSuggestions = true + rc.REngines[engName.String()] = eng } // Set the exchange engines. @@ -172,8 +156,12 @@ func (c *Config) fromReader(rc ReaderConfig) { Timeout: moretime.ConvertFromFancyTime(rc.Server.ImageProxy.Timeout), }, }, - // Initialize the categories map. - Categories: map[category.Name]Category{}, + // Initialize the disabled engines slices. + Engines: EngineConfig{ + NoWeb: make([]engines.Name, 0), + NoImages: make([]engines.Name, 0), + NoSuggestions: make([]engines.Name, 0), + }, // Exchange config. Exchange: Exchange{ BaseCurrency: currency.ConvertBase(rc.RExchange.BaseCurrency), @@ -184,59 +172,24 @@ func (c *Config) fromReader(rc ReaderConfig) { }, } - // Set the categories map config. - for catName, catRConf := range rc.RCategories { - // Initialize the engines slices. - engEnabled := make([]engines.Name, 0) - engRequired := make([]engines.Name, 0) - engRequiredByOrigin := make([]engines.Name, 0) - engPreferred := make([]engines.Name, 0) - engPreferredByOrigin := make([]engines.Name, 0) - - // Set the engines slices according to the reader config. - for engS, engRConf := range catRConf.REngines { - engName, err := engines.NameString(engS) - if err != nil { - log.Panic(). - Caller(). - Err(err). - Msg("Failed converting string to engine name") - // ^PANIC - } - - if engRConf.Enabled { - engEnabled = append(engEnabled, engName) - - if engRConf.Required { - engRequired = append(engRequired, engName) - } else if engRConf.RequiredByOrigin { - engRequiredByOrigin = append(engRequiredByOrigin, engName) - } else if engRConf.Preferred { - engPreferred = append(engPreferred, engName) - } else if engRConf.PreferredByOrigin { - engPreferredByOrigin = append(engPreferredByOrigin, engName) - } - } + // Set the disabled engines slices. + for engNameS, engConf := range rc.REngines { + engName, err := engines.NameString(engNameS) + if err != nil { + log.Panic(). + Err(err). + Str("name", engNameS). + Msg("Couldn't convert engine name string to type") } - // Timings config. - timingsConf := CategoryTimings{ - PreferredTimeout: moretime.ConvertFromFancyTime(catRConf.RTimings.PreferredTimeout), - HardTimeout: moretime.ConvertFromFancyTime(catRConf.RTimings.HardTimeout), - Delay: moretime.ConvertFromFancyTime(catRConf.RTimings.Delay), - RandomDelay: moretime.ConvertFromFancyTime(catRConf.RTimings.RandomDelay), - Parallelism: catRConf.RTimings.Parallelism, + if engConf.NoWeb { + nc.Engines.NoWeb = append(nc.Engines.NoWeb, engName) } - - // Set the category config. - nc.Categories[catName] = Category{ - Engines: engEnabled, - RequiredEngines: engRequired, - RequiredByOriginEngines: engRequiredByOrigin, - PreferredEngines: engPreferred, - PreferredByOriginEngines: engPreferredByOrigin, - Ranking: catRConf.Ranking, - Timings: timingsConf, + if engConf.NoImages { + nc.Engines.NoImages = append(nc.Engines.NoImages, engName) + } + if engConf.NoSuggestions { + nc.Engines.NoSuggestions = append(nc.Engines.NoSuggestions, engName) } } diff --git a/src/config/structs_category.go b/src/config/structs_category.go deleted file mode 100644 index ba1c6d4b..00000000 --- a/src/config/structs_category.go +++ /dev/null @@ -1,91 +0,0 @@ -package config - -import ( - "time" - - "github.com/hearchco/agent/src/search/engines" -) - -// ReaderCategory is format in which the config is read from the config file and environment variables. -type ReaderCategory struct { - REngines map[string]ReaderCategoryEngine `koanf:"engines"` - Ranking CategoryRanking `koanf:"ranking"` - RTimings ReaderCategoryTimings `koanf:"timings"` -} -type Category struct { - Engines []engines.Name - RequiredEngines []engines.Name - RequiredByOriginEngines []engines.Name - PreferredEngines []engines.Name - PreferredByOriginEngines []engines.Name - Ranking CategoryRanking - Timings CategoryTimings -} - -// ReaderEngine is format in which the config is read from the config file and environment variables. -type ReaderCategoryEngine struct { - // If false, the engine will not be used and other options will be ignored. - Enabled bool `koanf:"enabled"` - // If true, the engine will be awaited unless the hard timeout is reached. - Required bool `koanf:"required"` - // If true, the fastest engine that has this engine in "Origins" will be awaited unless the hard timeout is reached. - // This means that we want to get results from this engine or any engine that has this engine in "Origins", whichever responds the fastest. - RequiredByOrigin bool `koanf:"requiredbyorigin"` - // If true, the engine will be awaited unless the preferred timeout is reached. - Preferred bool `koanf:"preferred"` - // If true, the fastest engine that has this engine in "Origins" will be awaited unless the preferred timeout is reached. - // This means that we want to get results from this engine or any engine that has this engine in "Origins", whichever responds the fastest. - PreferredByOrigin bool `koanf:"preferredbyorigin"` -} - -type CategoryRanking struct { - REXP float64 `koanf:"rexp"` - A float64 `koanf:"a"` - B float64 `koanf:"b"` - C float64 `koanf:"c"` - D float64 `koanf:"d"` - TRA float64 `koanf:"tra"` - TRB float64 `koanf:"trb"` - TRC float64 `koanf:"trc"` - TRD float64 `koanf:"trd"` - Engines map[string]CategoryEngineRanking `koanf:"engines"` -} - -type CategoryEngineRanking struct { - Mul float64 `koanf:"mul"` - Const float64 `koanf:"const"` -} - -// ReaderTimings is format in which the config is read from the config file and environment variables. -// In format. -// Example: 1s, 1m, 1h, 1d, 1w, 1M, 1y. -// If unit is not specified, it is assumed to be milliseconds. -// Delegates Delay, RandomDelay, Parallelism to colly.Collector.Limit(). -type ReaderCategoryTimings struct { - // Maximum amount of time to wait for the PreferredEngines (or ByOrigin) to respond. - // If the search is still waiting for the RequiredEngines (or ByOrigin) after this time, the search will continue. - PreferredTimeout string `koanf:"preferredtimeout"` - // Hard timeout after which the search is forcefully stopped (even if the engines didn't respond). - HardTimeout string `koanf:"hardtimeout"` - // Colly delay. - Delay string `koanf:"delay"` - // Colly random delay. - RandomDelay string `koanf:"randomdelay"` - // Colly parallelism. - Parallelism int `koanf:"parallelism"` -} - -// Delegates Delay, RandomDelay, Parallelism to colly.Collector.Limit(). -type CategoryTimings struct { - // Maximum amount of time to wait for the PreferredEngines (or ByOrigin) to respond. - // If the search is still waiting for the RequiredEngines (or ByOrigin) after this time, the search will continue. - PreferredTimeout time.Duration - // Hard timeout after which the search is forcefully stopped (even if the engines didn't respond). - HardTimeout time.Duration - // Colly delay. - Delay time.Duration - // Colly random delay. - RandomDelay time.Duration - // Colly parallelism. - Parallelism int -} diff --git a/src/config/structs_config.go b/src/config/structs_config.go index 21f511c9..8f45d17d 100644 --- a/src/config/structs_config.go +++ b/src/config/structs_config.go @@ -1,17 +1,13 @@ package config -import ( - "github.com/hearchco/agent/src/search/category" -) - // ReaderConfig is format in which the config is read from the config file and environment variables. type ReaderConfig struct { - Server ReaderServer `koanf:"server"` - RCategories map[category.Name]ReaderCategory `koanf:"categories"` - RExchange ReaderExchange `koanf:"exchange"` + Server ReaderServer `koanf:"server"` + REngines map[string]ReaderEngineConfig `koanf:"engines"` + RExchange ReaderExchange `koanf:"exchange"` } type Config struct { - Server Server - Categories map[category.Name]Category - Exchange Exchange + Server Server + Engines EngineConfig + Exchange Exchange } diff --git a/src/config/structs_engines.go b/src/config/structs_engines.go new file mode 100644 index 00000000..70a79be0 --- /dev/null +++ b/src/config/structs_engines.go @@ -0,0 +1,20 @@ +package config + +import ( + "github.com/hearchco/agent/src/search/engines" +) + +// ReaderEngineConfig is format in which the config is read from the config file and environment variables. +// Used to disable certain search types for an engine. By default, all types are enabled. +type ReaderEngineConfig struct { + NoWeb bool // Whether this engine is disallowed to do web searches. + NoImages bool // Whether this engine is disallowed to do image searches. + NoSuggestions bool // Whether this engine is disallowed to do suggestion searches. +} + +// Slices of disabled engines for each search type, by default these are empty. +type EngineConfig struct { + NoWeb []engines.Name + NoImages []engines.Name + NoSuggestions []engines.Name +} diff --git a/src/router/routes/route_proxy.go b/src/router/routes/route_image_proxy.go similarity index 97% rename from src/router/routes/route_proxy.go rename to src/router/routes/route_image_proxy.go index e353d85d..994848ca 100644 --- a/src/router/routes/route_proxy.go +++ b/src/router/routes/route_image_proxy.go @@ -13,7 +13,7 @@ import ( "github.com/hearchco/agent/src/utils/anonymize" ) -func routeProxy(w http.ResponseWriter, r *http.Request, secret string, timeout time.Duration) error { +func routeImageProxy(w http.ResponseWriter, r *http.Request, secret string, timeout time.Duration) error { // Parse the form. err := r.ParseForm() if err != nil { diff --git a/src/router/routes/route_search.go b/src/router/routes/route_search_images.go similarity index 86% rename from src/router/routes/route_search.go rename to src/router/routes/route_search_images.go index 4f8566ad..7aeb4715 100644 --- a/src/router/routes/route_search.go +++ b/src/router/routes/route_search_images.go @@ -8,16 +8,16 @@ import ( "strings" "time" - "github.com/hearchco/agent/src/config" "github.com/hearchco/agent/src/search" "github.com/hearchco/agent/src/search/category" + "github.com/hearchco/agent/src/search/engines" "github.com/hearchco/agent/src/search/engines/options" "github.com/hearchco/agent/src/search/result" "github.com/hearchco/agent/src/search/result/rank" "github.com/hearchco/agent/src/utils/gotypelimits" ) -func routeSearch(w http.ResponseWriter, r *http.Request, ver string, catsConf map[category.Name]config.Category, secret string) error { +func routeSearchImages(w http.ResponseWriter, r *http.Request, ver string, disabledEngines []engines.Name, secret string) error { // Capture start time. startTime := time.Now() @@ -44,16 +44,6 @@ func routeSearch(w http.ResponseWriter, r *http.Request, ver string, catsConf ma }) } - categoryS := getParamOrDefault(r.Form, "category", category.GENERAL.String()) - categoryName, err := category.FromString(categoryS) - if err != nil { - // User error. - return writeResponseJSON(w, http.StatusBadRequest, ErrorResponse{ - Message: "invalid category value", - Value: fmt.Sprintf("%v", categoryName), - }) - } - pagesMaxS := getParamOrDefault(r.Form, "pages", "1") pagesMax, err := strconv.Atoi(pagesMaxS) if err != nil { @@ -114,6 +104,25 @@ func routeSearch(w http.ResponseWriter, r *http.Request, ver string, catsConf ma }) } + categoryS := getParamOrDefault(r.Form, "category") + if categoryS == "" { + // User error. + return writeResponseJSON(w, http.StatusBadRequest, ErrorResponse{ + Message: "category cannot be empty or whitespace", + Value: "empty category", + }) + } + + catConf, err := category.Base64ToCategoryType(categoryS) + if err != nil { + // User error. + return writeResponseJSON(w, http.StatusBadRequest, ErrorResponse{ + Message: "invalid category value", + Value: fmt.Sprintf("%v", err), + }) + } + catConf.DisableEngines(disabledEngines) + // All of these have default values set and validated. opts := options.Options{ Pages: options.Pages{ @@ -125,14 +134,7 @@ func routeSearch(w http.ResponseWriter, r *http.Request, ver string, catsConf ma } // Search for results. - var scrapedRes []result.Result - switch categoryName { - case category.IMAGES: - scrapedRes, err = search.ImageSearch(query, opts, catsConf[categoryName]) - default: - scrapedRes, err = search.Search(query, categoryName, opts, catsConf[categoryName]) - } - + scrapedRes, err := search.Images(query, opts, catConf) if err != nil { // Server error. werr := writeResponseJSON(w, http.StatusInternalServerError, ErrorResponse{ @@ -147,7 +149,7 @@ func routeSearch(w http.ResponseWriter, r *http.Request, ver string, catsConf ma // Rank the results. var rankedRes rank.Results = slices.Clone(scrapedRes) - rankedRes.Rank(catsConf[categoryName].Ranking) + rankedRes.Rank(catConf.Ranking) // Convert the results to include the hashes (output format). outpusRes := result.ConvertToOutput(rankedRes, secret) diff --git a/src/router/routes/route_suggestions.go b/src/router/routes/route_search_suggestions.go similarity index 77% rename from src/router/routes/route_suggestions.go rename to src/router/routes/route_search_suggestions.go index fd874ab4..9dd43ba5 100644 --- a/src/router/routes/route_suggestions.go +++ b/src/router/routes/route_search_suggestions.go @@ -7,14 +7,15 @@ import ( "strings" "time" - "github.com/hearchco/agent/src/config" "github.com/hearchco/agent/src/search" + "github.com/hearchco/agent/src/search/category" + "github.com/hearchco/agent/src/search/engines" "github.com/hearchco/agent/src/search/engines/options" "github.com/hearchco/agent/src/search/result" "github.com/hearchco/agent/src/search/result/rank" ) -func routeSuggest(w http.ResponseWriter, r *http.Request, ver string, catConf config.Category) error { +func routeSearchSuggestions(w http.ResponseWriter, r *http.Request, ver string, disabledEngines []engines.Name) error { // Capture start time. startTime := time.Now() @@ -60,6 +61,25 @@ func routeSuggest(w http.ResponseWriter, r *http.Request, ver string, catConf co }) } + categoryS := getParamOrDefault(r.Form, "category") + if categoryS == "" { + // User error. + return writeResponseJSON(w, http.StatusBadRequest, ErrorResponse{ + Message: "category cannot be empty or whitespace", + Value: "empty category", + }) + } + + catConf, err := category.Base64ToCategoryType(categoryS) + if err != nil { + // User error. + return writeResponseJSON(w, http.StatusBadRequest, ErrorResponse{ + Message: "invalid category value", + Value: fmt.Sprintf("%v", err), + }) + } + catConf.DisableEngines(disabledEngines) + // All of these have default values set and validated. opts := options.Options{ Pages: options.Pages{ @@ -71,7 +91,7 @@ func routeSuggest(w http.ResponseWriter, r *http.Request, ver string, catConf co } // Search for suggestions. - scrapedSugs, err := search.Suggest(query, opts, catConf) + scrapedSugs, err := search.Suggestions(query, opts, catConf) if err != nil { // Server error. werr := writeResponseJSON(w, http.StatusInternalServerError, ErrorResponse{ diff --git a/src/router/routes/route_search_web.go b/src/router/routes/route_search_web.go new file mode 100644 index 00000000..3db29a7d --- /dev/null +++ b/src/router/routes/route_search_web.go @@ -0,0 +1,168 @@ +package routes + +import ( + "fmt" + "net/http" + "slices" + "strconv" + "strings" + "time" + + "github.com/hearchco/agent/src/search" + "github.com/hearchco/agent/src/search/category" + "github.com/hearchco/agent/src/search/engines" + "github.com/hearchco/agent/src/search/engines/options" + "github.com/hearchco/agent/src/search/result" + "github.com/hearchco/agent/src/search/result/rank" + "github.com/hearchco/agent/src/utils/gotypelimits" +) + +func routeSearchWeb(w http.ResponseWriter, r *http.Request, ver string, disabledEngines []engines.Name, secret string) error { + // Capture start time. + startTime := time.Now() + + // Parse form data (including query params). + if err := r.ParseForm(); err != nil { + // Server error. + werr := writeResponseJSON(w, http.StatusInternalServerError, ErrorResponse{ + Message: "failed to parse form", + Value: fmt.Sprintf("%v", err), + }) + if werr != nil { + return fmt.Errorf("%w: %w", werr, err) + } + return err + } + + // Query is required. + query := strings.TrimSpace(getParamOrDefault(r.Form, "q")) + if query == "" { + // User error. + return writeResponseJSON(w, http.StatusBadRequest, ErrorResponse{ + Message: "query cannot be empty or whitespace", + Value: "empty query", + }) + } + + pagesMaxS := getParamOrDefault(r.Form, "pages", "1") + pagesMax, err := strconv.Atoi(pagesMaxS) + if err != nil { + // User error. + return writeResponseJSON(w, http.StatusUnprocessableEntity, ErrorResponse{ + Message: "cannot convert pages value to int", + Value: fmt.Sprintf("%v", err), + }) + } + // TODO: Make upper limit configurable. + pagesMaxUpperLimit := 10 + if pagesMax < 1 || pagesMax > pagesMaxUpperLimit { + // User error. + return writeResponseJSON(w, http.StatusBadRequest, ErrorResponse{ + Message: fmt.Sprintf("pages value must be at least 1 and at most %v", pagesMaxUpperLimit), + Value: "out of range", + }) + } + + pagesStartS := getParamOrDefault(r.Form, "start", "1") + pagesStart, err := strconv.Atoi(pagesStartS) + if err != nil { + // User error. + return writeResponseJSON(w, http.StatusUnprocessableEntity, ErrorResponse{ + Message: "cannot convert start value to int", + Value: fmt.Sprintf("%v", err), + }) + } + // Make sure that pagesStart can be safely added to pagesMax. + if pagesStart < 1 || pagesStart > gotypelimits.MaxInt-pagesMaxUpperLimit { + // User error. + return writeResponseJSON(w, http.StatusBadRequest, ErrorResponse{ + Message: fmt.Sprintf("start value must be at least 1 and at most %v", gotypelimits.MaxInt-pagesMaxUpperLimit), + Value: "out of range", + }) + } else { + // Since it's >=1, we decrement it to match the 0-based index. + pagesStart -= 1 + } + + localeS := getParamOrDefault(r.Form, "locale", options.LocaleDefault.String()) + locale, err := options.StringToLocale(localeS) + if err != nil { + // User error. + return writeResponseJSON(w, http.StatusBadRequest, ErrorResponse{ + Message: "invalid locale value", + Value: fmt.Sprintf("%v", err), + }) + } + + safeSearchS := getParamOrDefault(r.Form, "safesearch", "false") + safeSearch, err := strconv.ParseBool(safeSearchS) + if err != nil { + // User error. + return writeResponseJSON(w, http.StatusUnprocessableEntity, ErrorResponse{ + Message: "cannot convert safesearch value to bool", + Value: fmt.Sprintf("%v", err), + }) + } + + categoryS := getParamOrDefault(r.Form, "category") + if categoryS == "" { + // User error. + return writeResponseJSON(w, http.StatusBadRequest, ErrorResponse{ + Message: "category cannot be empty or whitespace", + Value: "empty category", + }) + } + + catConf, err := category.Base64ToCategoryType(categoryS) + if err != nil { + // User error. + return writeResponseJSON(w, http.StatusBadRequest, ErrorResponse{ + Message: "invalid category value", + Value: fmt.Sprintf("%v", err), + }) + } + catConf.DisableEngines(disabledEngines) + + // All of these have default values set and validated. + opts := options.Options{ + Pages: options.Pages{ + Start: pagesStart, + Max: pagesMax, + }, + Locale: locale, + SafeSearch: safeSearch, + } + + // Search for results. + scrapedRes, err := search.Web(query, opts, catConf) + if err != nil { + // Server error. + werr := writeResponseJSON(w, http.StatusInternalServerError, ErrorResponse{ + Message: "failed to search", + Value: fmt.Sprintf("%v", err), + }) + if werr != nil { + return fmt.Errorf("%w: %w", werr, err) + } + return err + } + + // Rank the results. + var rankedRes rank.Results = slices.Clone(scrapedRes) + rankedRes.Rank(catConf.Ranking) + + // Convert the results to include the hashes (output format). + outpusRes := result.ConvertToOutput(rankedRes, secret) + + // Create the response. + res := ResultsResponse{ + responseBase{ + ver, + time.Since(startTime).Milliseconds(), + }, + outpusRes, + } + + // If writing response failes, return the error. + return writeResponseJSON(w, http.StatusOK, res) +} diff --git a/src/router/routes/setup.go b/src/router/routes/setup.go index 994bae45..65b8fb86 100644 --- a/src/router/routes/setup.go +++ b/src/router/routes/setup.go @@ -8,12 +8,22 @@ import ( "github.com/hearchco/agent/src/cache" "github.com/hearchco/agent/src/config" - "github.com/hearchco/agent/src/search/category" +) + +const ( + healthzRoute = "/healthz" + versionzRoute = "/versionz" + searchWebRoute = "/search/web" + searchImagesRoute = "/search/images" + searchSuggestionsRoute = "/search/suggestions" + exchangeRoute = "/exchange" + exchangeCurrenciesRoute = "/exchange/currencies" + imageProxyRoute = "/imageproxy" ) func Setup(mux *chi.Mux, ver string, db cache.DB, conf config.Config) { - // /healthz - mux.Get("/healthz", func(w http.ResponseWriter, r *http.Request) { + // Health check + mux.Get(healthzRoute, func(w http.ResponseWriter, r *http.Request) { err := writeResponse(w, http.StatusOK, "OK") if err != nil { log.Error(). @@ -24,8 +34,8 @@ func Setup(mux *chi.Mux, ver string, db cache.DB, conf config.Config) { } }) - // /versionz - mux.Get("/versionz", func(w http.ResponseWriter, r *http.Request) { + // Version + mux.Get(versionzRoute, func(w http.ResponseWriter, r *http.Request) { err := writeResponse(w, http.StatusOK, ver) if err != nil { log.Error(). @@ -36,19 +46,9 @@ func Setup(mux *chi.Mux, ver string, db cache.DB, conf config.Config) { } }) - // /search - mux.Get("/search", func(w http.ResponseWriter, r *http.Request) { - err := routeSearch(w, r, ver, conf.Categories, conf.Server.ImageProxy.SecretKey) - if err != nil { - log.Error(). - Err(err). - Str("path", r.URL.Path). - Str("method", r.Method). - Msg("Failed to send response") - } - }) - mux.Post("/search", func(w http.ResponseWriter, r *http.Request) { - err := routeSearch(w, r, ver, conf.Categories, conf.Server.ImageProxy.SecretKey) + // Web search + muxGetPost(mux, searchWebRoute, func(w http.ResponseWriter, r *http.Request) { + err := routeSearchWeb(w, r, ver, conf.Engines.NoWeb, conf.Server.ImageProxy.SecretKey) if err != nil { log.Error(). Err(err). @@ -58,19 +58,9 @@ func Setup(mux *chi.Mux, ver string, db cache.DB, conf config.Config) { } }) - // /suggestions - mux.Get("/suggestions", func(w http.ResponseWriter, r *http.Request) { - err := routeSuggest(w, r, ver, conf.Categories[category.SUGGESTIONS]) - if err != nil { - log.Error(). - Err(err). - Str("path", r.URL.Path). - Str("method", r.Method). - Msg("Failed to send response") - } - }) - mux.Post("/suggestions", func(w http.ResponseWriter, r *http.Request) { - err := routeSuggest(w, r, ver, conf.Categories[category.SUGGESTIONS]) + // Images search + muxGetPost(mux, searchImagesRoute, func(w http.ResponseWriter, r *http.Request) { + err := routeSearchImages(w, r, ver, conf.Engines.NoImages, conf.Server.ImageProxy.SecretKey) if err != nil { log.Error(). Err(err). @@ -80,9 +70,9 @@ func Setup(mux *chi.Mux, ver string, db cache.DB, conf config.Config) { } }) - // /exchange - mux.Get("/exchange", func(w http.ResponseWriter, r *http.Request) { - err := routeExchange(w, r, ver, conf.Exchange, db, conf.Server.Cache.TTL.Currencies) + // Suggestions search + muxGetPost(mux, searchSuggestionsRoute, func(w http.ResponseWriter, r *http.Request) { + err := routeSearchSuggestions(w, r, ver, conf.Engines.NoSuggestions) if err != nil { log.Error(). Err(err). @@ -91,7 +81,9 @@ func Setup(mux *chi.Mux, ver string, db cache.DB, conf config.Config) { Msg("Failed to send response") } }) - mux.Post("/exchange", func(w http.ResponseWriter, r *http.Request) { + + // Exchange + muxGetPost(mux, exchangeRoute, func(w http.ResponseWriter, r *http.Request) { err := routeExchange(w, r, ver, conf.Exchange, db, conf.Server.Cache.TTL.Currencies) if err != nil { log.Error(). @@ -102,18 +94,8 @@ func Setup(mux *chi.Mux, ver string, db cache.DB, conf config.Config) { } }) - // /currencies - mux.Get("/currencies", func(w http.ResponseWriter, r *http.Request) { - err := routeCurrencies(w, ver, conf.Exchange, db, conf.Server.Cache.TTL.Currencies) - if err != nil { - log.Error(). - Err(err). - Str("path", r.URL.Path). - Str("method", r.Method). - Msg("Failed to send response") - } - }) - mux.Post("/currencies", func(w http.ResponseWriter, r *http.Request) { + // Exchange currencies + muxGetPost(mux, exchangeCurrenciesRoute, func(w http.ResponseWriter, r *http.Request) { err := routeCurrencies(w, ver, conf.Exchange, db, conf.Server.Cache.TTL.Currencies) if err != nil { log.Error(). @@ -124,9 +106,9 @@ func Setup(mux *chi.Mux, ver string, db cache.DB, conf config.Config) { } }) - // /proxy - mux.Get("/proxy", func(w http.ResponseWriter, r *http.Request) { - err := routeProxy(w, r, conf.Server.ImageProxy.SecretKey, conf.Server.ImageProxy.Timeout) + // Image proxy + mux.Get(imageProxyRoute, func(w http.ResponseWriter, r *http.Request) { + err := routeImageProxy(w, r, conf.Server.ImageProxy.SecretKey, conf.Server.ImageProxy.Timeout) if err != nil { log.Error(). Err(err). @@ -136,3 +118,8 @@ func Setup(mux *chi.Mux, ver string, db cache.DB, conf config.Config) { } }) } + +func muxGetPost(mux *chi.Mux, pattern string, handler http.HandlerFunc) { + mux.Get(pattern, handler) + mux.Post(pattern, handler) +} diff --git a/src/search/category/convert.go b/src/search/category/convert.go new file mode 100644 index 00000000..00027b12 --- /dev/null +++ b/src/search/category/convert.go @@ -0,0 +1,83 @@ +package category + +import ( + "encoding/base64" + "encoding/json" + "fmt" + + "github.com/hearchco/agent/src/search/engines" + "github.com/hearchco/agent/src/utils/moretime" +) + +func Base64ToCategoryType(b64 string) (Category, error) { + cj, err := Base64ToCategoryJSON(b64) + if err != nil { + return Category{}, fmt.Errorf("failed to convert base64 to category JSON: %w", err) + } + + return cj.ToCategoryType() +} + +func Base64ToCategoryJSON(b64 string) (CategoryJSON, error) { + s, err := base64.URLEncoding.DecodeString(b64) + if err != nil { + return CategoryJSON{}, fmt.Errorf("failed to decode base64: %w (%v)", err, b64) + } + + var cj CategoryJSON + if err := json.Unmarshal(s, &cj); err != nil { + return CategoryJSON{}, fmt.Errorf("failed to unmarshal category JSON: %w (%v)", err, string(s)) + } + + return cj, nil +} + +// Converts the category JSON into a more program friendly category type. +// Returns an error if any issues occur during the conversion. +func (cj CategoryJSON) ToCategoryType() (Category, error) { + // Initialize the engines slices. + engEnabled := make([]engines.Name, 0) + engRequired := make([]engines.Name, 0) + engRequiredByOrigin := make([]engines.Name, 0) + engPreferred := make([]engines.Name, 0) + engPreferredByOrigin := make([]engines.Name, 0) + + // Set the engines slices according to the provided JSON. + for nameS, conf := range cj.Engines { + name, err := engines.NameString(nameS) + if err != nil { + return Category{}, fmt.Errorf("failed converting string to engine name: %w", err) + } + + if conf.Enabled { + engEnabled = append(engEnabled, name) + + if conf.Required { + engRequired = append(engRequired, name) + } else if conf.RequiredByOrigin { + engRequiredByOrigin = append(engRequiredByOrigin, name) + } else if conf.Preferred { + engPreferred = append(engPreferred, name) + } else if conf.PreferredByOrigin { + engPreferredByOrigin = append(engPreferredByOrigin, name) + } + } + } + + // Timings config. + timings := Timings{ + PreferredTimeout: moretime.ConvertFromFancyTime(cj.Timings.PreferredTimeout), + HardTimeout: moretime.ConvertFromFancyTime(cj.Timings.HardTimeout), + } + + // Set the category config. + return Category{ + Engines: engEnabled, + RequiredEngines: engRequired, + RequiredByOriginEngines: engRequiredByOrigin, + PreferredEngines: engPreferred, + PreferredByOriginEngines: engPreferredByOrigin, + Ranking: cj.Ranking, // Stays the same. + Timings: timings, + }, nil +} diff --git a/src/search/category/disable.go b/src/search/category/disable.go new file mode 100644 index 00000000..9b8d58a2 --- /dev/null +++ b/src/search/category/disable.go @@ -0,0 +1,27 @@ +package category + +import ( + "slices" + + "github.com/hearchco/agent/src/search/engines" +) + +// Remove the specified engines from the Category. +// Passed as pointer to modify the original. +func (c *Category) DisableEngines(disabledEngines []engines.Name) { + c.Engines = slices.DeleteFunc(c.Engines, func(e engines.Name) bool { + return slices.Contains(disabledEngines, e) + }) + c.RequiredEngines = slices.DeleteFunc(c.RequiredEngines, func(e engines.Name) bool { + return slices.Contains(disabledEngines, e) + }) + c.RequiredByOriginEngines = slices.DeleteFunc(c.RequiredByOriginEngines, func(e engines.Name) bool { + return slices.Contains(disabledEngines, e) + }) + c.PreferredEngines = slices.DeleteFunc(c.PreferredEngines, func(e engines.Name) bool { + return slices.Contains(disabledEngines, e) + }) + c.PreferredByOriginEngines = slices.DeleteFunc(c.PreferredByOriginEngines, func(e engines.Name) bool { + return slices.Contains(disabledEngines, e) + }) +} diff --git a/src/search/category/json.go b/src/search/category/json.go new file mode 100644 index 00000000..87d3e7c1 --- /dev/null +++ b/src/search/category/json.go @@ -0,0 +1,41 @@ +package category + +// CategoryJSON is format in which the config is passed from the user. +type CategoryJSON struct { + Engines map[string]EngineJSON `koanf:"engines"` + Ranking Ranking `koanf:"ranking"` + Timings TimingsJSON `koanf:"timings"` +} + +// EngineJSON is format in which the config is passed from the user. +type EngineJSON struct { + // If false, the engine will not be used and other options will be ignored. + // This adds the engine to engines slice during conversion. + Enabled bool `koanf:"enabled"` + // If true, the engine will be awaited unless the hard timeout is reached. + // This adds the engine to required engines slice during conversion. + Required bool `koanf:"required"` + // If true, the fastest engine that has this engine in "Origins" will be awaited unless the hard timeout is reached. + // This means that we want to get results from this engine or any engine that has this engine in "Origins", whichever responds the fastest. + // This adds the engine to required engines by origin slice during conversion. + RequiredByOrigin bool `koanf:"requiredbyorigin"` + // If true, the engine will be awaited unless the preferred timeout is reached. + // This adds the engine to preferred engines slice during conversion. + Preferred bool `koanf:"preferred"` + // If true, the fastest engine that has this engine in "Origins" will be awaited unless the preferred timeout is reached. + // This means that we want to get results from this engine or any engine that has this engine in "Origins", whichever responds the fastest. + // This adds the engine to preferred by origin slice during conversion. + PreferredByOrigin bool `koanf:"preferredbyorigin"` +} + +// TimingsJSON is format in which the config is passed from the user. +// In format. +// Example: 1s, 1m, 1h, 1d, 1w, 1M, 1y. +// If unit is not specified, it is assumed to be milliseconds. +type TimingsJSON struct { + // Maximum amount of time to wait for the PreferredEngines (or ByOrigin) to respond. + // If the search is still waiting for the RequiredEngines (or ByOrigin) after this time, the search will continue. + PreferredTimeout string `koanf:"preferredtimeout"` + // Hard timeout after which the search is forcefully stopped (even if the engines didn't respond). + HardTimeout string `koanf:"hardtimeout"` +} diff --git a/src/search/category/name.go b/src/search/category/name.go deleted file mode 100644 index f5f9aac0..00000000 --- a/src/search/category/name.go +++ /dev/null @@ -1,40 +0,0 @@ -package category - -import ( - "fmt" -) - -type Name string - -const ( - UNDEFINED Name = "undefined" - SUGGESTIONS Name = "suggestions" - GENERAL Name = "general" - IMAGES Name = "images" - SCIENCE Name = "science" - THOROUGH Name = "thorough" -) - -func (cat Name) String() string { - return string(cat) -} - -// Converts a string to a category name if it exists. -// If the string is empty, then GENERAL is returned. -// Otherwise returns UNDEFINED. -func FromString(cat string) (Name, error) { - switch cat { - case "", GENERAL.String(): - return GENERAL, nil - case IMAGES.String(): - return IMAGES, nil - case SCIENCE.String(): - return SCIENCE, nil - case THOROUGH.String(): - return THOROUGH, nil - case SUGGESTIONS.String(): - return UNDEFINED, fmt.Errorf("category %q is not allowed", cat) - default: - return UNDEFINED, fmt.Errorf("category %q is not defined", cat) - } -} diff --git a/src/search/category/type.go b/src/search/category/type.go new file mode 100644 index 00000000..89eb0d8b --- /dev/null +++ b/src/search/category/type.go @@ -0,0 +1,43 @@ +package category + +import ( + "time" + + "github.com/hearchco/agent/src/search/engines" +) + +type Category struct { + Engines []engines.Name + RequiredEngines []engines.Name + RequiredByOriginEngines []engines.Name + PreferredEngines []engines.Name + PreferredByOriginEngines []engines.Name + Ranking Ranking + Timings Timings +} + +type Ranking struct { + REXP float64 `koanf:"rexp"` + A float64 `koanf:"a"` + B float64 `koanf:"b"` + C float64 `koanf:"c"` + D float64 `koanf:"d"` + TRA float64 `koanf:"tra"` + TRB float64 `koanf:"trb"` + TRC float64 `koanf:"trc"` + TRD float64 `koanf:"trd"` + Engines map[string]EngineRanking `koanf:"engines"` +} + +type EngineRanking struct { + Mul float64 `koanf:"mul"` + Const float64 `koanf:"const"` +} + +type Timings struct { + // Maximum amount of time to wait for the PreferredEngines (or ByOrigin) to respond. + // If the search is still waiting for the RequiredEngines (or ByOrigin) after this time, the search will continue. + PreferredTimeout time.Duration + // Hard timeout after which the search is forcefully stopped (even if the engines didn't respond). + HardTimeout time.Duration +} diff --git a/src/search/engines/bing/imagesearch_test.go b/src/search/engines/bing/imagesearch_test.go index 694fb97d..510dd2eb 100644 --- a/src/search/engines/bing/imagesearch_test.go +++ b/src/search/engines/bing/imagesearch_test.go @@ -4,13 +4,11 @@ import ( "context" "testing" - "github.com/hearchco/agent/src/search/category" "github.com/hearchco/agent/src/search/engines/_engines_test" ) func TestImageSearch(t *testing.T) { // Testing options. - conf := _engines_test.NewConfig(seName) opt := _engines_test.NewOpts() // Test cases. @@ -32,7 +30,7 @@ func TestImageSearch(t *testing.T) { }} se := New() - se.InitSearcher(context.Background(), conf.Categories[category.GENERAL].Timings) + se.InitSearcher(context.Background()) _engines_test.CheckImageSearch(t, se, tchar[:], tccr[:], tcrr[:]) } diff --git a/src/search/engines/bing/search_test.go b/src/search/engines/bing/search_test.go index 334fec41..6bd12788 100644 --- a/src/search/engines/bing/search_test.go +++ b/src/search/engines/bing/search_test.go @@ -4,13 +4,11 @@ import ( "context" "testing" - "github.com/hearchco/agent/src/search/category" "github.com/hearchco/agent/src/search/engines/_engines_test" ) func TestSearch(t *testing.T) { // Testing options. - conf := _engines_test.NewConfig(seName) opt := _engines_test.NewOpts() // Test cases. @@ -32,7 +30,7 @@ func TestSearch(t *testing.T) { }} se := New() - se.InitSearcher(context.Background(), conf.Categories[category.GENERAL].Timings) + se.InitSearcher(context.Background()) _engines_test.CheckSearch(t, se, tchar, tccr, tcrr) } diff --git a/src/search/engines/brave/search_test.go b/src/search/engines/brave/search_test.go index 76932aa3..b8e37d2d 100644 --- a/src/search/engines/brave/search_test.go +++ b/src/search/engines/brave/search_test.go @@ -4,13 +4,11 @@ import ( "context" "testing" - "github.com/hearchco/agent/src/search/category" "github.com/hearchco/agent/src/search/engines/_engines_test" ) func TestSearch(t *testing.T) { // Testing options. - conf := _engines_test.NewConfig(seName) opt := _engines_test.NewOpts() // Test cases. @@ -32,7 +30,7 @@ func TestSearch(t *testing.T) { }} se := New() - se.InitSearcher(context.Background(), conf.Categories[category.GENERAL].Timings) + se.InitSearcher(context.Background()) _engines_test.CheckSearch(t, se, tchar, tccr, tcrr) } diff --git a/src/search/engines/duckduckgo/search_test.go b/src/search/engines/duckduckgo/search_test.go index 1589b179..2d4c9c9e 100644 --- a/src/search/engines/duckduckgo/search_test.go +++ b/src/search/engines/duckduckgo/search_test.go @@ -4,13 +4,11 @@ import ( "context" "testing" - "github.com/hearchco/agent/src/search/category" "github.com/hearchco/agent/src/search/engines/_engines_test" ) func TestSearch(t *testing.T) { // Testing options. - conf := _engines_test.NewConfig(seName) opt := _engines_test.NewOpts() // Test cases. @@ -32,7 +30,7 @@ func TestSearch(t *testing.T) { }} se := New() - se.InitSearcher(context.Background(), conf.Categories[category.GENERAL].Timings) + se.InitSearcher(context.Background()) _engines_test.CheckSearch(t, se, tchar, tccr, tcrr) } diff --git a/src/search/engines/duckduckgo/suggest_test.go b/src/search/engines/duckduckgo/suggest_test.go index b719c489..7ac16efa 100644 --- a/src/search/engines/duckduckgo/suggest_test.go +++ b/src/search/engines/duckduckgo/suggest_test.go @@ -4,12 +4,11 @@ import ( "context" "testing" - "github.com/hearchco/agent/src/config" "github.com/hearchco/agent/src/search/engines/_engines_test" ) func TestSuggest(t *testing.T) { se := New() - se.InitSuggester(context.Background(), config.CategoryTimings{}) + se.InitSuggester(context.Background()) _engines_test.CheckSuggest(t, se, "test") } diff --git a/src/search/engines/etools/search_test.go b/src/search/engines/etools/search_test.go index a29faa94..64f3dc75 100644 --- a/src/search/engines/etools/search_test.go +++ b/src/search/engines/etools/search_test.go @@ -4,13 +4,11 @@ import ( "context" "testing" - "github.com/hearchco/agent/src/search/category" "github.com/hearchco/agent/src/search/engines/_engines_test" ) func TestSearch(t *testing.T) { // Testing options. - conf := _engines_test.NewConfig(seName) opt := _engines_test.NewOpts() // Test cases. @@ -32,7 +30,7 @@ func TestSearch(t *testing.T) { }} se := New() - se.InitSearcher(context.Background(), conf.Categories[category.GENERAL].Timings) + se.InitSearcher(context.Background()) _engines_test.CheckSearch(t, se, tchar, tccr, tcrr) } diff --git a/src/search/engines/google/imagesearch_test.go b/src/search/engines/google/imagesearch_test.go index cb8b92f3..44926678 100644 --- a/src/search/engines/google/imagesearch_test.go +++ b/src/search/engines/google/imagesearch_test.go @@ -4,13 +4,11 @@ import ( "context" "testing" - "github.com/hearchco/agent/src/search/category" "github.com/hearchco/agent/src/search/engines/_engines_test" ) func TestImageSearch(t *testing.T) { // Testing options. - conf := _engines_test.NewConfig(seName) opt := _engines_test.NewOpts() // Test cases. @@ -32,7 +30,7 @@ func TestImageSearch(t *testing.T) { }} se := New() - se.InitSearcher(context.Background(), conf.Categories[category.GENERAL].Timings) + se.InitSearcher(context.Background()) _engines_test.CheckImageSearch(t, se, tchar[:], tccr[:], tcrr[:]) } diff --git a/src/search/engines/google/search_test.go b/src/search/engines/google/search_test.go index 6063df38..634b6ae2 100644 --- a/src/search/engines/google/search_test.go +++ b/src/search/engines/google/search_test.go @@ -4,13 +4,11 @@ import ( "context" "testing" - "github.com/hearchco/agent/src/search/category" "github.com/hearchco/agent/src/search/engines/_engines_test" ) func TestSearch(t *testing.T) { // Testing options. - conf := _engines_test.NewConfig(seName) opt := _engines_test.NewOpts() // Test cases. @@ -32,7 +30,7 @@ func TestSearch(t *testing.T) { }} se := New() - se.InitSearcher(context.Background(), conf.Categories[category.GENERAL].Timings) + se.InitSearcher(context.Background()) _engines_test.CheckSearch(t, se, tchar, tccr, tcrr) } diff --git a/src/search/engines/google/suggest_test.go b/src/search/engines/google/suggest_test.go index feaeafc7..32e79c60 100644 --- a/src/search/engines/google/suggest_test.go +++ b/src/search/engines/google/suggest_test.go @@ -4,12 +4,11 @@ import ( "context" "testing" - "github.com/hearchco/agent/src/config" "github.com/hearchco/agent/src/search/engines/_engines_test" ) func TestSuggest(t *testing.T) { se := New() - se.InitSuggester(context.Background(), config.CategoryTimings{}) + se.InitSuggester(context.Background()) _engines_test.CheckSuggest(t, se, "test") } diff --git a/src/search/engines/googlescholar/search_test.go b/src/search/engines/googlescholar/search_test.go index 440cec32..7292aed8 100644 --- a/src/search/engines/googlescholar/search_test.go +++ b/src/search/engines/googlescholar/search_test.go @@ -4,13 +4,11 @@ import ( "context" "testing" - "github.com/hearchco/agent/src/search/category" "github.com/hearchco/agent/src/search/engines/_engines_test" ) func TestSearch(t *testing.T) { // Testing options. - conf := _engines_test.NewConfig(seName) opt := _engines_test.NewOpts() // Test cases. @@ -32,7 +30,7 @@ func TestSearch(t *testing.T) { }} se := New() - se.InitSearcher(context.Background(), conf.Categories[category.GENERAL].Timings) + se.InitSearcher(context.Background()) _engines_test.CheckSearch(t, se, tchar, tccr, tcrr) } diff --git a/src/search/engines/mojeek/search_test.go b/src/search/engines/mojeek/search_test.go index 3adaddab..85c82892 100644 --- a/src/search/engines/mojeek/search_test.go +++ b/src/search/engines/mojeek/search_test.go @@ -4,13 +4,11 @@ import ( "context" "testing" - "github.com/hearchco/agent/src/search/category" "github.com/hearchco/agent/src/search/engines/_engines_test" ) func TestSearch(t *testing.T) { // Testing options. - conf := _engines_test.NewConfig(seName) opt := _engines_test.NewOpts() // Test cases. @@ -32,7 +30,7 @@ func TestSearch(t *testing.T) { }} se := New() - se.InitSearcher(context.Background(), conf.Categories[category.GENERAL].Timings) + se.InitSearcher(context.Background()) _engines_test.CheckSearch(t, se, tchar, tccr, tcrr) } diff --git a/src/search/engines/presearch/search_test.go b/src/search/engines/presearch/search_test.go index 47cd7b42..72ecc8e4 100644 --- a/src/search/engines/presearch/search_test.go +++ b/src/search/engines/presearch/search_test.go @@ -4,13 +4,11 @@ import ( "context" "testing" - "github.com/hearchco/agent/src/search/category" "github.com/hearchco/agent/src/search/engines/_engines_test" ) func TestSearch(t *testing.T) { // Testing options. - conf := _engines_test.NewConfig(seName) opt := _engines_test.NewOpts() // Test cases. @@ -32,7 +30,7 @@ func TestSearch(t *testing.T) { }} se := New() - se.InitSearcher(context.Background(), conf.Categories[category.GENERAL].Timings) + se.InitSearcher(context.Background()) _engines_test.CheckSearch(t, se, tchar, tccr, tcrr) } diff --git a/src/search/engines/qwant/search_test.go b/src/search/engines/qwant/search_test.go index cc8c87db..ac584cc4 100644 --- a/src/search/engines/qwant/search_test.go +++ b/src/search/engines/qwant/search_test.go @@ -4,13 +4,11 @@ import ( "context" "testing" - "github.com/hearchco/agent/src/search/category" "github.com/hearchco/agent/src/search/engines/_engines_test" ) func TestSearch(t *testing.T) { // Testing options. - conf := _engines_test.NewConfig(seName) opt := _engines_test.NewOpts() // Test cases. @@ -32,7 +30,7 @@ func TestSearch(t *testing.T) { }} se := New() - se.InitSearcher(context.Background(), conf.Categories[category.GENERAL].Timings) + se.InitSearcher(context.Background()) _engines_test.CheckSearch(t, se, tchar, tccr, tcrr) } diff --git a/src/search/engines/startpage/search_test.go b/src/search/engines/startpage/search_test.go index 9deab1f3..e42bff54 100644 --- a/src/search/engines/startpage/search_test.go +++ b/src/search/engines/startpage/search_test.go @@ -4,13 +4,11 @@ import ( "context" "testing" - "github.com/hearchco/agent/src/search/category" "github.com/hearchco/agent/src/search/engines/_engines_test" ) func TestSearch(t *testing.T) { // Testing options. - conf := _engines_test.NewConfig(seName) opt := _engines_test.NewOpts() // Test cases. @@ -32,7 +30,7 @@ func TestSearch(t *testing.T) { }} se := New() - se.InitSearcher(context.Background(), conf.Categories[category.GENERAL].Timings) + se.InitSearcher(context.Background()) _engines_test.CheckSearch(t, se, tchar, tccr, tcrr) } diff --git a/src/search/engines/swisscows/search_test.go b/src/search/engines/swisscows/search_test.go index 4a57ce9f..f5dd6b39 100644 --- a/src/search/engines/swisscows/search_test.go +++ b/src/search/engines/swisscows/search_test.go @@ -4,13 +4,11 @@ import ( "context" "testing" - "github.com/hearchco/agent/src/search/category" "github.com/hearchco/agent/src/search/engines/_engines_test" ) func TestSearch(t *testing.T) { // Testing options. - conf := _engines_test.NewConfig(seName) opt := _engines_test.NewOpts() // Test cases. @@ -32,7 +30,7 @@ func TestSearch(t *testing.T) { }} se := New() - se.InitSearcher(context.Background(), conf.Categories[category.GENERAL].Timings) + se.InitSearcher(context.Background()) _engines_test.CheckSearch(t, se, tchar, tccr, tcrr) } diff --git a/src/search/engines/yahoo/search_test.go b/src/search/engines/yahoo/search_test.go index 5d11f0cd..43777976 100644 --- a/src/search/engines/yahoo/search_test.go +++ b/src/search/engines/yahoo/search_test.go @@ -4,13 +4,11 @@ import ( "context" "testing" - "github.com/hearchco/agent/src/search/category" "github.com/hearchco/agent/src/search/engines/_engines_test" ) func TestSearch(t *testing.T) { // Testing options. - conf := _engines_test.NewConfig(seName) opt := _engines_test.NewOpts() // Test cases. @@ -32,7 +30,7 @@ func TestSearch(t *testing.T) { }} se := New() - se.InitSearcher(context.Background(), conf.Categories[category.GENERAL].Timings) + se.InitSearcher(context.Background()) _engines_test.CheckSearch(t, se, tchar, tccr, tcrr) } diff --git a/src/search/engines/yep/search_test.go b/src/search/engines/yep/search_test.go index 165f1c2d..a9f830b5 100644 --- a/src/search/engines/yep/search_test.go +++ b/src/search/engines/yep/search_test.go @@ -32,7 +32,7 @@ package yep // }} // se := New() -// se.Init(context.Background(), conf.Categories[category.GENERAL].Timings) +// se.Init(context.Background()) // _engines_test.CheckSearch(t, se, tchar, tccr, tcrr) // } diff --git a/src/search/init.go b/src/search/init.go index 414a296b..c866e481 100644 --- a/src/search/init.go +++ b/src/search/init.go @@ -3,34 +3,33 @@ package search import ( "context" - "github.com/hearchco/agent/src/config" "github.com/hearchco/agent/src/search/engines" "github.com/hearchco/agent/src/search/scraper" ) // Searchers. -func initializeSearchers(ctx context.Context, engs []engines.Name, timings config.CategoryTimings) []scraper.Searcher { +func initializeSearchers(ctx context.Context, engs []engines.Name) []scraper.Searcher { searchers := searcherArray() for _, engName := range engs { - searchers[engName].InitSearcher(ctx, timings) + searchers[engName].InitSearcher(ctx) } return searchers[:] } // Image searchers. -func initializeImageSearchers(ctx context.Context, engs []engines.Name, timings config.CategoryTimings) []scraper.ImageSearcher { +func initializeImageSearchers(ctx context.Context, engs []engines.Name) []scraper.ImageSearcher { searchers := imageSearcherArray() for _, engName := range engs { - searchers[engName].InitSearcher(ctx, timings) + searchers[engName].InitSearcher(ctx) } return searchers[:] } // Suggesters. -func initializeSuggesters(ctx context.Context, engs []engines.Name, timings config.CategoryTimings) []scraper.Suggester { +func initializeSuggesters(ctx context.Context, engs []engines.Name) []scraper.Suggester { suggesters := suggesterArray() for _, engName := range engs { - suggesters[engName].InitSuggester(ctx, timings) + suggesters[engName].InitSuggester(ctx) } return suggesters[:] } diff --git a/src/search/result/rank/results.go b/src/search/result/rank/results.go index 7bcec95a..9882d1a6 100644 --- a/src/search/result/rank/results.go +++ b/src/search/result/rank/results.go @@ -3,14 +3,14 @@ package rank import ( "sort" - "github.com/hearchco/agent/src/config" + "github.com/hearchco/agent/src/search/category" "github.com/hearchco/agent/src/search/result" ) type Results []result.Result // Calculates the Score, sorts by it and then populates the Rank field of every result. -func (r Results) Rank(rconf config.CategoryRanking) { +func (r Results) Rank(rconf category.Ranking) { // Fill Rank field for every EngineRank. r.fillEngineRankRank() diff --git a/src/search/result/rank/score.go b/src/search/result/rank/score.go index 20193901..f37bbe6c 100644 --- a/src/search/result/rank/score.go +++ b/src/search/result/rank/score.go @@ -3,18 +3,18 @@ package rank import ( "math" - "github.com/hearchco/agent/src/config" + "github.com/hearchco/agent/src/search/category" ) // Calculates and sets scores for all results. -func (r Results) calculateScores(rconf config.CategoryRanking) { +func (r Results) calculateScores(rconf category.Ranking) { for _, res := range r { res.SetScore(calculateScore(res, rconf)) } } // Calculates and sets scores for all results. -func (s Suggestions) calculateScores(rconf config.CategoryRanking) { +func (s Suggestions) calculateScores(rconf category.Ranking) { for i := range s { sug := &s[i] sug.SetScore(calculateScore(sug, rconf)) @@ -22,7 +22,7 @@ func (s Suggestions) calculateScores(rconf config.CategoryRanking) { } // Calculates the score for one result. -func calculateScore[T ranker](val scoreEngineRanker[T], rconf config.CategoryRanking) float64 { +func calculateScore[T ranker](val scoreEngineRanker[T], rconf category.Ranking) float64 { var retRankScore float64 = 0 for _, er := range val.EngineRanks() { eng := rconf.Engines[er.SearchEngine().String()] diff --git a/src/search/result/rank/suggestions.go b/src/search/result/rank/suggestions.go index ebc212be..fe9a8745 100644 --- a/src/search/result/rank/suggestions.go +++ b/src/search/result/rank/suggestions.go @@ -3,14 +3,14 @@ package rank import ( "sort" - "github.com/hearchco/agent/src/config" + "github.com/hearchco/agent/src/search/category" "github.com/hearchco/agent/src/search/result" ) type Suggestions []result.Suggestion // Calculates the Score, sorts by it and then populates the Rank field of every result. -func (s Suggestions) Rank(rconf config.CategoryRanking) { +func (s Suggestions) Rank(rconf category.Ranking) { // Calculate and set scores. s.calculateScores(rconf) diff --git a/src/search/imagesearch.go b/src/search/s_images.go similarity index 95% rename from src/search/imagesearch.go rename to src/search/s_images.go index 3b1ea661..ad572669 100644 --- a/src/search/imagesearch.go +++ b/src/search/s_images.go @@ -8,13 +8,14 @@ import ( "github.com/rs/zerolog/log" - "github.com/hearchco/agent/src/config" + "github.com/hearchco/agent/src/search/category" "github.com/hearchco/agent/src/search/engines/options" "github.com/hearchco/agent/src/search/result" "github.com/hearchco/agent/src/utils/anonymize" ) -func ImageSearch(query string, opts options.Options, catConf config.Category) ([]result.Result, error) { +// Searches for images using the provided category config. +func Images(query string, opts options.Options, catConf category.Category) ([]result.Result, error) { // Capture start time. startTime := time.Now() @@ -53,7 +54,7 @@ func ImageSearch(query string, opts options.Options, catConf config.Category) ([ }() // Initialize each engine. - searchers := initializeImageSearchers(searchCtx, catConf.Engines, catConf.Timings) + searchers := initializeImageSearchers(searchCtx, catConf.Engines) // Create a map for the results with RWMutex. // TODO: Make title and desc length configurable. diff --git a/src/search/suggest.go b/src/search/s_suggestions.go similarity index 92% rename from src/search/suggest.go rename to src/search/s_suggestions.go index bb7ea07c..0c6ab6fa 100644 --- a/src/search/suggest.go +++ b/src/search/s_suggestions.go @@ -8,13 +8,14 @@ import ( "github.com/rs/zerolog/log" - "github.com/hearchco/agent/src/config" + "github.com/hearchco/agent/src/search/category" "github.com/hearchco/agent/src/search/engines/options" "github.com/hearchco/agent/src/search/result" "github.com/hearchco/agent/src/utils/anonymize" ) -func Suggest(query string, opts options.Options, catConf config.Category) ([]result.Suggestion, error) { +// Searches for suggestions using the provided category config. +func Suggestions(query string, opts options.Options, catConf category.Category) ([]result.Suggestion, error) { // Capture start time. startTime := time.Now() @@ -43,7 +44,7 @@ func Suggest(query string, opts options.Options, catConf config.Category) ([]res }() // Initialize each engine. - suggesters := initializeSuggesters(suggestCtx, catConf.Engines, catConf.Timings) + suggesters := initializeSuggesters(suggestCtx, catConf.Engines) // Create a map for the suggestions with RWMutex. concMap := result.NewSuggestionMap(len(catConf.Engines)) diff --git a/src/search/search.go b/src/search/s_web.go similarity index 93% rename from src/search/search.go rename to src/search/s_web.go index 58089772..716dce13 100644 --- a/src/search/search.go +++ b/src/search/s_web.go @@ -8,14 +8,14 @@ import ( "github.com/rs/zerolog/log" - "github.com/hearchco/agent/src/config" "github.com/hearchco/agent/src/search/category" "github.com/hearchco/agent/src/search/engines/options" "github.com/hearchco/agent/src/search/result" "github.com/hearchco/agent/src/utils/anonymize" ) -func Search(query string, category category.Name, opts options.Options, catConf config.Category) ([]result.Result, error) { +// Searches for web using the provided category config. +func Web(query string, opts options.Options, catConf category.Category) ([]result.Result, error) { // Capture start time. startTime := time.Now() @@ -24,7 +24,6 @@ func Search(query string, category category.Name, opts options.Options, catConf } log.Debug(). - Str("category", category.String()). Str("query", anonymize.String(query)). Int("pages_start", opts.Pages.Start). Int("pages_max", opts.Pages.Max). @@ -55,7 +54,7 @@ func Search(query string, category category.Name, opts options.Options, catConf }() // Initialize each engine. - searchers := initializeSearchers(searchCtx, catConf.Engines, catConf.Timings) + searchers := initializeSearchers(searchCtx, catConf.Engines) // Create a map for the results with RWMutex. // TODO: Make title and desc length configurable. diff --git a/src/search/scraper/collector.go b/src/search/scraper/collector.go index bedcc73d..bf30bd0b 100644 --- a/src/search/scraper/collector.go +++ b/src/search/scraper/collector.go @@ -3,7 +3,6 @@ package scraper import ( "bytes" "context" - "fmt" "io" "strings" @@ -12,7 +11,6 @@ import ( "github.com/klauspost/compress/zstd" "github.com/rs/zerolog/log" - "github.com/hearchco/agent/src/config" "github.com/hearchco/agent/src/search/useragent" ) @@ -52,23 +50,6 @@ func (e *EngineBase) initCollector(ctx context.Context, acceptS string) { ) } -func (e *EngineBase) initLimitRule(timings config.CategoryTimings) { - limitRule := colly.LimitRule{ - DomainGlob: "*", - Delay: timings.Delay, - RandomDelay: timings.RandomDelay, - Parallelism: timings.Parallelism, - } - if err := e.collector.Limit(&limitRule); err != nil { - log.Panic(). - Caller(). - Err(err). - Str("limitRule", fmt.Sprintf("%v", limitRule)). - Msg("Failed adding new limit rule") - // ^PANIC - } -} - func (e *EngineBase) initCollectorOnRequest(ctx context.Context) { e.collector.OnRequest(func(r *colly.Request) { if err := ctx.Err(); err != nil { diff --git a/src/search/scraper/enginebase.go b/src/search/scraper/enginebase.go index 81eb8573..a3323538 100644 --- a/src/search/scraper/enginebase.go +++ b/src/search/scraper/enginebase.go @@ -5,7 +5,6 @@ import ( "github.com/gocolly/colly/v2" - "github.com/hearchco/agent/src/config" "github.com/hearchco/agent/src/search/engines" ) @@ -14,7 +13,6 @@ type EngineBase struct { Name engines.Name Origins []engines.Name collector *colly.Collector - timings config.CategoryTimings } // Used to get the name of the search engine. @@ -28,32 +26,20 @@ func (e EngineBase) GetOrigins() []engines.Name { } // Used to initialize the EngineBase collector. -func (e *EngineBase) Init(ctx context.Context, timings config.CategoryTimings) { - e.timings = timings - e.initLimitRule(timings) +func (e *EngineBase) Init(ctx context.Context) { e.initCollectorOnRequest(ctx) e.initCollectorOnResponse() e.initCollectorOnError() } // Used to initialize the EngineBase collector for searching. -func (e *EngineBase) InitSearcher(ctx context.Context, timings config.CategoryTimings) { +func (e *EngineBase) InitSearcher(ctx context.Context) { e.initCollectorSearcher(ctx) - e.Init(ctx, timings) + e.Init(ctx) } // Used to initialize the EngineBase collector for suggesting. -func (e *EngineBase) InitSuggester(ctx context.Context, timings config.CategoryTimings) { +func (e *EngineBase) InitSuggester(ctx context.Context) { e.initCollectorSuggester(ctx) - e.Init(ctx, timings) -} - -// Used to allow re-running the Search method. -func (e *EngineBase) ReInitSearcher(ctx context.Context) { - e.InitSearcher(ctx, e.timings) -} - -// Used to allow re-running the Suggest method. -func (e *EngineBase) ReInitSuggester(ctx context.Context) { - e.InitSuggester(ctx, e.timings) + e.Init(ctx) } diff --git a/src/search/scraper/interfaces.go b/src/search/scraper/interfaces.go index 4759abfe..a0dfc3d0 100644 --- a/src/search/scraper/interfaces.go +++ b/src/search/scraper/interfaces.go @@ -3,7 +3,6 @@ package scraper import ( "context" - "github.com/hearchco/agent/src/config" "github.com/hearchco/agent/src/search/engines" "github.com/hearchco/agent/src/search/engines/options" "github.com/hearchco/agent/src/search/result" @@ -13,15 +12,14 @@ import ( type Enginer interface { GetName() engines.Name GetOrigins() []engines.Name - Init(context.Context, config.CategoryTimings) + Init(context.Context) } // Interface that each search engine must implement to support searching general results. type Searcher interface { Enginer - InitSearcher(context.Context, config.CategoryTimings) - ReInitSearcher(context.Context) + InitSearcher(context.Context) Search(string, options.Options, chan result.ResultScraped) ([]error, bool) } @@ -29,8 +27,7 @@ type Searcher interface { type ImageSearcher interface { Enginer - InitSearcher(context.Context, config.CategoryTimings) - ReInitSearcher(context.Context) + InitSearcher(context.Context) ImageSearch(string, options.Options, chan result.ResultScraped) ([]error, bool) } @@ -38,7 +35,6 @@ type ImageSearcher interface { type Suggester interface { Enginer - InitSuggester(context.Context, config.CategoryTimings) - ReInitSuggester(context.Context) + InitSuggester(context.Context) Suggest(string, options.Options, chan result.SuggestionScraped) ([]error, bool) } diff --git a/src/search/searchtype/name.go b/src/search/searchtype/name.go new file mode 100644 index 00000000..a8fc4f04 --- /dev/null +++ b/src/search/searchtype/name.go @@ -0,0 +1,32 @@ +package searchtype + +import ( + "fmt" +) + +type Name string + +const ( + WEB Name = "web" + IMAGES Name = "images" + SUGGESTIONS Name = "suggestions" +) + +func (st Name) String() string { + return string(st) +} + +// Converts a string to a search type name if it exists. +// Otherwise returns an error. +func FromString(st string) (Name, error) { + switch st { + case WEB.String(): + return WEB, nil + case IMAGES.String(): + return IMAGES, nil + case SUGGESTIONS.String(): + return SUGGESTIONS, nil + default: + return "", fmt.Errorf("search type %q is not defined", st) + } +} From 3778169af5292ced2c8dd6d0dd2b54819cd2b9db Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aleksa=20Siri=C5=A1ki?= <31509435+aleksasiriski@users.noreply.github.com> Date: Fri, 15 Nov 2024 13:10:15 +0100 Subject: [PATCH 02/11] refactor: rename all instances of Search(ers) to WebSearch(ers) --- generate/enginer/enginer.go | 6 ++-- .../bing/{imagesearch.go => s_images.go} | 0 .../{imagesearch_test.go => s_images_test.go} | 0 .../engines/bing/{search.go => s_web.go} | 2 +- .../bing/{search_test.go => s_web_test.go} | 2 +- .../engines/brave/{search.go => s_web.go} | 2 +- .../brave/{search_test.go => s_web_test.go} | 2 +- .../{suggest.go => s_suggestions.go} | 0 ...{suggest_test.go => s_suggestions_test.go} | 0 .../duckduckgo/{search.go => s_web.go} | 2 +- .../{search_test.go => s_web_test.go} | 2 +- .../engines/etools/{search.go => s_web.go} | 2 +- .../etools/{search_test.go => s_web_test.go} | 2 +- .../google/{imagesearch.go => s_images.go} | 0 .../{imagesearch_test.go => s_images_test.go} | 0 .../google/{suggest.go => s_suggestions.go} | 0 ...{suggest_test.go => s_suggestions_test.go} | 0 .../engines/google/{search.go => s_web.go} | 2 +- .../google/{search_test.go => s_web_test.go} | 2 +- .../googlescholar/{search.go => s_web.go} | 2 +- .../{search_test.go => s_web_test.go} | 2 +- .../engines/mojeek/{search.go => s_web.go} | 2 +- .../mojeek/{search_test.go => s_web_test.go} | 2 +- src/search/engines/name.go | 28 +++++++++---------- .../engines/presearch/{search.go => s_web.go} | 2 +- .../{search_test.go => s_web_test.go} | 2 +- .../engines/qwant/{search.go => s_web.go} | 2 +- .../qwant/{search_test.go => s_web_test.go} | 2 +- .../engines/startpage/{search.go => s_web.go} | 2 +- .../{search_test.go => s_web_test.go} | 2 +- .../engines/swisscows/{search.go => s_web.go} | 2 +- .../{search_test.go => s_web_test.go} | 2 +- .../engines/yahoo/{search.go => s_web.go} | 2 +- .../yahoo/{search_test.go => s_web_test.go} | 2 +- .../engines/yep/{search.go => s_web.go} | 2 +- .../yep/{search_test.go => s_web_test.go} | 2 +- src/search/init.go | 10 +++---- src/search/run_engines.go | 10 +++---- src/search/run_origins.go | 8 +++--- src/search/s_web.go | 2 +- src/search/scraper/enginebase.go | 4 +-- src/search/scraper/interfaces.go | 6 ++-- 42 files changed, 63 insertions(+), 63 deletions(-) rename src/search/engines/bing/{imagesearch.go => s_images.go} (100%) rename src/search/engines/bing/{imagesearch_test.go => s_images_test.go} (100%) rename src/search/engines/bing/{search.go => s_web.go} (96%) rename src/search/engines/bing/{search_test.go => s_web_test.go} (94%) rename src/search/engines/brave/{search.go => s_web.go} (96%) rename src/search/engines/brave/{search_test.go => s_web_test.go} (94%) rename src/search/engines/duckduckgo/{suggest.go => s_suggestions.go} (100%) rename src/search/engines/duckduckgo/{suggest_test.go => s_suggestions_test.go} (100%) rename src/search/engines/duckduckgo/{search.go => s_web.go} (96%) rename src/search/engines/duckduckgo/{search_test.go => s_web_test.go} (94%) rename src/search/engines/etools/{search.go => s_web.go} (97%) rename src/search/engines/etools/{search_test.go => s_web_test.go} (94%) rename src/search/engines/google/{imagesearch.go => s_images.go} (100%) rename src/search/engines/google/{imagesearch_test.go => s_images_test.go} (100%) rename src/search/engines/google/{suggest.go => s_suggestions.go} (100%) rename src/search/engines/google/{suggest_test.go => s_suggestions_test.go} (100%) rename src/search/engines/google/{search.go => s_web.go} (95%) rename src/search/engines/google/{search_test.go => s_web_test.go} (94%) rename src/search/engines/googlescholar/{search.go => s_web.go} (96%) rename src/search/engines/googlescholar/{search_test.go => s_web_test.go} (95%) rename src/search/engines/mojeek/{search.go => s_web.go} (95%) rename src/search/engines/mojeek/{search_test.go => s_web_test.go} (94%) rename src/search/engines/presearch/{search.go => s_web.go} (96%) rename src/search/engines/presearch/{search_test.go => s_web_test.go} (94%) rename src/search/engines/qwant/{search.go => s_web.go} (95%) rename src/search/engines/qwant/{search_test.go => s_web_test.go} (94%) rename src/search/engines/startpage/{search.go => s_web.go} (95%) rename src/search/engines/startpage/{search_test.go => s_web_test.go} (94%) rename src/search/engines/swisscows/{search.go => s_web.go} (96%) rename src/search/engines/swisscows/{search_test.go => s_web_test.go} (94%) rename src/search/engines/yahoo/{search.go => s_web.go} (96%) rename src/search/engines/yahoo/{search_test.go => s_web_test.go} (94%) rename src/search/engines/yep/{search.go => s_web.go} (96%) rename src/search/engines/yep/{search_test.go => s_web_test.go} (95%) diff --git a/generate/enginer/enginer.go b/generate/enginer/enginer.go index 2824d8c1..1960fa4a 100644 --- a/generate/enginer/enginer.go +++ b/generate/enginer/enginer.go @@ -25,8 +25,8 @@ var ( interfacesImport = flag.String("interfacesimport", "github.com/hearchco/agent/src/search/scraper", "source of the interface import, which is prefixed to interfaces; default github.com/hearchco/agent/src/search/scraper") interfacesPackage = flag.String("interfacespackage", "scraper", "name of the package for the interfaces; default scraper") interfaceEnginer = flag.String("interfaceenginer", "Enginer", "name of the nginer interface; default scraper.Enginer") - interfaceSearcher = flag.String("interfacesearcher", "Searcher", "name of the searcher interface; default scraper.Searcher") - interfaceImageSearcher = flag.String("interfaceimagesearcher", "ImageSearcher", "name of the searcher interface; default scraper.ImageSearcher") + interfaceWebSearcher = flag.String("interfacewebsearcher", "WebSearcher", "name of the web searcher interface; default scraper.WebSearcher") + interfaceImageSearcher = flag.String("interfaceimagesearcher", "ImageSearcher", "name of the image searcher interface; default scraper.ImageSearcher") interfaceSuggester = flag.String("interfacesuggester", "Suggester", "name of the suggester interface; default scraper.Suggester") enginesImport = flag.String("enginesimport", "github.com/hearchco/agent/src/search/engines", "source of the engines import, which is prefixed to imports for engines; default github.com/hearchco/agent/src/search/engines") ) @@ -201,7 +201,7 @@ func (g *Generator) generate(typeName string) { g.printEnginerLen(values) g.printInterfaces(values, *interfaceEnginer) - g.printInterfaces(values, *interfaceSearcher) + g.printInterfaces(values, *interfaceWebSearcher) g.printInterfaces(values, *interfaceImageSearcher) g.printInterfaces(values, *interfaceSuggester) } diff --git a/src/search/engines/bing/imagesearch.go b/src/search/engines/bing/s_images.go similarity index 100% rename from src/search/engines/bing/imagesearch.go rename to src/search/engines/bing/s_images.go diff --git a/src/search/engines/bing/imagesearch_test.go b/src/search/engines/bing/s_images_test.go similarity index 100% rename from src/search/engines/bing/imagesearch_test.go rename to src/search/engines/bing/s_images_test.go diff --git a/src/search/engines/bing/search.go b/src/search/engines/bing/s_web.go similarity index 96% rename from src/search/engines/bing/search.go rename to src/search/engines/bing/s_web.go index 8ba7cfe6..ba0c7491 100644 --- a/src/search/engines/bing/search.go +++ b/src/search/engines/bing/s_web.go @@ -17,7 +17,7 @@ import ( "github.com/hearchco/agent/src/utils/moreurls" ) -func (se Engine) Search(query string, opts options.Options, resChan chan result.ResultScraped) ([]error, bool) { +func (se Engine) WebSearch(query string, opts options.Options, resChan chan result.ResultScraped) ([]error, bool) { foundResults := atomic.Bool{} retErrors := make([]error, 0, opts.Pages.Max) pageRankCounter := scraper.NewPageRankCounter(opts.Pages.Max) diff --git a/src/search/engines/bing/search_test.go b/src/search/engines/bing/s_web_test.go similarity index 94% rename from src/search/engines/bing/search_test.go rename to src/search/engines/bing/s_web_test.go index 6bd12788..c701300a 100644 --- a/src/search/engines/bing/search_test.go +++ b/src/search/engines/bing/s_web_test.go @@ -7,7 +7,7 @@ import ( "github.com/hearchco/agent/src/search/engines/_engines_test" ) -func TestSearch(t *testing.T) { +func TestWebSearch(t *testing.T) { // Testing options. opt := _engines_test.NewOpts() diff --git a/src/search/engines/brave/search.go b/src/search/engines/brave/s_web.go similarity index 96% rename from src/search/engines/brave/search.go rename to src/search/engines/brave/s_web.go index 87d2261f..0da2214b 100644 --- a/src/search/engines/brave/search.go +++ b/src/search/engines/brave/s_web.go @@ -16,7 +16,7 @@ import ( "github.com/hearchco/agent/src/utils/moreurls" ) -func (se Engine) Search(query string, opts options.Options, resChan chan result.ResultScraped) ([]error, bool) { +func (se Engine) WebSearch(query string, opts options.Options, resChan chan result.ResultScraped) ([]error, bool) { foundResults := atomic.Bool{} retErrors := make([]error, 0, opts.Pages.Max) pageRankCounter := scraper.NewPageRankCounter(opts.Pages.Max) diff --git a/src/search/engines/brave/search_test.go b/src/search/engines/brave/s_web_test.go similarity index 94% rename from src/search/engines/brave/search_test.go rename to src/search/engines/brave/s_web_test.go index b8e37d2d..d3910ebc 100644 --- a/src/search/engines/brave/search_test.go +++ b/src/search/engines/brave/s_web_test.go @@ -7,7 +7,7 @@ import ( "github.com/hearchco/agent/src/search/engines/_engines_test" ) -func TestSearch(t *testing.T) { +func TestWebSearch(t *testing.T) { // Testing options. opt := _engines_test.NewOpts() diff --git a/src/search/engines/duckduckgo/suggest.go b/src/search/engines/duckduckgo/s_suggestions.go similarity index 100% rename from src/search/engines/duckduckgo/suggest.go rename to src/search/engines/duckduckgo/s_suggestions.go diff --git a/src/search/engines/duckduckgo/suggest_test.go b/src/search/engines/duckduckgo/s_suggestions_test.go similarity index 100% rename from src/search/engines/duckduckgo/suggest_test.go rename to src/search/engines/duckduckgo/s_suggestions_test.go diff --git a/src/search/engines/duckduckgo/search.go b/src/search/engines/duckduckgo/s_web.go similarity index 96% rename from src/search/engines/duckduckgo/search.go rename to src/search/engines/duckduckgo/s_web.go index a263878c..8a174e14 100644 --- a/src/search/engines/duckduckgo/search.go +++ b/src/search/engines/duckduckgo/s_web.go @@ -17,7 +17,7 @@ import ( "github.com/hearchco/agent/src/utils/moreurls" ) -func (se Engine) Search(query string, opts options.Options, resChan chan result.ResultScraped) ([]error, bool) { +func (se Engine) WebSearch(query string, opts options.Options, resChan chan result.ResultScraped) ([]error, bool) { foundResults := atomic.Bool{} retErrors := make([]error, 0, opts.Pages.Max) diff --git a/src/search/engines/duckduckgo/search_test.go b/src/search/engines/duckduckgo/s_web_test.go similarity index 94% rename from src/search/engines/duckduckgo/search_test.go rename to src/search/engines/duckduckgo/s_web_test.go index 2d4c9c9e..3955c83a 100644 --- a/src/search/engines/duckduckgo/search_test.go +++ b/src/search/engines/duckduckgo/s_web_test.go @@ -7,7 +7,7 @@ import ( "github.com/hearchco/agent/src/search/engines/_engines_test" ) -func TestSearch(t *testing.T) { +func TestWebSearch(t *testing.T) { // Testing options. opt := _engines_test.NewOpts() diff --git a/src/search/engines/etools/search.go b/src/search/engines/etools/s_web.go similarity index 97% rename from src/search/engines/etools/search.go rename to src/search/engines/etools/s_web.go index 6b8fdbc3..4583da24 100644 --- a/src/search/engines/etools/search.go +++ b/src/search/engines/etools/s_web.go @@ -17,7 +17,7 @@ import ( "github.com/hearchco/agent/src/utils/moreurls" ) -func (se Engine) Search(query string, opts options.Options, resChan chan result.ResultScraped) ([]error, bool) { +func (se Engine) WebSearch(query string, opts options.Options, resChan chan result.ResultScraped) ([]error, bool) { foundResults := atomic.Bool{} retErrors := make([]error, 0, opts.Pages.Max) pageRankCounter := scraper.NewPageRankCounter(opts.Pages.Max) diff --git a/src/search/engines/etools/search_test.go b/src/search/engines/etools/s_web_test.go similarity index 94% rename from src/search/engines/etools/search_test.go rename to src/search/engines/etools/s_web_test.go index 64f3dc75..4b9b8514 100644 --- a/src/search/engines/etools/search_test.go +++ b/src/search/engines/etools/s_web_test.go @@ -7,7 +7,7 @@ import ( "github.com/hearchco/agent/src/search/engines/_engines_test" ) -func TestSearch(t *testing.T) { +func TestWebSearch(t *testing.T) { // Testing options. opt := _engines_test.NewOpts() diff --git a/src/search/engines/google/imagesearch.go b/src/search/engines/google/s_images.go similarity index 100% rename from src/search/engines/google/imagesearch.go rename to src/search/engines/google/s_images.go diff --git a/src/search/engines/google/imagesearch_test.go b/src/search/engines/google/s_images_test.go similarity index 100% rename from src/search/engines/google/imagesearch_test.go rename to src/search/engines/google/s_images_test.go diff --git a/src/search/engines/google/suggest.go b/src/search/engines/google/s_suggestions.go similarity index 100% rename from src/search/engines/google/suggest.go rename to src/search/engines/google/s_suggestions.go diff --git a/src/search/engines/google/suggest_test.go b/src/search/engines/google/s_suggestions_test.go similarity index 100% rename from src/search/engines/google/suggest_test.go rename to src/search/engines/google/s_suggestions_test.go diff --git a/src/search/engines/google/search.go b/src/search/engines/google/s_web.go similarity index 95% rename from src/search/engines/google/search.go rename to src/search/engines/google/s_web.go index edc1716d..e753a905 100644 --- a/src/search/engines/google/search.go +++ b/src/search/engines/google/s_web.go @@ -16,7 +16,7 @@ import ( "github.com/hearchco/agent/src/utils/moreurls" ) -func (se Engine) Search(query string, opts options.Options, resChan chan result.ResultScraped) ([]error, bool) { +func (se Engine) WebSearch(query string, opts options.Options, resChan chan result.ResultScraped) ([]error, bool) { foundResults := atomic.Bool{} retErrors := make([]error, 0, opts.Pages.Max) pageRankCounter := scraper.NewPageRankCounter(opts.Pages.Max) diff --git a/src/search/engines/google/search_test.go b/src/search/engines/google/s_web_test.go similarity index 94% rename from src/search/engines/google/search_test.go rename to src/search/engines/google/s_web_test.go index 634b6ae2..14d1c928 100644 --- a/src/search/engines/google/search_test.go +++ b/src/search/engines/google/s_web_test.go @@ -7,7 +7,7 @@ import ( "github.com/hearchco/agent/src/search/engines/_engines_test" ) -func TestSearch(t *testing.T) { +func TestWebSearch(t *testing.T) { // Testing options. opt := _engines_test.NewOpts() diff --git a/src/search/engines/googlescholar/search.go b/src/search/engines/googlescholar/s_web.go similarity index 96% rename from src/search/engines/googlescholar/search.go rename to src/search/engines/googlescholar/s_web.go index b31732ca..19c3624f 100644 --- a/src/search/engines/googlescholar/search.go +++ b/src/search/engines/googlescholar/s_web.go @@ -16,7 +16,7 @@ import ( "github.com/hearchco/agent/src/utils/moreurls" ) -func (se Engine) Search(query string, opts options.Options, resChan chan result.ResultScraped) ([]error, bool) { +func (se Engine) WebSearch(query string, opts options.Options, resChan chan result.ResultScraped) ([]error, bool) { foundResults := atomic.Bool{} retErrors := make([]error, 0, opts.Pages.Max) pageRankCounter := scraper.NewPageRankCounter(opts.Pages.Max) diff --git a/src/search/engines/googlescholar/search_test.go b/src/search/engines/googlescholar/s_web_test.go similarity index 95% rename from src/search/engines/googlescholar/search_test.go rename to src/search/engines/googlescholar/s_web_test.go index 7292aed8..095583fc 100644 --- a/src/search/engines/googlescholar/search_test.go +++ b/src/search/engines/googlescholar/s_web_test.go @@ -7,7 +7,7 @@ import ( "github.com/hearchco/agent/src/search/engines/_engines_test" ) -func TestSearch(t *testing.T) { +func TestWebSearch(t *testing.T) { // Testing options. opt := _engines_test.NewOpts() diff --git a/src/search/engines/mojeek/search.go b/src/search/engines/mojeek/s_web.go similarity index 95% rename from src/search/engines/mojeek/search.go rename to src/search/engines/mojeek/s_web.go index c33b6d20..17c0d210 100644 --- a/src/search/engines/mojeek/search.go +++ b/src/search/engines/mojeek/s_web.go @@ -16,7 +16,7 @@ import ( "github.com/hearchco/agent/src/utils/moreurls" ) -func (se Engine) Search(query string, opts options.Options, resChan chan result.ResultScraped) ([]error, bool) { +func (se Engine) WebSearch(query string, opts options.Options, resChan chan result.ResultScraped) ([]error, bool) { foundResults := atomic.Bool{} retErrors := make([]error, 0, opts.Pages.Max) pageRankCounter := scraper.NewPageRankCounter(opts.Pages.Max) diff --git a/src/search/engines/mojeek/search_test.go b/src/search/engines/mojeek/s_web_test.go similarity index 94% rename from src/search/engines/mojeek/search_test.go rename to src/search/engines/mojeek/s_web_test.go index 85c82892..a60d3bd0 100644 --- a/src/search/engines/mojeek/search_test.go +++ b/src/search/engines/mojeek/s_web_test.go @@ -7,7 +7,7 @@ import ( "github.com/hearchco/agent/src/search/engines/_engines_test" ) -func TestSearch(t *testing.T) { +func TestWebSearch(t *testing.T) { // Testing options. opt := _engines_test.NewOpts() diff --git a/src/search/engines/name.go b/src/search/engines/name.go index e49075f2..0bfe301e 100644 --- a/src/search/engines/name.go +++ b/src/search/engines/name.go @@ -4,23 +4,23 @@ import "strings" type Name int -//go:generate enumer -type=Name -json -text -sql +//go:generate enumer -type=Name -json -text //go:generate go run github.com/hearchco/agent/generate/enginer -type=Name -packagename search -output ../engine_enginer.go const ( UNDEFINED Name = iota - BING // enginer,searcher,imagesearcher - BRAVE // enginer,searcher - DUCKDUCKGO // enginer,searcher,suggester - ETOOLS // enginer,searcher - GOOGLE // enginer,searcher,imagesearcher,suggester - GOOGLESCHOLAR // enginer,searcher - MOJEEK // enginer,searcher - PRESEARCH // enginer,searcher - QWANT // enginer,searcher - STARTPAGE // enginer,searcher - SWISSCOWS // enginer,searcher - YAHOO // enginer,searcher - YEP + BING // enginer,websearcher,imagesearcher + BRAVE // enginer,websearcher + DUCKDUCKGO // enginer,websearcher,suggester + ETOOLS // enginer,websearcher + GOOGLE // enginer,websearcher,imagesearcher,suggester + GOOGLESCHOLAR // enginer,websearcher + MOJEEK // enginer,websearcher + PRESEARCH // enginer,websearcher + QWANT // enginer,websearcher + STARTPAGE // enginer,websearcher + SWISSCOWS // enginer,websearcher + YAHOO // enginer,websearcher + YEP // disabled ) // Returns engine names without UNDEFINED. diff --git a/src/search/engines/presearch/search.go b/src/search/engines/presearch/s_web.go similarity index 96% rename from src/search/engines/presearch/search.go rename to src/search/engines/presearch/s_web.go index 88ffe9b3..e5f2cead 100644 --- a/src/search/engines/presearch/search.go +++ b/src/search/engines/presearch/s_web.go @@ -17,7 +17,7 @@ import ( "github.com/hearchco/agent/src/utils/moreurls" ) -func (se Engine) Search(query string, opts options.Options, resChan chan result.ResultScraped) ([]error, bool) { +func (se Engine) WebSearch(query string, opts options.Options, resChan chan result.ResultScraped) ([]error, bool) { foundResults := atomic.Bool{} retErrors := make([]error, 0, opts.Pages.Max) diff --git a/src/search/engines/presearch/search_test.go b/src/search/engines/presearch/s_web_test.go similarity index 94% rename from src/search/engines/presearch/search_test.go rename to src/search/engines/presearch/s_web_test.go index 72ecc8e4..c398a68c 100644 --- a/src/search/engines/presearch/search_test.go +++ b/src/search/engines/presearch/s_web_test.go @@ -7,7 +7,7 @@ import ( "github.com/hearchco/agent/src/search/engines/_engines_test" ) -func TestSearch(t *testing.T) { +func TestWebSearch(t *testing.T) { // Testing options. opt := _engines_test.NewOpts() diff --git a/src/search/engines/qwant/search.go b/src/search/engines/qwant/s_web.go similarity index 95% rename from src/search/engines/qwant/search.go rename to src/search/engines/qwant/s_web.go index 2cf6f9d7..8ec93f9c 100644 --- a/src/search/engines/qwant/search.go +++ b/src/search/engines/qwant/s_web.go @@ -16,7 +16,7 @@ import ( "github.com/hearchco/agent/src/utils/moreurls" ) -func (se Engine) Search(query string, opts options.Options, resChan chan result.ResultScraped) ([]error, bool) { +func (se Engine) WebSearch(query string, opts options.Options, resChan chan result.ResultScraped) ([]error, bool) { foundResults := atomic.Bool{} retErrors := make([]error, 0, opts.Pages.Max) diff --git a/src/search/engines/qwant/search_test.go b/src/search/engines/qwant/s_web_test.go similarity index 94% rename from src/search/engines/qwant/search_test.go rename to src/search/engines/qwant/s_web_test.go index ac584cc4..372a270f 100644 --- a/src/search/engines/qwant/search_test.go +++ b/src/search/engines/qwant/s_web_test.go @@ -7,7 +7,7 @@ import ( "github.com/hearchco/agent/src/search/engines/_engines_test" ) -func TestSearch(t *testing.T) { +func TestWebSearch(t *testing.T) { // Testing options. opt := _engines_test.NewOpts() diff --git a/src/search/engines/startpage/search.go b/src/search/engines/startpage/s_web.go similarity index 95% rename from src/search/engines/startpage/search.go rename to src/search/engines/startpage/s_web.go index 42f9f0c7..c3c76136 100644 --- a/src/search/engines/startpage/search.go +++ b/src/search/engines/startpage/s_web.go @@ -17,7 +17,7 @@ import ( "github.com/hearchco/agent/src/utils/moreurls" ) -func (se Engine) Search(query string, opts options.Options, resChan chan result.ResultScraped) ([]error, bool) { +func (se Engine) WebSearch(query string, opts options.Options, resChan chan result.ResultScraped) ([]error, bool) { foundResults := atomic.Bool{} retErrors := make([]error, 0, opts.Pages.Max) pageRankCounter := scraper.NewPageRankCounter(opts.Pages.Max) diff --git a/src/search/engines/startpage/search_test.go b/src/search/engines/startpage/s_web_test.go similarity index 94% rename from src/search/engines/startpage/search_test.go rename to src/search/engines/startpage/s_web_test.go index e42bff54..d4d4c1c9 100644 --- a/src/search/engines/startpage/search_test.go +++ b/src/search/engines/startpage/s_web_test.go @@ -7,7 +7,7 @@ import ( "github.com/hearchco/agent/src/search/engines/_engines_test" ) -func TestSearch(t *testing.T) { +func TestWebSearch(t *testing.T) { // Testing options. opt := _engines_test.NewOpts() diff --git a/src/search/engines/swisscows/search.go b/src/search/engines/swisscows/s_web.go similarity index 96% rename from src/search/engines/swisscows/search.go rename to src/search/engines/swisscows/s_web.go index c4b40217..7e1e6145 100644 --- a/src/search/engines/swisscows/search.go +++ b/src/search/engines/swisscows/s_web.go @@ -16,7 +16,7 @@ import ( "github.com/hearchco/agent/src/utils/moreurls" ) -func (se Engine) Search(query string, opts options.Options, resChan chan result.ResultScraped) ([]error, bool) { +func (se Engine) WebSearch(query string, opts options.Options, resChan chan result.ResultScraped) ([]error, bool) { foundResults := atomic.Bool{} retErrors := make([]error, 0, opts.Pages.Max) diff --git a/src/search/engines/swisscows/search_test.go b/src/search/engines/swisscows/s_web_test.go similarity index 94% rename from src/search/engines/swisscows/search_test.go rename to src/search/engines/swisscows/s_web_test.go index f5dd6b39..5ffdfbb3 100644 --- a/src/search/engines/swisscows/search_test.go +++ b/src/search/engines/swisscows/s_web_test.go @@ -7,7 +7,7 @@ import ( "github.com/hearchco/agent/src/search/engines/_engines_test" ) -func TestSearch(t *testing.T) { +func TestWebSearch(t *testing.T) { // Testing options. opt := _engines_test.NewOpts() diff --git a/src/search/engines/yahoo/search.go b/src/search/engines/yahoo/s_web.go similarity index 96% rename from src/search/engines/yahoo/search.go rename to src/search/engines/yahoo/s_web.go index ae57572c..32e95ffd 100644 --- a/src/search/engines/yahoo/search.go +++ b/src/search/engines/yahoo/s_web.go @@ -17,7 +17,7 @@ import ( "github.com/hearchco/agent/src/utils/moreurls" ) -func (se Engine) Search(query string, opts options.Options, resChan chan result.ResultScraped) ([]error, bool) { +func (se Engine) WebSearch(query string, opts options.Options, resChan chan result.ResultScraped) ([]error, bool) { foundResults := atomic.Bool{} retErrors := make([]error, 0, opts.Pages.Max) pageRankCounter := scraper.NewPageRankCounter(opts.Pages.Max) diff --git a/src/search/engines/yahoo/search_test.go b/src/search/engines/yahoo/s_web_test.go similarity index 94% rename from src/search/engines/yahoo/search_test.go rename to src/search/engines/yahoo/s_web_test.go index 43777976..5339ab9f 100644 --- a/src/search/engines/yahoo/search_test.go +++ b/src/search/engines/yahoo/s_web_test.go @@ -7,7 +7,7 @@ import ( "github.com/hearchco/agent/src/search/engines/_engines_test" ) -func TestSearch(t *testing.T) { +func TestWebSearch(t *testing.T) { // Testing options. opt := _engines_test.NewOpts() diff --git a/src/search/engines/yep/search.go b/src/search/engines/yep/s_web.go similarity index 96% rename from src/search/engines/yep/search.go rename to src/search/engines/yep/s_web.go index f9bc5cfb..9b5077fa 100644 --- a/src/search/engines/yep/search.go +++ b/src/search/engines/yep/s_web.go @@ -18,7 +18,7 @@ package yep // "github.com/hearchco/agent/src/utils/morestrings" // ) -// func (se Engine) Search(query string, opts options.Options, resChan chan result.ResultScraped) ([]error, bool) { +// func (se Engine) WebSearch(query string, opts options.Options, resChan chan result.ResultScraped) ([]error, bool) { // foundResults := atomic.Bool{} // retErrors := make([]error, 0, opts.Pages.Max) // pageRankCounter := scraper.NewPageRankCounter(opts.Pages.Max) diff --git a/src/search/engines/yep/search_test.go b/src/search/engines/yep/s_web_test.go similarity index 95% rename from src/search/engines/yep/search_test.go rename to src/search/engines/yep/s_web_test.go index a9f830b5..e32d3d45 100644 --- a/src/search/engines/yep/search_test.go +++ b/src/search/engines/yep/s_web_test.go @@ -8,7 +8,7 @@ package yep // "github.com/hearchco/agent/src/search/engines/_engines_test" // ) -// func TestSearch(t *testing.T) { +// func TestWebSearch(t *testing.T) { // // Testing options. // conf := _engines_test.NewConfig(seName) // opt := _engines_test.NewOpts() diff --git a/src/search/init.go b/src/search/init.go index c866e481..9c6b11d4 100644 --- a/src/search/init.go +++ b/src/search/init.go @@ -7,16 +7,16 @@ import ( "github.com/hearchco/agent/src/search/scraper" ) -// Searchers. -func initializeSearchers(ctx context.Context, engs []engines.Name) []scraper.Searcher { - searchers := searcherArray() +// Initialize web searchers. +func initializeWebSearchers(ctx context.Context, engs []engines.Name) []scraper.WebSearcher { + searchers := webSearcherArray() for _, engName := range engs { searchers[engName].InitSearcher(ctx) } return searchers[:] } -// Image searchers. +// Initialize image searchers. func initializeImageSearchers(ctx context.Context, engs []engines.Name) []scraper.ImageSearcher { searchers := imageSearcherArray() for _, engName := range engs { @@ -25,7 +25,7 @@ func initializeImageSearchers(ctx context.Context, engs []engines.Name) []scrape return searchers[:] } -// Suggesters. +// Initialize suggesters. func initializeSuggesters(ctx context.Context, engs []engines.Name) []scraper.Suggester { suggesters := suggesterArray() for _, engName := range engs { diff --git a/src/search/run_engines.go b/src/search/run_engines.go index 7d70fbdf..39ede3b3 100644 --- a/src/search/run_engines.go +++ b/src/search/run_engines.go @@ -9,16 +9,16 @@ import ( "github.com/hearchco/agent/src/search/scraper" ) -// Searchers. -func runRequiredSearchers(engs []engines.Name, searchers []scraper.Searcher, wgRequiredEngines *sync.WaitGroup, concMap *result.ResultConcMap, query string, opts options.Options, onceWrapMap map[engines.Name]*onceWrapper) { +// Web searchers. +func runRequiredSearchers(engs []engines.Name, searchers []scraper.WebSearcher, wgRequiredEngines *sync.WaitGroup, concMap *result.ResultConcMap, query string, opts options.Options, onceWrapMap map[engines.Name]*onceWrapper) { runSearchers(groupRequired, engs, searchers, wgRequiredEngines, concMap, query, opts, onceWrapMap) } -func runPreferredSearchers(engs []engines.Name, searchers []scraper.Searcher, wgPreferredEngines *sync.WaitGroup, concMap *result.ResultConcMap, query string, opts options.Options, onceWrapMap map[engines.Name]*onceWrapper) { +func runPreferredSearchers(engs []engines.Name, searchers []scraper.WebSearcher, wgPreferredEngines *sync.WaitGroup, concMap *result.ResultConcMap, query string, opts options.Options, onceWrapMap map[engines.Name]*onceWrapper) { runSearchers(groupPreferred, engs, searchers, wgPreferredEngines, concMap, query, opts, onceWrapMap) } -func runSearchers(groupName string, engs []engines.Name, searchers []scraper.Searcher, wgRequiredEngines *sync.WaitGroup, concMap *result.ResultConcMap, query string, opts options.Options, onceWrapMap map[engines.Name]*onceWrapper) { +func runSearchers(groupName string, engs []engines.Name, searchers []scraper.WebSearcher, wgRequiredEngines *sync.WaitGroup, concMap *result.ResultConcMap, query string, opts options.Options, onceWrapMap map[engines.Name]*onceWrapper) { wgRequiredEngines.Add(len(engs)) for _, engName := range engs { searcher := searchers[engName] @@ -27,7 +27,7 @@ func runSearchers(groupName string, engs []engines.Name, searchers []scraper.Sea defer wgRequiredEngines.Done() // Run the engine. - runEngine(groupName, onceWrapMap[engName], concMap, engName, searcher.Search, query, opts) + runEngine(groupName, onceWrapMap[engName], concMap, engName, searcher.WebSearch, query, opts) }() } } diff --git a/src/search/run_origins.go b/src/search/run_origins.go index eaafac28..e1b9bd59 100644 --- a/src/search/run_origins.go +++ b/src/search/run_origins.go @@ -11,15 +11,15 @@ import ( ) // Searchers. -func runRequiredByOriginSearchers(engs []engines.Name, searchers []scraper.Searcher, wgByOriginEngines *sync.WaitGroup, concMap *result.ResultConcMap, enabledEngines []engines.Name, query string, opts options.Options, onceWrapMap map[engines.Name]*onceWrapper) { +func runRequiredByOriginSearchers(engs []engines.Name, searchers []scraper.WebSearcher, wgByOriginEngines *sync.WaitGroup, concMap *result.ResultConcMap, enabledEngines []engines.Name, query string, opts options.Options, onceWrapMap map[engines.Name]*onceWrapper) { runByOriginSearchers(groupRequiredByOrigin, engs, searchers, wgByOriginEngines, concMap, enabledEngines, query, opts, onceWrapMap) } -func runPreferredByOriginSearchers(engs []engines.Name, searchers []scraper.Searcher, wgByOriginEngines *sync.WaitGroup, concMap *result.ResultConcMap, enabledEngines []engines.Name, query string, opts options.Options, onceWrapMap map[engines.Name]*onceWrapper) { +func runPreferredByOriginSearchers(engs []engines.Name, searchers []scraper.WebSearcher, wgByOriginEngines *sync.WaitGroup, concMap *result.ResultConcMap, enabledEngines []engines.Name, query string, opts options.Options, onceWrapMap map[engines.Name]*onceWrapper) { runByOriginSearchers(groupPreferredByOrigin, engs, searchers, wgByOriginEngines, concMap, enabledEngines, query, opts, onceWrapMap) } -func runByOriginSearchers(groupName string, engs []engines.Name, searchers []scraper.Searcher, wg *sync.WaitGroup, concMap *result.ResultConcMap, enabledEngines []engines.Name, query string, opts options.Options, onceWrapMap map[engines.Name]*onceWrapper) { +func runByOriginSearchers(groupName string, engs []engines.Name, searchers []scraper.WebSearcher, wg *sync.WaitGroup, concMap *result.ResultConcMap, enabledEngines []engines.Name, query string, opts options.Options, onceWrapMap map[engines.Name]*onceWrapper) { // Create a map of slices of all the engines that contain origins from the engines by origin. engsMap := make(map[engines.Name][]engines.Name, len(engs)) for _, originName := range engs { @@ -55,7 +55,7 @@ func runByOriginSearchers(groupName string, engs []engines.Name, searchers []scr defer wgWorkers.Done() // Run the engine. - runEngine(groupName, onceWrapMap[engName], concMap, engName, searcher.Search, query, opts) + runEngine(groupName, onceWrapMap[engName], concMap, engName, searcher.WebSearch, query, opts) // Indicate that the engine was successful. if onceWrapMap[engName].Success() { diff --git a/src/search/s_web.go b/src/search/s_web.go index 716dce13..a591bce9 100644 --- a/src/search/s_web.go +++ b/src/search/s_web.go @@ -54,7 +54,7 @@ func Web(query string, opts options.Options, catConf category.Category) ([]resul }() // Initialize each engine. - searchers := initializeSearchers(searchCtx, catConf.Engines) + searchers := initializeWebSearchers(searchCtx, catConf.Engines) // Create a map for the results with RWMutex. // TODO: Make title and desc length configurable. diff --git a/src/search/scraper/enginebase.go b/src/search/scraper/enginebase.go index a3323538..0a1b197c 100644 --- a/src/search/scraper/enginebase.go +++ b/src/search/scraper/enginebase.go @@ -32,13 +32,13 @@ func (e *EngineBase) Init(ctx context.Context) { e.initCollectorOnError() } -// Used to initialize the EngineBase collector for searching. +// Used to initialize the EngineBase collector for searching web/images. func (e *EngineBase) InitSearcher(ctx context.Context) { e.initCollectorSearcher(ctx) e.Init(ctx) } -// Used to initialize the EngineBase collector for suggesting. +// Used to initialize the EngineBase collector for searching suggestions. func (e *EngineBase) InitSuggester(ctx context.Context) { e.initCollectorSuggester(ctx) e.Init(ctx) diff --git a/src/search/scraper/interfaces.go b/src/search/scraper/interfaces.go index a0dfc3d0..77b8b719 100644 --- a/src/search/scraper/interfaces.go +++ b/src/search/scraper/interfaces.go @@ -15,12 +15,12 @@ type Enginer interface { Init(context.Context) } -// Interface that each search engine must implement to support searching general results. -type Searcher interface { +// Interface that each search engine must implement to support searching web results. +type WebSearcher interface { Enginer InitSearcher(context.Context) - Search(string, options.Options, chan result.ResultScraped) ([]error, bool) + WebSearch(string, options.Options, chan result.ResultScraped) ([]error, bool) } // Interface that each search engine must implement to support searching image results. From 700f2a8e072c44f09407fc99255cff0e52ca8a03 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aleksa=20Siri=C5=A1ki?= <31509435+aleksasiriski@users.noreply.github.com> Date: Fri, 15 Nov 2024 13:14:12 +0100 Subject: [PATCH 03/11] refactor(engines): update tests code to work with previous refactor --- .../{imagesearch.go => s_images.go} | 6 ++--- .../{suggest.go => s_suggestions.go} | 0 .../_engines_test/{search.go => s_web.go} | 8 +++--- src/search/engines/_engines_test/structs.go | 26 ------------------- src/search/engines/bing/s_web_test.go | 2 +- src/search/engines/brave/s_web_test.go | 2 +- src/search/engines/duckduckgo/s_web_test.go | 2 +- src/search/engines/etools/s_web_test.go | 2 +- src/search/engines/google/s_web_test.go | 2 +- .../engines/googlescholar/s_web_test.go | 2 +- src/search/engines/mojeek/s_web_test.go | 2 +- src/search/engines/presearch/s_web_test.go | 2 +- src/search/engines/qwant/s_web_test.go | 2 +- src/search/engines/startpage/s_web_test.go | 2 +- src/search/engines/swisscows/s_web_test.go | 2 +- src/search/engines/yahoo/s_web_test.go | 2 +- src/search/engines/yep/s_web_test.go | 2 +- 17 files changed, 20 insertions(+), 46 deletions(-) rename src/search/engines/_engines_test/{imagesearch.go => s_images.go} (94%) rename src/search/engines/_engines_test/{suggest.go => s_suggestions.go} (100%) rename src/search/engines/_engines_test/{search.go => s_web.go} (87%) diff --git a/src/search/engines/_engines_test/imagesearch.go b/src/search/engines/_engines_test/s_images.go similarity index 94% rename from src/search/engines/_engines_test/imagesearch.go rename to src/search/engines/_engines_test/s_images.go index 363bda3a..7f0faa1a 100644 --- a/src/search/engines/_engines_test/imagesearch.go +++ b/src/search/engines/_engines_test/s_images.go @@ -12,7 +12,7 @@ import ( func CheckImageSearch(t *testing.T, e scraper.ImageSearcher, tchar []TestCaseHasAnyResults, tccr []TestCaseContainsResults, tcrr []TestCaseRankedResults) { // TestCaseHasAnyResults for _, tc := range tchar { - e.ReInitSearcher(context.Background()) + e.InitSearcher(context.Background()) resChan := make(chan result.ResultScraped, 100) go e.ImageSearch(tc.Query, tc.Options, resChan) @@ -29,7 +29,7 @@ func CheckImageSearch(t *testing.T, e scraper.ImageSearcher, tchar []TestCaseHas // TestCaseContainsResults for _, tc := range tccr { - e.ReInitSearcher(context.Background()) + e.InitSearcher(context.Background()) resChan := make(chan result.ResultScraped, 100) go e.ImageSearch(tc.Query, tc.Options, resChan) @@ -61,7 +61,7 @@ func CheckImageSearch(t *testing.T, e scraper.ImageSearcher, tchar []TestCaseHas // TestCaseRankedResults for _, tc := range tcrr { - e.ReInitSearcher(context.Background()) + e.InitSearcher(context.Background()) resChan := make(chan result.ResultScraped, 100) go e.ImageSearch(tc.Query, tc.Options, resChan) diff --git a/src/search/engines/_engines_test/suggest.go b/src/search/engines/_engines_test/s_suggestions.go similarity index 100% rename from src/search/engines/_engines_test/suggest.go rename to src/search/engines/_engines_test/s_suggestions.go diff --git a/src/search/engines/_engines_test/search.go b/src/search/engines/_engines_test/s_web.go similarity index 87% rename from src/search/engines/_engines_test/search.go rename to src/search/engines/_engines_test/s_web.go index 7cfaa9a6..5cbccd9e 100644 --- a/src/search/engines/_engines_test/search.go +++ b/src/search/engines/_engines_test/s_web.go @@ -9,10 +9,10 @@ import ( "github.com/hearchco/agent/src/search/scraper" ) -func CheckSearch(t *testing.T, e scraper.Searcher, tchar []TestCaseHasAnyResults, tccr []TestCaseContainsResults, tcrr []TestCaseRankedResults) { +func CheckWebSearch(t *testing.T, e scraper.Searcher, tchar []TestCaseHasAnyResults, tccr []TestCaseContainsResults, tcrr []TestCaseRankedResults) { // TestCaseHasAnyResults for _, tc := range tchar { - e.ReInitSearcher(context.Background()) + e.InitSearcher(context.Background()) resChan := make(chan result.ResultScraped, 100) go e.Search(tc.Query, tc.Options, resChan) @@ -29,7 +29,7 @@ func CheckSearch(t *testing.T, e scraper.Searcher, tchar []TestCaseHasAnyResults // TestCaseContainsResults for _, tc := range tccr { - e.ReInitSearcher(context.Background()) + e.InitSearcher(context.Background()) resChan := make(chan result.ResultScraped, 100) go e.Search(tc.Query, tc.Options, resChan) @@ -61,7 +61,7 @@ func CheckSearch(t *testing.T, e scraper.Searcher, tchar []TestCaseHasAnyResults // TestCaseRankedResults for _, tc := range tcrr { - e.ReInitSearcher(context.Background()) + e.InitSearcher(context.Background()) resChan := make(chan result.ResultScraped, 100) go e.Search(tc.Query, tc.Options, resChan) diff --git a/src/search/engines/_engines_test/structs.go b/src/search/engines/_engines_test/structs.go index 16613690..5ef5dad4 100644 --- a/src/search/engines/_engines_test/structs.go +++ b/src/search/engines/_engines_test/structs.go @@ -1,11 +1,6 @@ package _engines_test import ( - "time" - - "github.com/hearchco/agent/src/config" - "github.com/hearchco/agent/src/search/category" - "github.com/hearchco/agent/src/search/engines" "github.com/hearchco/agent/src/search/engines/options" ) @@ -26,27 +21,6 @@ type TestCaseRankedResults struct { Options options.Options } -func NewConfig(seName engines.Name) config.Config { - return config.Config{ - Categories: map[category.Name]config.Category{ - category.GENERAL: { - Engines: []engines.Name{seName}, - Ranking: config.EmptyRanking([]engines.Name{seName}), - Timings: config.CategoryTimings{ - HardTimeout: 10000 * time.Millisecond, - }, - }, - category.IMAGES: { - Engines: []engines.Name{seName}, - Ranking: config.EmptyRanking([]engines.Name{seName}), - Timings: config.CategoryTimings{ - HardTimeout: 10000 * time.Millisecond, - }, - }, - }, - } -} - func NewOpts() options.Options { return options.Options{ Pages: options.Pages{Start: 0, Max: 1}, diff --git a/src/search/engines/bing/s_web_test.go b/src/search/engines/bing/s_web_test.go index c701300a..eb726eab 100644 --- a/src/search/engines/bing/s_web_test.go +++ b/src/search/engines/bing/s_web_test.go @@ -32,5 +32,5 @@ func TestWebSearch(t *testing.T) { se := New() se.InitSearcher(context.Background()) - _engines_test.CheckSearch(t, se, tchar, tccr, tcrr) + _engines_test.CheckWebSearch(t, se, tchar, tccr, tcrr) } diff --git a/src/search/engines/brave/s_web_test.go b/src/search/engines/brave/s_web_test.go index d3910ebc..20de6256 100644 --- a/src/search/engines/brave/s_web_test.go +++ b/src/search/engines/brave/s_web_test.go @@ -32,5 +32,5 @@ func TestWebSearch(t *testing.T) { se := New() se.InitSearcher(context.Background()) - _engines_test.CheckSearch(t, se, tchar, tccr, tcrr) + _engines_test.CheckWebSearch(t, se, tchar, tccr, tcrr) } diff --git a/src/search/engines/duckduckgo/s_web_test.go b/src/search/engines/duckduckgo/s_web_test.go index 3955c83a..fe56e6ec 100644 --- a/src/search/engines/duckduckgo/s_web_test.go +++ b/src/search/engines/duckduckgo/s_web_test.go @@ -32,5 +32,5 @@ func TestWebSearch(t *testing.T) { se := New() se.InitSearcher(context.Background()) - _engines_test.CheckSearch(t, se, tchar, tccr, tcrr) + _engines_test.CheckWebSearch(t, se, tchar, tccr, tcrr) } diff --git a/src/search/engines/etools/s_web_test.go b/src/search/engines/etools/s_web_test.go index 4b9b8514..426e4824 100644 --- a/src/search/engines/etools/s_web_test.go +++ b/src/search/engines/etools/s_web_test.go @@ -32,5 +32,5 @@ func TestWebSearch(t *testing.T) { se := New() se.InitSearcher(context.Background()) - _engines_test.CheckSearch(t, se, tchar, tccr, tcrr) + _engines_test.CheckWebSearch(t, se, tchar, tccr, tcrr) } diff --git a/src/search/engines/google/s_web_test.go b/src/search/engines/google/s_web_test.go index 14d1c928..240eaf75 100644 --- a/src/search/engines/google/s_web_test.go +++ b/src/search/engines/google/s_web_test.go @@ -32,5 +32,5 @@ func TestWebSearch(t *testing.T) { se := New() se.InitSearcher(context.Background()) - _engines_test.CheckSearch(t, se, tchar, tccr, tcrr) + _engines_test.CheckWebSearch(t, se, tchar, tccr, tcrr) } diff --git a/src/search/engines/googlescholar/s_web_test.go b/src/search/engines/googlescholar/s_web_test.go index 095583fc..f3f5a162 100644 --- a/src/search/engines/googlescholar/s_web_test.go +++ b/src/search/engines/googlescholar/s_web_test.go @@ -32,5 +32,5 @@ func TestWebSearch(t *testing.T) { se := New() se.InitSearcher(context.Background()) - _engines_test.CheckSearch(t, se, tchar, tccr, tcrr) + _engines_test.CheckWebSearch(t, se, tchar, tccr, tcrr) } diff --git a/src/search/engines/mojeek/s_web_test.go b/src/search/engines/mojeek/s_web_test.go index a60d3bd0..55c26faa 100644 --- a/src/search/engines/mojeek/s_web_test.go +++ b/src/search/engines/mojeek/s_web_test.go @@ -32,5 +32,5 @@ func TestWebSearch(t *testing.T) { se := New() se.InitSearcher(context.Background()) - _engines_test.CheckSearch(t, se, tchar, tccr, tcrr) + _engines_test.CheckWebSearch(t, se, tchar, tccr, tcrr) } diff --git a/src/search/engines/presearch/s_web_test.go b/src/search/engines/presearch/s_web_test.go index c398a68c..e48f2fbe 100644 --- a/src/search/engines/presearch/s_web_test.go +++ b/src/search/engines/presearch/s_web_test.go @@ -32,5 +32,5 @@ func TestWebSearch(t *testing.T) { se := New() se.InitSearcher(context.Background()) - _engines_test.CheckSearch(t, se, tchar, tccr, tcrr) + _engines_test.CheckWebSearch(t, se, tchar, tccr, tcrr) } diff --git a/src/search/engines/qwant/s_web_test.go b/src/search/engines/qwant/s_web_test.go index 372a270f..3caead9d 100644 --- a/src/search/engines/qwant/s_web_test.go +++ b/src/search/engines/qwant/s_web_test.go @@ -32,5 +32,5 @@ func TestWebSearch(t *testing.T) { se := New() se.InitSearcher(context.Background()) - _engines_test.CheckSearch(t, se, tchar, tccr, tcrr) + _engines_test.CheckWebSearch(t, se, tchar, tccr, tcrr) } diff --git a/src/search/engines/startpage/s_web_test.go b/src/search/engines/startpage/s_web_test.go index d4d4c1c9..6cf72623 100644 --- a/src/search/engines/startpage/s_web_test.go +++ b/src/search/engines/startpage/s_web_test.go @@ -32,5 +32,5 @@ func TestWebSearch(t *testing.T) { se := New() se.InitSearcher(context.Background()) - _engines_test.CheckSearch(t, se, tchar, tccr, tcrr) + _engines_test.CheckWebSearch(t, se, tchar, tccr, tcrr) } diff --git a/src/search/engines/swisscows/s_web_test.go b/src/search/engines/swisscows/s_web_test.go index 5ffdfbb3..ec7a934f 100644 --- a/src/search/engines/swisscows/s_web_test.go +++ b/src/search/engines/swisscows/s_web_test.go @@ -32,5 +32,5 @@ func TestWebSearch(t *testing.T) { se := New() se.InitSearcher(context.Background()) - _engines_test.CheckSearch(t, se, tchar, tccr, tcrr) + _engines_test.CheckWebSearch(t, se, tchar, tccr, tcrr) } diff --git a/src/search/engines/yahoo/s_web_test.go b/src/search/engines/yahoo/s_web_test.go index 5339ab9f..bdf4c2a4 100644 --- a/src/search/engines/yahoo/s_web_test.go +++ b/src/search/engines/yahoo/s_web_test.go @@ -32,5 +32,5 @@ func TestWebSearch(t *testing.T) { se := New() se.InitSearcher(context.Background()) - _engines_test.CheckSearch(t, se, tchar, tccr, tcrr) + _engines_test.CheckWebSearch(t, se, tchar, tccr, tcrr) } diff --git a/src/search/engines/yep/s_web_test.go b/src/search/engines/yep/s_web_test.go index e32d3d45..21cdb08d 100644 --- a/src/search/engines/yep/s_web_test.go +++ b/src/search/engines/yep/s_web_test.go @@ -34,5 +34,5 @@ package yep // se := New() // se.Init(context.Background()) -// _engines_test.CheckSearch(t, se, tchar, tccr, tcrr) +// _engines_test.CheckWebSearch(t, se, tchar, tccr, tcrr) // } From 839cf977dd08423d23270c1fca5c7a83064820b2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aleksa=20Siri=C5=A1ki?= <31509435+aleksasiriski@users.noreply.github.com> Date: Fri, 15 Nov 2024 13:35:25 +0100 Subject: [PATCH 04/11] refactor(result): rename all instances of general to web --- src/search/result/construct.go | 6 +- src/search/result/general.go | 121 ------------------ src/search/result/{images.go => r_images.go} | 2 +- .../{images_output.go => r_images_output.go} | 0 ...{images_scraped.go => r_images_scraped.go} | 6 +- .../result/{suggestion.go => r_suggestion.go} | 0 ...ion_scraped.go => r_suggestion_scraped.go} | 0 src/search/result/r_web.go | 121 ++++++++++++++++++ .../{general_output.go => r_web_output.go} | 8 +- .../{general_scraped.go => r_web_scraped.go} | 18 +-- src/search/result/rank/filler_test.go | 4 +- src/search/result/shorten.go | 10 +- 12 files changed, 148 insertions(+), 148 deletions(-) delete mode 100644 src/search/result/general.go rename src/search/result/{images.go => r_images.go} (99%) rename src/search/result/{images_output.go => r_images_output.go} (100%) rename src/search/result/{images_scraped.go => r_images_scraped.go} (98%) rename src/search/result/{suggestion.go => r_suggestion.go} (100%) rename src/search/result/{suggestion_scraped.go => r_suggestion_scraped.go} (100%) create mode 100644 src/search/result/r_web.go rename src/search/result/{general_output.go => r_web_output.go} (61%) rename src/search/result/{general_scraped.go => r_web_scraped.go} (70%) diff --git a/src/search/result/construct.go b/src/search/result/construct.go index 29059c24..e733cbea 100644 --- a/src/search/result/construct.go +++ b/src/search/result/construct.go @@ -6,8 +6,8 @@ import ( "github.com/hearchco/agent/src/search/engines" ) -func ConstructResult(seName engines.Name, urll string, title string, description string, page int, onPageRank int) (GeneralScraped, error) { - res := GeneralScraped{ +func ConstructResult(seName engines.Name, urll string, title string, description string, page int, onPageRank int) (WebScraped, error) { + res := WebScraped{ url: urll, title: title, description: description, @@ -47,7 +47,7 @@ func ConstructImagesResult( ) (ImagesScraped, error) { res, err := ConstructResult(seName, urll, title, description, page, onPageRank) imgres := ImagesScraped{ - GeneralScraped: res, + WebScraped: res, originalSize: scrapedImageFormat{ height: originalHeight, diff --git a/src/search/result/general.go b/src/search/result/general.go deleted file mode 100644 index 768a7e5e..00000000 --- a/src/search/result/general.go +++ /dev/null @@ -1,121 +0,0 @@ -package result - -import ( - "time" - - "github.com/hearchco/agent/src/utils/anonymize" - "github.com/rs/zerolog/log" -) - -type General struct { - generalJSON -} - -type generalJSON struct { - URL string `json:"url"` - FQDN string `json:"fqdn"` - Title string `json:"title"` - Description string `json:"description"` - Rank int `json:"rank"` - Score float64 `json:"score"` - EngineRanks []Rank `json:"engine_ranks"` -} - -func (r General) Key() string { - return r.URL() -} - -func (r General) URL() string { - if r.generalJSON.URL == "" { - log.Panic().Msg("URL is empty") - // ^PANIC - Assert because the URL should never be empty. - } - - return r.generalJSON.URL -} - -func (r General) FQDN() string { - if r.generalJSON.FQDN == "" { - log.Panic().Msg("FQDN is empty") - // ^PANIC - Assert because the FQDN should never be empty. - } - - return r.generalJSON.FQDN -} - -func (r General) Title() string { - if r.generalJSON.Title == "" { - log.Panic().Msg("Title is empty") - // ^PANIC - Assert because the Title should never be empty. - } - - return r.generalJSON.Title -} - -func (r General) Description() string { - return r.generalJSON.Description -} - -func (r *General) SetDescription(desc string) { - r.generalJSON.Description = desc -} - -func (r General) Rank() int { - return r.generalJSON.Rank -} - -func (r *General) SetRank(rank int) { - r.generalJSON.Rank = rank -} - -func (r General) Score() float64 { - return r.generalJSON.Score -} - -func (r *General) SetScore(score float64) { - r.generalJSON.Score = score -} - -func (r General) EngineRanks() []Rank { - if r.generalJSON.EngineRanks == nil { - log.Panic().Msg("EngineRanks is nil") - // ^PANIC - Assert because the EngineRanks should never be nil. - } - - return r.generalJSON.EngineRanks -} - -func (r *General) InitEngineRanks() { - r.generalJSON.EngineRanks = make([]Rank, 0) -} - -func (r *General) ShrinkEngineRanks() { - if r.generalJSON.EngineRanks == nil { - log.Panic().Msg("EngineRanks is nil") - // ^PANIC - Assert because the EngineRanks should never be nil. - } - - ranksLen := len(r.generalJSON.EngineRanks) - r.generalJSON.EngineRanks = r.generalJSON.EngineRanks[:ranksLen:ranksLen] -} - -func (r *General) AppendEngineRanks(rank Rank) { - if r.generalJSON.EngineRanks == nil { - log.Panic().Msg("EngineRanks is nil") - // ^PANIC - Assert because the EngineRanks should never be nil. - } - - r.generalJSON.EngineRanks = append(r.generalJSON.EngineRanks, rank) -} - -func (r General) ConvertToOutput(secret string) ResultOutput { - fqdnHash, fqdnTimestamp := anonymize.CalculateHMACBase64(r.FQDN(), secret, time.Now()) - - return GeneralOutput{ - generalOutputJSON{ - r, - fqdnHash, - fqdnTimestamp, - }, - } -} diff --git a/src/search/result/images.go b/src/search/result/r_images.go similarity index 99% rename from src/search/result/images.go rename to src/search/result/r_images.go index d4213ea0..de2cf717 100644 --- a/src/search/result/images.go +++ b/src/search/result/r_images.go @@ -13,7 +13,7 @@ type Images struct { } type imagesJSON struct { - General + Web OriginalSize ImageFormat `json:"original"` ThumbnailSize ImageFormat `json:"thumbnail"` diff --git a/src/search/result/images_output.go b/src/search/result/r_images_output.go similarity index 100% rename from src/search/result/images_output.go rename to src/search/result/r_images_output.go diff --git a/src/search/result/images_scraped.go b/src/search/result/r_images_scraped.go similarity index 98% rename from src/search/result/images_scraped.go rename to src/search/result/r_images_scraped.go index 7f813f33..b034859a 100644 --- a/src/search/result/images_scraped.go +++ b/src/search/result/r_images_scraped.go @@ -6,7 +6,7 @@ import ( ) type ImagesScraped struct { - GeneralScraped + WebScraped originalSize scrapedImageFormat thumbnailSize scrapedImageFormat @@ -61,8 +61,8 @@ func (r ImagesScraped) Convert(erCap int) Result { engineRanks = append(engineRanks, r.Rank().Convert()) return &Images{ imagesJSON{ - General{ - generalJSON{ + Web{ + webJSON{ URL: r.URL(), FQDN: moreurls.FQDN(r.URL()), Title: r.Title(), diff --git a/src/search/result/suggestion.go b/src/search/result/r_suggestion.go similarity index 100% rename from src/search/result/suggestion.go rename to src/search/result/r_suggestion.go diff --git a/src/search/result/suggestion_scraped.go b/src/search/result/r_suggestion_scraped.go similarity index 100% rename from src/search/result/suggestion_scraped.go rename to src/search/result/r_suggestion_scraped.go diff --git a/src/search/result/r_web.go b/src/search/result/r_web.go new file mode 100644 index 00000000..34bc2ce6 --- /dev/null +++ b/src/search/result/r_web.go @@ -0,0 +1,121 @@ +package result + +import ( + "time" + + "github.com/hearchco/agent/src/utils/anonymize" + "github.com/rs/zerolog/log" +) + +type Web struct { + webJSON +} + +type webJSON struct { + URL string `json:"url"` + FQDN string `json:"fqdn"` + Title string `json:"title"` + Description string `json:"description"` + Rank int `json:"rank"` + Score float64 `json:"score"` + EngineRanks []Rank `json:"engine_ranks"` +} + +func (r Web) Key() string { + return r.URL() +} + +func (r Web) URL() string { + if r.webJSON.URL == "" { + log.Panic().Msg("URL is empty") + // ^PANIC - Assert because the URL should never be empty. + } + + return r.webJSON.URL +} + +func (r Web) FQDN() string { + if r.webJSON.FQDN == "" { + log.Panic().Msg("FQDN is empty") + // ^PANIC - Assert because the FQDN should never be empty. + } + + return r.webJSON.FQDN +} + +func (r Web) Title() string { + if r.webJSON.Title == "" { + log.Panic().Msg("Title is empty") + // ^PANIC - Assert because the Title should never be empty. + } + + return r.webJSON.Title +} + +func (r Web) Description() string { + return r.webJSON.Description +} + +func (r *Web) SetDescription(desc string) { + r.webJSON.Description = desc +} + +func (r Web) Rank() int { + return r.webJSON.Rank +} + +func (r *Web) SetRank(rank int) { + r.webJSON.Rank = rank +} + +func (r Web) Score() float64 { + return r.webJSON.Score +} + +func (r *Web) SetScore(score float64) { + r.webJSON.Score = score +} + +func (r Web) EngineRanks() []Rank { + if r.webJSON.EngineRanks == nil { + log.Panic().Msg("EngineRanks is nil") + // ^PANIC - Assert because the EngineRanks should never be nil. + } + + return r.webJSON.EngineRanks +} + +func (r *Web) InitEngineRanks() { + r.webJSON.EngineRanks = make([]Rank, 0) +} + +func (r *Web) ShrinkEngineRanks() { + if r.webJSON.EngineRanks == nil { + log.Panic().Msg("EngineRanks is nil") + // ^PANIC - Assert because the EngineRanks should never be nil. + } + + ranksLen := len(r.webJSON.EngineRanks) + r.webJSON.EngineRanks = r.webJSON.EngineRanks[:ranksLen:ranksLen] +} + +func (r *Web) AppendEngineRanks(rank Rank) { + if r.webJSON.EngineRanks == nil { + log.Panic().Msg("EngineRanks is nil") + // ^PANIC - Assert because the EngineRanks should never be nil. + } + + r.webJSON.EngineRanks = append(r.webJSON.EngineRanks, rank) +} + +func (r Web) ConvertToOutput(secret string) ResultOutput { + fqdnHash, fqdnTimestamp := anonymize.CalculateHMACBase64(r.FQDN(), secret, time.Now()) + + return WebOutput{ + webOutputJSON{ + r, + fqdnHash, + fqdnTimestamp, + }, + } +} diff --git a/src/search/result/general_output.go b/src/search/result/r_web_output.go similarity index 61% rename from src/search/result/general_output.go rename to src/search/result/r_web_output.go index 80970882..8dd4da22 100644 --- a/src/search/result/general_output.go +++ b/src/search/result/r_web_output.go @@ -1,11 +1,11 @@ package result -type GeneralOutput struct { - generalOutputJSON +type WebOutput struct { + webOutputJSON } -type generalOutputJSON struct { - General +type webOutputJSON struct { + Web FqdnHash string `json:"fqdn_hash,omitempty"` FqdnHashTimestamp string `json:"fqdn_hash_timestamp,omitempty"` diff --git a/src/search/result/general_scraped.go b/src/search/result/r_web_scraped.go similarity index 70% rename from src/search/result/general_scraped.go rename to src/search/result/r_web_scraped.go index a82aabbf..b73fd099 100644 --- a/src/search/result/general_scraped.go +++ b/src/search/result/r_web_scraped.go @@ -5,18 +5,18 @@ import ( "github.com/rs/zerolog/log" ) -type GeneralScraped struct { +type WebScraped struct { url string title string description string rank RankScraped } -func (r GeneralScraped) Key() string { +func (r WebScraped) Key() string { return r.URL() } -func (r GeneralScraped) URL() string { +func (r WebScraped) URL() string { if r.url == "" { log.Panic().Msg("url is empty") // ^PANIC - Assert because the url should never be empty. @@ -25,7 +25,7 @@ func (r GeneralScraped) URL() string { return r.url } -func (r GeneralScraped) Title() string { +func (r WebScraped) Title() string { if r.title == "" { log.Panic().Msg("title is empty") // ^PANIC - Assert because the title should never be empty. @@ -34,19 +34,19 @@ func (r GeneralScraped) Title() string { return r.title } -func (r GeneralScraped) Description() string { +func (r WebScraped) Description() string { return r.description } -func (r GeneralScraped) Rank() RankScraped { +func (r WebScraped) Rank() RankScraped { return r.rank } -func (r GeneralScraped) Convert(erCap int) Result { +func (r WebScraped) Convert(erCap int) Result { engineRanks := make([]Rank, 0, erCap) engineRanks = append(engineRanks, r.Rank().Convert()) - return &General{ - generalJSON{ + return &Web{ + webJSON{ URL: r.URL(), FQDN: moreurls.FQDN(r.URL()), Title: r.Title(), diff --git a/src/search/result/rank/filler_test.go b/src/search/result/rank/filler_test.go index e2b584b4..e62479cc 100644 --- a/src/search/result/rank/filler_test.go +++ b/src/search/result/rank/filler_test.go @@ -52,8 +52,8 @@ func TestFillEngineRankRank(t *testing.T) { resultsOrig := make(Results, 0, len(ranksTests)) resultsExpected := make(Results, 0, len(ranksTests)) for _, rankPair := range ranksTests { - var resOrig result.Result = &result.General{} - var resExpected result.Result = &result.General{} + var resOrig result.Result = &result.Web{} + var resExpected result.Result = &result.Web{} resOrig.InitEngineRanks() resExpected.InitEngineRanks() diff --git a/src/search/result/shorten.go b/src/search/result/shorten.go index 89ee5a24..cbb5481e 100644 --- a/src/search/result/shorten.go +++ b/src/search/result/shorten.go @@ -1,17 +1,17 @@ package result // Changes the title and description of the result to be at most N and M characters long respectively. -func (r General) Shorten(maxTitleLength int, maxDescriptionLength int) Result { +func (r Web) Shorten(maxTitleLength int, maxDescriptionLength int) Result { short := r - short.generalJSON.Title = shortString(r.Title(), maxTitleLength) - short.generalJSON.Description = shortString(r.Description(), maxDescriptionLength) + short.webJSON.Title = shortString(r.Title(), maxTitleLength) + short.webJSON.Description = shortString(r.Description(), maxDescriptionLength) return &short } func (r Images) Shorten(maxTitleLength int, maxDescriptionLength int) Result { short := r - short.generalJSON.Title = shortString(r.Title(), maxTitleLength) - short.generalJSON.Description = shortString(r.Description(), maxDescriptionLength) + short.webJSON.Title = shortString(r.Title(), maxTitleLength) + short.webJSON.Description = shortString(r.Description(), maxDescriptionLength) return &short } From a658cd078a7ef54ceea1d3e9b130cc263bb6b347 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aleksa=20Siri=C5=A1ki?= <31509435+aleksasiriski@users.noreply.github.com> Date: Fri, 15 Nov 2024 13:38:33 +0100 Subject: [PATCH 05/11] refactor(config): disabled engines example config --- hearchco_example.yaml | 18 +++++------------- src/config/structs_engines.go | 6 +++--- 2 files changed, 8 insertions(+), 16 deletions(-) diff --git a/hearchco_example.yaml b/hearchco_example.yaml index bbdfb5b7..7fba18b7 100644 --- a/hearchco_example.yaml +++ b/hearchco_example.yaml @@ -4,16 +4,8 @@ server: type: none imageproxy: secretkey: changemepls -# categories: -# general: -# engines: -# duckduckgo: -# enabled: false -# ranking: -# engines: -# google: -# mul: 2 -# images: -# engines: -# bing: -# enabled: false +# engines: +# google: +# noweb: true # Disables web search for this engine +# noimages: true # Disables image search for this engine +# nosuggestions: true # Disables suggestions for this engine diff --git a/src/config/structs_engines.go b/src/config/structs_engines.go index 70a79be0..d3a0fec0 100644 --- a/src/config/structs_engines.go +++ b/src/config/structs_engines.go @@ -7,9 +7,9 @@ import ( // ReaderEngineConfig is format in which the config is read from the config file and environment variables. // Used to disable certain search types for an engine. By default, all types are enabled. type ReaderEngineConfig struct { - NoWeb bool // Whether this engine is disallowed to do web searches. - NoImages bool // Whether this engine is disallowed to do image searches. - NoSuggestions bool // Whether this engine is disallowed to do suggestion searches. + NoWeb bool `koanf:"noweb"` // Whether this engine is disallowed to do web searches. + NoImages bool `koanf:"noimages"` // Whether this engine is disallowed to do image searches. + NoSuggestions bool `koanf:"nosuggestions"` // Whether this engine is disallowed to do suggestion searches. } // Slices of disabled engines for each search type, by default these are empty. From 0031bc71cdde8bd6fa1a11cc33095a1f61758ab1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aleksa=20Siri=C5=A1ki?= <31509435+aleksasiriski@users.noreply.github.com> Date: Fri, 15 Nov 2024 13:49:36 +0100 Subject: [PATCH 06/11] refactor(engines): tests web searcher --- src/search/engines/_engines_test/s_web.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/search/engines/_engines_test/s_web.go b/src/search/engines/_engines_test/s_web.go index 5cbccd9e..16ac6e5f 100644 --- a/src/search/engines/_engines_test/s_web.go +++ b/src/search/engines/_engines_test/s_web.go @@ -9,13 +9,13 @@ import ( "github.com/hearchco/agent/src/search/scraper" ) -func CheckWebSearch(t *testing.T, e scraper.Searcher, tchar []TestCaseHasAnyResults, tccr []TestCaseContainsResults, tcrr []TestCaseRankedResults) { +func CheckWebSearch(t *testing.T, e scraper.WebSearcher, tchar []TestCaseHasAnyResults, tccr []TestCaseContainsResults, tcrr []TestCaseRankedResults) { // TestCaseHasAnyResults for _, tc := range tchar { e.InitSearcher(context.Background()) resChan := make(chan result.ResultScraped, 100) - go e.Search(tc.Query, tc.Options, resChan) + go e.WebSearch(tc.Query, tc.Options, resChan) results := make([]result.ResultScraped, 0) for r := range resChan { @@ -32,7 +32,7 @@ func CheckWebSearch(t *testing.T, e scraper.Searcher, tchar []TestCaseHasAnyResu e.InitSearcher(context.Background()) resChan := make(chan result.ResultScraped, 100) - go e.Search(tc.Query, tc.Options, resChan) + go e.WebSearch(tc.Query, tc.Options, resChan) results := make([]result.ResultScraped, 0) for r := range resChan { @@ -64,7 +64,7 @@ func CheckWebSearch(t *testing.T, e scraper.Searcher, tchar []TestCaseHasAnyResu e.InitSearcher(context.Background()) resChan := make(chan result.ResultScraped, 100) - go e.Search(tc.Query, tc.Options, resChan) + go e.WebSearch(tc.Query, tc.Options, resChan) results := make([]result.ResultScraped, 0) for r := range resChan { From e4804c03e44f5e2ae5e770cea6e90c14d921ec00 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aleksa=20Siri=C5=A1ki?= <31509435+aleksasiriski@users.noreply.github.com> Date: Fri, 15 Nov 2024 13:50:41 +0100 Subject: [PATCH 07/11] refactor(config): delete unused vars --- src/config/defaults_exchange.go | 17 ----------------- 1 file changed, 17 deletions(-) delete mode 100644 src/config/defaults_exchange.go diff --git a/src/config/defaults_exchange.go b/src/config/defaults_exchange.go deleted file mode 100644 index 19f1ef1d..00000000 --- a/src/config/defaults_exchange.go +++ /dev/null @@ -1,17 +0,0 @@ -package config - -import ( - "time" - - "github.com/hearchco/agent/src/exchange/engines" -) - -var exchangeEngines = []engines.Name{ - engines.CURRENCYAPI, - engines.EXCHANGERATEAPI, - engines.FRANKFURTER, -} - -var exchangeTimings = ExchangeTimings{ - HardTimeout: 1 * time.Second, -} From 229e7a182a0ab634a04185971f1ea99f3a048542 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aleksa=20Siri=C5=A1ki?= <31509435+aleksasiriski@users.noreply.github.com> Date: Fri, 15 Nov 2024 22:56:06 +0100 Subject: [PATCH 08/11] refactor(rank): renamed vars for fn and removed hardcoded *10 (affecting TRC, now timesreturnedscoremul) --- docs/example_category.json | 24 ++++++++++++------------ src/search/category/convert.go | 28 +++++++++++++++++++++++++++- src/search/category/json.go | 26 +++++++++++++++++++++++++- src/search/category/type.go | 29 +++++++++++++++++------------ src/search/result/rank/score.go | 20 +++++++++++++------- 5 files changed, 94 insertions(+), 33 deletions(-) diff --git a/docs/example_category.json b/docs/example_category.json index 68034abf..e5f429e7 100644 --- a/docs/example_category.json +++ b/docs/example_category.json @@ -23,27 +23,27 @@ } }, "ranking": { - "rexp": 0.5, - "a": 1, - "b": 0, - "c": 1, - "d": 0, - "tra": 1, - "trb": 0, - "trc": 1, - "trd": 0, + "rankexp": 0.5, + "rankmul": 1, + "rankconst": 0, + "rankscoremul": 1, + "rankscoreadd": 0, + "timesreturnedmul": 1, + "timesreturnedadd": 0, + "timesreturnedscoremul": 1, + "timesreturnedscoreadd": 0, "engines": { "google": { "mul": 1, - "const": 0 + "add": 0 }, "bing": { "mul": 1, - "const": 0 + "add": 0 }, "brave": { "mul": 1, - "const": 0 + "add": 0 } } }, diff --git a/src/search/category/convert.go b/src/search/category/convert.go index 00027b12..156602ea 100644 --- a/src/search/category/convert.go +++ b/src/search/category/convert.go @@ -64,6 +64,32 @@ func (cj CategoryJSON) ToCategoryType() (Category, error) { } } + // Ranking config. + ranking := Ranking{ + RankExp: cj.Ranking.RankExp, + RankMul: cj.Ranking.RankMul, + RankAdd: cj.Ranking.RankAdd, + RankScoreMul: cj.Ranking.RankScoreMul, + RankScoreAdd: cj.Ranking.RankScoreAdd, + TimesReturnedMul: cj.Ranking.TimesReturnedMul, + TimesReturnedAdd: cj.Ranking.TimesReturnedAdd, + TimesReturnedScoreMul: cj.Ranking.TimesReturnedScoreMul, + TimesReturnedScoreAdd: cj.Ranking.TimesReturnedScoreAdd, + Engines: make(map[engines.Name]EngineRanking), + } + + // Set the engine ranking config. + for nameS, er := range cj.Ranking.Engines { + name, err := engines.NameString(nameS) + if err != nil { + return Category{}, fmt.Errorf("failed converting string to engine name: %w", err) + } + ranking.Engines[name] = EngineRanking{ + Mul: er.Mul, + Add: er.Add, + } + } + // Timings config. timings := Timings{ PreferredTimeout: moretime.ConvertFromFancyTime(cj.Timings.PreferredTimeout), @@ -77,7 +103,7 @@ func (cj CategoryJSON) ToCategoryType() (Category, error) { RequiredByOriginEngines: engRequiredByOrigin, PreferredEngines: engPreferred, PreferredByOriginEngines: engPreferredByOrigin, - Ranking: cj.Ranking, // Stays the same. + Ranking: ranking, Timings: timings, }, nil } diff --git a/src/search/category/json.go b/src/search/category/json.go index 87d3e7c1..7d349834 100644 --- a/src/search/category/json.go +++ b/src/search/category/json.go @@ -3,7 +3,7 @@ package category // CategoryJSON is format in which the config is passed from the user. type CategoryJSON struct { Engines map[string]EngineJSON `koanf:"engines"` - Ranking Ranking `koanf:"ranking"` + Ranking RankingJSON `koanf:"ranking"` Timings TimingsJSON `koanf:"timings"` } @@ -28,6 +28,30 @@ type EngineJSON struct { PreferredByOrigin bool `koanf:"preferredbyorigin"` } +// RankingJSON is format in which the config is passed from the user. +type RankingJSON struct { + // The exponent, multiplier and addition used on the rank itself. + RankExp float64 `koanf:"rankexp"` + RankMul float64 `koanf:"rankmul"` + RankAdd float64 `koanf:"rankconst"` + // The multiplier and addition used on the rank score (number calculated from dividing 100 with the rank + above variables applied). + RankScoreMul float64 `koanf:"rankscoremul"` + RankScoreAdd float64 `koanf:"rankscoreadd"` + // The multiplier and addition used on the number of times the result was returned. + TimesReturnedMul float64 `koanf:"timesreturnedmul"` + TimesReturnedAdd float64 `koanf:"timesreturnedadd"` + // The multiplier and addition used on the times returned score (number calculated from doing log(timesReturnedNum + above variables applied)). + TimesReturnedScoreMul float64 `koanf:"timesreturnedscoremul"` + TimesReturnedScoreAdd float64 `koanf:"timesreturnedscoreadd"` + // Multipliers and additions for each engine, applied to the rank score. + Engines map[string]EngineRankingJSON `koanf:"engines"` +} + +type EngineRankingJSON struct { + Mul float64 `koanf:"mul"` + Add float64 `koanf:"add"` +} + // TimingsJSON is format in which the config is passed from the user. // In format. // Example: 1s, 1m, 1h, 1d, 1w, 1M, 1y. diff --git a/src/search/category/type.go b/src/search/category/type.go index 89eb0d8b..54fb861c 100644 --- a/src/search/category/type.go +++ b/src/search/category/type.go @@ -17,21 +17,26 @@ type Category struct { } type Ranking struct { - REXP float64 `koanf:"rexp"` - A float64 `koanf:"a"` - B float64 `koanf:"b"` - C float64 `koanf:"c"` - D float64 `koanf:"d"` - TRA float64 `koanf:"tra"` - TRB float64 `koanf:"trb"` - TRC float64 `koanf:"trc"` - TRD float64 `koanf:"trd"` - Engines map[string]EngineRanking `koanf:"engines"` + // The exponent, multiplier and addition used on the rank itself. + RankExp float64 + RankMul float64 + RankAdd float64 + // The multiplier and addition used on the rank score (number calculated from dividing 100 with the rank + above variables applied). + RankScoreMul float64 + RankScoreAdd float64 + // The multiplier and addition used on the number of times the result was returned. + TimesReturnedMul float64 + TimesReturnedAdd float64 + // The multiplier and addition used on the times returned score (number calculated from doing log(timesReturnedNum + above variables applied)). + TimesReturnedScoreMul float64 + TimesReturnedScoreAdd float64 + // Multipliers and additions for each engine, applied to the rank score. + Engines map[engines.Name]EngineRanking } type EngineRanking struct { - Mul float64 `koanf:"mul"` - Const float64 `koanf:"const"` + Mul float64 + Add float64 } type Timings struct { diff --git a/src/search/result/rank/score.go b/src/search/result/rank/score.go index f37bbe6c..a79c0fbf 100644 --- a/src/search/result/rank/score.go +++ b/src/search/result/rank/score.go @@ -23,15 +23,21 @@ func (s Suggestions) calculateScores(rconf category.Ranking) { // Calculates the score for one result. func calculateScore[T ranker](val scoreEngineRanker[T], rconf category.Ranking) float64 { - var retRankScore float64 = 0 + var rankScoreSum float64 = 0 + + // Calculate the sum of the rank scores of all engines. + // The rank score is dividing 100 to invert the priority (the lower the rank, the higher the score). for _, er := range val.EngineRanks() { - eng := rconf.Engines[er.SearchEngine().String()] - retRankScore += (100.0/math.Pow(float64(er.Rank())*rconf.A+rconf.B, rconf.REXP)*rconf.C+rconf.D)*eng.Mul + eng.Const + eng := rconf.Engines[er.SearchEngine()] + rankScoreSum += (100.0/math.Pow(float64(er.Rank())*rconf.RankMul+rconf.RankAdd, rconf.RankExp)*rconf.RankScoreMul+rconf.RankScoreAdd)*eng.Mul + eng.Add } - retRankScore /= float64(len(val.EngineRanks())) - timesReturnedScore := math.Log(float64(len(val.EngineRanks()))*rconf.TRA+rconf.TRB)*10*rconf.TRC + rconf.TRD - score := retRankScore + timesReturnedScore + // Calculate the average rank score from the sum. + rankScoreAvg := rankScoreSum / float64(len(val.EngineRanks())) + + // Calculate a second score based on the number of times the result was returned. + // Log is used to make the score less sensitive to the number of times returned. + timesReturnedScore := math.Log(float64(len(val.EngineRanks()))*rconf.TimesReturnedMul+rconf.TimesReturnedAdd)*rconf.TimesReturnedScoreMul + rconf.TimesReturnedScoreAdd - return score + return rankScoreAvg + timesReturnedScore } From c4e50c55a2dc988125cf9817b9a09a19faf52892 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aleksa=20Siri=C5=A1ki?= <31509435+aleksasiriski@users.noreply.github.com> Date: Fri, 15 Nov 2024 23:45:13 +0100 Subject: [PATCH 09/11] fix(rank): add hardcoded *100 (affecting TRC, now timesreturnedscoremul) --- src/search/result/rank/score.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/search/result/rank/score.go b/src/search/result/rank/score.go index a79c0fbf..1b1bfe6a 100644 --- a/src/search/result/rank/score.go +++ b/src/search/result/rank/score.go @@ -37,7 +37,7 @@ func calculateScore[T ranker](val scoreEngineRanker[T], rconf category.Ranking) // Calculate a second score based on the number of times the result was returned. // Log is used to make the score less sensitive to the number of times returned. - timesReturnedScore := math.Log(float64(len(val.EngineRanks()))*rconf.TimesReturnedMul+rconf.TimesReturnedAdd)*rconf.TimesReturnedScoreMul + rconf.TimesReturnedScoreAdd + timesReturnedScore := math.Log(float64(len(val.EngineRanks()))*rconf.TimesReturnedMul+rconf.TimesReturnedAdd)*100*rconf.TimesReturnedScoreMul + rconf.TimesReturnedScoreAdd return rankScoreAvg + timesReturnedScore } From 583a59df4a16103768e0983ad9ae10df415ca9b2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aleksa=20Siri=C5=A1ki?= <31509435+aleksasiriski@users.noreply.github.com> Date: Sat, 16 Nov 2024 00:39:55 +0100 Subject: [PATCH 10/11] fix(search): return user error when disabled engines are received instead of ignoring --- src/router/routes/route_search_images.go | 9 ++++++- src/router/routes/route_search_suggestions.go | 9 ++++++- src/router/routes/route_search_web.go | 9 ++++++- src/search/category/disable.go | 27 ------------------- src/search/category/disabled.go | 19 +++++++++++++ 5 files changed, 43 insertions(+), 30 deletions(-) delete mode 100644 src/search/category/disable.go create mode 100644 src/search/category/disabled.go diff --git a/src/router/routes/route_search_images.go b/src/router/routes/route_search_images.go index 7aeb4715..b97e5744 100644 --- a/src/router/routes/route_search_images.go +++ b/src/router/routes/route_search_images.go @@ -121,7 +121,14 @@ func routeSearchImages(w http.ResponseWriter, r *http.Request, ver string, disab Value: fmt.Sprintf("%v", err), }) } - catConf.DisableEngines(disabledEngines) + + if catConf.ContainsDisabledEngines(disabledEngines) { + // User error. + return writeResponseJSON(w, http.StatusBadRequest, ErrorResponse{ + Message: "category contains disabled engines", + Value: "disabled engines", + }) + } // All of these have default values set and validated. opts := options.Options{ diff --git a/src/router/routes/route_search_suggestions.go b/src/router/routes/route_search_suggestions.go index 9dd43ba5..4f630568 100644 --- a/src/router/routes/route_search_suggestions.go +++ b/src/router/routes/route_search_suggestions.go @@ -78,7 +78,14 @@ func routeSearchSuggestions(w http.ResponseWriter, r *http.Request, ver string, Value: fmt.Sprintf("%v", err), }) } - catConf.DisableEngines(disabledEngines) + + if catConf.ContainsDisabledEngines(disabledEngines) { + // User error. + return writeResponseJSON(w, http.StatusBadRequest, ErrorResponse{ + Message: "category contains disabled engines", + Value: "disabled engines", + }) + } // All of these have default values set and validated. opts := options.Options{ diff --git a/src/router/routes/route_search_web.go b/src/router/routes/route_search_web.go index 3db29a7d..8d3c99b8 100644 --- a/src/router/routes/route_search_web.go +++ b/src/router/routes/route_search_web.go @@ -121,7 +121,14 @@ func routeSearchWeb(w http.ResponseWriter, r *http.Request, ver string, disabled Value: fmt.Sprintf("%v", err), }) } - catConf.DisableEngines(disabledEngines) + + if catConf.ContainsDisabledEngines(disabledEngines) { + // User error. + return writeResponseJSON(w, http.StatusBadRequest, ErrorResponse{ + Message: "category contains disabled engines", + Value: "disabled engines", + }) + } // All of these have default values set and validated. opts := options.Options{ diff --git a/src/search/category/disable.go b/src/search/category/disable.go deleted file mode 100644 index 9b8d58a2..00000000 --- a/src/search/category/disable.go +++ /dev/null @@ -1,27 +0,0 @@ -package category - -import ( - "slices" - - "github.com/hearchco/agent/src/search/engines" -) - -// Remove the specified engines from the Category. -// Passed as pointer to modify the original. -func (c *Category) DisableEngines(disabledEngines []engines.Name) { - c.Engines = slices.DeleteFunc(c.Engines, func(e engines.Name) bool { - return slices.Contains(disabledEngines, e) - }) - c.RequiredEngines = slices.DeleteFunc(c.RequiredEngines, func(e engines.Name) bool { - return slices.Contains(disabledEngines, e) - }) - c.RequiredByOriginEngines = slices.DeleteFunc(c.RequiredByOriginEngines, func(e engines.Name) bool { - return slices.Contains(disabledEngines, e) - }) - c.PreferredEngines = slices.DeleteFunc(c.PreferredEngines, func(e engines.Name) bool { - return slices.Contains(disabledEngines, e) - }) - c.PreferredByOriginEngines = slices.DeleteFunc(c.PreferredByOriginEngines, func(e engines.Name) bool { - return slices.Contains(disabledEngines, e) - }) -} diff --git a/src/search/category/disabled.go b/src/search/category/disabled.go new file mode 100644 index 00000000..da8733de --- /dev/null +++ b/src/search/category/disabled.go @@ -0,0 +1,19 @@ +package category + +import ( + "slices" + + "github.com/hearchco/agent/src/search/engines" +) + +// Returns true if the category contains any disabled engines. +// Otherwise, returns false. +func (c Category) ContainsDisabledEngines(disabledEngines []engines.Name) bool { + for _, eng := range disabledEngines { + if slices.Contains(c.Engines, eng) { + return true + } + } + + return false +} From d871bcf3b1d66524e1223d65904bc07238b691da Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aleksa=20Siri=C5=A1ki?= <31509435+aleksasiriski@users.noreply.github.com> Date: Sat, 16 Nov 2024 17:01:46 +0100 Subject: [PATCH 11/11] docs(category): fix example category json --- docs/example_category.json | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/example_category.json b/docs/example_category.json index e5f429e7..df083648 100644 --- a/docs/example_category.json +++ b/docs/example_category.json @@ -5,21 +5,21 @@ "required": false, "requiredbyorigin": true, "preferred": false, - "prefferedbyorigin": false + "preferredbyorigin": false }, "bing": { "enabled": true, "required": false, "requiredbyorigin": true, "preferred": false, - "prefferedbyorigin": false + "preferredbyorigin": false }, "brave": { "enabled": true, "required": false, "requiredbyorigin": false, "preferred": true, - "prefferedbyorigin": false + "preferredbyorigin": false } }, "ranking": { @@ -49,6 +49,6 @@ }, "timings": { "preferredtimeout": "500", - "HardTimeout": "1500" + "hardtimeout": "1500" } }