From b1bb4841bf2b2021b25bd32e09f11d7380ab6929 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aleksa=20Siri=C5=A1ki?= <31509435+aleksasiriski@users.noreply.github.com> Date: Sat, 11 May 2024 21:45:41 +0200 Subject: [PATCH 1/2] refactor(config): separate files and different names for structs --- src/config/defaults.go | 24 +-- src/config/load.go | 45 +++-- src/config/structs.go | 185 ------------------ src/config/structs_category.go | 89 +++++++++ src/config/structs_config.go | 24 +++ src/config/structs_server.go | 112 +++++++++++ src/router/proxy.go | 2 +- src/search/bang.go | 2 +- src/search/engine_interface.go | 2 +- src/search/engines/_engines_test/structs.go | 4 +- src/search/engines/_sedefaults/init.go | 2 +- src/search/engines/bing/bing.go | 2 +- src/search/engines/bingimages/bingimages.go | 2 +- src/search/engines/brave/brave.go | 2 +- src/search/engines/duckduckgo/duckduckgo.go | 2 +- src/search/engines/etools/etools.go | 2 +- src/search/engines/google/google.go | 2 +- .../engines/googleimages/googleimages.go | 2 +- .../engines/googlescholar/googlescholar.go | 2 +- src/search/engines/mojeek/mojeek.go | 2 +- src/search/engines/presearch/presearch.go | 2 +- src/search/engines/qwant/qwant.go | 2 +- src/search/engines/startpage/startpage.go | 2 +- src/search/engines/swisscows/swisscows.go | 2 +- src/search/engines/yahoo/yahoo.go | 2 +- src/search/engines/yep/yep.go | 2 +- src/search/perform.go | 2 +- src/search/rank/rank.go | 2 +- src/search/rank/score.go | 2 +- 29 files changed, 283 insertions(+), 244 deletions(-) delete mode 100644 src/config/structs.go create mode 100644 src/config/structs_category.go create mode 100644 src/config/structs_config.go create mode 100644 src/config/structs_server.go diff --git a/src/config/defaults.go b/src/config/defaults.go index ffa6a06a..80969655 100644 --- a/src/config/defaults.go +++ b/src/config/defaults.go @@ -11,8 +11,8 @@ import ( const DefaultLocale string = "en_US" -func EmptyRanking() Ranking { - rnk := Ranking{ +func EmptyRanking() CategoryRanking { + rnk := CategoryRanking{ REXP: 0.5, A: 1, B: 0, @@ -22,11 +22,11 @@ func EmptyRanking() Ranking { TRB: 0, TRC: 1, TRD: 0, - Engines: map[string]EngineRanking{}, + Engines: map[string]CategoryEngineRanking{}, } for _, eng := range engines.Names() { - rnk.Engines[eng.ToLower()] = EngineRanking{ + rnk.Engines[eng.ToLower()] = CategoryEngineRanking{ Mul: 1, Const: 0, } @@ -35,7 +35,7 @@ func EmptyRanking() Ranking { return rnk } -func NewRanking() Ranking { +func NewRanking() CategoryRanking { return EmptyRanking() } @@ -163,8 +163,8 @@ func New() Config { Port: 6379, }, }, - Proxy: Proxy{ - Timeouts: ProxyTimeouts{ + Proxy: ImageProxy{ + Timeouts: ImageProxyTimeouts{ Dial: 3 * time.Second, KeepAlive: 3 * time.Second, TLSHandshake: 2 * time.Second, @@ -176,7 +176,7 @@ func New() Config { category.GENERAL: { Engines: NewGeneral(), Ranking: NewRanking(), - Timings: Timings{ + Timings: CategoryTimings{ PreferredTimeout: 1 * time.Second, PreferredTimeoutResults: 20, AdditionalTimeout: 50 * time.Millisecond, @@ -188,7 +188,7 @@ func New() Config { category.IMAGES: { Engines: NewImage(), Ranking: NewRanking(), - Timings: Timings{ + Timings: CategoryTimings{ PreferredTimeout: 1 * time.Second, PreferredTimeoutResults: 40, AdditionalTimeout: 100 * time.Millisecond, @@ -200,7 +200,7 @@ func New() Config { category.INFO: { Engines: NewInfo(), Ranking: NewRanking(), - Timings: Timings{ + Timings: CategoryTimings{ PreferredTimeout: 500 * time.Millisecond, PreferredTimeoutResults: 10, AdditionalTimeout: 25 * time.Millisecond, @@ -212,7 +212,7 @@ func New() Config { category.SCIENCE: { Engines: NewScience(), Ranking: NewRanking(), - Timings: Timings{ + Timings: CategoryTimings{ PreferredTimeout: 1 * time.Second, PreferredTimeoutResults: 10, AdditionalTimeout: 100 * time.Millisecond, @@ -224,7 +224,7 @@ func New() Config { category.SURF: { Engines: NewGeneral(), Ranking: NewRanking(), - Timings: Timings{ + Timings: CategoryTimings{ PreferredTimeout: 2 * time.Second, PreferredTimeoutResults: 60, AdditionalTimeout: 200 * time.Millisecond, diff --git a/src/config/load.go b/src/config/load.go index 8bea0a71..a8490678 100644 --- a/src/config/load.go +++ b/src/config/load.go @@ -4,7 +4,6 @@ import ( "os" "path" "strings" - "time" "github.com/hearchco/hearchco/src/moretime" "github.com/hearchco/hearchco/src/search/category" @@ -37,9 +36,9 @@ func (c *Config) fromReader(rc ReaderConfig) { Badger: rc.Server.Cache.Badger, Redis: rc.Server.Cache.Redis, }, - Proxy: Proxy{ + Proxy: ImageProxy{ Salt: rc.Server.Proxy.Salt, - Timeouts: ProxyTimeouts{ + Timeouts: ImageProxyTimeouts{ Dial: moretime.ConvertFromFancyTime(rc.Server.Proxy.Timeouts.Dial), KeepAlive: moretime.ConvertFromFancyTime(rc.Server.Proxy.Timeouts.KeepAlive), TLSHandshake: moretime.ConvertFromFancyTime(rc.Server.Proxy.Timeouts.TLSHandshake), @@ -75,15 +74,15 @@ func (c *Config) fromReader(rc ReaderConfig) { engArr = append(engArr, engineName) } } - tim := Timings{ - PreferredTimeout: time.Duration(val.RTimings.PreferredTimeout) * time.Millisecond, + tim := CategoryTimings{ + PreferredTimeout: moretime.ConvertFromFancyTime(val.RTimings.PreferredTimeout), PreferredTimeoutResults: val.RTimings.PreferredTimeoutResults, - AdditionalTimeout: time.Duration(val.RTimings.AdditionalTimeout) * time.Millisecond, - HardTimeout: time.Duration(val.RTimings.HardTimeout) * time.Millisecond, - Timeout: time.Duration(val.RTimings.Timeout) * time.Millisecond, - PageTimeout: time.Duration(val.RTimings.PageTimeout) * time.Millisecond, - Delay: time.Duration(val.RTimings.Delay) * time.Millisecond, - RandomDelay: time.Duration(val.RTimings.RandomDelay) * time.Millisecond, + AdditionalTimeout: moretime.ConvertFromFancyTime(val.RTimings.AdditionalTimeout), + HardTimeout: moretime.ConvertFromFancyTime(val.RTimings.HardTimeout), + Timeout: moretime.ConvertFromFancyTime(val.RTimings.Timeout), + PageTimeout: moretime.ConvertFromFancyTime(val.RTimings.PageTimeout), + Delay: moretime.ConvertFromFancyTime(val.RTimings.Delay), + RandomDelay: moretime.ConvertFromFancyTime(val.RTimings.RandomDelay), Parallelism: val.RTimings.Parallelism, } nc.Categories[key] = Category{ @@ -112,9 +111,9 @@ func (c Config) getReader() ReaderConfig { Badger: c.Server.Cache.Badger, Redis: c.Server.Cache.Redis, }, - Proxy: ReaderProxy{ + Proxy: ReaderImageProxy{ Salt: c.Server.Proxy.Salt, - Timeouts: ReaderProxyTimeouts{ + Timeouts: ReaderImageProxyTimeouts{ Dial: moretime.ConvertToFancyTime(c.Server.Proxy.Timeouts.Dial), KeepAlive: moretime.ConvertToFancyTime(c.Server.Proxy.Timeouts.KeepAlive), TLSHandshake: moretime.ConvertToFancyTime(c.Server.Proxy.Timeouts.TLSHandshake), @@ -126,24 +125,24 @@ func (c Config) getReader() ReaderConfig { } for key, val := range c.Categories { - tim := ReaderTimings{ - PreferredTimeout: uint(val.Timings.PreferredTimeout.Milliseconds()), + tim := ReaderCategoryTimings{ + PreferredTimeout: moretime.ConvertToFancyTime(val.Timings.PreferredTimeout), PreferredTimeoutResults: val.Timings.PreferredTimeoutResults, - AdditionalTimeout: uint(val.Timings.AdditionalTimeout.Milliseconds()), - HardTimeout: uint(val.Timings.HardTimeout.Milliseconds()), - Timeout: uint(val.Timings.Timeout.Milliseconds()), - PageTimeout: uint(val.Timings.PageTimeout.Milliseconds()), - Delay: uint(val.Timings.Delay.Milliseconds()), - RandomDelay: uint(val.Timings.RandomDelay.Milliseconds()), + AdditionalTimeout: moretime.ConvertToFancyTime(val.Timings.AdditionalTimeout), + HardTimeout: moretime.ConvertToFancyTime(val.Timings.HardTimeout), + Timeout: moretime.ConvertToFancyTime(val.Timings.Timeout), + PageTimeout: moretime.ConvertToFancyTime(val.Timings.PageTimeout), + Delay: moretime.ConvertToFancyTime(val.Timings.Delay), + RandomDelay: moretime.ConvertToFancyTime(val.Timings.RandomDelay), Parallelism: val.Timings.Parallelism, } rc.RCategories[key] = ReaderCategory{ Ranking: val.Ranking, - REngines: map[string]ReaderEngine{}, + REngines: map[string]ReaderCategoryEngine{}, RTimings: tim, } for _, eng := range val.Engines { - rc.RCategories[key].REngines[eng.ToLower()] = ReaderEngine{Enabled: true} + rc.RCategories[key].REngines[eng.ToLower()] = ReaderCategoryEngine{Enabled: true} } } diff --git a/src/config/structs.go b/src/config/structs.go deleted file mode 100644 index 0dd87d35..00000000 --- a/src/config/structs.go +++ /dev/null @@ -1,185 +0,0 @@ -package config - -import ( - "time" - - "github.com/hearchco/hearchco/src/search/category" - "github.com/hearchco/hearchco/src/search/engines" -) - -type EngineRanking struct { - Mul float64 `koanf:"mul"` - Const float64 `koanf:"const"` -} - -type Ranking struct { - REXP float64 `koanf:"rexp"` - A float64 `koanf:"a"` - B float64 `koanf:"b"` - C float64 `koanf:"c"` - D float64 `koanf:"d"` - TRA float64 `koanf:"tra"` - TRB float64 `koanf:"trb"` - TRC float64 `koanf:"trc"` - TRD float64 `koanf:"trd"` - Engines map[string]EngineRanking `koanf:"engines"` -} - -type Settings struct { - RequestedResultsPerPage int `koanf:"requestedresults"` - Shortcut string `koanf:"shortcut"` - Proxies []string `koanf:"proxies"` -} - -// ReaderTTL is format in which the config is read from the config file -// in format -// example: 1s, 1m, 1h, 1d, 1w, 1M, 1y -// if unit is not specified, it is assumed to be milliseconds -type ReaderTTL struct { - // how long to store the results in cache - // setting this to 0 caches the results forever - // to disable caching set conf.Cache.Type to "none" - Time string `koanf:"time"` - // if the remaining TTL when retrieving from cache is less than this, update the cache entry and reset the TTL - // setting this to 0 disables this feature - // setting this to the same value (or higher) as Results will update the cache entry every time - RefreshTime string `koanf:"refreshtime"` -} -type TTL struct { - Time time.Duration - RefreshTime time.Duration -} - -type Badger struct { - // setting this to false will result in badger not persisting the cache to disk - // that means that badger will run in memory only - Persist bool `koanf:"persist"` -} - -type Redis struct { - Host string `koanf:"host"` - Port uint16 `koanf:"port"` - Password string `koanf:"password"` - Database uint8 `koanf:"database"` -} - -// ReaderCache is format in which the config is read from the config file -type ReaderCache struct { - // can be "none", "badger" or "redis" - Type string `koanf:"type"` - // has no effect if Type is "none" - TTL ReaderTTL `koanf:"ttl"` - // badger specific settings - Badger Badger `koanf:"badger"` - // redis specific settings - Redis Redis `koanf:"redis"` -} -type Cache struct { - Type string - TTL TTL - Badger Badger - Redis Redis -} - -type ReaderProxyTimeouts struct { - Dial string `koanf:"dial"` - KeepAlive string `koanf:"keepalive"` - TLSHandshake string `koanf:"tlshandshake"` -} -type ProxyTimeouts struct { - Dial time.Duration - KeepAlive time.Duration - TLSHandshake time.Duration -} - -type ReaderProxy struct { - Salt string `koanf:"salt"` - Timeouts ReaderProxyTimeouts `koanf:"timeouts"` -} -type Proxy struct { - Salt string - Timeouts ProxyTimeouts -} - -// ReaderServer is format in which the config is read from the config file -type ReaderServer struct { - // environment in which the server is running (normal or lambda) - Environment string `koanf:"environment"` - // port on which the API server listens - Port int `koanf:"port"` - // urls used for CORS, comma separated (wildcards allowed) and converted into slice - FrontendUrls string `koanf:"frontendurls"` - // cache settings - Cache ReaderCache `koanf:"cache"` - // salt used for image proxy - Proxy ReaderProxy `koanf:"proxy"` -} -type Server struct { - Environment string - Port int - FrontendUrls []string - Cache Cache - Proxy Proxy -} - -// ReaderEngine is format in which the config is read from the config file -type ReaderEngine struct { - Enabled bool `koanf:"enabled"` -} - -// ReaderTimings is format in which the config is read from the config file -// in miliseconds -type ReaderTimings struct { - PreferredTimeout uint `koanf:"preferredtimeout"` - PreferredTimeoutResults int `koanf:"preferredtimeoutresults"` - AdditionalTimeout uint `koanf:"additionaltimeout"` - HardTimeout uint `koanf:"hardtimeout"` - Timeout uint `koanf:"timeout"` - PageTimeout uint `koanf:"pagetimeout"` - Delay uint `koanf:"delay"` - RandomDelay uint `koanf:"randomdelay"` - Parallelism int `koanf:"parallelism"` -} - -// Delegates Timeout, PageTimeout to colly.Collector.SetRequestTimeout(); Note: See https://github.com/gocolly/colly/issues/644 -// Delegates Delay, RandomDelay, Parallelism to colly.Collector.Limit() -type Timings struct { - // preferred timeout if enough results are found - PreferredTimeout time.Duration - // number of results which if not met will trigger the additional timeout - PreferredTimeoutResults int - // additional timeout if not enough results are found (delay after which the number of results is checked) - AdditionalTimeout time.Duration - // hard timeout after which the search is forcefully stopped - HardTimeout time.Duration - // colly settings - Timeout time.Duration - PageTimeout time.Duration - Delay time.Duration - RandomDelay time.Duration - Parallelism int -} - -// ReaderCategory is format in which the config is read from the config file -type ReaderCategory struct { - REngines map[string]ReaderEngine `koanf:"engines"` - Ranking Ranking `koanf:"ranking"` - RTimings ReaderTimings `koanf:"timings"` -} -type Category struct { - Engines []engines.Name - Ranking Ranking - Timings Timings -} - -// ReaderConfig is format in which the config is read from the config file -type ReaderConfig struct { - Server ReaderServer `koanf:"server"` - RCategories map[category.Name]ReaderCategory `koanf:"categories"` - Settings map[string]Settings `koanf:"settings"` -} -type Config struct { - Server Server - Categories map[category.Name]Category - Settings map[engines.Name]Settings -} diff --git a/src/config/structs_category.go b/src/config/structs_category.go new file mode 100644 index 00000000..187072e2 --- /dev/null +++ b/src/config/structs_category.go @@ -0,0 +1,89 @@ +package config + +import ( + "time" + + "github.com/hearchco/hearchco/src/search/engines" +) + +// ReaderCategory is format in which the config is read from the config file +type ReaderCategory struct { + REngines map[string]ReaderCategoryEngine `koanf:"engines"` + Ranking CategoryRanking `koanf:"ranking"` + RTimings ReaderCategoryTimings `koanf:"timings"` +} +type Category struct { + Engines []engines.Name + Ranking CategoryRanking + Timings CategoryTimings +} + +// ReaderEngine is format in which the config is read from the config file +type ReaderCategoryEngine struct { + Enabled bool `koanf:"enabled"` +} + +type CategoryRanking struct { + REXP float64 `koanf:"rexp"` + A float64 `koanf:"a"` + B float64 `koanf:"b"` + C float64 `koanf:"c"` + D float64 `koanf:"d"` + TRA float64 `koanf:"tra"` + TRB float64 `koanf:"trb"` + TRC float64 `koanf:"trc"` + TRD float64 `koanf:"trd"` + Engines map[string]CategoryEngineRanking `koanf:"engines"` +} + +type CategoryEngineRanking struct { + Mul float64 `koanf:"mul"` + Const float64 `koanf:"const"` +} + +// ReaderTimings is format in which the config is read from the config file +// Delegates Timeout, PageTimeout to colly.Collector.SetRequestTimeout(); Note: See https://github.com/gocolly/colly/issues/644 +// Delegates Delay, RandomDelay, Parallelism to colly.Collector.Limit() +type ReaderCategoryTimings struct { + // Preferred timeout if enough results are found + PreferredTimeout string `koanf:"preferredtimeout"` + // Number of results which if not met will trigger the additional timeout + PreferredTimeoutResults int `koanf:"preferredtimeoutresults"` + // Additional timeout if not enough results are found (delay after which the number of results is checked) + AdditionalTimeout string `koanf:"additionaltimeout"` + // Hard timeout after which the search is forcefully stopped + HardTimeout string `koanf:"hardtimeout"` + // Colly collector timeout (should be less than or equal to HardTimeout) + Timeout string `koanf:"timeout"` + // Colly collector page timeout (should be less than or equal to HardTimeout) + PageTimeout string `koanf:"pagetimeout"` + // Colly delay + Delay string `koanf:"delay"` + // Colly random delay + RandomDelay string `koanf:"randomdelay"` + // Colly parallelism + Parallelism int `koanf:"parallelism"` +} + +// Delegates Timeout, PageTimeout to colly.Collector.SetRequestTimeout(); Note: See https://github.com/gocolly/colly/issues/644 +// Delegates Delay, RandomDelay, Parallelism to colly.Collector.Limit() +type CategoryTimings struct { + // Preferred timeout if enough results are found + PreferredTimeout time.Duration + // Number of results which if not met will trigger the additional timeout + PreferredTimeoutResults int + // Additional timeout if not enough results are found (delay after which the number of results is checked) + AdditionalTimeout time.Duration + // Hard timeout after which the search is forcefully stopped + HardTimeout time.Duration + // Colly collector timeout (should be less than or equal to HardTimeout) + Timeout time.Duration + // Colly collector page timeout (should be less than or equal to HardTimeout) + PageTimeout time.Duration + // Colly delay + Delay time.Duration + // Colly random delay + RandomDelay time.Duration + // Colly parallelism + Parallelism int +} diff --git a/src/config/structs_config.go b/src/config/structs_config.go new file mode 100644 index 00000000..e6205a1c --- /dev/null +++ b/src/config/structs_config.go @@ -0,0 +1,24 @@ +package config + +import ( + "github.com/hearchco/hearchco/src/search/category" + "github.com/hearchco/hearchco/src/search/engines" +) + +type Settings struct { + RequestedResultsPerPage int `koanf:"requestedresults"` + Shortcut string `koanf:"shortcut"` + Proxies []string `koanf:"proxies"` +} + +// ReaderConfig is format in which the config is read from the config file +type ReaderConfig struct { + Server ReaderServer `koanf:"server"` + RCategories map[category.Name]ReaderCategory `koanf:"categories"` + Settings map[string]Settings `koanf:"settings"` +} +type Config struct { + Server Server + Categories map[category.Name]Category + Settings map[engines.Name]Settings +} diff --git a/src/config/structs_server.go b/src/config/structs_server.go new file mode 100644 index 00000000..dd7b6d87 --- /dev/null +++ b/src/config/structs_server.go @@ -0,0 +1,112 @@ +package config + +import "time" + +// ReaderServer is format in which the config is read from the config file +type ReaderServer struct { + // Environment in which the server is running (normal or lambda) + Environment string `koanf:"environment"` + // Port on which the API server listens + Port int `koanf:"port"` + // URLs used for CORS (wildcards allowed) + // comma separated + FrontendUrls string `koanf:"frontendurls"` + // Cache settings + Cache ReaderCache `koanf:"cache"` + // Image proxy settings + Proxy ReaderImageProxy `koanf:"proxy"` +} +type Server struct { + // Environment in which the server is running (normal or lambda) + Environment string + // Port on which the API server listens + Port int + // URLs used for CORS (wildcards allowed) + FrontendUrls []string + // Cache settings + Cache Cache + // Image proxy settings + Proxy ImageProxy +} + +// ReaderCache is format in which the config is read from the config file +type ReaderCache struct { + // Can be "none", "badger" or "redis" + Type string `koanf:"type"` + // Has no effect if type is "none" + TTL ReaderTTL `koanf:"ttl"` + // Badger specific settings + Badger Badger `koanf:"badger"` + // Redis specific settings + Redis Redis `koanf:"redis"` +} +type Cache struct { + // Can be "none", "badger" or "redis" + Type string + // Has no effect if type is "none" + TTL TTL + // Badger specific settings + Badger Badger + // Redis specific settings + Redis Redis +} + +// ReaderTTL is format in which the config is read from the config file +// In format +// Example: 1s, 1m, 1h, 1d, 1w, 1M, 1y +// If unit is not specified, it is assumed to be milliseconds +type ReaderTTL struct { + // how long to store the results in cache + // setting this to 0 caches the results forever + Time string `koanf:"time"` + // if the remaining TTL when retrieving from cache is less than this, update the cache entry and reset the TTL + // setting this to 0 disables this feature + // setting this to the same value (or higher) as Results will update the cache entry every time + RefreshTime string `koanf:"refreshtime"` +} +type TTL struct { + // How long to store the results in cache + // Setting this to 0 caches the results forever + Time time.Duration + // If the remaining TTL when retrieving from cache is less than this, update the cache entry and reset the TTL + // Setting this to 0 disables this feature + // Setting this to the same value (or higher) as Results will update the cache entry every time + RefreshTime time.Duration +} + +type Badger struct { + // Setting this to false will result in badger not persisting the cache to disk making it run "in-memory" + Persist bool `koanf:"persist"` +} + +type Redis struct { + Host string `koanf:"host"` + Port uint16 `koanf:"port"` + Password string `koanf:"password"` + Database uint8 `koanf:"database"` +} + +// ReaderProxy is format in which the config is read from the config file +type ReaderImageProxy struct { + Salt string `koanf:"salt"` + Timeouts ReaderImageProxyTimeouts `koanf:"timeouts"` +} +type ImageProxy struct { + Salt string + Timeouts ImageProxyTimeouts +} + +// ReaderProxyTimeouts is format in which the config is read from the config file +// in format +// example: 1s, 1m, 1h, 1d, 1w, 1M, 1y +// if unit is not specified, it is assumed to be milliseconds +type ReaderImageProxyTimeouts struct { + Dial string `koanf:"dial"` + KeepAlive string `koanf:"keepalive"` + TLSHandshake string `koanf:"tlshandshake"` +} +type ImageProxyTimeouts struct { + Dial time.Duration + KeepAlive time.Duration + TLSHandshake time.Duration +} diff --git a/src/router/proxy.go b/src/router/proxy.go index 82b980b9..12b72865 100644 --- a/src/router/proxy.go +++ b/src/router/proxy.go @@ -13,7 +13,7 @@ import ( "github.com/rs/zerolog/log" ) -func Proxy(w http.ResponseWriter, r *http.Request, salt string, timeouts config.ProxyTimeouts) error { +func Proxy(w http.ResponseWriter, r *http.Request, salt string, timeouts config.ImageProxyTimeouts) error { err := r.ParseForm() if err != nil { // server error diff --git a/src/search/bang.go b/src/search/bang.go index b7496010..7a8e1f0b 100644 --- a/src/search/bang.go +++ b/src/search/bang.go @@ -10,7 +10,7 @@ import ( "github.com/rs/zerolog/log" ) -func procBang(query string, setCategory category.Name, settings map[engines.Name]config.Settings, categories map[category.Name]config.Category) (string, category.Name, config.Timings, []engines.Name) { +func procBang(query string, setCategory category.Name, settings map[engines.Name]config.Settings, categories map[category.Name]config.Category) (string, category.Name, config.CategoryTimings, []engines.Name) { useSpec, specEng := procSpecificEngine(query, settings) goodCat, cat := procCategory(query, setCategory) if !goodCat && !useSpec && (query != "" && query[0] == '!') { diff --git a/src/search/engine_interface.go b/src/search/engine_interface.go index 53f5c5c3..c96c605f 100644 --- a/src/search/engine_interface.go +++ b/src/search/engine_interface.go @@ -9,5 +9,5 @@ import ( ) type Searcher interface { - Search(ctx context.Context, query string, relay *bucket.Relay, options engines.Options, settings config.Settings, timings config.Timings, salt string, nEnabledEngines int) []error + Search(ctx context.Context, query string, relay *bucket.Relay, options engines.Options, settings config.Settings, timings config.CategoryTimings, salt string, nEnabledEngines int) []error } diff --git a/src/search/engines/_engines_test/structs.go b/src/search/engines/_engines_test/structs.go index ab2fcb29..9e453ea6 100644 --- a/src/search/engines/_engines_test/structs.go +++ b/src/search/engines/_engines_test/structs.go @@ -31,14 +31,14 @@ func NewConfig(engineName engines.Name) config.Config { category.GENERAL: { Engines: []engines.Name{engineName}, Ranking: config.NewRanking(), - Timings: config.Timings{ + Timings: config.CategoryTimings{ Timeout: 10000 * time.Millisecond, // colly default }, }, category.IMAGES: { Engines: []engines.Name{engineName}, Ranking: config.NewRanking(), - Timings: config.Timings{ + Timings: config.CategoryTimings{ Timeout: 10000 * time.Millisecond, // colly default }, }, diff --git a/src/search/engines/_sedefaults/init.go b/src/search/engines/_sedefaults/init.go index 215ab190..d821afa2 100644 --- a/src/search/engines/_sedefaults/init.go +++ b/src/search/engines/_sedefaults/init.go @@ -13,7 +13,7 @@ import ( ) // it's okay to return pointers to collectors since colly.NewCollector() returns a pointer -func InitializeCollectors(ctx context.Context, engineName engines.Name, options engines.Options, settings config.Settings, timings config.Timings, relay *bucket.Relay) (*colly.Collector, *colly.Collector) { +func InitializeCollectors(ctx context.Context, engineName engines.Name, options engines.Options, settings config.Settings, timings config.CategoryTimings, relay *bucket.Relay) (*colly.Collector, *colly.Collector) { col := colly.NewCollector(colly.MaxDepth(1), colly.UserAgent(options.UserAgent), colly.Async()) pagesCol := colly.NewCollector(colly.MaxDepth(1), colly.UserAgent(options.UserAgent), colly.Async()) diff --git a/src/search/engines/bing/bing.go b/src/search/engines/bing/bing.go index e613ccc1..29a0d9f1 100644 --- a/src/search/engines/bing/bing.go +++ b/src/search/engines/bing/bing.go @@ -22,7 +22,7 @@ func New() Engine { return Engine{} } -func (e Engine) Search(ctx context.Context, query string, relay *bucket.Relay, options engines.Options, settings config.Settings, timings config.Timings, salt string, nEnabledEngines int) []error { +func (e Engine) Search(ctx context.Context, query string, relay *bucket.Relay, options engines.Options, settings config.Settings, timings config.CategoryTimings, salt string, nEnabledEngines int) []error { ctx, err := _sedefaults.Prepare(ctx, Info, Support, &options, &settings) if err != nil { return []error{err} diff --git a/src/search/engines/bingimages/bingimages.go b/src/search/engines/bingimages/bingimages.go index 4089de93..d1235949 100644 --- a/src/search/engines/bingimages/bingimages.go +++ b/src/search/engines/bingimages/bingimages.go @@ -21,7 +21,7 @@ func New() Engine { return Engine{} } -func (e Engine) Search(ctx context.Context, query string, relay *bucket.Relay, options engines.Options, settings config.Settings, timings config.Timings, salt string, nEnabledEngines int) []error { +func (e Engine) Search(ctx context.Context, query string, relay *bucket.Relay, options engines.Options, settings config.Settings, timings config.CategoryTimings, salt string, nEnabledEngines int) []error { ctx, err := _sedefaults.Prepare(ctx, Info, Support, &options, &settings) if err != nil { return []error{err} diff --git a/src/search/engines/brave/brave.go b/src/search/engines/brave/brave.go index b5015bab..ad2e9be9 100644 --- a/src/search/engines/brave/brave.go +++ b/src/search/engines/brave/brave.go @@ -19,7 +19,7 @@ func New() Engine { return Engine{} } -func (e Engine) Search(ctx context.Context, query string, relay *bucket.Relay, options engines.Options, settings config.Settings, timings config.Timings, salt string, nEnabledEngines int) []error { +func (e Engine) Search(ctx context.Context, query string, relay *bucket.Relay, options engines.Options, settings config.Settings, timings config.CategoryTimings, salt string, nEnabledEngines int) []error { ctx, err := _sedefaults.Prepare(ctx, Info, Support, &options, &settings) if err != nil { return []error{err} diff --git a/src/search/engines/duckduckgo/duckduckgo.go b/src/search/engines/duckduckgo/duckduckgo.go index 16b6179b..0141c1c3 100644 --- a/src/search/engines/duckduckgo/duckduckgo.go +++ b/src/search/engines/duckduckgo/duckduckgo.go @@ -22,7 +22,7 @@ func New() Engine { return Engine{} } -func (e Engine) Search(ctx context.Context, query string, relay *bucket.Relay, options engines.Options, settings config.Settings, timings config.Timings, salt string, nEnabledEngines int) []error { +func (e Engine) Search(ctx context.Context, query string, relay *bucket.Relay, options engines.Options, settings config.Settings, timings config.CategoryTimings, salt string, nEnabledEngines int) []error { ctx, err := _sedefaults.Prepare(ctx, Info, Support, &options, &settings) if err != nil { return []error{err} diff --git a/src/search/engines/etools/etools.go b/src/search/engines/etools/etools.go index 64b04b29..d866a709 100644 --- a/src/search/engines/etools/etools.go +++ b/src/search/engines/etools/etools.go @@ -19,7 +19,7 @@ func New() Engine { return Engine{} } -func (e Engine) Search(ctx context.Context, query string, relay *bucket.Relay, options engines.Options, settings config.Settings, timings config.Timings, salt string, nEnabledEngines int) []error { +func (e Engine) Search(ctx context.Context, query string, relay *bucket.Relay, options engines.Options, settings config.Settings, timings config.CategoryTimings, salt string, nEnabledEngines int) []error { ctx, err := _sedefaults.Prepare(ctx, Info, Support, &options, &settings) if err != nil { return []error{err} diff --git a/src/search/engines/google/google.go b/src/search/engines/google/google.go index 7fb7e889..a5ff6ca6 100644 --- a/src/search/engines/google/google.go +++ b/src/search/engines/google/google.go @@ -18,7 +18,7 @@ func New() Engine { return Engine{} } -func (e Engine) Search(ctx context.Context, query string, relay *bucket.Relay, options engines.Options, settings config.Settings, timings config.Timings, salt string, nEnabledEngines int) []error { +func (e Engine) Search(ctx context.Context, query string, relay *bucket.Relay, options engines.Options, settings config.Settings, timings config.CategoryTimings, salt string, nEnabledEngines int) []error { ctx, err := _sedefaults.Prepare(ctx, Info, Support, &options, &settings) if err != nil { return []error{err} diff --git a/src/search/engines/googleimages/googleimages.go b/src/search/engines/googleimages/googleimages.go index 32998c96..ab71aa25 100644 --- a/src/search/engines/googleimages/googleimages.go +++ b/src/search/engines/googleimages/googleimages.go @@ -22,7 +22,7 @@ func New() Engine { return Engine{} } -func (e Engine) Search(ctx context.Context, query string, relay *bucket.Relay, options engines.Options, settings config.Settings, timings config.Timings, salt string, nEnabledEngines int) []error { +func (e Engine) Search(ctx context.Context, query string, relay *bucket.Relay, options engines.Options, settings config.Settings, timings config.CategoryTimings, salt string, nEnabledEngines int) []error { ctx, err := _sedefaults.Prepare(ctx, Info, Support, &options, &settings) if err != nil { return []error{err} diff --git a/src/search/engines/googlescholar/googlescholar.go b/src/search/engines/googlescholar/googlescholar.go index e31aeb69..f5010117 100644 --- a/src/search/engines/googlescholar/googlescholar.go +++ b/src/search/engines/googlescholar/googlescholar.go @@ -20,7 +20,7 @@ func New() Engine { return Engine{} } -func (e Engine) Search(ctx context.Context, query string, relay *bucket.Relay, options engines.Options, settings config.Settings, timings config.Timings, salt string, nEnabledEngines int) []error { +func (e Engine) Search(ctx context.Context, query string, relay *bucket.Relay, options engines.Options, settings config.Settings, timings config.CategoryTimings, salt string, nEnabledEngines int) []error { ctx, err := _sedefaults.Prepare(ctx, Info, Support, &options, &settings) if err != nil { return []error{err} diff --git a/src/search/engines/mojeek/mojeek.go b/src/search/engines/mojeek/mojeek.go index db5a6559..a6df0784 100644 --- a/src/search/engines/mojeek/mojeek.go +++ b/src/search/engines/mojeek/mojeek.go @@ -19,7 +19,7 @@ func New() Engine { return Engine{} } -func (e Engine) Search(ctx context.Context, query string, relay *bucket.Relay, options engines.Options, settings config.Settings, timings config.Timings, salt string, nEnabledEngines int) []error { +func (e Engine) Search(ctx context.Context, query string, relay *bucket.Relay, options engines.Options, settings config.Settings, timings config.CategoryTimings, salt string, nEnabledEngines int) []error { ctx, err := _sedefaults.Prepare(ctx, Info, Support, &options, &settings) if err != nil { return []error{err} diff --git a/src/search/engines/presearch/presearch.go b/src/search/engines/presearch/presearch.go index 2a2b399c..1f154036 100644 --- a/src/search/engines/presearch/presearch.go +++ b/src/search/engines/presearch/presearch.go @@ -21,7 +21,7 @@ func New() Engine { return Engine{} } -func (e Engine) Search(ctx context.Context, query string, relay *bucket.Relay, options engines.Options, settings config.Settings, timings config.Timings, salt string, nEnabledEngines int) []error { +func (e Engine) Search(ctx context.Context, query string, relay *bucket.Relay, options engines.Options, settings config.Settings, timings config.CategoryTimings, salt string, nEnabledEngines int) []error { ctx, err := _sedefaults.Prepare(ctx, Info, Support, &options, &settings) if err != nil { return []error{err} diff --git a/src/search/engines/qwant/qwant.go b/src/search/engines/qwant/qwant.go index 98523bdd..ecc56098 100644 --- a/src/search/engines/qwant/qwant.go +++ b/src/search/engines/qwant/qwant.go @@ -21,7 +21,7 @@ func New() Engine { return Engine{} } -func (e Engine) Search(ctx context.Context, query string, relay *bucket.Relay, options engines.Options, settings config.Settings, timings config.Timings, salt string, nEnabledEngines int) []error { +func (e Engine) Search(ctx context.Context, query string, relay *bucket.Relay, options engines.Options, settings config.Settings, timings config.CategoryTimings, salt string, nEnabledEngines int) []error { ctx, err := _sedefaults.Prepare(ctx, Info, Support, &options, &settings) if err != nil { return []error{err} diff --git a/src/search/engines/startpage/startpage.go b/src/search/engines/startpage/startpage.go index 26dea850..568124ca 100644 --- a/src/search/engines/startpage/startpage.go +++ b/src/search/engines/startpage/startpage.go @@ -20,7 +20,7 @@ func New() Engine { return Engine{} } -func (e Engine) Search(ctx context.Context, query string, relay *bucket.Relay, options engines.Options, settings config.Settings, timings config.Timings, salt string, nEnabledEngines int) []error { +func (e Engine) Search(ctx context.Context, query string, relay *bucket.Relay, options engines.Options, settings config.Settings, timings config.CategoryTimings, salt string, nEnabledEngines int) []error { ctx, err := _sedefaults.Prepare(ctx, Info, Support, &options, &settings) if err != nil { return []error{err} diff --git a/src/search/engines/swisscows/swisscows.go b/src/search/engines/swisscows/swisscows.go index 6787ab87..0f94cd36 100644 --- a/src/search/engines/swisscows/swisscows.go +++ b/src/search/engines/swisscows/swisscows.go @@ -21,7 +21,7 @@ func New() Engine { return Engine{} } -func (e Engine) Search(ctx context.Context, query string, relay *bucket.Relay, options engines.Options, settings config.Settings, timings config.Timings, salt string, nEnabledEngines int) []error { +func (e Engine) Search(ctx context.Context, query string, relay *bucket.Relay, options engines.Options, settings config.Settings, timings config.CategoryTimings, salt string, nEnabledEngines int) []error { ctx, err := _sedefaults.Prepare(ctx, Info, Support, &options, &settings) if err != nil { return []error{err} diff --git a/src/search/engines/yahoo/yahoo.go b/src/search/engines/yahoo/yahoo.go index e429e42f..2aa62fb5 100644 --- a/src/search/engines/yahoo/yahoo.go +++ b/src/search/engines/yahoo/yahoo.go @@ -21,7 +21,7 @@ func New() Engine { return Engine{} } -func (e Engine) Search(ctx context.Context, query string, relay *bucket.Relay, options engines.Options, settings config.Settings, timings config.Timings, salt string, nEnabledEngines int) []error { +func (e Engine) Search(ctx context.Context, query string, relay *bucket.Relay, options engines.Options, settings config.Settings, timings config.CategoryTimings, salt string, nEnabledEngines int) []error { ctx, err := _sedefaults.Prepare(ctx, Info, Support, &options, &settings) if err != nil { return []error{err} diff --git a/src/search/engines/yep/yep.go b/src/search/engines/yep/yep.go index 960e3ff0..8e75f3d9 100644 --- a/src/search/engines/yep/yep.go +++ b/src/search/engines/yep/yep.go @@ -21,7 +21,7 @@ func New() Engine { return Engine{} } -func (e Engine) Search(ctx context.Context, query string, relay *bucket.Relay, options engines.Options, settings config.Settings, timings config.Timings, salt string, nEnabledEngines int) []error { +func (e Engine) Search(ctx context.Context, query string, relay *bucket.Relay, options engines.Options, settings config.Settings, timings config.CategoryTimings, salt string, nEnabledEngines int) []error { ctx, err := _sedefaults.Prepare(ctx, Info, Support, &options, &settings) if err != nil { return []error{err} diff --git a/src/search/perform.go b/src/search/perform.go index 56a03cc2..9b20f344 100644 --- a/src/search/perform.go +++ b/src/search/perform.go @@ -65,7 +65,7 @@ func PerformSearch(query string, options engines.Options, settings map[engines.N return results } -func runEngines(engs []engines.Name, query string, options engines.Options, settings map[engines.Name]config.Settings, timings config.Timings, salt string) map[string]*result.Result { +func runEngines(engs []engines.Name, query string, options engines.Options, settings map[engines.Name]config.Settings, timings config.CategoryTimings, salt string) map[string]*result.Result { // create engine strings slice for logging engsStrs := make([]string, 0, len(engs)) for _, eng := range engs { diff --git a/src/search/rank/rank.go b/src/search/rank/rank.go index 0cf88fc7..6ba27870 100644 --- a/src/search/rank/rank.go +++ b/src/search/rank/rank.go @@ -7,7 +7,7 @@ import ( "github.com/hearchco/hearchco/src/search/result" ) -func Rank(resMap map[string]*result.Result, rconf config.Ranking) []result.Result { +func Rank(resMap map[string]*result.Result, rconf config.CategoryRanking) []result.Result { results := make([]result.Result, 0, len(resMap)) for _, res := range resMap { // set res.EngineRanks slice's capacity to it's length diff --git a/src/search/rank/score.go b/src/search/rank/score.go index a6c99ced..976658aa 100644 --- a/src/search/rank/score.go +++ b/src/search/rank/score.go @@ -8,7 +8,7 @@ import ( ) // package local func that gets result pointer passed down -func getScore(result *result.Result, rconf *config.Ranking) float64 { +func getScore(result *result.Result, rconf *config.CategoryRanking) float64 { retRankScore := float64(0) for _, er := range result.EngineRanks { seMul := rconf.Engines[er.SearchEngine.ToLower()].Mul From beae7b2117b23bd79071ffbde9f5b14c4c7c1fe8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aleksa=20Siri=C5=A1ki?= <31509435+aleksasiriski@users.noreply.github.com> Date: Sat, 11 May 2024 22:48:03 +0200 Subject: [PATCH 2/2] fix(search): better timings algo and even better defaults --- src/config/defaults.go | 70 +++++++++++---------- src/config/load.go | 36 +++++------ src/config/structs_category.go | 53 +++++++++------- src/config/structs_server.go | 6 +- src/search/category/category.go | 7 ++- src/search/category/name.go | 4 +- src/search/engines/_engines_test/structs.go | 4 +- src/search/engines/_sedefaults/init.go | 8 --- src/search/perform.go | 52 +++++++++++---- 9 files changed, 135 insertions(+), 105 deletions(-) diff --git a/src/config/defaults.go b/src/config/defaults.go index 80969655..a53ce1fe 100644 --- a/src/config/defaults.go +++ b/src/config/defaults.go @@ -129,9 +129,11 @@ func NewImage() []engines.Name { } } -func NewInfo() []engines.Name { +func NewQuick() []engines.Name { return []engines.Name{ engines.BING, + engines.BRAVE, + engines.DUCKDUCKGO, engines.GOOGLE, engines.MOJEEK, } @@ -177,60 +179,60 @@ func New() Config { Engines: NewGeneral(), Ranking: NewRanking(), Timings: CategoryTimings{ - PreferredTimeout: 1 * time.Second, - PreferredTimeoutResults: 20, - AdditionalTimeout: 50 * time.Millisecond, - HardTimeout: 1500 * time.Millisecond, - Timeout: 1500 * time.Millisecond, - PageTimeout: 1 * time.Second, + PreferredTimeoutMin: 1 * time.Second, + PreferredTimeoutMax: 2 * time.Second, + PreferredResultsNumber: 20, + StepTime: 50 * time.Millisecond, + MinimumResultsNumber: 10, + HardTimeout: 3 * time.Second, }, }, category.IMAGES: { Engines: NewImage(), Ranking: NewRanking(), Timings: CategoryTimings{ - PreferredTimeout: 1 * time.Second, - PreferredTimeoutResults: 40, - AdditionalTimeout: 100 * time.Millisecond, - HardTimeout: 1500 * time.Millisecond, - Timeout: 1500 * time.Millisecond, - PageTimeout: 1 * time.Second, + PreferredTimeoutMin: 1 * time.Second, + PreferredTimeoutMax: 2 * time.Second, + PreferredResultsNumber: 40, + StepTime: 100 * time.Millisecond, + MinimumResultsNumber: 20, + HardTimeout: 3 * time.Second, }, }, - category.INFO: { - Engines: NewInfo(), + category.QUICK: { + Engines: NewQuick(), Ranking: NewRanking(), Timings: CategoryTimings{ - PreferredTimeout: 500 * time.Millisecond, - PreferredTimeoutResults: 10, - AdditionalTimeout: 25 * time.Millisecond, - HardTimeout: 1200 * time.Millisecond, - Timeout: 1200 * time.Millisecond, - PageTimeout: 1 * time.Second, + PreferredTimeoutMin: 500 * time.Millisecond, + PreferredTimeoutMax: 1500 * time.Millisecond, + PreferredResultsNumber: 10, + StepTime: 25 * time.Millisecond, + MinimumResultsNumber: 5, + HardTimeout: 3 * time.Second, }, }, category.SCIENCE: { Engines: NewScience(), Ranking: NewRanking(), Timings: CategoryTimings{ - PreferredTimeout: 1 * time.Second, - PreferredTimeoutResults: 10, - AdditionalTimeout: 100 * time.Millisecond, - HardTimeout: 3 * time.Second, - Timeout: 3 * time.Second, - PageTimeout: 1 * time.Second, + PreferredTimeoutMin: 1 * time.Second, + PreferredTimeoutMax: 2 * time.Second, + PreferredResultsNumber: 10, + StepTime: 100 * time.Millisecond, + MinimumResultsNumber: 5, + HardTimeout: 3 * time.Second, }, }, - category.SURF: { + category.BROAD: { Engines: NewGeneral(), Ranking: NewRanking(), Timings: CategoryTimings{ - PreferredTimeout: 2 * time.Second, - PreferredTimeoutResults: 60, - AdditionalTimeout: 200 * time.Millisecond, - HardTimeout: 4 * time.Second, - Timeout: 4 * time.Second, - PageTimeout: 1 * time.Second, + PreferredTimeoutMin: 1 * time.Second, + PreferredTimeoutMax: 3 * time.Second, + PreferredResultsNumber: 50, + StepTime: 100 * time.Millisecond, + MinimumResultsNumber: 30, + HardTimeout: 5 * time.Second, }, }, }, diff --git a/src/config/load.go b/src/config/load.go index a8490678..c0fd3abe 100644 --- a/src/config/load.go +++ b/src/config/load.go @@ -75,15 +75,15 @@ func (c *Config) fromReader(rc ReaderConfig) { } } tim := CategoryTimings{ - PreferredTimeout: moretime.ConvertFromFancyTime(val.RTimings.PreferredTimeout), - PreferredTimeoutResults: val.RTimings.PreferredTimeoutResults, - AdditionalTimeout: moretime.ConvertFromFancyTime(val.RTimings.AdditionalTimeout), - HardTimeout: moretime.ConvertFromFancyTime(val.RTimings.HardTimeout), - Timeout: moretime.ConvertFromFancyTime(val.RTimings.Timeout), - PageTimeout: moretime.ConvertFromFancyTime(val.RTimings.PageTimeout), - Delay: moretime.ConvertFromFancyTime(val.RTimings.Delay), - RandomDelay: moretime.ConvertFromFancyTime(val.RTimings.RandomDelay), - Parallelism: val.RTimings.Parallelism, + PreferredTimeoutMin: moretime.ConvertFromFancyTime(val.RTimings.PreferredTimeoutMin), + PreferredTimeoutMax: moretime.ConvertFromFancyTime(val.RTimings.PreferredTimeoutMax), + PreferredResultsNumber: val.RTimings.PreferredResultsNumber, + StepTime: moretime.ConvertFromFancyTime(val.RTimings.StepTime), + MinimumResultsNumber: val.RTimings.MinimumResultsNumber, + HardTimeout: moretime.ConvertFromFancyTime(val.RTimings.HardTimeout), + Delay: moretime.ConvertFromFancyTime(val.RTimings.Delay), + RandomDelay: moretime.ConvertFromFancyTime(val.RTimings.RandomDelay), + Parallelism: val.RTimings.Parallelism, } nc.Categories[key] = Category{ Ranking: val.Ranking, @@ -126,15 +126,15 @@ func (c Config) getReader() ReaderConfig { for key, val := range c.Categories { tim := ReaderCategoryTimings{ - PreferredTimeout: moretime.ConvertToFancyTime(val.Timings.PreferredTimeout), - PreferredTimeoutResults: val.Timings.PreferredTimeoutResults, - AdditionalTimeout: moretime.ConvertToFancyTime(val.Timings.AdditionalTimeout), - HardTimeout: moretime.ConvertToFancyTime(val.Timings.HardTimeout), - Timeout: moretime.ConvertToFancyTime(val.Timings.Timeout), - PageTimeout: moretime.ConvertToFancyTime(val.Timings.PageTimeout), - Delay: moretime.ConvertToFancyTime(val.Timings.Delay), - RandomDelay: moretime.ConvertToFancyTime(val.Timings.RandomDelay), - Parallelism: val.Timings.Parallelism, + PreferredTimeoutMin: moretime.ConvertToFancyTime(val.Timings.PreferredTimeoutMin), + PreferredTimeoutMax: moretime.ConvertToFancyTime(val.Timings.PreferredTimeoutMax), + PreferredResultsNumber: val.Timings.PreferredResultsNumber, + StepTime: moretime.ConvertToFancyTime(val.Timings.StepTime), + MinimumResultsNumber: val.Timings.MinimumResultsNumber, + HardTimeout: moretime.ConvertToFancyTime(val.Timings.HardTimeout), + Delay: moretime.ConvertToFancyTime(val.Timings.Delay), + RandomDelay: moretime.ConvertToFancyTime(val.Timings.RandomDelay), + Parallelism: val.Timings.Parallelism, } rc.RCategories[key] = ReaderCategory{ Ranking: val.Ranking, diff --git a/src/config/structs_category.go b/src/config/structs_category.go index 187072e2..f8912036 100644 --- a/src/config/structs_category.go +++ b/src/config/structs_category.go @@ -42,21 +42,27 @@ type CategoryEngineRanking struct { } // ReaderTimings is format in which the config is read from the config file +// In format +// Example: 1s, 1m, 1h, 1d, 1w, 1M, 1y +// If unit is not specified, it is assumed to be milliseconds // Delegates Timeout, PageTimeout to colly.Collector.SetRequestTimeout(); Note: See https://github.com/gocolly/colly/issues/644 // Delegates Delay, RandomDelay, Parallelism to colly.Collector.Limit() type ReaderCategoryTimings struct { - // Preferred timeout if enough results are found - PreferredTimeout string `koanf:"preferredtimeout"` - // Number of results which if not met will trigger the additional timeout - PreferredTimeoutResults int `koanf:"preferredtimeoutresults"` - // Additional timeout if not enough results are found (delay after which the number of results is checked) - AdditionalTimeout string `koanf:"additionaltimeout"` - // Hard timeout after which the search is forcefully stopped + // Minimum amount of time to wait before starting to check the number of results + // Search will wait for at least this amount of time (unless all engines respond) + PreferredTimeoutMin string `koanf:"preferredtimeoutmin"` + // Maximum amount of time to wait until the number of results is satisfactory + // Search will wait for at most this amount of time (unless all engines respond or the preferred number of results is found) + PreferredTimeoutMax string `koanf:"preferredtimeoutmax"` + // Preferred number of results to find + PreferredResultsNumber int `koanf:"preferredresultsnumber"` + // Time of the steps for checking if the number of results is satisfactory + StepTime string `koanf:"steptime"` + // Minimum number of results required after the maximum preferred time + // If this number isn't met, the search will continue after the maximum preferred time + MinimumResultsNumber int `koanf:"minimumresultsnumber"` + // Hard timeout after which the search is forcefully stopped (even if the engines didn't respond) HardTimeout string `koanf:"hardtimeout"` - // Colly collector timeout (should be less than or equal to HardTimeout) - Timeout string `koanf:"timeout"` - // Colly collector page timeout (should be less than or equal to HardTimeout) - PageTimeout string `koanf:"pagetimeout"` // Colly delay Delay string `koanf:"delay"` // Colly random delay @@ -68,18 +74,21 @@ type ReaderCategoryTimings struct { // Delegates Timeout, PageTimeout to colly.Collector.SetRequestTimeout(); Note: See https://github.com/gocolly/colly/issues/644 // Delegates Delay, RandomDelay, Parallelism to colly.Collector.Limit() type CategoryTimings struct { - // Preferred timeout if enough results are found - PreferredTimeout time.Duration - // Number of results which if not met will trigger the additional timeout - PreferredTimeoutResults int - // Additional timeout if not enough results are found (delay after which the number of results is checked) - AdditionalTimeout time.Duration - // Hard timeout after which the search is forcefully stopped + // Minimum amount of time to wait before starting to check the number of results + // Search will wait for at least this amount of time (unless all engines respond) + PreferredTimeoutMin time.Duration + // Maximum amount of time to wait until the number of results is satisfactory + // Search will wait for at most this amount of time (unless all engines respond or the preferred number of results is found) + PreferredTimeoutMax time.Duration + // Preferred number of results to find + PreferredResultsNumber int + // Time of the steps for checking if the number of results is satisfactory + StepTime time.Duration + // Minimum number of results required after the maximum preferred time + // If this number isn't met, the search will continue after the maximum preferred time + MinimumResultsNumber int + // Hard timeout after which the search is forcefully stopped (even if the engines didn't respond) HardTimeout time.Duration - // Colly collector timeout (should be less than or equal to HardTimeout) - Timeout time.Duration - // Colly collector page timeout (should be less than or equal to HardTimeout) - PageTimeout time.Duration // Colly delay Delay time.Duration // Colly random delay diff --git a/src/config/structs_server.go b/src/config/structs_server.go index dd7b6d87..11944e7d 100644 --- a/src/config/structs_server.go +++ b/src/config/structs_server.go @@ -97,9 +97,9 @@ type ImageProxy struct { } // ReaderProxyTimeouts is format in which the config is read from the config file -// in format -// example: 1s, 1m, 1h, 1d, 1w, 1M, 1y -// if unit is not specified, it is assumed to be milliseconds +// In format +// Example: 1s, 1m, 1h, 1d, 1w, 1M, 1y +// If unit is not specified, it is assumed to be milliseconds type ReaderImageProxyTimeouts struct { Dial string `koanf:"dial"` KeepAlive string `koanf:"keepalive"` diff --git a/src/search/category/category.go b/src/search/category/category.go index 9ab3261f..82b77a4c 100644 --- a/src/search/category/category.go +++ b/src/search/category/category.go @@ -7,11 +7,12 @@ import ( var FromString = map[string]Name{ "general": GENERAL, "images": IMAGES, - "info": INFO, - "wiki": INFO, + "quick": QUICK, + "fast": QUICK, "science": SCIENCE, "sci": SCIENCE, - "surf": SURF, + "broad": BROAD, + "surf": BROAD, } // returns category diff --git a/src/search/category/name.go b/src/search/category/name.go index 3b992e1b..52933bf1 100644 --- a/src/search/category/name.go +++ b/src/search/category/name.go @@ -7,7 +7,7 @@ const ( UNDEFINED Name = "undefined" GENERAL Name = "general" IMAGES Name = "images" - INFO Name = "info" + QUICK Name = "quick" SCIENCE Name = "science" - SURF Name = "surf" + BROAD Name = "broad" ) diff --git a/src/search/engines/_engines_test/structs.go b/src/search/engines/_engines_test/structs.go index 9e453ea6..74aa4949 100644 --- a/src/search/engines/_engines_test/structs.go +++ b/src/search/engines/_engines_test/structs.go @@ -32,14 +32,14 @@ func NewConfig(engineName engines.Name) config.Config { Engines: []engines.Name{engineName}, Ranking: config.NewRanking(), Timings: config.CategoryTimings{ - Timeout: 10000 * time.Millisecond, // colly default + HardTimeout: 10000 * time.Millisecond, }, }, category.IMAGES: { Engines: []engines.Name{engineName}, Ranking: config.NewRanking(), Timings: config.CategoryTimings{ - Timeout: 10000 * time.Millisecond, // colly default + HardTimeout: 10000 * time.Millisecond, }, }, }, diff --git a/src/search/engines/_sedefaults/init.go b/src/search/engines/_sedefaults/init.go index d821afa2..10f969b6 100644 --- a/src/search/engines/_sedefaults/init.go +++ b/src/search/engines/_sedefaults/init.go @@ -30,14 +30,6 @@ func InitializeCollectors(ctx context.Context, engineName engines.Name, options Msg("_sedefaults.InitializeCollectors(): failed adding new limit rule") } - if timings.Timeout != 0 { - col.SetRequestTimeout(timings.Timeout) - } - - if timings.PageTimeout != 0 { - pagesCol.SetRequestTimeout(timings.PageTimeout) - } - if settings.Proxies != nil { log.Debug(). Strs("proxies", settings.Proxies). diff --git a/src/search/perform.go b/src/search/perform.go index 9b20f344..fbfd3f5f 100644 --- a/src/search/perform.go +++ b/src/search/perform.go @@ -87,7 +87,8 @@ func runEngines(engs []engines.Name, query string, options engines.Options, sett engineStarter := NewEngineStarter() start := time.Now() - ctx, cancelCtx := context.WithTimeout(context.Background(), timings.PreferredTimeout) + // initially set the preferred timeout minimum (will be reassigned to step time later) + ctx, cancelCtx := context.WithTimeout(context.Background(), timings.PreferredTimeoutMin) ctxHard, cancelCtxHard := context.WithTimeout(context.Background(), timings.HardTimeout) // run all engines concurrently @@ -114,31 +115,55 @@ func runEngines(engs []engines.Name, query string, options engines.Options, sett waitCh <- struct{}{} }() - // break the loop if the preferred timeout is reached and there are enough results + // break the loop if the preferred number of results is found before the preferred timeout is reached + // otherwise break the loop when the minimum number of results if found // or if the hard timeout is reached // or if all engines finished Outer: for { select { - // preferred timeout reached + // preferred timeout (min/max) or step time reached case <-ctx.Done(): - log.Debug(). - Dur("duration", time.Since(start)). - Msg("Timeout reached while waiting for engines") - - // if there are not enough results, switch to additional timeout and wait again - // otherwise break the loop - if len(relay.ResultMap) < timings.PreferredTimeoutResults { - cancelCtx() // cancel the current context before creating a new one to prevent context leak - ctx, cancelCtx = context.WithTimeout(context.Background(), timings.AdditionalTimeout) + currTimeout := time.Since(start) + if currTimeout < timings.PreferredTimeoutMax { + // if the preferred number of results isn't reached, continue additional step time + if len(relay.ResultMap) < timings.PreferredResultsNumber { + log.Debug(). + Dur("duration", currTimeout). + Int("results", len(relay.ResultMap)). + Msg("Timeout reached while waiting for engines, waiting additional step time") + cancelCtx() // cancel the current context before creating a new one to prevent context leak + ctx, cancelCtx = context.WithTimeout(context.Background(), timings.StepTime) + } else { + log.Debug(). + Dur("duration", currTimeout). + Int("results", len(relay.ResultMap)). + Msg("Timeout reached while waiting for engines") + break Outer + } } else { - break Outer + // if the minimum number of results isn't reached, continue additional step time + if len(relay.ResultMap) < timings.MinimumResultsNumber { + log.Debug(). + Dur("duration", currTimeout). + Int("results", len(relay.ResultMap)). + Msg("Preferred timeout maximum reached, waiting for minimum results required") + cancelCtx() // cancel the current context before creating a new one to prevent context leak + ctx, cancelCtx = context.WithTimeout(context.Background(), timings.StepTime) + } else { + log.Debug(). + Dur("duration", currTimeout). + Int("results", len(relay.ResultMap)). + Msg("Preferred timeout maximum reached") + break Outer + } } // hard timeout reached case <-ctxHard.Done(): log.Debug(). Dur("duration", time.Since(start)). + Int("results", len(relay.ResultMap)). Msg("Hard timeout reached while waiting for engines") break Outer @@ -146,6 +171,7 @@ Outer: case <-waitCh: log.Debug(). Dur("duration", time.Since(start)). + Int("results", len(relay.ResultMap)). Msg("All engines finished") break Outer }