diff --git a/src/config/defaults.go b/src/config/defaults.go index 80969655..a53ce1fe 100644 --- a/src/config/defaults.go +++ b/src/config/defaults.go @@ -129,9 +129,11 @@ func NewImage() []engines.Name { } } -func NewInfo() []engines.Name { +func NewQuick() []engines.Name { return []engines.Name{ engines.BING, + engines.BRAVE, + engines.DUCKDUCKGO, engines.GOOGLE, engines.MOJEEK, } @@ -177,60 +179,60 @@ func New() Config { Engines: NewGeneral(), Ranking: NewRanking(), Timings: CategoryTimings{ - PreferredTimeout: 1 * time.Second, - PreferredTimeoutResults: 20, - AdditionalTimeout: 50 * time.Millisecond, - HardTimeout: 1500 * time.Millisecond, - Timeout: 1500 * time.Millisecond, - PageTimeout: 1 * time.Second, + PreferredTimeoutMin: 1 * time.Second, + PreferredTimeoutMax: 2 * time.Second, + PreferredResultsNumber: 20, + StepTime: 50 * time.Millisecond, + MinimumResultsNumber: 10, + HardTimeout: 3 * time.Second, }, }, category.IMAGES: { Engines: NewImage(), Ranking: NewRanking(), Timings: CategoryTimings{ - PreferredTimeout: 1 * time.Second, - PreferredTimeoutResults: 40, - AdditionalTimeout: 100 * time.Millisecond, - HardTimeout: 1500 * time.Millisecond, - Timeout: 1500 * time.Millisecond, - PageTimeout: 1 * time.Second, + PreferredTimeoutMin: 1 * time.Second, + PreferredTimeoutMax: 2 * time.Second, + PreferredResultsNumber: 40, + StepTime: 100 * time.Millisecond, + MinimumResultsNumber: 20, + HardTimeout: 3 * time.Second, }, }, - category.INFO: { - Engines: NewInfo(), + category.QUICK: { + Engines: NewQuick(), Ranking: NewRanking(), Timings: CategoryTimings{ - PreferredTimeout: 500 * time.Millisecond, - PreferredTimeoutResults: 10, - AdditionalTimeout: 25 * time.Millisecond, - HardTimeout: 1200 * time.Millisecond, - Timeout: 1200 * time.Millisecond, - PageTimeout: 1 * time.Second, + PreferredTimeoutMin: 500 * time.Millisecond, + PreferredTimeoutMax: 1500 * time.Millisecond, + PreferredResultsNumber: 10, + StepTime: 25 * time.Millisecond, + MinimumResultsNumber: 5, + HardTimeout: 3 * time.Second, }, }, category.SCIENCE: { Engines: NewScience(), Ranking: NewRanking(), Timings: CategoryTimings{ - PreferredTimeout: 1 * time.Second, - PreferredTimeoutResults: 10, - AdditionalTimeout: 100 * time.Millisecond, - HardTimeout: 3 * time.Second, - Timeout: 3 * time.Second, - PageTimeout: 1 * time.Second, + PreferredTimeoutMin: 1 * time.Second, + PreferredTimeoutMax: 2 * time.Second, + PreferredResultsNumber: 10, + StepTime: 100 * time.Millisecond, + MinimumResultsNumber: 5, + HardTimeout: 3 * time.Second, }, }, - category.SURF: { + category.BROAD: { Engines: NewGeneral(), Ranking: NewRanking(), Timings: CategoryTimings{ - PreferredTimeout: 2 * time.Second, - PreferredTimeoutResults: 60, - AdditionalTimeout: 200 * time.Millisecond, - HardTimeout: 4 * time.Second, - Timeout: 4 * time.Second, - PageTimeout: 1 * time.Second, + PreferredTimeoutMin: 1 * time.Second, + PreferredTimeoutMax: 3 * time.Second, + PreferredResultsNumber: 50, + StepTime: 100 * time.Millisecond, + MinimumResultsNumber: 30, + HardTimeout: 5 * time.Second, }, }, }, diff --git a/src/config/load.go b/src/config/load.go index a8490678..c0fd3abe 100644 --- a/src/config/load.go +++ b/src/config/load.go @@ -75,15 +75,15 @@ func (c *Config) fromReader(rc ReaderConfig) { } } tim := CategoryTimings{ - PreferredTimeout: moretime.ConvertFromFancyTime(val.RTimings.PreferredTimeout), - PreferredTimeoutResults: val.RTimings.PreferredTimeoutResults, - AdditionalTimeout: moretime.ConvertFromFancyTime(val.RTimings.AdditionalTimeout), - HardTimeout: moretime.ConvertFromFancyTime(val.RTimings.HardTimeout), - Timeout: moretime.ConvertFromFancyTime(val.RTimings.Timeout), - PageTimeout: moretime.ConvertFromFancyTime(val.RTimings.PageTimeout), - Delay: moretime.ConvertFromFancyTime(val.RTimings.Delay), - RandomDelay: moretime.ConvertFromFancyTime(val.RTimings.RandomDelay), - Parallelism: val.RTimings.Parallelism, + PreferredTimeoutMin: moretime.ConvertFromFancyTime(val.RTimings.PreferredTimeoutMin), + PreferredTimeoutMax: moretime.ConvertFromFancyTime(val.RTimings.PreferredTimeoutMax), + PreferredResultsNumber: val.RTimings.PreferredResultsNumber, + StepTime: moretime.ConvertFromFancyTime(val.RTimings.StepTime), + MinimumResultsNumber: val.RTimings.MinimumResultsNumber, + HardTimeout: moretime.ConvertFromFancyTime(val.RTimings.HardTimeout), + Delay: moretime.ConvertFromFancyTime(val.RTimings.Delay), + RandomDelay: moretime.ConvertFromFancyTime(val.RTimings.RandomDelay), + Parallelism: val.RTimings.Parallelism, } nc.Categories[key] = Category{ Ranking: val.Ranking, @@ -126,15 +126,15 @@ func (c Config) getReader() ReaderConfig { for key, val := range c.Categories { tim := ReaderCategoryTimings{ - PreferredTimeout: moretime.ConvertToFancyTime(val.Timings.PreferredTimeout), - PreferredTimeoutResults: val.Timings.PreferredTimeoutResults, - AdditionalTimeout: moretime.ConvertToFancyTime(val.Timings.AdditionalTimeout), - HardTimeout: moretime.ConvertToFancyTime(val.Timings.HardTimeout), - Timeout: moretime.ConvertToFancyTime(val.Timings.Timeout), - PageTimeout: moretime.ConvertToFancyTime(val.Timings.PageTimeout), - Delay: moretime.ConvertToFancyTime(val.Timings.Delay), - RandomDelay: moretime.ConvertToFancyTime(val.Timings.RandomDelay), - Parallelism: val.Timings.Parallelism, + PreferredTimeoutMin: moretime.ConvertToFancyTime(val.Timings.PreferredTimeoutMin), + PreferredTimeoutMax: moretime.ConvertToFancyTime(val.Timings.PreferredTimeoutMax), + PreferredResultsNumber: val.Timings.PreferredResultsNumber, + StepTime: moretime.ConvertToFancyTime(val.Timings.StepTime), + MinimumResultsNumber: val.Timings.MinimumResultsNumber, + HardTimeout: moretime.ConvertToFancyTime(val.Timings.HardTimeout), + Delay: moretime.ConvertToFancyTime(val.Timings.Delay), + RandomDelay: moretime.ConvertToFancyTime(val.Timings.RandomDelay), + Parallelism: val.Timings.Parallelism, } rc.RCategories[key] = ReaderCategory{ Ranking: val.Ranking, diff --git a/src/config/structs_category.go b/src/config/structs_category.go index 187072e2..f8912036 100644 --- a/src/config/structs_category.go +++ b/src/config/structs_category.go @@ -42,21 +42,27 @@ type CategoryEngineRanking struct { } // ReaderTimings is format in which the config is read from the config file +// In format +// Example: 1s, 1m, 1h, 1d, 1w, 1M, 1y +// If unit is not specified, it is assumed to be milliseconds // Delegates Timeout, PageTimeout to colly.Collector.SetRequestTimeout(); Note: See https://github.com/gocolly/colly/issues/644 // Delegates Delay, RandomDelay, Parallelism to colly.Collector.Limit() type ReaderCategoryTimings struct { - // Preferred timeout if enough results are found - PreferredTimeout string `koanf:"preferredtimeout"` - // Number of results which if not met will trigger the additional timeout - PreferredTimeoutResults int `koanf:"preferredtimeoutresults"` - // Additional timeout if not enough results are found (delay after which the number of results is checked) - AdditionalTimeout string `koanf:"additionaltimeout"` - // Hard timeout after which the search is forcefully stopped + // Minimum amount of time to wait before starting to check the number of results + // Search will wait for at least this amount of time (unless all engines respond) + PreferredTimeoutMin string `koanf:"preferredtimeoutmin"` + // Maximum amount of time to wait until the number of results is satisfactory + // Search will wait for at most this amount of time (unless all engines respond or the preferred number of results is found) + PreferredTimeoutMax string `koanf:"preferredtimeoutmax"` + // Preferred number of results to find + PreferredResultsNumber int `koanf:"preferredresultsnumber"` + // Time of the steps for checking if the number of results is satisfactory + StepTime string `koanf:"steptime"` + // Minimum number of results required after the maximum preferred time + // If this number isn't met, the search will continue after the maximum preferred time + MinimumResultsNumber int `koanf:"minimumresultsnumber"` + // Hard timeout after which the search is forcefully stopped (even if the engines didn't respond) HardTimeout string `koanf:"hardtimeout"` - // Colly collector timeout (should be less than or equal to HardTimeout) - Timeout string `koanf:"timeout"` - // Colly collector page timeout (should be less than or equal to HardTimeout) - PageTimeout string `koanf:"pagetimeout"` // Colly delay Delay string `koanf:"delay"` // Colly random delay @@ -68,18 +74,21 @@ type ReaderCategoryTimings struct { // Delegates Timeout, PageTimeout to colly.Collector.SetRequestTimeout(); Note: See https://github.com/gocolly/colly/issues/644 // Delegates Delay, RandomDelay, Parallelism to colly.Collector.Limit() type CategoryTimings struct { - // Preferred timeout if enough results are found - PreferredTimeout time.Duration - // Number of results which if not met will trigger the additional timeout - PreferredTimeoutResults int - // Additional timeout if not enough results are found (delay after which the number of results is checked) - AdditionalTimeout time.Duration - // Hard timeout after which the search is forcefully stopped + // Minimum amount of time to wait before starting to check the number of results + // Search will wait for at least this amount of time (unless all engines respond) + PreferredTimeoutMin time.Duration + // Maximum amount of time to wait until the number of results is satisfactory + // Search will wait for at most this amount of time (unless all engines respond or the preferred number of results is found) + PreferredTimeoutMax time.Duration + // Preferred number of results to find + PreferredResultsNumber int + // Time of the steps for checking if the number of results is satisfactory + StepTime time.Duration + // Minimum number of results required after the maximum preferred time + // If this number isn't met, the search will continue after the maximum preferred time + MinimumResultsNumber int + // Hard timeout after which the search is forcefully stopped (even if the engines didn't respond) HardTimeout time.Duration - // Colly collector timeout (should be less than or equal to HardTimeout) - Timeout time.Duration - // Colly collector page timeout (should be less than or equal to HardTimeout) - PageTimeout time.Duration // Colly delay Delay time.Duration // Colly random delay diff --git a/src/config/structs_server.go b/src/config/structs_server.go index dd7b6d87..11944e7d 100644 --- a/src/config/structs_server.go +++ b/src/config/structs_server.go @@ -97,9 +97,9 @@ type ImageProxy struct { } // ReaderProxyTimeouts is format in which the config is read from the config file -// in format -// example: 1s, 1m, 1h, 1d, 1w, 1M, 1y -// if unit is not specified, it is assumed to be milliseconds +// In format +// Example: 1s, 1m, 1h, 1d, 1w, 1M, 1y +// If unit is not specified, it is assumed to be milliseconds type ReaderImageProxyTimeouts struct { Dial string `koanf:"dial"` KeepAlive string `koanf:"keepalive"` diff --git a/src/search/category/category.go b/src/search/category/category.go index 9ab3261f..82b77a4c 100644 --- a/src/search/category/category.go +++ b/src/search/category/category.go @@ -7,11 +7,12 @@ import ( var FromString = map[string]Name{ "general": GENERAL, "images": IMAGES, - "info": INFO, - "wiki": INFO, + "quick": QUICK, + "fast": QUICK, "science": SCIENCE, "sci": SCIENCE, - "surf": SURF, + "broad": BROAD, + "surf": BROAD, } // returns category diff --git a/src/search/category/name.go b/src/search/category/name.go index 3b992e1b..52933bf1 100644 --- a/src/search/category/name.go +++ b/src/search/category/name.go @@ -7,7 +7,7 @@ const ( UNDEFINED Name = "undefined" GENERAL Name = "general" IMAGES Name = "images" - INFO Name = "info" + QUICK Name = "quick" SCIENCE Name = "science" - SURF Name = "surf" + BROAD Name = "broad" ) diff --git a/src/search/engines/_engines_test/structs.go b/src/search/engines/_engines_test/structs.go index 9e453ea6..74aa4949 100644 --- a/src/search/engines/_engines_test/structs.go +++ b/src/search/engines/_engines_test/structs.go @@ -32,14 +32,14 @@ func NewConfig(engineName engines.Name) config.Config { Engines: []engines.Name{engineName}, Ranking: config.NewRanking(), Timings: config.CategoryTimings{ - Timeout: 10000 * time.Millisecond, // colly default + HardTimeout: 10000 * time.Millisecond, }, }, category.IMAGES: { Engines: []engines.Name{engineName}, Ranking: config.NewRanking(), Timings: config.CategoryTimings{ - Timeout: 10000 * time.Millisecond, // colly default + HardTimeout: 10000 * time.Millisecond, }, }, }, diff --git a/src/search/engines/_sedefaults/init.go b/src/search/engines/_sedefaults/init.go index d821afa2..10f969b6 100644 --- a/src/search/engines/_sedefaults/init.go +++ b/src/search/engines/_sedefaults/init.go @@ -30,14 +30,6 @@ func InitializeCollectors(ctx context.Context, engineName engines.Name, options Msg("_sedefaults.InitializeCollectors(): failed adding new limit rule") } - if timings.Timeout != 0 { - col.SetRequestTimeout(timings.Timeout) - } - - if timings.PageTimeout != 0 { - pagesCol.SetRequestTimeout(timings.PageTimeout) - } - if settings.Proxies != nil { log.Debug(). Strs("proxies", settings.Proxies). diff --git a/src/search/perform.go b/src/search/perform.go index 9b20f344..fbfd3f5f 100644 --- a/src/search/perform.go +++ b/src/search/perform.go @@ -87,7 +87,8 @@ func runEngines(engs []engines.Name, query string, options engines.Options, sett engineStarter := NewEngineStarter() start := time.Now() - ctx, cancelCtx := context.WithTimeout(context.Background(), timings.PreferredTimeout) + // initially set the preferred timeout minimum (will be reassigned to step time later) + ctx, cancelCtx := context.WithTimeout(context.Background(), timings.PreferredTimeoutMin) ctxHard, cancelCtxHard := context.WithTimeout(context.Background(), timings.HardTimeout) // run all engines concurrently @@ -114,31 +115,55 @@ func runEngines(engs []engines.Name, query string, options engines.Options, sett waitCh <- struct{}{} }() - // break the loop if the preferred timeout is reached and there are enough results + // break the loop if the preferred number of results is found before the preferred timeout is reached + // otherwise break the loop when the minimum number of results if found // or if the hard timeout is reached // or if all engines finished Outer: for { select { - // preferred timeout reached + // preferred timeout (min/max) or step time reached case <-ctx.Done(): - log.Debug(). - Dur("duration", time.Since(start)). - Msg("Timeout reached while waiting for engines") - - // if there are not enough results, switch to additional timeout and wait again - // otherwise break the loop - if len(relay.ResultMap) < timings.PreferredTimeoutResults { - cancelCtx() // cancel the current context before creating a new one to prevent context leak - ctx, cancelCtx = context.WithTimeout(context.Background(), timings.AdditionalTimeout) + currTimeout := time.Since(start) + if currTimeout < timings.PreferredTimeoutMax { + // if the preferred number of results isn't reached, continue additional step time + if len(relay.ResultMap) < timings.PreferredResultsNumber { + log.Debug(). + Dur("duration", currTimeout). + Int("results", len(relay.ResultMap)). + Msg("Timeout reached while waiting for engines, waiting additional step time") + cancelCtx() // cancel the current context before creating a new one to prevent context leak + ctx, cancelCtx = context.WithTimeout(context.Background(), timings.StepTime) + } else { + log.Debug(). + Dur("duration", currTimeout). + Int("results", len(relay.ResultMap)). + Msg("Timeout reached while waiting for engines") + break Outer + } } else { - break Outer + // if the minimum number of results isn't reached, continue additional step time + if len(relay.ResultMap) < timings.MinimumResultsNumber { + log.Debug(). + Dur("duration", currTimeout). + Int("results", len(relay.ResultMap)). + Msg("Preferred timeout maximum reached, waiting for minimum results required") + cancelCtx() // cancel the current context before creating a new one to prevent context leak + ctx, cancelCtx = context.WithTimeout(context.Background(), timings.StepTime) + } else { + log.Debug(). + Dur("duration", currTimeout). + Int("results", len(relay.ResultMap)). + Msg("Preferred timeout maximum reached") + break Outer + } } // hard timeout reached case <-ctxHard.Done(): log.Debug(). Dur("duration", time.Since(start)). + Int("results", len(relay.ResultMap)). Msg("Hard timeout reached while waiting for engines") break Outer @@ -146,6 +171,7 @@ Outer: case <-waitCh: log.Debug(). Dur("duration", time.Since(start)). + Int("results", len(relay.ResultMap)). Msg("All engines finished") break Outer }