diff --git a/src/config/defaults.go b/src/config/defaults.go index 80ad8039..92ccc7f7 100644 --- a/src/config/defaults.go +++ b/src/config/defaults.go @@ -177,64 +177,60 @@ func New() Config { Engines: NewGeneral(), Ranking: NewRanking(), Timings: Timings{ - Timeout: 1000 * time.Millisecond, - PageTimeout: 1000 * time.Millisecond, + PreferredTimeout: 1 * time.Second, + PreferredTimeoutResults: 30, + AdditionalTimeout: 50 * time.Millisecond, + HardTimeout: 3 * time.Second, + Timeout: 3 * time.Second, + PageTimeout: 1 * time.Second, }, }, category.IMAGES: { Engines: NewImage(), Ranking: NewRanking(), Timings: Timings{ - Timeout: 1500 * time.Millisecond, - PageTimeout: 1500 * time.Millisecond, + PreferredTimeout: 1 * time.Second, + PreferredTimeoutResults: 40, + AdditionalTimeout: 100 * time.Millisecond, + HardTimeout: 2 * time.Second, + Timeout: 2 * time.Second, + PageTimeout: 1 * time.Second, }, }, category.INFO: { Engines: NewInfo(), Ranking: NewRanking(), Timings: Timings{ - Timeout: 1000 * time.Millisecond, - PageTimeout: 1000 * time.Millisecond, + PreferredTimeout: 1 * time.Second, + PreferredTimeoutResults: 20, + AdditionalTimeout: 50 * time.Millisecond, + HardTimeout: 2 * time.Second, + Timeout: 2 * time.Second, + PageTimeout: 1 * time.Second, }, }, category.SCIENCE: { Engines: NewScience(), Ranking: NewRanking(), Timings: Timings{ - Timeout: 3000 * time.Millisecond, - PageTimeout: 1000 * time.Millisecond, - }, - }, - category.NEWS: { - Engines: NewAllEnabled(), - Ranking: NewRanking(), - Timings: Timings{ - Timeout: 1000 * time.Millisecond, - PageTimeout: 1000 * time.Millisecond, - }, - }, - category.BLOG: { - Engines: NewAllEnabled(), - Ranking: NewRanking(), - Timings: Timings{ - Timeout: 2500 * time.Millisecond, - PageTimeout: 1000 * time.Millisecond, + PreferredTimeout: 2 * time.Second, + PreferredTimeoutResults: 40, + AdditionalTimeout: 200 * time.Millisecond, + HardTimeout: 5 * time.Second, + Timeout: 5 * time.Second, + PageTimeout: 1 * time.Second, }, }, category.SURF: { - Engines: NewAllEnabled(), - Ranking: NewRanking(), - Timings: Timings{ - Timeout: 2000 * time.Millisecond, - PageTimeout: 1000 * time.Millisecond, - }, - }, - category.NEWNEWS: { - Engines: NewAllEnabled(), + Engines: NewGeneral(), Ranking: NewRanking(), Timings: Timings{ - Timeout: 1000 * time.Millisecond, - PageTimeout: 1000 * time.Millisecond, + PreferredTimeout: 2 * time.Second, + PreferredTimeoutResults: 60, + AdditionalTimeout: 100 * time.Millisecond, + HardTimeout: 4 * time.Second, + Timeout: 4 * time.Second, + PageTimeout: 1 * time.Second, }, }, }, diff --git a/src/config/load.go b/src/config/load.go index eb000e3a..8bea0a71 100644 --- a/src/config/load.go +++ b/src/config/load.go @@ -76,12 +76,15 @@ func (c *Config) fromReader(rc ReaderConfig) { } } tim := Timings{ - // HardTimeout: time.Duration(val.RTimings.HardTimeout) * time.Millisecond, - Timeout: time.Duration(val.RTimings.Timeout) * time.Millisecond, - PageTimeout: time.Duration(val.RTimings.PageTimeout) * time.Millisecond, - Delay: time.Duration(val.RTimings.Delay) * time.Millisecond, - RandomDelay: time.Duration(val.RTimings.RandomDelay) * time.Millisecond, - Parallelism: val.RTimings.Parallelism, + PreferredTimeout: time.Duration(val.RTimings.PreferredTimeout) * time.Millisecond, + PreferredTimeoutResults: val.RTimings.PreferredTimeoutResults, + AdditionalTimeout: time.Duration(val.RTimings.AdditionalTimeout) * time.Millisecond, + HardTimeout: time.Duration(val.RTimings.HardTimeout) * time.Millisecond, + Timeout: time.Duration(val.RTimings.Timeout) * time.Millisecond, + PageTimeout: time.Duration(val.RTimings.PageTimeout) * time.Millisecond, + Delay: time.Duration(val.RTimings.Delay) * time.Millisecond, + RandomDelay: time.Duration(val.RTimings.RandomDelay) * time.Millisecond, + Parallelism: val.RTimings.Parallelism, } nc.Categories[key] = Category{ Ranking: val.Ranking, @@ -124,12 +127,15 @@ func (c Config) getReader() ReaderConfig { for key, val := range c.Categories { tim := ReaderTimings{ - // HardTimeout: uint(val.Timings.HardTimeout.Milliseconds()), - Timeout: uint(val.Timings.Timeout.Milliseconds()), - PageTimeout: uint(val.Timings.PageTimeout.Milliseconds()), - Delay: uint(val.Timings.Delay.Milliseconds()), - RandomDelay: uint(val.Timings.RandomDelay.Milliseconds()), - Parallelism: val.Timings.Parallelism, + PreferredTimeout: uint(val.Timings.PreferredTimeout.Milliseconds()), + PreferredTimeoutResults: val.Timings.PreferredTimeoutResults, + AdditionalTimeout: uint(val.Timings.AdditionalTimeout.Milliseconds()), + HardTimeout: uint(val.Timings.HardTimeout.Milliseconds()), + Timeout: uint(val.Timings.Timeout.Milliseconds()), + PageTimeout: uint(val.Timings.PageTimeout.Milliseconds()), + Delay: uint(val.Timings.Delay.Milliseconds()), + RandomDelay: uint(val.Timings.RandomDelay.Milliseconds()), + Parallelism: val.Timings.Parallelism, } rc.RCategories[key] = ReaderCategory{ Ranking: val.Ranking, diff --git a/src/config/structs.go b/src/config/structs.go index cfd2d28a..0dd87d35 100644 --- a/src/config/structs.go +++ b/src/config/structs.go @@ -130,18 +130,29 @@ type ReaderEngine struct { // ReaderTimings is format in which the config is read from the config file // in miliseconds type ReaderTimings struct { - // HardTimeout uint `koanf:"hardTimeout"` - Timeout uint `koanf:"timeout"` - PageTimeout uint `koanf:"pagetimeout"` - Delay uint `koanf:"delay"` - RandomDelay uint `koanf:"randomdelay"` - Parallelism int `koanf:"parallelism"` + PreferredTimeout uint `koanf:"preferredtimeout"` + PreferredTimeoutResults int `koanf:"preferredtimeoutresults"` + AdditionalTimeout uint `koanf:"additionaltimeout"` + HardTimeout uint `koanf:"hardtimeout"` + Timeout uint `koanf:"timeout"` + PageTimeout uint `koanf:"pagetimeout"` + Delay uint `koanf:"delay"` + RandomDelay uint `koanf:"randomdelay"` + Parallelism int `koanf:"parallelism"` } // Delegates Timeout, PageTimeout to colly.Collector.SetRequestTimeout(); Note: See https://github.com/gocolly/colly/issues/644 // Delegates Delay, RandomDelay, Parallelism to colly.Collector.Limit() type Timings struct { - // HardTimeout time.Duration + // preferred timeout if enough results are found + PreferredTimeout time.Duration + // number of results which if not met will trigger the additional timeout + PreferredTimeoutResults int + // additional timeout if not enough results are found (delay after which the number of results is checked) + AdditionalTimeout time.Duration + // hard timeout after which the search is forcefully stopped + HardTimeout time.Duration + // colly settings Timeout time.Duration PageTimeout time.Duration Delay time.Duration diff --git a/src/main.go b/src/main.go index 77315bf4..136bcada 100644 --- a/src/main.go +++ b/src/main.go @@ -19,7 +19,8 @@ func main() { mainTimer := time.Now() // setup signal interrupt (CTRL+C) - ctx, _ := signal.NotifyContext(context.Background(), os.Interrupt, syscall.SIGINT, syscall.SIGTERM) + ctx, cancel := signal.NotifyContext(context.Background(), os.Interrupt, syscall.SIGINT, syscall.SIGTERM) + defer cancel() // parse cli arguments cliFlags := cli.Setup() diff --git a/src/search/category/category.go b/src/search/category/category.go index 59b4d51c..9ab3261f 100644 --- a/src/search/category/category.go +++ b/src/search/category/category.go @@ -5,19 +5,13 @@ import ( ) var FromString = map[string]Name{ - //main "general": GENERAL, "images": IMAGES, "info": INFO, + "wiki": INFO, "science": SCIENCE, - "news": NEWS, - "blog": BLOG, + "sci": SCIENCE, "surf": SURF, - "newnews": NEWNEWS, - //alternatives - "wiki": INFO, - "sci": SCIENCE, - "nnews": NEWNEWS, } // returns category diff --git a/src/search/category/name.go b/src/search/category/name.go index 36da6c46..3b992e1b 100644 --- a/src/search/category/name.go +++ b/src/search/category/name.go @@ -9,8 +9,5 @@ const ( IMAGES Name = "images" INFO Name = "info" SCIENCE Name = "science" - NEWS Name = "news" - BLOG Name = "blog" SURF Name = "surf" - NEWNEWS Name = "newnews" ) diff --git a/src/search/perform.go b/src/search/perform.go index f0a7681c..56a03cc2 100644 --- a/src/search/perform.go +++ b/src/search/perform.go @@ -66,6 +66,7 @@ func PerformSearch(query string, options engines.Options, settings map[engines.N } func runEngines(engs []engines.Name, query string, options engines.Options, settings map[engines.Name]config.Settings, timings config.Timings, salt string) map[string]*result.Result { + // create engine strings slice for logging engsStrs := make([]string, 0, len(engs)) for _, eng := range engs { engsStrs = append(engsStrs, eng.String()) @@ -76,13 +77,20 @@ func runEngines(engs []engines.Name, query string, options engines.Options, sett Strs("engines", engsStrs). Msg("Enabled engines") + // create a relay to store results relay := bucket.Relay{ ResultMap: make(map[string]*result.Result), } + // create a wait group to wait for all engines to finish var wg sync.WaitGroup engineStarter := NewEngineStarter() + start := time.Now() + ctx, cancelCtx := context.WithTimeout(context.Background(), timings.PreferredTimeout) + ctxHard, cancelCtxHard := context.WithTimeout(context.Background(), timings.HardTimeout) + + // run all engines concurrently for _, eng := range engs { wg.Add(1) go func() { @@ -99,6 +107,53 @@ func runEngines(engs []engines.Name, query string, options engines.Options, sett }() } - wg.Wait() + // wait for all engines to finish + waitCh := make(chan struct{}) + go func() { + wg.Wait() + waitCh <- struct{}{} + }() + + // break the loop if the preferred timeout is reached and there are enough results + // or if the hard timeout is reached + // or if all engines finished +Outer: + for { + select { + // preferred timeout reached + case <-ctx.Done(): + log.Debug(). + Dur("duration", time.Since(start)). + Msg("Timeout reached while waiting for engines") + + // if there are not enough results, switch to additional timeout and wait again + // otherwise break the loop + if len(relay.ResultMap) < timings.PreferredTimeoutResults { + cancelCtx() // cancel the current context before creating a new one to prevent context leak + ctx, cancelCtx = context.WithTimeout(context.Background(), timings.AdditionalTimeout) + } else { + break Outer + } + + // hard timeout reached + case <-ctxHard.Done(): + log.Debug(). + Dur("duration", time.Since(start)). + Msg("Hard timeout reached while waiting for engines") + break Outer + + // all engines finished + case <-waitCh: + log.Debug(). + Dur("duration", time.Since(start)). + Msg("All engines finished") + break Outer + } + } + + // cancel the current contexts to prevent context leak + cancelCtx() + cancelCtxHard() + return relay.ResultMap }