Skip to content

Commit

Permalink
fix(search): better timings algo and even better defaults
Browse files Browse the repository at this point in the history
  • Loading branch information
aleksasiriski committed May 11, 2024
1 parent b1bb484 commit beae7b2
Show file tree
Hide file tree
Showing 9 changed files with 135 additions and 105 deletions.
70 changes: 36 additions & 34 deletions src/config/defaults.go
Original file line number Diff line number Diff line change
Expand Up @@ -129,9 +129,11 @@ func NewImage() []engines.Name {
}
}

func NewInfo() []engines.Name {
func NewQuick() []engines.Name {
return []engines.Name{
engines.BING,
engines.BRAVE,
engines.DUCKDUCKGO,
engines.GOOGLE,
engines.MOJEEK,
}
Expand Down Expand Up @@ -177,60 +179,60 @@ func New() Config {
Engines: NewGeneral(),
Ranking: NewRanking(),
Timings: CategoryTimings{
PreferredTimeout: 1 * time.Second,
PreferredTimeoutResults: 20,
AdditionalTimeout: 50 * time.Millisecond,
HardTimeout: 1500 * time.Millisecond,
Timeout: 1500 * time.Millisecond,
PageTimeout: 1 * time.Second,
PreferredTimeoutMin: 1 * time.Second,
PreferredTimeoutMax: 2 * time.Second,
PreferredResultsNumber: 20,
StepTime: 50 * time.Millisecond,
MinimumResultsNumber: 10,
HardTimeout: 3 * time.Second,
},
},
category.IMAGES: {
Engines: NewImage(),
Ranking: NewRanking(),
Timings: CategoryTimings{
PreferredTimeout: 1 * time.Second,
PreferredTimeoutResults: 40,
AdditionalTimeout: 100 * time.Millisecond,
HardTimeout: 1500 * time.Millisecond,
Timeout: 1500 * time.Millisecond,
PageTimeout: 1 * time.Second,
PreferredTimeoutMin: 1 * time.Second,
PreferredTimeoutMax: 2 * time.Second,
PreferredResultsNumber: 40,
StepTime: 100 * time.Millisecond,
MinimumResultsNumber: 20,
HardTimeout: 3 * time.Second,
},
},
category.INFO: {
Engines: NewInfo(),
category.QUICK: {
Engines: NewQuick(),
Ranking: NewRanking(),
Timings: CategoryTimings{
PreferredTimeout: 500 * time.Millisecond,
PreferredTimeoutResults: 10,
AdditionalTimeout: 25 * time.Millisecond,
HardTimeout: 1200 * time.Millisecond,
Timeout: 1200 * time.Millisecond,
PageTimeout: 1 * time.Second,
PreferredTimeoutMin: 500 * time.Millisecond,
PreferredTimeoutMax: 1500 * time.Millisecond,
PreferredResultsNumber: 10,
StepTime: 25 * time.Millisecond,
MinimumResultsNumber: 5,
HardTimeout: 3 * time.Second,
},
},
category.SCIENCE: {
Engines: NewScience(),
Ranking: NewRanking(),
Timings: CategoryTimings{
PreferredTimeout: 1 * time.Second,
PreferredTimeoutResults: 10,
AdditionalTimeout: 100 * time.Millisecond,
HardTimeout: 3 * time.Second,
Timeout: 3 * time.Second,
PageTimeout: 1 * time.Second,
PreferredTimeoutMin: 1 * time.Second,
PreferredTimeoutMax: 2 * time.Second,
PreferredResultsNumber: 10,
StepTime: 100 * time.Millisecond,
MinimumResultsNumber: 5,
HardTimeout: 3 * time.Second,
},
},
category.SURF: {
category.BROAD: {
Engines: NewGeneral(),
Ranking: NewRanking(),
Timings: CategoryTimings{
PreferredTimeout: 2 * time.Second,
PreferredTimeoutResults: 60,
AdditionalTimeout: 200 * time.Millisecond,
HardTimeout: 4 * time.Second,
Timeout: 4 * time.Second,
PageTimeout: 1 * time.Second,
PreferredTimeoutMin: 1 * time.Second,
PreferredTimeoutMax: 3 * time.Second,
PreferredResultsNumber: 50,
StepTime: 100 * time.Millisecond,
MinimumResultsNumber: 30,
HardTimeout: 5 * time.Second,
},
},
},
Expand Down
36 changes: 18 additions & 18 deletions src/config/load.go
Original file line number Diff line number Diff line change
Expand Up @@ -75,15 +75,15 @@ func (c *Config) fromReader(rc ReaderConfig) {
}
}
tim := CategoryTimings{
PreferredTimeout: moretime.ConvertFromFancyTime(val.RTimings.PreferredTimeout),
PreferredTimeoutResults: val.RTimings.PreferredTimeoutResults,
AdditionalTimeout: moretime.ConvertFromFancyTime(val.RTimings.AdditionalTimeout),
HardTimeout: moretime.ConvertFromFancyTime(val.RTimings.HardTimeout),
Timeout: moretime.ConvertFromFancyTime(val.RTimings.Timeout),
PageTimeout: moretime.ConvertFromFancyTime(val.RTimings.PageTimeout),
Delay: moretime.ConvertFromFancyTime(val.RTimings.Delay),
RandomDelay: moretime.ConvertFromFancyTime(val.RTimings.RandomDelay),
Parallelism: val.RTimings.Parallelism,
PreferredTimeoutMin: moretime.ConvertFromFancyTime(val.RTimings.PreferredTimeoutMin),
PreferredTimeoutMax: moretime.ConvertFromFancyTime(val.RTimings.PreferredTimeoutMax),
PreferredResultsNumber: val.RTimings.PreferredResultsNumber,
StepTime: moretime.ConvertFromFancyTime(val.RTimings.StepTime),
MinimumResultsNumber: val.RTimings.MinimumResultsNumber,
HardTimeout: moretime.ConvertFromFancyTime(val.RTimings.HardTimeout),
Delay: moretime.ConvertFromFancyTime(val.RTimings.Delay),
RandomDelay: moretime.ConvertFromFancyTime(val.RTimings.RandomDelay),
Parallelism: val.RTimings.Parallelism,
}
nc.Categories[key] = Category{
Ranking: val.Ranking,
Expand Down Expand Up @@ -126,15 +126,15 @@ func (c Config) getReader() ReaderConfig {

for key, val := range c.Categories {
tim := ReaderCategoryTimings{
PreferredTimeout: moretime.ConvertToFancyTime(val.Timings.PreferredTimeout),
PreferredTimeoutResults: val.Timings.PreferredTimeoutResults,
AdditionalTimeout: moretime.ConvertToFancyTime(val.Timings.AdditionalTimeout),
HardTimeout: moretime.ConvertToFancyTime(val.Timings.HardTimeout),
Timeout: moretime.ConvertToFancyTime(val.Timings.Timeout),
PageTimeout: moretime.ConvertToFancyTime(val.Timings.PageTimeout),
Delay: moretime.ConvertToFancyTime(val.Timings.Delay),
RandomDelay: moretime.ConvertToFancyTime(val.Timings.RandomDelay),
Parallelism: val.Timings.Parallelism,
PreferredTimeoutMin: moretime.ConvertToFancyTime(val.Timings.PreferredTimeoutMin),
PreferredTimeoutMax: moretime.ConvertToFancyTime(val.Timings.PreferredTimeoutMax),
PreferredResultsNumber: val.Timings.PreferredResultsNumber,
StepTime: moretime.ConvertToFancyTime(val.Timings.StepTime),
MinimumResultsNumber: val.Timings.MinimumResultsNumber,
HardTimeout: moretime.ConvertToFancyTime(val.Timings.HardTimeout),
Delay: moretime.ConvertToFancyTime(val.Timings.Delay),
RandomDelay: moretime.ConvertToFancyTime(val.Timings.RandomDelay),
Parallelism: val.Timings.Parallelism,
}
rc.RCategories[key] = ReaderCategory{
Ranking: val.Ranking,
Expand Down
53 changes: 31 additions & 22 deletions src/config/structs_category.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,21 +42,27 @@ type CategoryEngineRanking struct {
}

// ReaderTimings is format in which the config is read from the config file
// In <number><unit> format
// Example: 1s, 1m, 1h, 1d, 1w, 1M, 1y
// If unit is not specified, it is assumed to be milliseconds
// Delegates Timeout, PageTimeout to colly.Collector.SetRequestTimeout(); Note: See https://github.com/gocolly/colly/issues/644
// Delegates Delay, RandomDelay, Parallelism to colly.Collector.Limit()
type ReaderCategoryTimings struct {
// Preferred timeout if enough results are found
PreferredTimeout string `koanf:"preferredtimeout"`
// Number of results which if not met will trigger the additional timeout
PreferredTimeoutResults int `koanf:"preferredtimeoutresults"`
// Additional timeout if not enough results are found (delay after which the number of results is checked)
AdditionalTimeout string `koanf:"additionaltimeout"`
// Hard timeout after which the search is forcefully stopped
// Minimum amount of time to wait before starting to check the number of results
// Search will wait for at least this amount of time (unless all engines respond)
PreferredTimeoutMin string `koanf:"preferredtimeoutmin"`
// Maximum amount of time to wait until the number of results is satisfactory
// Search will wait for at most this amount of time (unless all engines respond or the preferred number of results is found)
PreferredTimeoutMax string `koanf:"preferredtimeoutmax"`
// Preferred number of results to find
PreferredResultsNumber int `koanf:"preferredresultsnumber"`
// Time of the steps for checking if the number of results is satisfactory
StepTime string `koanf:"steptime"`
// Minimum number of results required after the maximum preferred time
// If this number isn't met, the search will continue after the maximum preferred time
MinimumResultsNumber int `koanf:"minimumresultsnumber"`
// Hard timeout after which the search is forcefully stopped (even if the engines didn't respond)
HardTimeout string `koanf:"hardtimeout"`
// Colly collector timeout (should be less than or equal to HardTimeout)
Timeout string `koanf:"timeout"`
// Colly collector page timeout (should be less than or equal to HardTimeout)
PageTimeout string `koanf:"pagetimeout"`
// Colly delay
Delay string `koanf:"delay"`
// Colly random delay
Expand All @@ -68,18 +74,21 @@ type ReaderCategoryTimings struct {
// Delegates Timeout, PageTimeout to colly.Collector.SetRequestTimeout(); Note: See https://github.com/gocolly/colly/issues/644
// Delegates Delay, RandomDelay, Parallelism to colly.Collector.Limit()
type CategoryTimings struct {
// Preferred timeout if enough results are found
PreferredTimeout time.Duration
// Number of results which if not met will trigger the additional timeout
PreferredTimeoutResults int
// Additional timeout if not enough results are found (delay after which the number of results is checked)
AdditionalTimeout time.Duration
// Hard timeout after which the search is forcefully stopped
// Minimum amount of time to wait before starting to check the number of results
// Search will wait for at least this amount of time (unless all engines respond)
PreferredTimeoutMin time.Duration
// Maximum amount of time to wait until the number of results is satisfactory
// Search will wait for at most this amount of time (unless all engines respond or the preferred number of results is found)
PreferredTimeoutMax time.Duration
// Preferred number of results to find
PreferredResultsNumber int
// Time of the steps for checking if the number of results is satisfactory
StepTime time.Duration
// Minimum number of results required after the maximum preferred time
// If this number isn't met, the search will continue after the maximum preferred time
MinimumResultsNumber int
// Hard timeout after which the search is forcefully stopped (even if the engines didn't respond)
HardTimeout time.Duration
// Colly collector timeout (should be less than or equal to HardTimeout)
Timeout time.Duration
// Colly collector page timeout (should be less than or equal to HardTimeout)
PageTimeout time.Duration
// Colly delay
Delay time.Duration
// Colly random delay
Expand Down
6 changes: 3 additions & 3 deletions src/config/structs_server.go
Original file line number Diff line number Diff line change
Expand Up @@ -97,9 +97,9 @@ type ImageProxy struct {
}

// ReaderProxyTimeouts is format in which the config is read from the config file
// in <number><unit> format
// example: 1s, 1m, 1h, 1d, 1w, 1M, 1y
// if unit is not specified, it is assumed to be milliseconds
// In <number><unit> format
// Example: 1s, 1m, 1h, 1d, 1w, 1M, 1y
// If unit is not specified, it is assumed to be milliseconds
type ReaderImageProxyTimeouts struct {
Dial string `koanf:"dial"`
KeepAlive string `koanf:"keepalive"`
Expand Down
7 changes: 4 additions & 3 deletions src/search/category/category.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,12 @@ import (
var FromString = map[string]Name{
"general": GENERAL,
"images": IMAGES,
"info": INFO,
"wiki": INFO,
"quick": QUICK,
"fast": QUICK,
"science": SCIENCE,
"sci": SCIENCE,
"surf": SURF,
"broad": BROAD,
"surf": BROAD,
}

// returns category
Expand Down
4 changes: 2 additions & 2 deletions src/search/category/name.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ const (
UNDEFINED Name = "undefined"
GENERAL Name = "general"
IMAGES Name = "images"
INFO Name = "info"
QUICK Name = "quick"
SCIENCE Name = "science"
SURF Name = "surf"
BROAD Name = "broad"
)
4 changes: 2 additions & 2 deletions src/search/engines/_engines_test/structs.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,14 +32,14 @@ func NewConfig(engineName engines.Name) config.Config {
Engines: []engines.Name{engineName},
Ranking: config.NewRanking(),
Timings: config.CategoryTimings{
Timeout: 10000 * time.Millisecond, // colly default
HardTimeout: 10000 * time.Millisecond,
},
},
category.IMAGES: {
Engines: []engines.Name{engineName},
Ranking: config.NewRanking(),
Timings: config.CategoryTimings{
Timeout: 10000 * time.Millisecond, // colly default
HardTimeout: 10000 * time.Millisecond,
},
},
},
Expand Down
8 changes: 0 additions & 8 deletions src/search/engines/_sedefaults/init.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,14 +30,6 @@ func InitializeCollectors(ctx context.Context, engineName engines.Name, options
Msg("_sedefaults.InitializeCollectors(): failed adding new limit rule")
}

if timings.Timeout != 0 {
col.SetRequestTimeout(timings.Timeout)
}

if timings.PageTimeout != 0 {
pagesCol.SetRequestTimeout(timings.PageTimeout)
}

if settings.Proxies != nil {
log.Debug().
Strs("proxies", settings.Proxies).
Expand Down
52 changes: 39 additions & 13 deletions src/search/perform.go
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,8 @@ func runEngines(engs []engines.Name, query string, options engines.Options, sett
engineStarter := NewEngineStarter()

start := time.Now()
ctx, cancelCtx := context.WithTimeout(context.Background(), timings.PreferredTimeout)
// initially set the preferred timeout minimum (will be reassigned to step time later)
ctx, cancelCtx := context.WithTimeout(context.Background(), timings.PreferredTimeoutMin)
ctxHard, cancelCtxHard := context.WithTimeout(context.Background(), timings.HardTimeout)

// run all engines concurrently
Expand All @@ -114,38 +115,63 @@ func runEngines(engs []engines.Name, query string, options engines.Options, sett
waitCh <- struct{}{}
}()

// break the loop if the preferred timeout is reached and there are enough results
// break the loop if the preferred number of results is found before the preferred timeout is reached
// otherwise break the loop when the minimum number of results if found
// or if the hard timeout is reached
// or if all engines finished
Outer:
for {
select {
// preferred timeout reached
// preferred timeout (min/max) or step time reached
case <-ctx.Done():
log.Debug().
Dur("duration", time.Since(start)).
Msg("Timeout reached while waiting for engines")

// if there are not enough results, switch to additional timeout and wait again
// otherwise break the loop
if len(relay.ResultMap) < timings.PreferredTimeoutResults {
cancelCtx() // cancel the current context before creating a new one to prevent context leak
ctx, cancelCtx = context.WithTimeout(context.Background(), timings.AdditionalTimeout)
currTimeout := time.Since(start)
if currTimeout < timings.PreferredTimeoutMax {
// if the preferred number of results isn't reached, continue additional step time
if len(relay.ResultMap) < timings.PreferredResultsNumber {
log.Debug().
Dur("duration", currTimeout).
Int("results", len(relay.ResultMap)).
Msg("Timeout reached while waiting for engines, waiting additional step time")
cancelCtx() // cancel the current context before creating a new one to prevent context leak
ctx, cancelCtx = context.WithTimeout(context.Background(), timings.StepTime)
} else {
log.Debug().
Dur("duration", currTimeout).
Int("results", len(relay.ResultMap)).
Msg("Timeout reached while waiting for engines")
break Outer
}
} else {
break Outer
// if the minimum number of results isn't reached, continue additional step time
if len(relay.ResultMap) < timings.MinimumResultsNumber {
log.Debug().
Dur("duration", currTimeout).
Int("results", len(relay.ResultMap)).
Msg("Preferred timeout maximum reached, waiting for minimum results required")
cancelCtx() // cancel the current context before creating a new one to prevent context leak
ctx, cancelCtx = context.WithTimeout(context.Background(), timings.StepTime)
} else {
log.Debug().
Dur("duration", currTimeout).
Int("results", len(relay.ResultMap)).
Msg("Preferred timeout maximum reached")
break Outer
}
}

// hard timeout reached
case <-ctxHard.Done():
log.Debug().
Dur("duration", time.Since(start)).
Int("results", len(relay.ResultMap)).
Msg("Hard timeout reached while waiting for engines")
break Outer

// all engines finished
case <-waitCh:
log.Debug().
Dur("duration", time.Since(start)).
Int("results", len(relay.ResultMap)).
Msg("All engines finished")
break Outer
}
Expand Down

0 comments on commit beae7b2

Please sign in to comment.