diff --git a/.gitignore b/.gitignore index fb8e7cee..ca329bdc 100644 --- a/.gitignore +++ b/.gitignore @@ -27,8 +27,9 @@ hearchco.* .vscode/* test.go -src/engines/*/site/* -!src/engines/_engines_test +src/search/engines/*/site/* +!src/search/engines/_engines_test +!src/search/engines/_sedefaults log/ database/ diff --git a/Makefile b/Makefile index a51efab5..487e2698 100644 --- a/Makefile +++ b/Makefile @@ -1,34 +1,40 @@ run: go run ./src +run-cli: + go run ./src --cli debug: go run ./src -v +debug-cli: + go run ./srv -v --cli trace: go run ./src -vv +trace-cli: + go run ./src -vv --cli setup: go get ./... go install github.com/dmarkham/enumer@latest go generate ./... +install: setup build: go build ./... test: sh ./scripts/test.sh - test-engines: sh ./scripts/test-engines.sh - test-redis: sh ./scripts/test-redis.sh - test-redis-podman: sh ./scripts/test-redis-podman.sh - test-redis-docker: sh ./scripts/test-redis-docker.sh +test-all: test test-redis test-engines +test-all-podman: test test-redis-podman test-engines +test-all-docker: test test-redis-docker test-engines update: go get -u ./... diff --git a/generate/searcher/searcher.go b/generate/searcher/searcher.go index 24d30b68..11f3ee37 100644 --- a/generate/searcher/searcher.go +++ b/generate/searcher/searcher.go @@ -22,7 +22,7 @@ var ( trimprefix = flag.String("trimprefix", "", "trim the `prefix` from the generated constant names") buildTags = flag.String("tags", "", "comma-separated list of build tags to apply") packageName = flag.String("packagename", "", "name of the package for generated code; default current package") - enginesImport = flag.String("enginesimport", "github.com/hearchco/hearchco/src/engines", "source of the engines import, which is prefixed to imports for consts; default github.com/hearchco/hearchco/src/engines") + enginesImport = flag.String("enginesimport", "github.com/hearchco/hearchco/src/search/engines", "source of the engines import, which is prefixed to imports for consts; default github.com/hearchco/hearchco/src/search/engines") linecomment = flag.Bool("linecomment", false, "use line comment text as printed text when present") ) diff --git a/go.mod b/go.mod index f2a4ee9d..6fb6e16b 100644 --- a/go.mod +++ b/go.mod @@ -90,7 +90,6 @@ require ( github.com/mattn/go-colorable v0.1.13 // indirect github.com/mattn/go-isatty v0.0.20 // indirect github.com/saintfish/chardet v0.0.0-20230101081208-5e3ef4b5456d // indirect - github.com/sourcegraph/conc v0.3.0 github.com/temoto/robotstxt v1.1.2 // indirect golang.org/x/net v0.21.0 golang.org/x/sys v0.17.0 // indirect diff --git a/go.sum b/go.sum index 2c7ca4ef..03b703a9 100644 --- a/go.sum +++ b/go.sum @@ -247,8 +247,6 @@ github.com/rs/zerolog v1.32.0/go.mod h1:/7mN4D5sKwJLZQ2b/znpjC3/GQWY/xaDXUM0kKWR github.com/saintfish/chardet v0.0.0-20120816061221-3af4cd4741ca/go.mod h1:uugorj2VCxiV1x+LzaIdVa9b4S4qGAcH6cbhh4qVxOU= github.com/saintfish/chardet v0.0.0-20230101081208-5e3ef4b5456d h1:hrujxIzL1woJ7AwssoOcM/tq5JjjG2yYOc8odClEiXA= github.com/saintfish/chardet v0.0.0-20230101081208-5e3ef4b5456d/go.mod h1:uugorj2VCxiV1x+LzaIdVa9b4S4qGAcH6cbhh4qVxOU= -github.com/sourcegraph/conc v0.3.0 h1:OQTbbt6P72L20UqAkXXuLOj79LfEanQ+YQFNpLA9ySo= -github.com/sourcegraph/conc v0.3.0/go.mod h1:Sdozi7LEKbFPqYX2/J+iBAM6HpqSLTASQIKqDmF7Mt0= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.2.0/go.mod h1:qt09Ya8vawLte6SNmTgCsAVtYtaKzEcn8ATUoHMkEqE= github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= diff --git a/src/cli/climode.go b/src/cli/climode.go index 0d9a4572..6f692048 100644 --- a/src/cli/climode.go +++ b/src/cli/climode.go @@ -5,12 +5,12 @@ import ( "time" "github.com/hearchco/hearchco/src/anonymize" - "github.com/hearchco/hearchco/src/bucket/result" "github.com/hearchco/hearchco/src/cache" - "github.com/hearchco/hearchco/src/category" "github.com/hearchco/hearchco/src/config" - "github.com/hearchco/hearchco/src/engines" "github.com/hearchco/hearchco/src/search" + "github.com/hearchco/hearchco/src/search/category" + "github.com/hearchco/hearchco/src/search/engines" + "github.com/hearchco/hearchco/src/search/result" "github.com/rs/zerolog/log" ) diff --git a/src/cli/setup.go b/src/cli/setup.go index 049592cb..513e44db 100644 --- a/src/cli/setup.go +++ b/src/cli/setup.go @@ -4,8 +4,8 @@ import ( "fmt" "github.com/alecthomas/kong" - "github.com/hearchco/hearchco/src/category" - "github.com/hearchco/hearchco/src/engines" + "github.com/hearchco/hearchco/src/search/category" + "github.com/hearchco/hearchco/src/search/engines" "github.com/rs/zerolog/log" ) diff --git a/src/config/defaults.go b/src/config/defaults.go index 56a10612..ecd5b60a 100644 --- a/src/config/defaults.go +++ b/src/config/defaults.go @@ -4,9 +4,9 @@ import ( "log" "time" - "github.com/hearchco/hearchco/src/category" - "github.com/hearchco/hearchco/src/engines" "github.com/hearchco/hearchco/src/moretime" + "github.com/hearchco/hearchco/src/search/category" + "github.com/hearchco/hearchco/src/search/engines" ) const DefaultLocale string = "en_US" diff --git a/src/config/load.go b/src/config/load.go index f82d6bbe..563f74b3 100644 --- a/src/config/load.go +++ b/src/config/load.go @@ -6,9 +6,9 @@ import ( "strings" "time" - "github.com/hearchco/hearchco/src/category" - "github.com/hearchco/hearchco/src/engines" "github.com/hearchco/hearchco/src/moretime" + "github.com/hearchco/hearchco/src/search/category" + "github.com/hearchco/hearchco/src/search/engines" "github.com/knadh/koanf/parsers/yaml" "github.com/knadh/koanf/providers/env" "github.com/knadh/koanf/providers/file" diff --git a/src/config/structs.go b/src/config/structs.go index de30ea71..bb48079a 100644 --- a/src/config/structs.go +++ b/src/config/structs.go @@ -3,8 +3,8 @@ package config import ( "time" - "github.com/hearchco/hearchco/src/category" - "github.com/hearchco/hearchco/src/engines" + "github.com/hearchco/hearchco/src/search/category" + "github.com/hearchco/hearchco/src/search/engines" ) type EngineRanking struct { diff --git a/src/router/search.go b/src/router/search.go index ac9ad080..d88e8e56 100644 --- a/src/router/search.go +++ b/src/router/search.go @@ -8,12 +8,12 @@ import ( "github.com/gin-gonic/gin" "github.com/goccy/go-json" - "github.com/hearchco/hearchco/src/bucket/result" "github.com/hearchco/hearchco/src/cache" - "github.com/hearchco/hearchco/src/category" "github.com/hearchco/hearchco/src/config" - "github.com/hearchco/hearchco/src/engines" "github.com/hearchco/hearchco/src/search" + "github.com/hearchco/hearchco/src/search/category" + "github.com/hearchco/hearchco/src/search/engines" + "github.com/hearchco/hearchco/src/search/result" ) func Search(c *gin.Context, conf *config.Config, db cache.DB) error { diff --git a/src/bucket/bucket.go b/src/search/bucket/addresult.go similarity index 50% rename from src/bucket/bucket.go rename to src/search/bucket/addresult.go index fd8a8463..442ee88b 100644 --- a/src/bucket/bucket.go +++ b/src/search/bucket/addresult.go @@ -1,22 +1,14 @@ package bucket import ( - "fmt" - "sync" - "github.com/gocolly/colly/v2" - "github.com/hearchco/hearchco/src/bucket/result" "github.com/hearchco/hearchco/src/config" - "github.com/hearchco/hearchco/src/engines" + "github.com/hearchco/hearchco/src/search/engines" + "github.com/hearchco/hearchco/src/search/result" "github.com/rs/zerolog/log" ) -type Relay struct { - ResultMap map[string]*result.Result - Mutex sync.RWMutex -} - -func AddSEResult(seResult *engines.RetrievedResult, seName engines.Name, relay *Relay, options *engines.Options, pagesCol *colly.Collector) { +func AddSEResult(seResult *result.RetrievedResult, seName engines.Name, relay *Relay, options *engines.Options, pagesCol *colly.Collector) { log.Trace(). Str("engine", seName.String()). Str("title", seResult.Title). @@ -28,7 +20,7 @@ func AddSEResult(seResult *engines.RetrievedResult, seName engines.Name, relay * relay.Mutex.RUnlock() if !exists { - engineRanks := make([]engines.RetrievedRank, len(config.EnabledEngines)) + engineRanks := make([]result.RetrievedRank, len(config.EnabledEngines)) engineRanks[0] = seResult.Rank result := result.Result{ URL: seResult.URL, @@ -74,42 +66,3 @@ func AddSEResult(seResult *engines.RetrievedResult, seName engines.Name, relay * } } } - -func SetResultResponse(link string, response *colly.Response, relay *Relay, seName engines.Name) error { - log.Trace(). - Str("engine", seName.String()). - Str("link", link). - Msg("Got response") - - relay.Mutex.Lock() - mapRes, exists := relay.ResultMap[link] - - if !exists { - relay.Mutex.Unlock() - relay.Mutex.RLock() - err := fmt.Errorf("bucket.SetResultResponse(): URL not in map when adding response, should not be possible. URL: %v.\nRelay: %v", link, relay) - relay.Mutex.RUnlock() - return err - } else { - mapRes.Response = response - relay.Mutex.Unlock() - } - - return nil -} - -func MakeSEResult(urll string, title string, description string, searchEngineName engines.Name, sePage int, seOnPageRank int) *engines.RetrievedResult { - ser := engines.RetrievedRank{ - SearchEngine: searchEngineName, - Rank: 0, - Page: uint(sePage), - OnPageRank: uint(seOnPageRank), - } - res := engines.RetrievedResult{ - URL: urll, - Title: title, - Description: description, - Rank: ser, - } - return &res -} diff --git a/src/search/bucket/makeresult.go b/src/search/bucket/makeresult.go new file mode 100644 index 00000000..ceca5038 --- /dev/null +++ b/src/search/bucket/makeresult.go @@ -0,0 +1,22 @@ +package bucket + +import ( + "github.com/hearchco/hearchco/src/search/engines" + "github.com/hearchco/hearchco/src/search/result" +) + +func MakeSEResult(urll string, title string, description string, searchEngineName engines.Name, sePage int, seOnPageRank int) *result.RetrievedResult { + ser := result.RetrievedRank{ + SearchEngine: searchEngineName, + Rank: 0, + Page: uint(sePage), + OnPageRank: uint(seOnPageRank), + } + res := result.RetrievedResult{ + URL: urll, + Title: title, + Description: description, + Rank: ser, + } + return &res +} diff --git a/src/search/bucket/relay.go b/src/search/bucket/relay.go new file mode 100644 index 00000000..0d69d74a --- /dev/null +++ b/src/search/bucket/relay.go @@ -0,0 +1,12 @@ +package bucket + +import ( + "sync" + + "github.com/hearchco/hearchco/src/search/result" +) + +type Relay struct { + ResultMap map[string]*result.Result + Mutex sync.RWMutex +} diff --git a/src/search/bucket/setresponse.go b/src/search/bucket/setresponse.go new file mode 100644 index 00000000..64c703e3 --- /dev/null +++ b/src/search/bucket/setresponse.go @@ -0,0 +1,32 @@ +package bucket + +import ( + "fmt" + + "github.com/gocolly/colly/v2" + "github.com/hearchco/hearchco/src/search/engines" + "github.com/rs/zerolog/log" +) + +func SetResultResponse(link string, response *colly.Response, relay *Relay, seName engines.Name) error { + log.Trace(). + Str("engine", seName.String()). + Str("link", link). + Msg("Got response") + + relay.Mutex.Lock() + mapRes, exists := relay.ResultMap[link] + + if !exists { + relay.Mutex.Unlock() + relay.Mutex.RLock() + err := fmt.Errorf("bucket.SetResultResponse(): URL not in map when adding response, should not be possible. URL: %v.\nRelay: %v", link, relay) + relay.Mutex.RUnlock() + return err + } else { + mapRes.Response = response + relay.Mutex.Unlock() + } + + return nil +} diff --git a/src/search/cache.go b/src/search/cache.go index ed393c17..007f6989 100644 --- a/src/search/cache.go +++ b/src/search/cache.go @@ -2,10 +2,10 @@ package search import ( "github.com/hearchco/hearchco/src/anonymize" - "github.com/hearchco/hearchco/src/bucket/result" "github.com/hearchco/hearchco/src/cache" "github.com/hearchco/hearchco/src/config" - "github.com/hearchco/hearchco/src/engines" + "github.com/hearchco/hearchco/src/search/engines" + "github.com/hearchco/hearchco/src/search/result" "github.com/rs/zerolog/log" ) diff --git a/src/category/category.go b/src/search/category/category.go similarity index 100% rename from src/category/category.go rename to src/search/category/category.go diff --git a/src/category/name.go b/src/search/category/name.go similarity index 100% rename from src/category/name.go rename to src/search/category/name.go diff --git a/src/engines/_engines_test/structs.go b/src/search/engines/_engines_test/structs.go similarity index 88% rename from src/engines/_engines_test/structs.go rename to src/search/engines/_engines_test/structs.go index 63eed460..24ec18bd 100644 --- a/src/engines/_engines_test/structs.go +++ b/src/search/engines/_engines_test/structs.go @@ -3,9 +3,9 @@ package _engines_test import ( "time" - "github.com/hearchco/hearchco/src/category" "github.com/hearchco/hearchco/src/config" - "github.com/hearchco/hearchco/src/engines" + "github.com/hearchco/hearchco/src/search/category" + "github.com/hearchco/hearchco/src/search/engines" ) type TestCaseHasAnyResults struct { diff --git a/src/engines/_engines_test/tester.go b/src/search/engines/_engines_test/tester.go similarity index 100% rename from src/engines/_engines_test/tester.go rename to src/search/engines/_engines_test/tester.go diff --git a/src/search/engines/_sedefaults/colly.go b/src/search/engines/_sedefaults/colly.go new file mode 100644 index 00000000..439397fb --- /dev/null +++ b/src/search/engines/_sedefaults/colly.go @@ -0,0 +1,68 @@ +package _sedefaults + +import ( + "context" + "fmt" + "os" + + "github.com/gocolly/colly/v2" + "github.com/hearchco/hearchco/src/config" + "github.com/hearchco/hearchco/src/search/engines" + "github.com/rs/zerolog" + "github.com/rs/zerolog/log" +) + +func ColRequest(seName engines.Name, col *colly.Collector, ctx context.Context) { + col.OnRequest(func(r *colly.Request) { + if err := ctx.Err(); err != nil { + if engines.IsTimeoutError(err) { + log.Trace(). + Err(err). + Str("engine", seName.String()). + Msg("_sedefaults.ColRequest() -> col.OnRequest(): context timeout error") + } else { + log.Error(). + Err(err). + Str("engine", seName.String()). + Msg("_sedefaults.ColRequest() -> col.OnRequest(): context error") + } + r.Abort() + return + } + }) +} + +func ColError(seName engines.Name, col *colly.Collector) { + col.OnError(func(r *colly.Response, err error) { + if engines.IsTimeoutError(err) { + log.Trace(). + // Err(err). // timeout error produces Get "url" error with the query + Str("engine", seName.String()). + // Str("url", urll). // can't reliably anonymize it (because it's engine dependent and query isn't passed to this function) + Msg("_sedefaults.ColError() -> col.OnError(): request timeout error for url") + } else { + log.Error(). + Err(err). + Str("engine", seName.String()). + // Str("url", urll). // can't reliably anonymize it (because it's engine dependent and query isn't passed to this function) + Int("statusCode", r.StatusCode). + Str("response", string(r.Body)). // query can be present, depending on the response from the engine (Google has the query in 3 places) + Msg("_sedefaults.ColError() -> col.OnError(): request error for url") + + dumpPath := fmt.Sprintf("%v%v_col.log.html", config.LogDumpLocation, seName.String()) + log.Debug(). + Str("engine", seName.String()). + Str("responsePath", dumpPath). + Func(func(e *zerolog.Event) { + bodyWriteErr := os.WriteFile(dumpPath, r.Body, 0644) + if bodyWriteErr != nil { + log.Error(). + Err(bodyWriteErr). + Str("engine", seName.String()). + Msg("_sedefaults.ColError() -> col.OnError(): error writing html response body to file") + } + }). + Msg("_sedefaults.ColError() -> col.OnError(): html response written") + } + }) +} diff --git a/src/search/engines/_sedefaults/collypages.go b/src/search/engines/_sedefaults/collypages.go new file mode 100644 index 00000000..177cea98 --- /dev/null +++ b/src/search/engines/_sedefaults/collypages.go @@ -0,0 +1,61 @@ +package _sedefaults + +import ( + "context" + + "github.com/gocolly/colly/v2" + "github.com/hearchco/hearchco/src/search/bucket" + "github.com/hearchco/hearchco/src/search/engines" + "github.com/rs/zerolog/log" +) + +func PagesColRequest(seName engines.Name, pagesCol *colly.Collector, ctx context.Context) { + pagesCol.OnRequest(func(r *colly.Request) { + if err := ctx.Err(); err != nil { + if engines.IsTimeoutError(err) { + log.Trace(). + Err(err). + Str("engine", seName.String()). + Msg("_sedefaults.PagesColRequest() -> pagesCol.OnRequest(): context timeout error") + } else { + log.Error(). + Err(err). + Str("engine", seName.String()). + Msg("_sedefaults.PagesColRequest() -> pagesCol.OnRequest(): context error") + } + r.Abort() + return + } + r.Ctx.Put("originalURL", r.URL.String()) + }) +} + +func PagesColError(seName engines.Name, pagesCol *colly.Collector) { + pagesCol.OnError(func(r *colly.Response, err error) { + urll := r.Ctx.Get("originalURL") + if engines.IsTimeoutError(err) { + log.Trace(). + Err(err). + Str("engine", seName.String()). + Str("url", urll). + Msg("_sedefaults.PagesColError() -> pagesCol.OnError(): request timeout error for url") + } else { + log.Trace(). + Err(err). + Str("engine", seName.String()). + Str("url", urll). + Str("response", string(r.Body)). + Msg("_sedefaults.PagesColError() -> pagesCol.OnError(): request error for url") + } + }) +} + +func PagesColResponse(seName engines.Name, pagesCol *colly.Collector, relay *bucket.Relay) { + pagesCol.OnResponse(func(r *colly.Response) { + urll := r.Ctx.Get("originalURL") + err := bucket.SetResultResponse(urll, r, relay, seName) + if err != nil { + log.Error().Err(err).Msg("_sedefaults.PagesColResponse(): error setting result") + } + }) +} diff --git a/src/search/engines/_sedefaults/init.go b/src/search/engines/_sedefaults/init.go new file mode 100644 index 00000000..24d07d2a --- /dev/null +++ b/src/search/engines/_sedefaults/init.go @@ -0,0 +1,58 @@ +package _sedefaults + +import ( + "fmt" + + "github.com/gocolly/colly/v2" + "github.com/gocolly/colly/v2/proxy" + "github.com/hearchco/hearchco/src/config" + "github.com/hearchco/hearchco/src/search/engines" + "github.com/rs/zerolog/log" +) + +func InitializeCollectors(colPtr **colly.Collector, pagesColPtr **colly.Collector, settings *config.Settings, options *engines.Options, timings *config.Timings) { + *colPtr = colly.NewCollector(colly.MaxDepth(1), colly.UserAgent(options.UserAgent), colly.Async()) + *pagesColPtr = colly.NewCollector(colly.MaxDepth(1), colly.UserAgent(options.UserAgent), colly.Async()) + + if timings != nil { + var limitRule *colly.LimitRule = &colly.LimitRule{ + DomainGlob: "*", + Delay: timings.Delay, + RandomDelay: timings.RandomDelay, + Parallelism: timings.Parallelism, + } + + if err := (*colPtr).Limit(limitRule); err != nil { + log.Error(). + Err(err). + Str("limitRule", fmt.Sprintf("%v", limitRule)). + Msg("_sedefaults.InitializeCollectors(): failed adding new limit rule") + } + + if timings.Timeout != 0 { + (*colPtr).SetRequestTimeout(timings.Timeout) + } + + if timings.PageTimeout != 0 { + (*pagesColPtr).SetRequestTimeout(timings.PageTimeout) + } + } + + if settings.Proxies != nil { + log.Debug(). + Strs("proxies", settings.Proxies). + Msg("Using proxies") + + // Rotate proxies + rp, err := proxy.RoundRobinProxySwitcher(settings.Proxies...) + if err != nil { + log.Fatal(). + Err(err). + Strs("proxies", settings.Proxies). + Msg("_sedefaults.InitializeCollectors(): failed creating proxy switcher") + } + + (*colPtr).SetProxyFunc(rp) + (*pagesColPtr).SetProxyFunc(rp) + } +} diff --git a/src/search/engines/_sedefaults/pagecontext.go b/src/search/engines/_sedefaults/pagecontext.go new file mode 100644 index 00000000..8370c898 --- /dev/null +++ b/src/search/engines/_sedefaults/pagecontext.go @@ -0,0 +1,23 @@ +package _sedefaults + +import ( + "strconv" + + "github.com/gocolly/colly/v2" + "github.com/hearchco/hearchco/src/search/engines" + "github.com/rs/zerolog/log" +) + +func PageFromContext(ctx *colly.Context, seName engines.Name) int { + var pageStr string = ctx.Get("page") + page, converr := strconv.Atoi(pageStr) + if converr != nil { + log.Panic(). + Err(converr). + Str("engine", seName.String()). + Str("page", pageStr). + Msg("_sedefaults.PageFromContext(): failed to convert page number to int") + // ^PANIC + } + return page +} diff --git a/src/search/engines/_sedefaults/prepare.go b/src/search/engines/_sedefaults/prepare.go new file mode 100644 index 00000000..2662cb02 --- /dev/null +++ b/src/search/engines/_sedefaults/prepare.go @@ -0,0 +1,67 @@ +package _sedefaults + +import ( + "context" + + "github.com/hearchco/hearchco/src/config" + "github.com/hearchco/hearchco/src/search/engines" + "github.com/hearchco/hearchco/src/search/useragent" + "github.com/rs/zerolog/log" +) + +func Prepare(seName engines.Name, options *engines.Options, settings *config.Settings, support *engines.SupportedSettings, info *engines.Info, ctx *context.Context) error { + if ctx == nil { + *ctx = context.Background() + } + + if options.UserAgent == "" { + options.UserAgent = useragent.RandomUserAgent() + } + log.Trace(). + Str("engine", seName.String()). + Str("userAgent", options.UserAgent). + Msg("Prepare") + + // TODO: move to config.SetupConfig + if settings.RequestedResultsPerPage != 0 && !support.RequestedResultsPerPage { + log.Panic(). + Str("engine", seName.String()). + Int("requestedResultsPerPage", settings.RequestedResultsPerPage). + Msg("_sedefaults.Prepare(): setting not supported by engine") + // ^PANIC + } + if settings.RequestedResultsPerPage == 0 && support.RequestedResultsPerPage { + // if its used in the code but not set, give it the default value + settings.RequestedResultsPerPage = info.ResultsPerPage + } + + if options.Mobile && !support.Mobile { + options.Mobile = false // this line shouldn't matter [1] + log.Debug(). + Str("engine", seName.String()). + Bool("mobile", options.Mobile). + Msg("Mobile set but not supported") + } + + if options.Locale != "" && !support.Locale { + options.Locale = config.DefaultLocale // [1] + log.Debug(). + Str("engine", seName.String()). + Str("locale", options.Locale). + Msg("Locale set but not supported") + } + + if options.Locale == "" && support.Locale { + options.Locale = config.DefaultLocale + } + + if options.SafeSearch && !support.SafeSearch { + options.SafeSearch = false // [1] + log.Debug(). + Str("engine", seName.String()). + Bool("safeSearch", options.SafeSearch). + Msg("SafeSearch set but not supported") + } + + return nil +} diff --git a/src/search/engines/_sedefaults/requests.go b/src/search/engines/_sedefaults/requests.go new file mode 100644 index 00000000..c15d2b87 --- /dev/null +++ b/src/search/engines/_sedefaults/requests.go @@ -0,0 +1,32 @@ +package _sedefaults + +import ( + "fmt" + "io" + + "github.com/gocolly/colly/v2" + "github.com/hearchco/hearchco/src/search/engines" + "github.com/rs/zerolog/log" +) + +func DoGetRequest(urll string, anonurll string, colCtx *colly.Context, collector *colly.Collector, packageName engines.Name, retError *error) { + log.Trace(). + Str("engine", packageName.String()). + Str("url", anonurll). + Msg("GET") + err := collector.Request("GET", urll, nil, colCtx, nil) + if err != nil { + *retError = fmt.Errorf("%v.Search(): failed GET request to %v with %w", packageName.ToLower(), urll, err) + } +} + +func DoPostRequest(urll string, requestData io.Reader, colCtx *colly.Context, collector *colly.Collector, packageName engines.Name, retError *error) { + log.Trace(). + Str("engine", packageName.String()). + Str("url", urll). + Msg("POST") + err := collector.Request("POST", urll, requestData, colCtx, nil) + if err != nil { + *retError = fmt.Errorf("%v.Search(): failed POST request to %v and body %v. error %w", packageName.ToLower(), requestData, urll, err) + } +} diff --git a/src/engines/bing/bing.go b/src/search/engines/bing/bing.go similarity index 78% rename from src/engines/bing/bing.go rename to src/search/engines/bing/bing.go index f0dc2454..e4cd7380 100644 --- a/src/engines/bing/bing.go +++ b/src/search/engines/bing/bing.go @@ -9,16 +9,16 @@ import ( "github.com/gocolly/colly/v2" "github.com/hearchco/hearchco/src/anonymize" - "github.com/hearchco/hearchco/src/bucket" "github.com/hearchco/hearchco/src/config" - "github.com/hearchco/hearchco/src/engines" + "github.com/hearchco/hearchco/src/search/bucket" + "github.com/hearchco/hearchco/src/search/engines" + "github.com/hearchco/hearchco/src/search/engines/_sedefaults" "github.com/hearchco/hearchco/src/search/parse" - "github.com/hearchco/hearchco/src/sedefaults" "github.com/rs/zerolog/log" ) func Search(ctx context.Context, query string, relay *bucket.Relay, options engines.Options, settings config.Settings, timings config.Timings) error { - if err := sedefaults.Prepare(Info.Name, &options, &settings, &Support, &Info, &ctx); err != nil { + if err := _sedefaults.Prepare(Info.Name, &options, &settings, &Support, &Info, &ctx); err != nil { return err } @@ -26,14 +26,14 @@ func Search(ctx context.Context, query string, relay *bucket.Relay, options engi var pagesCol *colly.Collector var retError error - sedefaults.InitializeCollectors(&col, &pagesCol, &settings, &options, &timings) + _sedefaults.InitializeCollectors(&col, &pagesCol, &settings, &options, &timings) - sedefaults.PagesColRequest(Info.Name, pagesCol, ctx) - sedefaults.PagesColError(Info.Name, pagesCol) - sedefaults.PagesColResponse(Info.Name, pagesCol, relay) + _sedefaults.PagesColRequest(Info.Name, pagesCol, ctx) + _sedefaults.PagesColError(Info.Name, pagesCol) + _sedefaults.PagesColResponse(Info.Name, pagesCol, relay) - sedefaults.ColRequest(Info.Name, col, ctx) - sedefaults.ColError(Info.Name, col) + _sedefaults.ColRequest(Info.Name, col, ctx) + _sedefaults.ColError(Info.Name, col) var pageRankCounter []int = make([]int, options.MaxPages*Info.ResultsPerPage) @@ -55,7 +55,7 @@ func Search(ctx context.Context, query string, relay *bucket.Relay, options engi descText = strings.Split(descText, "Web")[1] } - page := sedefaults.PageFromContext(e.Request.Ctx, Info.Name) + page := _sedefaults.PageFromContext(e.Request.Ctx, Info.Name) res := bucket.MakeSEResult(linkText, titleText, descText, Info.Name, page, pageRankCounter[page]+1) bucket.AddSEResult(res, Info.Name, relay, &options, pagesCol) @@ -77,7 +77,7 @@ func Search(ctx context.Context, query string, relay *bucket.Relay, options engi urll := Info.URL + query + localeParam anonUrll := Info.URL + anonymize.String(query) + localeParam - sedefaults.DoGetRequest(urll, anonUrll, colCtx, col, Info.Name, &retError) + _sedefaults.DoGetRequest(urll, anonUrll, colCtx, col, Info.Name, &retError) for i := 1; i < options.MaxPages; i++ { colCtx = colly.NewContext() @@ -85,7 +85,7 @@ func Search(ctx context.Context, query string, relay *bucket.Relay, options engi urll := Info.URL + query + "&first=" + strconv.Itoa(i*10+1) + localeParam anonUrll := Info.URL + anonymize.String(query) + "&first=" + strconv.Itoa(i*10+1) + localeParam - sedefaults.DoGetRequest(urll, anonUrll, colCtx, col, Info.Name, &retError) + _sedefaults.DoGetRequest(urll, anonUrll, colCtx, col, Info.Name, &retError) } col.Wait() diff --git a/src/engines/bing/bing.md b/src/search/engines/bing/bing.md similarity index 100% rename from src/engines/bing/bing.md rename to src/search/engines/bing/bing.md diff --git a/src/engines/bing/bing_test.go b/src/search/engines/bing/bing_test.go similarity index 87% rename from src/engines/bing/bing_test.go rename to src/search/engines/bing/bing_test.go index b8158557..feed8ec4 100644 --- a/src/engines/bing/bing_test.go +++ b/src/search/engines/bing/bing_test.go @@ -3,8 +3,8 @@ package bing_test import ( "testing" - "github.com/hearchco/hearchco/src/engines" - "github.com/hearchco/hearchco/src/engines/_engines_test" + "github.com/hearchco/hearchco/src/search/engines" + "github.com/hearchco/hearchco/src/search/engines/_engines_test" ) func TestSearch(t *testing.T) { diff --git a/src/engines/bing/options.go b/src/search/engines/bing/options.go similarity index 87% rename from src/engines/bing/options.go rename to src/search/engines/bing/options.go index 723c0cfc..b495c8e3 100644 --- a/src/engines/bing/options.go +++ b/src/search/engines/bing/options.go @@ -1,6 +1,8 @@ package bing -import "github.com/hearchco/hearchco/src/engines" +import ( + "github.com/hearchco/hearchco/src/search/engines" +) var Info engines.Info = engines.Info{ Domain: "www.bing.com", diff --git a/src/engines/brave/brave.go b/src/search/engines/brave/brave.go similarity index 75% rename from src/engines/brave/brave.go rename to src/search/engines/brave/brave.go index 58196f8d..e0289954 100644 --- a/src/engines/brave/brave.go +++ b/src/search/engines/brave/brave.go @@ -7,15 +7,15 @@ import ( "github.com/gocolly/colly/v2" "github.com/hearchco/hearchco/src/anonymize" - "github.com/hearchco/hearchco/src/bucket" "github.com/hearchco/hearchco/src/config" - "github.com/hearchco/hearchco/src/engines" + "github.com/hearchco/hearchco/src/search/bucket" + "github.com/hearchco/hearchco/src/search/engines" + "github.com/hearchco/hearchco/src/search/engines/_sedefaults" "github.com/hearchco/hearchco/src/search/parse" - "github.com/hearchco/hearchco/src/sedefaults" ) func Search(ctx context.Context, query string, relay *bucket.Relay, options engines.Options, settings config.Settings, timings config.Timings) error { - if err := sedefaults.Prepare(Info.Name, &options, &settings, &Support, &Info, &ctx); err != nil { + if err := _sedefaults.Prepare(Info.Name, &options, &settings, &Support, &Info, &ctx); err != nil { return err } @@ -23,14 +23,14 @@ func Search(ctx context.Context, query string, relay *bucket.Relay, options engi var pagesCol *colly.Collector var retError error - sedefaults.InitializeCollectors(&col, &pagesCol, &settings, &options, &timings) + _sedefaults.InitializeCollectors(&col, &pagesCol, &settings, &options, &timings) - sedefaults.PagesColRequest(Info.Name, pagesCol, ctx) - sedefaults.PagesColError(Info.Name, pagesCol) - sedefaults.PagesColResponse(Info.Name, pagesCol, relay) + _sedefaults.PagesColRequest(Info.Name, pagesCol, ctx) + _sedefaults.PagesColError(Info.Name, pagesCol) + _sedefaults.PagesColResponse(Info.Name, pagesCol, relay) - sedefaults.ColRequest(Info.Name, col, ctx) - sedefaults.ColError(Info.Name, col) + _sedefaults.ColRequest(Info.Name, col, ctx) + _sedefaults.ColError(Info.Name, col) var pageRankCounter []int = make([]int, options.MaxPages*Info.ResultsPerPage) @@ -58,7 +58,7 @@ func Search(ctx context.Context, query string, relay *bucket.Relay, options engi descText = strings.TrimSpace(dom.Find("p.snippet-description").Text()) } - page := sedefaults.PageFromContext(e.Request.Ctx, Info.Name) + page := _sedefaults.PageFromContext(e.Request.Ctx, Info.Name) res := bucket.MakeSEResult(linkText, titleText, descText, Info.Name, page, pageRankCounter[page]+1) bucket.AddSEResult(res, Info.Name, relay, &options, pagesCol) @@ -71,7 +71,7 @@ func Search(ctx context.Context, query string, relay *bucket.Relay, options engi urll := Info.URL + query + "&source=web" anonUrll := Info.URL + anonymize.String(query) + "&source=web" - sedefaults.DoGetRequest(urll, anonUrll, colCtx, col, Info.Name, &retError) + _sedefaults.DoGetRequest(urll, anonUrll, colCtx, col, Info.Name, &retError) for i := 1; i < options.MaxPages; i++ { colCtx = colly.NewContext() @@ -79,7 +79,7 @@ func Search(ctx context.Context, query string, relay *bucket.Relay, options engi urll := Info.URL + query + "&spellcheck=0&offset=" + strconv.Itoa(i) anonUrll := Info.URL + anonymize.String(query) + "&spellcheck=0&offset=" + strconv.Itoa(i) - sedefaults.DoGetRequest(urll, anonUrll, colCtx, col, Info.Name, &retError) + _sedefaults.DoGetRequest(urll, anonUrll, colCtx, col, Info.Name, &retError) } col.Wait() diff --git a/src/engines/brave/brave_test.go b/src/search/engines/brave/brave_test.go similarity index 87% rename from src/engines/brave/brave_test.go rename to src/search/engines/brave/brave_test.go index b528a4cc..9157745e 100644 --- a/src/engines/brave/brave_test.go +++ b/src/search/engines/brave/brave_test.go @@ -3,8 +3,8 @@ package brave_test import ( "testing" - "github.com/hearchco/hearchco/src/engines" - "github.com/hearchco/hearchco/src/engines/_engines_test" + "github.com/hearchco/hearchco/src/search/engines" + "github.com/hearchco/hearchco/src/search/engines/_engines_test" ) func TestSearch(t *testing.T) { diff --git a/src/engines/brave/options.go b/src/search/engines/brave/options.go similarity index 88% rename from src/engines/brave/options.go rename to src/search/engines/brave/options.go index 7dabd01f..b7af318a 100644 --- a/src/engines/brave/options.go +++ b/src/search/engines/brave/options.go @@ -1,6 +1,8 @@ package brave -import "github.com/hearchco/hearchco/src/engines" +import ( + "github.com/hearchco/hearchco/src/search/engines" +) var Info engines.Info = engines.Info{ Domain: "search.brave.com", diff --git a/src/engines/duckduckgo/ddg.md b/src/search/engines/duckduckgo/ddg.md similarity index 100% rename from src/engines/duckduckgo/ddg.md rename to src/search/engines/duckduckgo/ddg.md diff --git a/src/engines/duckduckgo/duckduckgo.go b/src/search/engines/duckduckgo/duckduckgo.go similarity index 74% rename from src/engines/duckduckgo/duckduckgo.go rename to src/search/engines/duckduckgo/duckduckgo.go index 9b1c6c78..5d10f714 100644 --- a/src/engines/duckduckgo/duckduckgo.go +++ b/src/search/engines/duckduckgo/duckduckgo.go @@ -9,15 +9,15 @@ import ( "github.com/PuerkitoBio/goquery" "github.com/gocolly/colly/v2" "github.com/hearchco/hearchco/src/anonymize" - "github.com/hearchco/hearchco/src/bucket" "github.com/hearchco/hearchco/src/config" - "github.com/hearchco/hearchco/src/engines" + "github.com/hearchco/hearchco/src/search/bucket" + "github.com/hearchco/hearchco/src/search/engines" + "github.com/hearchco/hearchco/src/search/engines/_sedefaults" "github.com/hearchco/hearchco/src/search/parse" - "github.com/hearchco/hearchco/src/sedefaults" ) func Search(ctx context.Context, query string, relay *bucket.Relay, options engines.Options, settings config.Settings, timings config.Timings) error { - if err := sedefaults.Prepare(Info.Name, &options, &settings, &Support, &Info, &ctx); err != nil { + if err := _sedefaults.Prepare(Info.Name, &options, &settings, &Support, &Info, &ctx); err != nil { return err } @@ -25,14 +25,14 @@ func Search(ctx context.Context, query string, relay *bucket.Relay, options engi var pagesCol *colly.Collector var retError error - sedefaults.InitializeCollectors(&col, &pagesCol, &settings, &options, &timings) + _sedefaults.InitializeCollectors(&col, &pagesCol, &settings, &options, &timings) - sedefaults.PagesColRequest(Info.Name, pagesCol, ctx) - sedefaults.PagesColError(Info.Name, pagesCol) - sedefaults.PagesColResponse(Info.Name, pagesCol, relay) + _sedefaults.PagesColRequest(Info.Name, pagesCol, ctx) + _sedefaults.PagesColError(Info.Name, pagesCol) + _sedefaults.PagesColResponse(Info.Name, pagesCol, relay) - sedefaults.ColRequest(Info.Name, col, ctx) - sedefaults.ColError(Info.Name, col) + _sedefaults.ColRequest(Info.Name, col, ctx) + _sedefaults.ColError(Info.Name, col) localeCookie := getLocale(&options) @@ -80,13 +80,13 @@ func Search(ctx context.Context, query string, relay *bucket.Relay, options engi urll := Info.URL + "?q=" + query anonUrll := Info.URL + "?q=" + anonymize.String(query) - sedefaults.DoGetRequest(urll, anonUrll, colCtx, col, Info.Name, &retError) + _sedefaults.DoGetRequest(urll, anonUrll, colCtx, col, Info.Name, &retError) for i := 1; i < options.MaxPages; i++ { colCtx = colly.NewContext() colCtx.Put("page", strconv.Itoa(i+1)) - sedefaults.DoPostRequest(Info.URL, strings.NewReader("q="+query+"&dc="+strconv.Itoa(i*20)), colCtx, col, Info.Name, &retError) + _sedefaults.DoPostRequest(Info.URL, strings.NewReader("q="+query+"&dc="+strconv.Itoa(i*20)), colCtx, col, Info.Name, &retError) } col.Wait() diff --git a/src/engines/duckduckgo/duckduckgo_test.go b/src/search/engines/duckduckgo/duckduckgo_test.go similarity index 87% rename from src/engines/duckduckgo/duckduckgo_test.go rename to src/search/engines/duckduckgo/duckduckgo_test.go index 061e9037..1f78a3dc 100644 --- a/src/engines/duckduckgo/duckduckgo_test.go +++ b/src/search/engines/duckduckgo/duckduckgo_test.go @@ -3,8 +3,8 @@ package duckduckgo_test import ( "testing" - "github.com/hearchco/hearchco/src/engines" - "github.com/hearchco/hearchco/src/engines/_engines_test" + "github.com/hearchco/hearchco/src/search/engines" + "github.com/hearchco/hearchco/src/search/engines/_engines_test" ) func TestSearch(t *testing.T) { diff --git a/src/engines/duckduckgo/options.go b/src/search/engines/duckduckgo/options.go similarity index 89% rename from src/engines/duckduckgo/options.go rename to src/search/engines/duckduckgo/options.go index bf044edc..af33d20c 100644 --- a/src/engines/duckduckgo/options.go +++ b/src/search/engines/duckduckgo/options.go @@ -1,6 +1,8 @@ package duckduckgo -import "github.com/hearchco/hearchco/src/engines" +import ( + "github.com/hearchco/hearchco/src/search/engines" +) var Info engines.Info = engines.Info{ Domain: "lite.duckduckgo.com", diff --git a/src/engines/etools/captcha.png b/src/search/engines/etools/captcha.png similarity index 100% rename from src/engines/etools/captcha.png rename to src/search/engines/etools/captcha.png diff --git a/src/engines/etools/etools.go b/src/search/engines/etools/etools.go similarity index 71% rename from src/engines/etools/etools.go rename to src/search/engines/etools/etools.go index 411cfed9..517ae878 100644 --- a/src/engines/etools/etools.go +++ b/src/search/engines/etools/etools.go @@ -6,16 +6,16 @@ import ( "strings" "github.com/gocolly/colly/v2" - "github.com/hearchco/hearchco/src/bucket" "github.com/hearchco/hearchco/src/config" - "github.com/hearchco/hearchco/src/engines" + "github.com/hearchco/hearchco/src/search/bucket" + "github.com/hearchco/hearchco/src/search/engines" + "github.com/hearchco/hearchco/src/search/engines/_sedefaults" "github.com/hearchco/hearchco/src/search/parse" - "github.com/hearchco/hearchco/src/sedefaults" "github.com/rs/zerolog/log" ) func Search(ctx context.Context, query string, relay *bucket.Relay, options engines.Options, settings config.Settings, timings config.Timings) error { - if err := sedefaults.Prepare(Info.Name, &options, &settings, &Support, &Info, &ctx); err != nil { + if err := _sedefaults.Prepare(Info.Name, &options, &settings, &Support, &Info, &ctx); err != nil { return err } @@ -23,14 +23,14 @@ func Search(ctx context.Context, query string, relay *bucket.Relay, options engi var pagesCol *colly.Collector var retError error - sedefaults.InitializeCollectors(&col, &pagesCol, &settings, &options, &timings) + _sedefaults.InitializeCollectors(&col, &pagesCol, &settings, &options, &timings) - sedefaults.PagesColRequest(Info.Name, pagesCol, ctx) - sedefaults.PagesColError(Info.Name, pagesCol) - sedefaults.PagesColResponse(Info.Name, pagesCol, relay) + _sedefaults.PagesColRequest(Info.Name, pagesCol, ctx) + _sedefaults.PagesColError(Info.Name, pagesCol) + _sedefaults.PagesColResponse(Info.Name, pagesCol, relay) - sedefaults.ColRequest(Info.Name, col, ctx) - sedefaults.ColError(Info.Name, col) + _sedefaults.ColRequest(Info.Name, col, ctx) + _sedefaults.ColError(Info.Name, col) var pageRankCounter []int = make([]int, options.MaxPages*Info.ResultsPerPage) @@ -53,7 +53,7 @@ func Search(ctx context.Context, query string, relay *bucket.Relay, options engi descText := strings.TrimSpace(dom.Find(dompaths.Description).Text()) if hrefExists && linkText != "" && linkText != "#" && titleText != "" { - page := sedefaults.PageFromContext(e.Request.Ctx, Info.Name) + page := _sedefaults.PageFromContext(e.Request.Ctx, Info.Name) res := bucket.MakeSEResult(linkText, titleText, descText, Info.Name, page, pageRankCounter[page]+1) bucket.AddSEResult(res, Info.Name, relay, &options, pagesCol) @@ -74,7 +74,7 @@ func Search(ctx context.Context, query string, relay *bucket.Relay, options engi colCtx := colly.NewContext() colCtx.Put("page", strconv.Itoa(1)) - sedefaults.DoPostRequest(Info.URL, strings.NewReader("query="+query+"&country=web&language=all"+safeSearchParam), colCtx, col, Info.Name, &retError) + _sedefaults.DoPostRequest(Info.URL, strings.NewReader("query="+query+"&country=web&language=all"+safeSearchParam), colCtx, col, Info.Name, &retError) col.Wait() // wait so I can get the JSESSION cookie back for i := 1; i < options.MaxPages; i++ { @@ -83,7 +83,7 @@ func Search(ctx context.Context, query string, relay *bucket.Relay, options engi colCtx.Put("page", pageStr) // query not needed as its saved in the session - sedefaults.DoGetRequest(pageURL+pageStr, pageURL+pageStr, colCtx, col, Info.Name, &retError) + _sedefaults.DoGetRequest(pageURL+pageStr, pageURL+pageStr, colCtx, col, Info.Name, &retError) } col.Wait() diff --git a/src/engines/etools/etools.md b/src/search/engines/etools/etools.md similarity index 100% rename from src/engines/etools/etools.md rename to src/search/engines/etools/etools.md diff --git a/src/engines/etools/etools_test.go b/src/search/engines/etools/etools_test.go similarity index 87% rename from src/engines/etools/etools_test.go rename to src/search/engines/etools/etools_test.go index 7e1f4013..5c60d541 100644 --- a/src/engines/etools/etools_test.go +++ b/src/search/engines/etools/etools_test.go @@ -3,8 +3,8 @@ package etools_test import ( "testing" - "github.com/hearchco/hearchco/src/engines" - "github.com/hearchco/hearchco/src/engines/_engines_test" + "github.com/hearchco/hearchco/src/search/engines" + "github.com/hearchco/hearchco/src/search/engines/_engines_test" ) func TestSearch(t *testing.T) { diff --git a/src/engines/etools/options.go b/src/search/engines/etools/options.go similarity index 89% rename from src/engines/etools/options.go rename to src/search/engines/etools/options.go index 48b7219b..b334e3e1 100644 --- a/src/engines/etools/options.go +++ b/src/search/engines/etools/options.go @@ -1,6 +1,8 @@ package etools -import "github.com/hearchco/hearchco/src/engines" +import ( + "github.com/hearchco/hearchco/src/search/engines" +) const pageURL string = "https://www.etools.ch/search.do?page=" diff --git a/src/engines/google/google.go b/src/search/engines/google/google.go similarity index 66% rename from src/engines/google/google.go rename to src/search/engines/google/google.go index c4991710..b34c304e 100644 --- a/src/engines/google/google.go +++ b/src/search/engines/google/google.go @@ -7,15 +7,15 @@ import ( "github.com/gocolly/colly/v2" "github.com/hearchco/hearchco/src/anonymize" - "github.com/hearchco/hearchco/src/bucket" "github.com/hearchco/hearchco/src/config" - "github.com/hearchco/hearchco/src/engines" + "github.com/hearchco/hearchco/src/search/bucket" + "github.com/hearchco/hearchco/src/search/engines" + "github.com/hearchco/hearchco/src/search/engines/_sedefaults" "github.com/hearchco/hearchco/src/search/parse" - "github.com/hearchco/hearchco/src/sedefaults" ) func Search(ctx context.Context, query string, relay *bucket.Relay, options engines.Options, settings config.Settings, timings config.Timings) error { - if err := sedefaults.Prepare(Info.Name, &options, &settings, &Support, &Info, &ctx); err != nil { + if err := _sedefaults.Prepare(Info.Name, &options, &settings, &Support, &Info, &ctx); err != nil { return err } @@ -23,14 +23,14 @@ func Search(ctx context.Context, query string, relay *bucket.Relay, options engi var pagesCol *colly.Collector var retError error - sedefaults.InitializeCollectors(&col, &pagesCol, &settings, &options, &timings) + _sedefaults.InitializeCollectors(&col, &pagesCol, &settings, &options, &timings) - sedefaults.PagesColRequest(Info.Name, pagesCol, ctx) - sedefaults.PagesColError(Info.Name, pagesCol) - sedefaults.PagesColResponse(Info.Name, pagesCol, relay) + _sedefaults.PagesColRequest(Info.Name, pagesCol, ctx) + _sedefaults.PagesColError(Info.Name, pagesCol) + _sedefaults.PagesColResponse(Info.Name, pagesCol, relay) - sedefaults.ColRequest(Info.Name, col, ctx) - sedefaults.ColError(Info.Name, col) + _sedefaults.ColRequest(Info.Name, col, ctx) + _sedefaults.ColError(Info.Name, col) var pageRankCounter []int = make([]int, options.MaxPages*Info.ResultsPerPage) @@ -43,7 +43,7 @@ func Search(ctx context.Context, query string, relay *bucket.Relay, options engi descText := strings.TrimSpace(dom.Find(dompaths.Description).Text()) if hrefExists && linkText != "" && linkText != "#" && titleText != "" { - page := sedefaults.PageFromContext(e.Request.Ctx, Info.Name) + page := _sedefaults.PageFromContext(e.Request.Ctx, Info.Name) res := bucket.MakeSEResult(linkText, titleText, descText, Info.Name, page, pageRankCounter[page]+1) bucket.AddSEResult(res, Info.Name, relay, &options, pagesCol) @@ -56,7 +56,7 @@ func Search(ctx context.Context, query string, relay *bucket.Relay, options engi urll := Info.URL + query anonUrll := Info.URL + anonymize.String(query) - sedefaults.DoGetRequest(urll, anonUrll, colCtx, col, Info.Name, &retError) + _sedefaults.DoGetRequest(urll, anonUrll, colCtx, col, Info.Name, &retError) for i := 1; i < options.MaxPages; i++ { colCtx = colly.NewContext() @@ -64,7 +64,7 @@ func Search(ctx context.Context, query string, relay *bucket.Relay, options engi urll := Info.URL + query + "&start=" + strconv.Itoa(i*10) anonUrll := Info.URL + anonymize.String(query) + "&start=" + strconv.Itoa(i*10) - sedefaults.DoGetRequest(urll, anonUrll, colCtx, col, Info.Name, &retError) + _sedefaults.DoGetRequest(urll, anonUrll, colCtx, col, Info.Name, &retError) } col.Wait() diff --git a/src/engines/google/google_test.go b/src/search/engines/google/google_test.go similarity index 87% rename from src/engines/google/google_test.go rename to src/search/engines/google/google_test.go index 5c466684..e19bde40 100644 --- a/src/engines/google/google_test.go +++ b/src/search/engines/google/google_test.go @@ -3,8 +3,8 @@ package google_test import ( "testing" - "github.com/hearchco/hearchco/src/engines" - "github.com/hearchco/hearchco/src/engines/_engines_test" + "github.com/hearchco/hearchco/src/search/engines" + "github.com/hearchco/hearchco/src/search/engines/_engines_test" ) func TestSearch(t *testing.T) { diff --git a/src/engines/google/options.go b/src/search/engines/google/options.go similarity index 89% rename from src/engines/google/options.go rename to src/search/engines/google/options.go index 58d8d376..b7fa92a1 100644 --- a/src/engines/google/options.go +++ b/src/search/engines/google/options.go @@ -1,7 +1,7 @@ package google import ( - "github.com/hearchco/hearchco/src/engines" + "github.com/hearchco/hearchco/src/search/engines" ) var Info engines.Info = engines.Info{ diff --git a/src/engines/googlescholar/googlescholar.go b/src/search/engines/googlescholar/googlescholar.go similarity index 73% rename from src/engines/googlescholar/googlescholar.go rename to src/search/engines/googlescholar/googlescholar.go index 1d8e2600..5476526c 100644 --- a/src/engines/googlescholar/googlescholar.go +++ b/src/search/engines/googlescholar/googlescholar.go @@ -8,16 +8,16 @@ import ( "github.com/gocolly/colly/v2" "github.com/hearchco/hearchco/src/anonymize" - "github.com/hearchco/hearchco/src/bucket" "github.com/hearchco/hearchco/src/config" - "github.com/hearchco/hearchco/src/engines" + "github.com/hearchco/hearchco/src/search/bucket" + "github.com/hearchco/hearchco/src/search/engines" + "github.com/hearchco/hearchco/src/search/engines/_sedefaults" "github.com/hearchco/hearchco/src/search/parse" - "github.com/hearchco/hearchco/src/sedefaults" "github.com/rs/zerolog/log" ) func Search(ctx context.Context, query string, relay *bucket.Relay, options engines.Options, settings config.Settings, timings config.Timings) error { - if err := sedefaults.Prepare(Info.Name, &options, &settings, &Support, &Info, &ctx); err != nil { + if err := _sedefaults.Prepare(Info.Name, &options, &settings, &Support, &Info, &ctx); err != nil { return err } @@ -25,14 +25,14 @@ func Search(ctx context.Context, query string, relay *bucket.Relay, options engi var pagesCol *colly.Collector var retError error - sedefaults.InitializeCollectors(&col, &pagesCol, &settings, &options, &timings) + _sedefaults.InitializeCollectors(&col, &pagesCol, &settings, &options, &timings) - sedefaults.PagesColRequest(Info.Name, pagesCol, ctx) - sedefaults.PagesColError(Info.Name, pagesCol) - sedefaults.PagesColResponse(Info.Name, pagesCol, relay) + _sedefaults.PagesColRequest(Info.Name, pagesCol, ctx) + _sedefaults.PagesColError(Info.Name, pagesCol) + _sedefaults.PagesColResponse(Info.Name, pagesCol, relay) - sedefaults.ColRequest(Info.Name, col, ctx) - sedefaults.ColError(Info.Name, col) + _sedefaults.ColRequest(Info.Name, col, ctx) + _sedefaults.ColError(Info.Name, col) var pageRankCounter []int = make([]int, options.MaxPages*Info.ResultsPerPage) @@ -48,7 +48,7 @@ func Search(ctx context.Context, query string, relay *bucket.Relay, options engi descText = citeInfo + " || " + descText if hrefExists && linkText != "" && linkText != "#" && titleText != "" { - page := sedefaults.PageFromContext(e.Request.Ctx, Info.Name) + page := _sedefaults.PageFromContext(e.Request.Ctx, Info.Name) res := bucket.MakeSEResult(linkText, titleText, descText, Info.Name, page, pageRankCounter[page]+1) bucket.AddSEResult(res, Info.Name, relay, &options, pagesCol) @@ -61,7 +61,7 @@ func Search(ctx context.Context, query string, relay *bucket.Relay, options engi urll := Info.URL + query anonUrll := Info.URL + anonymize.String(query) - sedefaults.DoGetRequest(urll, anonUrll, colCtx, col, Info.Name, &retError) + _sedefaults.DoGetRequest(urll, anonUrll, colCtx, col, Info.Name, &retError) for i := 1; i < options.MaxPages; i++ { colCtx = colly.NewContext() @@ -69,7 +69,7 @@ func Search(ctx context.Context, query string, relay *bucket.Relay, options engi urll := Info.URL + query + "&start=" + strconv.Itoa(i*10) anonUrll := Info.URL + anonymize.String(query) + "&start=" + strconv.Itoa(i*10) - sedefaults.DoGetRequest(urll, anonUrll, colCtx, col, Info.Name, &retError) + _sedefaults.DoGetRequest(urll, anonUrll, colCtx, col, Info.Name, &retError) } col.Wait() diff --git a/src/engines/googlescholar/googlescholar_test.go b/src/search/engines/googlescholar/googlescholar_test.go similarity index 89% rename from src/engines/googlescholar/googlescholar_test.go rename to src/search/engines/googlescholar/googlescholar_test.go index 5d385b93..170303fd 100644 --- a/src/engines/googlescholar/googlescholar_test.go +++ b/src/search/engines/googlescholar/googlescholar_test.go @@ -3,8 +3,8 @@ package googlescholar_test import ( "testing" - "github.com/hearchco/hearchco/src/engines" - "github.com/hearchco/hearchco/src/engines/_engines_test" + "github.com/hearchco/hearchco/src/search/engines" + "github.com/hearchco/hearchco/src/search/engines/_engines_test" ) func TestSearch(t *testing.T) { diff --git a/src/engines/googlescholar/options.go b/src/search/engines/googlescholar/options.go similarity index 88% rename from src/engines/googlescholar/options.go rename to src/search/engines/googlescholar/options.go index 68cb9474..14f4fcba 100644 --- a/src/engines/googlescholar/options.go +++ b/src/search/engines/googlescholar/options.go @@ -1,6 +1,8 @@ package googlescholar -import "github.com/hearchco/hearchco/src/engines" +import ( + "github.com/hearchco/hearchco/src/search/engines" +) var Info engines.Info = engines.Info{ Domain: "scholar.google.com", diff --git a/src/engines/mojeek/mojeek.go b/src/search/engines/mojeek/mojeek.go similarity index 72% rename from src/engines/mojeek/mojeek.go rename to src/search/engines/mojeek/mojeek.go index 8c42c573..7986d681 100644 --- a/src/engines/mojeek/mojeek.go +++ b/src/search/engines/mojeek/mojeek.go @@ -7,15 +7,15 @@ import ( "github.com/gocolly/colly/v2" "github.com/hearchco/hearchco/src/anonymize" - "github.com/hearchco/hearchco/src/bucket" "github.com/hearchco/hearchco/src/config" - "github.com/hearchco/hearchco/src/engines" + "github.com/hearchco/hearchco/src/search/bucket" + "github.com/hearchco/hearchco/src/search/engines" + "github.com/hearchco/hearchco/src/search/engines/_sedefaults" "github.com/hearchco/hearchco/src/search/parse" - "github.com/hearchco/hearchco/src/sedefaults" ) func Search(ctx context.Context, query string, relay *bucket.Relay, options engines.Options, settings config.Settings, timings config.Timings) error { - if err := sedefaults.Prepare(Info.Name, &options, &settings, &Support, &Info, &ctx); err != nil { + if err := _sedefaults.Prepare(Info.Name, &options, &settings, &Support, &Info, &ctx); err != nil { return err } @@ -23,14 +23,14 @@ func Search(ctx context.Context, query string, relay *bucket.Relay, options engi var pagesCol *colly.Collector var retError error - sedefaults.InitializeCollectors(&col, &pagesCol, &settings, &options, &timings) + _sedefaults.InitializeCollectors(&col, &pagesCol, &settings, &options, &timings) - sedefaults.PagesColRequest(Info.Name, pagesCol, ctx) - sedefaults.PagesColError(Info.Name, pagesCol) - sedefaults.PagesColResponse(Info.Name, pagesCol, relay) + _sedefaults.PagesColRequest(Info.Name, pagesCol, ctx) + _sedefaults.PagesColError(Info.Name, pagesCol) + _sedefaults.PagesColResponse(Info.Name, pagesCol, relay) - sedefaults.ColRequest(Info.Name, col, ctx) - sedefaults.ColError(Info.Name, col) + _sedefaults.ColRequest(Info.Name, col, ctx) + _sedefaults.ColError(Info.Name, col) var pageRankCounter []int = make([]int, options.MaxPages*Info.ResultsPerPage) @@ -44,7 +44,7 @@ func Search(ctx context.Context, query string, relay *bucket.Relay, options engi descText := strings.TrimSpace(dom.Find(dompaths.Description).Text()) if hrefExists && linkText != "" && linkText != "#" && titleText != "" { - page := sedefaults.PageFromContext(e.Request.Ctx, Info.Name) + page := _sedefaults.PageFromContext(e.Request.Ctx, Info.Name) res := bucket.MakeSEResult(linkText, titleText, descText, Info.Name, page, pageRankCounter[page]+1) bucket.AddSEResult(res, Info.Name, relay, &options, pagesCol) @@ -60,7 +60,7 @@ func Search(ctx context.Context, query string, relay *bucket.Relay, options engi urll := Info.URL + query + localeParam + safeSearchParam anonUrll := Info.URL + anonymize.String(query) + localeParam + safeSearchParam - sedefaults.DoGetRequest(urll, anonUrll, colCtx, col, Info.Name, &retError) + _sedefaults.DoGetRequest(urll, anonUrll, colCtx, col, Info.Name, &retError) for i := 1; i < options.MaxPages; i++ { colCtx = colly.NewContext() @@ -68,7 +68,7 @@ func Search(ctx context.Context, query string, relay *bucket.Relay, options engi urll := Info.URL + query + "&s=" + strconv.Itoa(i*10+1) + localeParam + safeSearchParam anonUrll := Info.URL + anonymize.String(query) + "&s=" + strconv.Itoa(i*10+1) + localeParam + safeSearchParam - sedefaults.DoGetRequest(urll, anonUrll, colCtx, col, Info.Name, &retError) + _sedefaults.DoGetRequest(urll, anonUrll, colCtx, col, Info.Name, &retError) } col.Wait() diff --git a/src/engines/mojeek/mojeek_test.go b/src/search/engines/mojeek/mojeek_test.go similarity index 87% rename from src/engines/mojeek/mojeek_test.go rename to src/search/engines/mojeek/mojeek_test.go index c26845ff..51504d8f 100644 --- a/src/engines/mojeek/mojeek_test.go +++ b/src/search/engines/mojeek/mojeek_test.go @@ -3,8 +3,8 @@ package mojeek_test import ( "testing" - "github.com/hearchco/hearchco/src/engines" - "github.com/hearchco/hearchco/src/engines/_engines_test" + "github.com/hearchco/hearchco/src/search/engines" + "github.com/hearchco/hearchco/src/search/engines/_engines_test" ) func TestSearch(t *testing.T) { diff --git a/src/engines/mojeek/options.go b/src/search/engines/mojeek/options.go similarity index 88% rename from src/engines/mojeek/options.go rename to src/search/engines/mojeek/options.go index 57bcd621..5e16d75b 100644 --- a/src/engines/mojeek/options.go +++ b/src/search/engines/mojeek/options.go @@ -1,6 +1,6 @@ package mojeek -import "github.com/hearchco/hearchco/src/engines" +import "github.com/hearchco/hearchco/src/search/engines" var Info engines.Info = engines.Info{ Domain: "www.mojeek.com", diff --git a/src/engines/name.go b/src/search/engines/name.go similarity index 87% rename from src/engines/name.go rename to src/search/engines/name.go index 98c0b029..d90edd88 100644 --- a/src/engines/name.go +++ b/src/search/engines/name.go @@ -5,7 +5,7 @@ import "strings" type Name uint8 //go:generate enumer -type=Name -json -text -yaml -sql -//go:generate go run github.com/hearchco/hearchco/generate/searcher -type=Name -packagename search -output ../search/engine_searcher.go +//go:generate go run github.com/hearchco/hearchco/generate/searcher -type=Name -packagename search -output ../engine_searcher.go const ( UNDEFINED Name = iota BING diff --git a/src/engines/presearch/json_response.go b/src/search/engines/presearch/json_response.go similarity index 100% rename from src/engines/presearch/json_response.go rename to src/search/engines/presearch/json_response.go diff --git a/src/engines/presearch/options.go b/src/search/engines/presearch/options.go similarity index 91% rename from src/engines/presearch/options.go rename to src/search/engines/presearch/options.go index 48f0ec76..d39ebdf8 100644 --- a/src/engines/presearch/options.go +++ b/src/search/engines/presearch/options.go @@ -1,6 +1,6 @@ package presearch -import "github.com/hearchco/hearchco/src/engines" +import "github.com/hearchco/hearchco/src/search/engines" var Info engines.Info = engines.Info{ Domain: "presearch.com", diff --git a/src/engines/presearch/presearch.go b/src/search/engines/presearch/presearch.go similarity index 80% rename from src/engines/presearch/presearch.go rename to src/search/engines/presearch/presearch.go index 5bdff10d..8e1448e3 100644 --- a/src/engines/presearch/presearch.go +++ b/src/search/engines/presearch/presearch.go @@ -8,16 +8,16 @@ import ( "github.com/gocolly/colly/v2" "github.com/hearchco/hearchco/src/anonymize" - "github.com/hearchco/hearchco/src/bucket" "github.com/hearchco/hearchco/src/config" - "github.com/hearchco/hearchco/src/engines" + "github.com/hearchco/hearchco/src/search/bucket" + "github.com/hearchco/hearchco/src/search/engines" + "github.com/hearchco/hearchco/src/search/engines/_sedefaults" "github.com/hearchco/hearchco/src/search/parse" - "github.com/hearchco/hearchco/src/sedefaults" "github.com/rs/zerolog/log" ) func Search(ctx context.Context, query string, relay *bucket.Relay, options engines.Options, settings config.Settings, timings config.Timings) error { - if err := sedefaults.Prepare(Info.Name, &options, &settings, &Support, &Info, &ctx); err != nil { + if err := _sedefaults.Prepare(Info.Name, &options, &settings, &Support, &Info, &ctx); err != nil { return err } @@ -25,14 +25,14 @@ func Search(ctx context.Context, query string, relay *bucket.Relay, options engi var pagesCol *colly.Collector var retError error - sedefaults.InitializeCollectors(&col, &pagesCol, &settings, &options, &timings) + _sedefaults.InitializeCollectors(&col, &pagesCol, &settings, &options, &timings) - sedefaults.PagesColRequest(Info.Name, pagesCol, ctx) - sedefaults.PagesColError(Info.Name, pagesCol) - sedefaults.PagesColResponse(Info.Name, pagesCol, relay) + _sedefaults.PagesColRequest(Info.Name, pagesCol, ctx) + _sedefaults.PagesColError(Info.Name, pagesCol) + _sedefaults.PagesColResponse(Info.Name, pagesCol, relay) - sedefaults.ColRequest(Info.Name, col, ctx) - sedefaults.ColError(Info.Name, col) + _sedefaults.ColRequest(Info.Name, col, ctx) + _sedefaults.ColError(Info.Name, col) safeSearch := getSafeSearch(options.SafeSearch) @@ -100,7 +100,7 @@ func Search(ctx context.Context, query string, relay *bucket.Relay, options engi urll := Info.URL + query anonUrll := Info.URL + anonymize.String(query) - sedefaults.DoGetRequest(urll, anonUrll, colCtx, col, Info.Name, &retError) + _sedefaults.DoGetRequest(urll, anonUrll, colCtx, col, Info.Name, &retError) for i := 1; i < options.MaxPages; i++ { colCtx = colly.NewContext() @@ -109,7 +109,7 @@ func Search(ctx context.Context, query string, relay *bucket.Relay, options engi urll := Info.URL + query + "&page=" + strconv.Itoa(i+1) anonUrll := Info.URL + anonymize.String(query) + "&page=" + strconv.Itoa(i+1) - sedefaults.DoGetRequest(urll, anonUrll, colCtx, col, Info.Name, &retError) + _sedefaults.DoGetRequest(urll, anonUrll, colCtx, col, Info.Name, &retError) } col.Wait() diff --git a/src/engines/presearch/presearch.md b/src/search/engines/presearch/presearch.md similarity index 100% rename from src/engines/presearch/presearch.md rename to src/search/engines/presearch/presearch.md diff --git a/src/engines/presearch/presearch_test.go b/src/search/engines/presearch/presearch_test.go similarity index 87% rename from src/engines/presearch/presearch_test.go rename to src/search/engines/presearch/presearch_test.go index 7c026283..81cfb966 100644 --- a/src/engines/presearch/presearch_test.go +++ b/src/search/engines/presearch/presearch_test.go @@ -3,8 +3,8 @@ package presearch_test import ( "testing" - "github.com/hearchco/hearchco/src/engines" - "github.com/hearchco/hearchco/src/engines/_engines_test" + "github.com/hearchco/hearchco/src/search/engines" + "github.com/hearchco/hearchco/src/search/engines/_engines_test" ) func TestSearch(t *testing.T) { diff --git a/src/engines/qwant/json_response.go b/src/search/engines/qwant/json_response.go similarity index 100% rename from src/engines/qwant/json_response.go rename to src/search/engines/qwant/json_response.go diff --git a/src/engines/qwant/options.go b/src/search/engines/qwant/options.go similarity index 86% rename from src/engines/qwant/options.go rename to src/search/engines/qwant/options.go index b16b4eb7..da1c0ba2 100644 --- a/src/engines/qwant/options.go +++ b/src/search/engines/qwant/options.go @@ -1,6 +1,8 @@ package qwant -import "github.com/hearchco/hearchco/src/engines" +import ( + "github.com/hearchco/hearchco/src/search/engines" +) var Info engines.Info = engines.Info{ Domain: "www.qwant.com", diff --git a/src/engines/qwant/qwant.go b/src/search/engines/qwant/qwant.go similarity index 86% rename from src/engines/qwant/qwant.go rename to src/search/engines/qwant/qwant.go index a30b0933..5e6fe019 100644 --- a/src/engines/qwant/qwant.go +++ b/src/search/engines/qwant/qwant.go @@ -8,16 +8,16 @@ import ( "github.com/gocolly/colly/v2" "github.com/hearchco/hearchco/src/anonymize" - "github.com/hearchco/hearchco/src/bucket" "github.com/hearchco/hearchco/src/config" - "github.com/hearchco/hearchco/src/engines" + "github.com/hearchco/hearchco/src/search/bucket" + "github.com/hearchco/hearchco/src/search/engines" + "github.com/hearchco/hearchco/src/search/engines/_sedefaults" "github.com/hearchco/hearchco/src/search/parse" - "github.com/hearchco/hearchco/src/sedefaults" "github.com/rs/zerolog/log" ) func Search(ctx context.Context, query string, relay *bucket.Relay, options engines.Options, settings config.Settings, timings config.Timings) error { - if err := sedefaults.Prepare(Info.Name, &options, &settings, &Support, &Info, &ctx); err != nil { + if err := _sedefaults.Prepare(Info.Name, &options, &settings, &Support, &Info, &ctx); err != nil { return err } @@ -25,14 +25,14 @@ func Search(ctx context.Context, query string, relay *bucket.Relay, options engi var pagesCol *colly.Collector var retError error - sedefaults.InitializeCollectors(&col, &pagesCol, &settings, &options, &timings) + _sedefaults.InitializeCollectors(&col, &pagesCol, &settings, &options, &timings) - sedefaults.PagesColRequest(Info.Name, pagesCol, ctx) - sedefaults.PagesColError(Info.Name, pagesCol) - sedefaults.PagesColResponse(Info.Name, pagesCol, relay) + _sedefaults.PagesColRequest(Info.Name, pagesCol, ctx) + _sedefaults.PagesColError(Info.Name, pagesCol) + _sedefaults.PagesColResponse(Info.Name, pagesCol, relay) - sedefaults.ColRequest(Info.Name, col, ctx) - sedefaults.ColError(Info.Name, col) + _sedefaults.ColRequest(Info.Name, col, ctx) + _sedefaults.ColError(Info.Name, col) col.OnResponse(func(r *colly.Response) { var pageStr string = r.Ctx.Get("page") @@ -80,7 +80,7 @@ func Search(ctx context.Context, query string, relay *bucket.Relay, options engi urll := Info.URL + query + "&count=" + strconv.Itoa(nRequested) + localeParam + "&offset=" + strconv.Itoa(i*nRequested) + deviceParam + safeSearchParam anonUrll := Info.URL + anonymize.String(query) + "&count=" + strconv.Itoa(nRequested) + localeParam + "&offset=" + strconv.Itoa(i*nRequested) + deviceParam + safeSearchParam - sedefaults.DoGetRequest(urll, anonUrll, colCtx, col, Info.Name, &retError) + _sedefaults.DoGetRequest(urll, anonUrll, colCtx, col, Info.Name, &retError) } col.Wait() diff --git a/src/engines/qwant/qwant.md b/src/search/engines/qwant/qwant.md similarity index 100% rename from src/engines/qwant/qwant.md rename to src/search/engines/qwant/qwant.md diff --git a/src/engines/qwant/qwant_test.go b/src/search/engines/qwant/qwant_test.go similarity index 87% rename from src/engines/qwant/qwant_test.go rename to src/search/engines/qwant/qwant_test.go index 9fab81ef..09dfdfae 100644 --- a/src/engines/qwant/qwant_test.go +++ b/src/search/engines/qwant/qwant_test.go @@ -3,8 +3,8 @@ package qwant_test import ( "testing" - "github.com/hearchco/hearchco/src/engines" - "github.com/hearchco/hearchco/src/engines/_engines_test" + "github.com/hearchco/hearchco/src/search/engines" + "github.com/hearchco/hearchco/src/search/engines/_engines_test" ) func TestSearch(t *testing.T) { diff --git a/src/engines/startpage/image-1.png b/src/search/engines/startpage/image-1.png similarity index 100% rename from src/engines/startpage/image-1.png rename to src/search/engines/startpage/image-1.png diff --git a/src/engines/startpage/image-2.png b/src/search/engines/startpage/image-2.png similarity index 100% rename from src/engines/startpage/image-2.png rename to src/search/engines/startpage/image-2.png diff --git a/src/engines/startpage/image.png b/src/search/engines/startpage/image.png similarity index 100% rename from src/engines/startpage/image.png rename to src/search/engines/startpage/image.png diff --git a/src/engines/startpage/options.go b/src/search/engines/startpage/options.go similarity index 89% rename from src/engines/startpage/options.go rename to src/search/engines/startpage/options.go index 823a54bf..d64a3428 100644 --- a/src/engines/startpage/options.go +++ b/src/search/engines/startpage/options.go @@ -1,6 +1,6 @@ package startpage -import "github.com/hearchco/hearchco/src/engines" +import "github.com/hearchco/hearchco/src/search/engines" var Info engines.Info = engines.Info{ Domain: "www.startpage.com", diff --git a/src/engines/startpage/startpage.go b/src/search/engines/startpage/startpage.go similarity index 76% rename from src/engines/startpage/startpage.go rename to src/search/engines/startpage/startpage.go index 2dac3f26..7126f85d 100644 --- a/src/engines/startpage/startpage.go +++ b/src/search/engines/startpage/startpage.go @@ -7,16 +7,16 @@ import ( "github.com/gocolly/colly/v2" "github.com/hearchco/hearchco/src/anonymize" - "github.com/hearchco/hearchco/src/bucket" "github.com/hearchco/hearchco/src/config" - "github.com/hearchco/hearchco/src/engines" + "github.com/hearchco/hearchco/src/search/bucket" + "github.com/hearchco/hearchco/src/search/engines" + "github.com/hearchco/hearchco/src/search/engines/_sedefaults" "github.com/hearchco/hearchco/src/search/parse" - "github.com/hearchco/hearchco/src/sedefaults" "github.com/rs/zerolog/log" ) func Search(ctx context.Context, query string, relay *bucket.Relay, options engines.Options, settings config.Settings, timings config.Timings) error { - if err := sedefaults.Prepare(Info.Name, &options, &settings, &Support, &Info, &ctx); err != nil { + if err := _sedefaults.Prepare(Info.Name, &options, &settings, &Support, &Info, &ctx); err != nil { return err } @@ -24,14 +24,14 @@ func Search(ctx context.Context, query string, relay *bucket.Relay, options engi var pagesCol *colly.Collector var retError error - sedefaults.InitializeCollectors(&col, &pagesCol, &settings, &options, &timings) + _sedefaults.InitializeCollectors(&col, &pagesCol, &settings, &options, &timings) - sedefaults.PagesColRequest(Info.Name, pagesCol, ctx) - sedefaults.PagesColError(Info.Name, pagesCol) - sedefaults.PagesColResponse(Info.Name, pagesCol, relay) + _sedefaults.PagesColRequest(Info.Name, pagesCol, ctx) + _sedefaults.PagesColError(Info.Name, pagesCol) + _sedefaults.PagesColResponse(Info.Name, pagesCol, relay) - sedefaults.ColRequest(Info.Name, col, ctx) - sedefaults.ColError(Info.Name, col) + _sedefaults.ColRequest(Info.Name, col, ctx) + _sedefaults.ColError(Info.Name, col) var pageRankCounter []int = make([]int, options.MaxPages*Info.ResultsPerPage) @@ -44,7 +44,7 @@ func Search(ctx context.Context, query string, relay *bucket.Relay, options engi descText := strings.TrimSpace(dom.Find(dompaths.Description).Text()) if hrefExists && linkText != "" && linkText != "#" && titleText != "" { - page := sedefaults.PageFromContext(e.Request.Ctx, Info.Name) + page := _sedefaults.PageFromContext(e.Request.Ctx, Info.Name) res := bucket.MakeSEResult(linkText, titleText, descText, Info.Name, page, pageRankCounter[page]+1) bucket.AddSEResult(res, Info.Name, relay, &options, pagesCol) @@ -78,7 +78,7 @@ func Search(ctx context.Context, query string, relay *bucket.Relay, options engi urll := Info.URL + query + safeSearch anonUrll := Info.URL + anonymize.String(query) + safeSearch - sedefaults.DoGetRequest(urll, anonUrll, colCtx, col, Info.Name, &retError) + _sedefaults.DoGetRequest(urll, anonUrll, colCtx, col, Info.Name, &retError) for i := 1; i < options.MaxPages; i++ { colCtx = colly.NewContext() @@ -86,7 +86,7 @@ func Search(ctx context.Context, query string, relay *bucket.Relay, options engi urll := Info.URL + query + "&page=" + strconv.Itoa(i+1) + safeSearch anonUrll := Info.URL + anonymize.String(query) + "&page=" + strconv.Itoa(i+1) + safeSearch - sedefaults.DoGetRequest(urll, anonUrll, colCtx, col, Info.Name, &retError) + _sedefaults.DoGetRequest(urll, anonUrll, colCtx, col, Info.Name, &retError) } col.Wait() diff --git a/src/engines/startpage/startpage.md b/src/search/engines/startpage/startpage.md similarity index 100% rename from src/engines/startpage/startpage.md rename to src/search/engines/startpage/startpage.md diff --git a/src/engines/startpage/startpage_test.go b/src/search/engines/startpage/startpage_test.go similarity index 89% rename from src/engines/startpage/startpage_test.go rename to src/search/engines/startpage/startpage_test.go index 49db12f1..134d348e 100644 --- a/src/engines/startpage/startpage_test.go +++ b/src/search/engines/startpage/startpage_test.go @@ -3,8 +3,8 @@ package startpage_test import ( "testing" - "github.com/hearchco/hearchco/src/engines" - "github.com/hearchco/hearchco/src/engines/_engines_test" + "github.com/hearchco/hearchco/src/search/engines" + "github.com/hearchco/hearchco/src/search/engines/_engines_test" ) func TestSearch(t *testing.T) { diff --git a/src/engines/structs.go b/src/search/engines/structs.go similarity index 76% rename from src/engines/structs.go rename to src/search/engines/structs.go index 5d64b1be..229e1144 100644 --- a/src/engines/structs.go +++ b/src/search/engines/structs.go @@ -3,26 +3,9 @@ package engines import ( "fmt" - "github.com/hearchco/hearchco/src/category" + "github.com/hearchco/hearchco/src/search/category" ) -// variables are 1-indexed -// Information about what Rank a result was on some Search Engine -type RetrievedRank struct { - SearchEngine Name - Rank uint - Page uint - OnPageRank uint -} - -// The info a Search Engine returned about some Result -type RetrievedResult struct { - URL string - Title string - Description string - Rank RetrievedRank -} - type SupportedSettings struct { Locale bool SafeSearch bool diff --git a/src/engines/swisscows/authenticator.go b/src/search/engines/swisscows/authenticator.go similarity index 100% rename from src/engines/swisscows/authenticator.go rename to src/search/engines/swisscows/authenticator.go diff --git a/src/engines/swisscows/image-1.png b/src/search/engines/swisscows/image-1.png similarity index 100% rename from src/engines/swisscows/image-1.png rename to src/search/engines/swisscows/image-1.png diff --git a/src/engines/swisscows/image.png b/src/search/engines/swisscows/image.png similarity index 100% rename from src/engines/swisscows/image.png rename to src/search/engines/swisscows/image.png diff --git a/src/engines/swisscows/json_response.go b/src/search/engines/swisscows/json_response.go similarity index 100% rename from src/engines/swisscows/json_response.go rename to src/search/engines/swisscows/json_response.go diff --git a/src/engines/swisscows/options.go b/src/search/engines/swisscows/options.go similarity index 82% rename from src/engines/swisscows/options.go rename to src/search/engines/swisscows/options.go index cde85d26..d6ed4710 100644 --- a/src/engines/swisscows/options.go +++ b/src/search/engines/swisscows/options.go @@ -1,6 +1,8 @@ package swisscows -import "github.com/hearchco/hearchco/src/engines" +import ( + "github.com/hearchco/hearchco/src/search/engines" +) var Info engines.Info = engines.Info{ Domain: "swisscows.com", diff --git a/src/engines/swisscows/swisscows.go b/src/search/engines/swisscows/swisscows.go similarity index 85% rename from src/engines/swisscows/swisscows.go rename to src/search/engines/swisscows/swisscows.go index eda92be9..a6db7e16 100644 --- a/src/engines/swisscows/swisscows.go +++ b/src/search/engines/swisscows/swisscows.go @@ -8,16 +8,16 @@ import ( "github.com/gocolly/colly/v2" "github.com/hearchco/hearchco/src/anonymize" - "github.com/hearchco/hearchco/src/bucket" "github.com/hearchco/hearchco/src/config" - "github.com/hearchco/hearchco/src/engines" + "github.com/hearchco/hearchco/src/search/bucket" + "github.com/hearchco/hearchco/src/search/engines" + "github.com/hearchco/hearchco/src/search/engines/_sedefaults" "github.com/hearchco/hearchco/src/search/parse" - "github.com/hearchco/hearchco/src/sedefaults" "github.com/rs/zerolog/log" ) func Search(ctx context.Context, query string, relay *bucket.Relay, options engines.Options, settings config.Settings, timings config.Timings) error { - if err := sedefaults.Prepare(Info.Name, &options, &settings, &Support, &Info, &ctx); err != nil { + if err := _sedefaults.Prepare(Info.Name, &options, &settings, &Support, &Info, &ctx); err != nil { return err } @@ -25,14 +25,14 @@ func Search(ctx context.Context, query string, relay *bucket.Relay, options engi var pagesCol *colly.Collector var retError error - sedefaults.InitializeCollectors(&col, &pagesCol, &settings, &options, &timings) + _sedefaults.InitializeCollectors(&col, &pagesCol, &settings, &options, &timings) - sedefaults.PagesColRequest(Info.Name, pagesCol, ctx) - sedefaults.PagesColError(Info.Name, pagesCol) - sedefaults.PagesColResponse(Info.Name, pagesCol, relay) + _sedefaults.PagesColRequest(Info.Name, pagesCol, ctx) + _sedefaults.PagesColError(Info.Name, pagesCol) + _sedefaults.PagesColResponse(Info.Name, pagesCol, relay) - sedefaults.ColRequest(Info.Name, col, ctx) - sedefaults.ColError(Info.Name, col) + _sedefaults.ColRequest(Info.Name, col, ctx) + _sedefaults.ColError(Info.Name, col) col.OnRequest(func(r *colly.Request) { if r.Method == "OPTIONS" { @@ -99,7 +99,7 @@ func Search(ctx context.Context, query string, relay *bucket.Relay, options engi urll := Info.URL + "freshness=All&itemsCount=" + strconv.Itoa(settings.RequestedResultsPerPage) + "&offset=" + strconv.Itoa(i*10) + "&query=" + query + localeParam anonUrll := Info.URL + "freshness=All&itemsCount=" + strconv.Itoa(settings.RequestedResultsPerPage) + "&offset=" + strconv.Itoa(i*10) + "&query=" + anonymize.String(query) + localeParam - sedefaults.DoGetRequest(urll, anonUrll, colCtx, col, Info.Name, &retError) + _sedefaults.DoGetRequest(urll, anonUrll, colCtx, col, Info.Name, &retError) } col.Wait() diff --git a/src/engines/swisscows/swisscows.md b/src/search/engines/swisscows/swisscows.md similarity index 100% rename from src/engines/swisscows/swisscows.md rename to src/search/engines/swisscows/swisscows.md diff --git a/src/engines/swisscows/swisscows_test.go b/src/search/engines/swisscows/swisscows_test.go similarity index 87% rename from src/engines/swisscows/swisscows_test.go rename to src/search/engines/swisscows/swisscows_test.go index 53644114..8c529e40 100644 --- a/src/engines/swisscows/swisscows_test.go +++ b/src/search/engines/swisscows/swisscows_test.go @@ -3,8 +3,8 @@ package swisscows_test import ( "testing" - "github.com/hearchco/hearchco/src/engines" - "github.com/hearchco/hearchco/src/engines/_engines_test" + "github.com/hearchco/hearchco/src/search/engines" + "github.com/hearchco/hearchco/src/search/engines/_engines_test" ) func TestSearch(t *testing.T) { diff --git a/src/engines/timeout.go b/src/search/engines/timeout.go similarity index 100% rename from src/engines/timeout.go rename to src/search/engines/timeout.go diff --git a/src/engines/yahoo/options.go b/src/search/engines/yahoo/options.go similarity index 91% rename from src/engines/yahoo/options.go rename to src/search/engines/yahoo/options.go index d1cb26df..ac68e32f 100644 --- a/src/engines/yahoo/options.go +++ b/src/search/engines/yahoo/options.go @@ -1,6 +1,8 @@ package yahoo -import "github.com/hearchco/hearchco/src/engines" +import ( + "github.com/hearchco/hearchco/src/search/engines" +) // doesn't catch the yt videos // the title cathes the link - e.g.: teentitans.fandom.com › wiki › Nya-NyaNya-Nya | Teen Titans Wiki | Fandom diff --git a/src/engines/yahoo/yahoo.go b/src/search/engines/yahoo/yahoo.go similarity index 75% rename from src/engines/yahoo/yahoo.go rename to src/search/engines/yahoo/yahoo.go index e2c3ebb8..2210c8b0 100644 --- a/src/engines/yahoo/yahoo.go +++ b/src/search/engines/yahoo/yahoo.go @@ -8,16 +8,16 @@ import ( "github.com/gocolly/colly/v2" "github.com/hearchco/hearchco/src/anonymize" - "github.com/hearchco/hearchco/src/bucket" "github.com/hearchco/hearchco/src/config" - "github.com/hearchco/hearchco/src/engines" + "github.com/hearchco/hearchco/src/search/bucket" + "github.com/hearchco/hearchco/src/search/engines" + "github.com/hearchco/hearchco/src/search/engines/_sedefaults" "github.com/hearchco/hearchco/src/search/parse" - "github.com/hearchco/hearchco/src/sedefaults" "github.com/rs/zerolog/log" ) func Search(ctx context.Context, query string, relay *bucket.Relay, options engines.Options, settings config.Settings, timings config.Timings) error { - if err := sedefaults.Prepare(Info.Name, &options, &settings, &Support, &Info, &ctx); err != nil { + if err := _sedefaults.Prepare(Info.Name, &options, &settings, &Support, &Info, &ctx); err != nil { return err } @@ -25,14 +25,14 @@ func Search(ctx context.Context, query string, relay *bucket.Relay, options engi var pagesCol *colly.Collector var retError error - sedefaults.InitializeCollectors(&col, &pagesCol, &settings, &options, &timings) + _sedefaults.InitializeCollectors(&col, &pagesCol, &settings, &options, &timings) - sedefaults.PagesColRequest(Info.Name, pagesCol, ctx) - sedefaults.PagesColError(Info.Name, pagesCol) - sedefaults.PagesColResponse(Info.Name, pagesCol, relay) + _sedefaults.PagesColRequest(Info.Name, pagesCol, ctx) + _sedefaults.PagesColError(Info.Name, pagesCol) + _sedefaults.PagesColResponse(Info.Name, pagesCol, relay) - sedefaults.ColRequest(Info.Name, col, ctx) - sedefaults.ColError(Info.Name, col) + _sedefaults.ColRequest(Info.Name, col, ctx) + _sedefaults.ColError(Info.Name, col) var pageRankCounter []int = make([]int, options.MaxPages*Info.ResultsPerPage) @@ -53,7 +53,7 @@ func Search(ctx context.Context, query string, relay *bucket.Relay, options engi descText := strings.TrimSpace(dom.Find(dompaths.Description).Text()) if labelExists && hrefExists && linkText != "" && linkText != "#" && titleText != "" { - page := sedefaults.PageFromContext(e.Request.Ctx, Info.Name) + page := _sedefaults.PageFromContext(e.Request.Ctx, Info.Name) res := bucket.MakeSEResult(linkText, titleText, descText, Info.Name, page, pageRankCounter[page]+1) bucket.AddSEResult(res, Info.Name, relay, &options, pagesCol) @@ -66,7 +66,7 @@ func Search(ctx context.Context, query string, relay *bucket.Relay, options engi urll := Info.URL + query anonUrll := Info.URL + anonymize.String(query) - sedefaults.DoGetRequest(urll, anonUrll, colCtx, col, Info.Name, &retError) + _sedefaults.DoGetRequest(urll, anonUrll, colCtx, col, Info.Name, &retError) for i := 1; i < options.MaxPages; i++ { colCtx = colly.NewContext() @@ -74,7 +74,7 @@ func Search(ctx context.Context, query string, relay *bucket.Relay, options engi urll := Info.URL + query + "&b=" + strconv.Itoa((i+1)*10) anonUrll := Info.URL + anonymize.String(query) + "&b=" + strconv.Itoa((i+1)*10) - sedefaults.DoGetRequest(urll, anonUrll, colCtx, col, Info.Name, &retError) + _sedefaults.DoGetRequest(urll, anonUrll, colCtx, col, Info.Name, &retError) } col.Wait() diff --git a/src/engines/yahoo/yahoo_test.go b/src/search/engines/yahoo/yahoo_test.go similarity index 87% rename from src/engines/yahoo/yahoo_test.go rename to src/search/engines/yahoo/yahoo_test.go index dab5f7af..be5bb184 100644 --- a/src/engines/yahoo/yahoo_test.go +++ b/src/search/engines/yahoo/yahoo_test.go @@ -3,8 +3,8 @@ package yahoo_test import ( "testing" - "github.com/hearchco/hearchco/src/engines" - "github.com/hearchco/hearchco/src/engines/_engines_test" + "github.com/hearchco/hearchco/src/search/engines" + "github.com/hearchco/hearchco/src/search/engines/_engines_test" ) func TestSearch(t *testing.T) { diff --git a/src/engines/yep/json_response.go b/src/search/engines/yep/json_response.go similarity index 100% rename from src/engines/yep/json_response.go rename to src/search/engines/yep/json_response.go diff --git a/src/engines/yep/options.go b/src/search/engines/yep/options.go similarity index 92% rename from src/engines/yep/options.go rename to src/search/engines/yep/options.go index 645d0f02..cf8c469a 100644 --- a/src/engines/yep/options.go +++ b/src/search/engines/yep/options.go @@ -1,7 +1,7 @@ package yep import ( - "github.com/hearchco/hearchco/src/engines" + "github.com/hearchco/hearchco/src/search/engines" ) var Info engines.Info = engines.Info{ diff --git a/src/engines/yep/yep.go b/src/search/engines/yep/yep.go similarity index 77% rename from src/engines/yep/yep.go rename to src/search/engines/yep/yep.go index 17e57361..d43fe885 100644 --- a/src/engines/yep/yep.go +++ b/src/search/engines/yep/yep.go @@ -7,15 +7,15 @@ import ( "github.com/gocolly/colly/v2" "github.com/hearchco/hearchco/src/anonymize" - "github.com/hearchco/hearchco/src/bucket" "github.com/hearchco/hearchco/src/config" - "github.com/hearchco/hearchco/src/engines" + "github.com/hearchco/hearchco/src/search/bucket" + "github.com/hearchco/hearchco/src/search/engines" + "github.com/hearchco/hearchco/src/search/engines/_sedefaults" "github.com/hearchco/hearchco/src/search/parse" - "github.com/hearchco/hearchco/src/sedefaults" ) func Search(ctx context.Context, query string, relay *bucket.Relay, options engines.Options, settings config.Settings, timings config.Timings) error { - if err := sedefaults.Prepare(Info.Name, &options, &settings, &Support, &Info, &ctx); err != nil { + if err := _sedefaults.Prepare(Info.Name, &options, &settings, &Support, &Info, &ctx); err != nil { return err } @@ -23,14 +23,14 @@ func Search(ctx context.Context, query string, relay *bucket.Relay, options engi var pagesCol *colly.Collector var retError error - sedefaults.InitializeCollectors(&col, &pagesCol, &settings, &options, &timings) + _sedefaults.InitializeCollectors(&col, &pagesCol, &settings, &options, &timings) - sedefaults.PagesColRequest(Info.Name, pagesCol, ctx) - sedefaults.PagesColError(Info.Name, pagesCol) - sedefaults.PagesColResponse(Info.Name, pagesCol, relay) + _sedefaults.PagesColRequest(Info.Name, pagesCol, ctx) + _sedefaults.PagesColError(Info.Name, pagesCol) + _sedefaults.PagesColResponse(Info.Name, pagesCol, relay) - sedefaults.ColRequest(Info.Name, col, ctx) - sedefaults.ColError(Info.Name, col) + _sedefaults.ColRequest(Info.Name, col, ctx) + _sedefaults.ColError(Info.Name, col) col.OnRequest(func(r *colly.Request) { r.Headers.Del("Accept") @@ -72,7 +72,7 @@ func Search(ctx context.Context, query string, relay *bucket.Relay, options engi anonUrll = Info.URL + "client=web" + localeParam + "&limit=" + strconv.Itoa(nRequested) + "&no_correct=false&q=" + anonymize.String(query) + safeSearchParam + "&type=web" } - sedefaults.DoGetRequest(urll, anonUrll, nil, col, Info.Name, &retError) + _sedefaults.DoGetRequest(urll, anonUrll, nil, col, Info.Name, &retError) col.Wait() pagesCol.Wait() diff --git a/src/engines/yep/yep.md b/src/search/engines/yep/yep.md similarity index 100% rename from src/engines/yep/yep.md rename to src/search/engines/yep/yep.md diff --git a/src/engines/yep/yep_test.go b/src/search/engines/yep/yep_test.go similarity index 87% rename from src/engines/yep/yep_test.go rename to src/search/engines/yep/yep_test.go index c23f66e2..ccf82a2f 100644 --- a/src/engines/yep/yep_test.go +++ b/src/search/engines/yep/yep_test.go @@ -3,8 +3,8 @@ package yep_test import ( "testing" - "github.com/hearchco/hearchco/src/engines" - "github.com/hearchco/hearchco/src/engines/_engines_test" + "github.com/hearchco/hearchco/src/search/engines" + "github.com/hearchco/hearchco/src/search/engines/_engines_test" ) func TestSearch(t *testing.T) { diff --git a/src/search/perform.go b/src/search/perform.go index 26478c68..22a6ee6f 100644 --- a/src/search/perform.go +++ b/src/search/perform.go @@ -4,30 +4,27 @@ import ( "context" "fmt" "net/url" - "strings" + "sync" "time" "github.com/hearchco/hearchco/src/anonymize" - "github.com/hearchco/hearchco/src/bucket" - "github.com/hearchco/hearchco/src/bucket/result" - "github.com/hearchco/hearchco/src/category" "github.com/hearchco/hearchco/src/config" - "github.com/hearchco/hearchco/src/engines" - "github.com/hearchco/hearchco/src/rank" + "github.com/hearchco/hearchco/src/search/bucket" + "github.com/hearchco/hearchco/src/search/engines" + "github.com/hearchco/hearchco/src/search/rank" + "github.com/hearchco/hearchco/src/search/result" "github.com/rs/zerolog/log" - "github.com/sourcegraph/conc" ) +// engine_searcher -> NewEngineStarter() uses this +type EngineSearch func(context.Context, string, *bucket.Relay, engines.Options, config.Settings, config.Timings) error + func PerformSearch(query string, options engines.Options, conf *config.Config) []result.Result { searchTimer := time.Now() - relay := bucket.Relay{ - ResultMap: make(map[string]*result.Result), - } - - query, timings, toRun := procBang(query, &options, conf) - + query, timings, enginesToRun := procBang(query, &options, conf) query = url.QueryEscape(query) + log.Debug(). Str("queryAnon", anonymize.String(query)). Str("queryHash", anonymize.HashToSHA256B64(query)). @@ -35,16 +32,18 @@ func PerformSearch(query string, options engines.Options, conf *config.Config) [ resTimer := time.Now() log.Debug().Msg("Waiting for results from engines...") - var worker conc.WaitGroup - runEngines(toRun, timings, conf.Settings, query, &worker, &relay, options) - worker.Wait() + + resultMap := runEngines(enginesToRun, query, options, conf.Settings, timings) + log.Debug(). Int64("ms", time.Since(resTimer).Milliseconds()). Msg("Got results") rankTimer := time.Now() log.Debug().Msg("Ranking...") - results := rank.Rank(relay.ResultMap, conf.Categories[options.Category].Ranking) // have to make copy, since its a map value + + results := rank.Rank(resultMap, conf.Categories[options.Category].Ranking) + rankTimeSince := time.Since(rankTimer) log.Debug(). Int64("ms", rankTimeSince.Milliseconds()). @@ -58,82 +57,37 @@ func PerformSearch(query string, options engines.Options, conf *config.Config) [ return results } -// engine_searcher, NewEngineStarter() use this. -type EngineSearch func(context.Context, string, *bucket.Relay, engines.Options, config.Settings, config.Timings) error - -func runEngines(engs []engines.Name, timings config.Timings, settings map[engines.Name]config.Settings, query string, worker *conc.WaitGroup, relay *bucket.Relay, options engines.Options) { +func runEngines(engs []engines.Name, query string, options engines.Options, settings map[engines.Name]config.Settings, timings config.Timings) map[string]*result.Result { config.EnabledEngines = engs log.Info(). Int("number", len(config.EnabledEngines)). Str("engines", fmt.Sprintf("%v", config.EnabledEngines)). Msg("Enabled engines") + relay := bucket.Relay{ + ResultMap: make(map[string]*result.Result), + } + + var wg sync.WaitGroup engineStarter := NewEngineStarter() + for i := range engs { + wg.Add(1) eng := engs[i] // dont change for to `for _, eng := range engs {`, eng retains the same address throughout the whole loop - worker.Go(func() { + go func() { + defer wg.Done() // if an error can be handled inside, it wont be returned // runs the Search function in the engine package - err := engineStarter[eng](context.Background(), query, relay, options, settings[eng], timings) + err := engineStarter[eng](context.Background(), query, &relay, options, settings[eng], timings) if err != nil { log.Error(). Err(err). Str("engine", eng.String()). Msg("search.runEngines(): error while searching") } - }) - } -} - -func procBang(query string, options *engines.Options, conf *config.Config) (string, config.Timings, []engines.Name) { - useSpec, specEng := procSpecificEngine(query, options, conf) - goodCat := procCategory(query, options) - if !goodCat && !useSpec && query[0] == '!' { - // options.category is set to GENERAL - log.Debug(). - Str("queryAnon", anonymize.String(query)). - Str("queryHash", anonymize.HashToSHA256B64(query)). - Msg("search.procBang(): invalid bang (not category or engine shortcut)") - } - - query = trimBang(query) - - if useSpec { - return query, conf.Categories[category.GENERAL].Timings, []engines.Name{specEng} - } else { - return query, conf.Categories[options.Category].Timings, conf.Categories[options.Category].Engines - } -} - -func trimBang(query string) string { - if (query)[0] == '!' { - return strings.SplitN(query, " ", 2)[1] - } - return query -} - -func procSpecificEngine(query string, options *engines.Options, conf *config.Config) (bool, engines.Name) { - if query[0] != '!' { - return false, engines.UNDEFINED - } - sp := strings.SplitN(query, " ", 2) - bangWord := sp[0][1:] - for key, val := range conf.Settings { - if strings.EqualFold(bangWord, val.Shortcut) || strings.EqualFold(bangWord, key.String()) { - return true, key - } + }() } - return false, engines.UNDEFINED -} - -func procCategory(query string, options *engines.Options) bool { - cat := category.FromQuery(query) - if cat != "" { - options.Category = cat - } - if options.Category == "" { - options.Category = category.GENERAL - } - return cat != "" + wg.Wait() + return relay.ResultMap } diff --git a/src/rank/math.go b/src/search/rank/math.go similarity index 100% rename from src/rank/math.go rename to src/search/rank/math.go diff --git a/src/rank/rank.go b/src/search/rank/rank.go similarity index 93% rename from src/rank/rank.go rename to src/search/rank/rank.go index 456fa051..9ba0dddf 100644 --- a/src/rank/rank.go +++ b/src/search/rank/rank.go @@ -4,9 +4,9 @@ import ( "math" "sort" - "github.com/hearchco/hearchco/src/bucket/result" "github.com/hearchco/hearchco/src/config" - "github.com/hearchco/hearchco/src/engines" + "github.com/hearchco/hearchco/src/search/engines" + "github.com/hearchco/hearchco/src/search/result" ) func GetScore(result *result.Result, rconf *config.Ranking) float64 { @@ -46,7 +46,7 @@ func Rank(resMap map[string]*result.Result, rconf config.Ranking) []result.Resul type RankFiller struct { ArrInd int - RetRank engines.RetrievedRank + RetRank result.RetrievedRank RRInd int } diff --git a/src/rank/sorting.go b/src/search/rank/sorting.go similarity index 94% rename from src/rank/sorting.go rename to src/search/rank/sorting.go index d35fb238..dbd85aec 100644 --- a/src/rank/sorting.go +++ b/src/search/rank/sorting.go @@ -3,7 +3,7 @@ package rank import ( "fmt" - "github.com/hearchco/hearchco/src/bucket/result" + "github.com/hearchco/hearchco/src/search/result" "github.com/rs/zerolog/log" ) diff --git a/src/bucket/result/result.go b/src/search/result/result.go similarity index 90% rename from src/bucket/result/result.go rename to src/search/result/result.go index ee60ee4f..36905f34 100644 --- a/src/bucket/result/result.go +++ b/src/search/result/result.go @@ -2,7 +2,6 @@ package result import ( "github.com/gocolly/colly/v2" - "github.com/hearchco/hearchco/src/engines" ) // Everything about some Result, calculated and compiled from multiple search engines @@ -13,7 +12,7 @@ type Result struct { Score float64 Title string Description string - EngineRanks []engines.RetrievedRank + EngineRanks []RetrievedRank TimesReturned uint8 Response *colly.Response } diff --git a/src/search/result/retrieved.go b/src/search/result/retrieved.go new file mode 100644 index 00000000..093814b1 --- /dev/null +++ b/src/search/result/retrieved.go @@ -0,0 +1,20 @@ +package result + +import "github.com/hearchco/hearchco/src/search/engines" + +// variables are 1-indexed +// Information about what Rank a result was on some Search Engine +type RetrievedRank struct { + SearchEngine engines.Name + Rank uint + Page uint + OnPageRank uint +} + +// The info a Search Engine returned about some Result +type RetrievedResult struct { + URL string + Title string + Description string + Rank RetrievedRank +} diff --git a/src/search/search.go b/src/search/search.go index 2bf519aa..d723702c 100644 --- a/src/search/search.go +++ b/src/search/search.go @@ -2,10 +2,10 @@ package search import ( "github.com/hearchco/hearchco/src/anonymize" - "github.com/hearchco/hearchco/src/bucket/result" "github.com/hearchco/hearchco/src/cache" "github.com/hearchco/hearchco/src/config" - "github.com/hearchco/hearchco/src/engines" + "github.com/hearchco/hearchco/src/search/engines" + "github.com/hearchco/hearchco/src/search/result" "github.com/rs/zerolog/log" ) diff --git a/src/search/utils.go b/src/search/utils.go new file mode 100644 index 00000000..bbe6d01a --- /dev/null +++ b/src/search/utils.go @@ -0,0 +1,64 @@ +package search + +import ( + "strings" + + "github.com/hearchco/hearchco/src/anonymize" + "github.com/hearchco/hearchco/src/config" + "github.com/hearchco/hearchco/src/search/category" + "github.com/hearchco/hearchco/src/search/engines" + "github.com/rs/zerolog/log" +) + +func procBang(query string, options *engines.Options, conf *config.Config) (string, config.Timings, []engines.Name) { + useSpec, specEng := procSpecificEngine(query, options, conf) + goodCat := procCategory(query, options) + if !goodCat && !useSpec && query[0] == '!' { + // options.category is set to GENERAL + log.Debug(). + Str("queryAnon", anonymize.String(query)). + Str("queryHash", anonymize.HashToSHA256B64(query)). + Msg("search.procBang(): invalid bang (not category or engine shortcut)") + } + + query = trimBang(query) + + if useSpec { + return query, conf.Categories[category.GENERAL].Timings, []engines.Name{specEng} + } else { + return query, conf.Categories[options.Category].Timings, conf.Categories[options.Category].Engines + } +} + +func trimBang(query string) string { + if (query)[0] == '!' { + return strings.SplitN(query, " ", 2)[1] + } + return query +} + +func procSpecificEngine(query string, options *engines.Options, conf *config.Config) (bool, engines.Name) { + if query[0] != '!' { + return false, engines.UNDEFINED + } + sp := strings.SplitN(query, " ", 2) + bangWord := sp[0][1:] + for key, val := range conf.Settings { + if strings.EqualFold(bangWord, val.Shortcut) || strings.EqualFold(bangWord, key.String()) { + return true, key + } + } + + return false, engines.UNDEFINED +} + +func procCategory(query string, options *engines.Options) bool { + cat := category.FromQuery(query) + if cat != "" { + options.Category = cat + } + if options.Category == "" { + options.Category = category.GENERAL + } + return cat != "" +} diff --git a/src/sedefaults/sedefaults.go b/src/sedefaults/sedefaults.go deleted file mode 100644 index b273a198..00000000 --- a/src/sedefaults/sedefaults.go +++ /dev/null @@ -1,264 +0,0 @@ -package sedefaults - -import ( - "context" - "fmt" - "io" - "os" - "strconv" - - "github.com/gocolly/colly/v2" - "github.com/gocolly/colly/v2/proxy" - "github.com/hearchco/hearchco/src/bucket" - "github.com/hearchco/hearchco/src/config" - "github.com/hearchco/hearchco/src/engines" - "github.com/hearchco/hearchco/src/search/useragent" - "github.com/rs/zerolog" - "github.com/rs/zerolog/log" -) - -func PagesColRequest(seName engines.Name, pagesCol *colly.Collector, ctx context.Context) { - pagesCol.OnRequest(func(r *colly.Request) { - if err := ctx.Err(); err != nil { - if engines.IsTimeoutError(err) { - log.Trace(). - Err(err). - Str("engine", seName.String()). - Msg("sedefaults.PagesColRequest() -> pagesCol.OnRequest(): context timeout error") - } else { - log.Error(). - Err(err). - Str("engine", seName.String()). - Msg("sedefaults.PagesColRequest() -> pagesCol.OnRequest(): context error") - } - r.Abort() - return - } - r.Ctx.Put("originalURL", r.URL.String()) - }) -} - -func PagesColError(seName engines.Name, pagesCol *colly.Collector) { - pagesCol.OnError(func(r *colly.Response, err error) { - urll := r.Ctx.Get("originalURL") - if engines.IsTimeoutError(err) { - log.Trace(). - Err(err). - Str("engine", seName.String()). - Str("url", urll). - Msg("sedefaults.PagesColError() -> pagesCol.OnError(): request timeout error for url") - } else { - log.Trace(). - Err(err). - Str("engine", seName.String()). - Str("url", urll). - Str("response", string(r.Body)). - Msg("sedefaults.PagesColError() -> pagesCol.OnError(): request error for url") - } - }) -} - -func PagesColResponse(seName engines.Name, pagesCol *colly.Collector, relay *bucket.Relay) { - pagesCol.OnResponse(func(r *colly.Response) { - urll := r.Ctx.Get("originalURL") - err := bucket.SetResultResponse(urll, r, relay, seName) - if err != nil { - log.Error().Err(err).Msg("sedefaults.PagesColResponse(): error setting result") - } - }) -} - -func ColRequest(seName engines.Name, col *colly.Collector, ctx context.Context) { - col.OnRequest(func(r *colly.Request) { - if err := ctx.Err(); err != nil { - if engines.IsTimeoutError(err) { - log.Trace(). - Err(err). - Str("engine", seName.String()). - Msg("sedefaults.ColRequest() -> col.OnRequest(): context timeout error") - } else { - log.Error(). - Err(err). - Str("engine", seName.String()). - Msg("sedefaults.ColRequest() -> col.OnRequest(): context error") - } - r.Abort() - return - } - }) -} - -func ColError(seName engines.Name, col *colly.Collector) { - col.OnError(func(r *colly.Response, err error) { - if engines.IsTimeoutError(err) { - log.Trace(). - // Err(err). // timeout error produces Get "url" error with the query - Str("engine", seName.String()). - // Str("url", urll). // can't reliably anonymize it (because it's engine dependent and query isn't passed to this function) - Msg("sedefaults.ColError() -> col.OnError(): request timeout error for url") - } else { - log.Error(). - Err(err). - Str("engine", seName.String()). - // Str("url", urll). // can't reliably anonymize it (because it's engine dependent and query isn't passed to this function) - Int("statusCode", r.StatusCode). - Str("response", string(r.Body)). // query can be present, depending on the response from the engine (Google has the query in 3 places) - Msg("sedefaults.ColError() -> col.OnError(): request error for url") - - dumpPath := fmt.Sprintf("%v%v_col.log.html", config.LogDumpLocation, seName.String()) - log.Debug(). - Str("engine", seName.String()). - Str("responsePath", dumpPath). - Func(func(e *zerolog.Event) { - bodyWriteErr := os.WriteFile(dumpPath, r.Body, 0644) - if bodyWriteErr != nil { - log.Error(). - Err(bodyWriteErr). - Str("engine", seName.String()). - Msg("sedefaults.ColError() -> col.OnError(): error writing html response body to file") - } - }). - Msg("sedefaults.ColError() -> col.OnError(): html response written") - } - }) -} - -func Prepare(seName engines.Name, options *engines.Options, settings *config.Settings, support *engines.SupportedSettings, info *engines.Info, ctx *context.Context) error { - if ctx == nil { - *ctx = context.Background() - } - - if options.UserAgent == "" { - options.UserAgent = useragent.RandomUserAgent() - } - log.Trace(). - Str("engine", seName.String()). - Str("userAgent", options.UserAgent). - Msg("Prepare") - - // TODO: move to config.SetupConfig - if settings.RequestedResultsPerPage != 0 && !support.RequestedResultsPerPage { - log.Panic(). - Str("engine", seName.String()). - Int("requestedResultsPerPage", settings.RequestedResultsPerPage). - Msg("sedefaults.Prepare(): setting not supported by engine") - // ^PANIC - } - if settings.RequestedResultsPerPage == 0 && support.RequestedResultsPerPage { - // if its used in the code but not set, give it the default value - settings.RequestedResultsPerPage = info.ResultsPerPage - } - - if options.Mobile && !support.Mobile { - options.Mobile = false // this line shouldn't matter [1] - log.Debug(). - Str("engine", seName.String()). - Bool("mobile", options.Mobile). - Msg("Mobile set but not supported") - } - - if options.Locale != "" && !support.Locale { - options.Locale = config.DefaultLocale // [1] - log.Debug(). - Str("engine", seName.String()). - Str("locale", options.Locale). - Msg("Locale set but not supported") - } - - if options.Locale == "" && support.Locale { - options.Locale = config.DefaultLocale - } - - if options.SafeSearch && !support.SafeSearch { - options.SafeSearch = false // [1] - log.Debug(). - Str("engine", seName.String()). - Bool("safeSearch", options.SafeSearch). - Msg("SafeSearch set but not supported") - } - - return nil -} - -func InitializeCollectors(colPtr **colly.Collector, pagesColPtr **colly.Collector, settings *config.Settings, options *engines.Options, timings *config.Timings) { - *colPtr = colly.NewCollector(colly.MaxDepth(1), colly.UserAgent(options.UserAgent), colly.Async()) - *pagesColPtr = colly.NewCollector(colly.MaxDepth(1), colly.UserAgent(options.UserAgent), colly.Async()) - - if timings != nil { - var limitRule *colly.LimitRule = &colly.LimitRule{ - DomainGlob: "*", - Delay: timings.Delay, - RandomDelay: timings.RandomDelay, - Parallelism: timings.Parallelism, - } - - if err := (*colPtr).Limit(limitRule); err != nil { - log.Error(). - Err(err). - Str("limitRule", fmt.Sprintf("%v", limitRule)). - Msg("sedefaults.InitializeCollectors(): failed adding new limit rule") - } - - if timings.Timeout != 0 { - (*colPtr).SetRequestTimeout(timings.Timeout) - } - - if timings.PageTimeout != 0 { - (*pagesColPtr).SetRequestTimeout(timings.PageTimeout) - } - } - - if settings.Proxies != nil { - log.Debug(). - Strs("proxies", settings.Proxies). - Msg("Using proxies") - - // Rotate proxies - rp, err := proxy.RoundRobinProxySwitcher(settings.Proxies...) - if err != nil { - log.Fatal(). - Err(err). - Strs("proxies", settings.Proxies). - Msg("sedefaults.InitializeCollectors(): failed creating proxy switcher") - } - - (*colPtr).SetProxyFunc(rp) - (*pagesColPtr).SetProxyFunc(rp) - } -} - -func DoGetRequest(urll string, anonurll string, colCtx *colly.Context, collector *colly.Collector, packageName engines.Name, retError *error) { - log.Trace(). - Str("engine", packageName.String()). - Str("url", anonurll). - Msg("GET") - err := collector.Request("GET", urll, nil, colCtx, nil) - if err != nil { - *retError = fmt.Errorf("%v.Search(): failed GET request to %v with %w", packageName.ToLower(), urll, err) - } -} - -func DoPostRequest(urll string, requestData io.Reader, colCtx *colly.Context, collector *colly.Collector, packageName engines.Name, retError *error) { - log.Trace(). - Str("engine", packageName.String()). - Str("url", urll). - Msg("POST") - err := collector.Request("POST", urll, requestData, colCtx, nil) - if err != nil { - *retError = fmt.Errorf("%v.Search(): failed POST request to %v and body %v. error %w", packageName.ToLower(), requestData, urll, err) - } -} - -func PageFromContext(ctx *colly.Context, seName engines.Name) int { - var pageStr string = ctx.Get("page") - page, converr := strconv.Atoi(pageStr) - if converr != nil { - log.Panic(). - Err(converr). - Str("engine", seName.String()). - Str("page", pageStr). - Msg("sedefaults.PageFromContext(): failed to convert page number to int") - // ^PANIC - } - return page -}