Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor(search,bucket,sedefaults): better project structure #203

Merged
merged 13 commits into from
Feb 9, 2024
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 10 additions & 4 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,34 +1,40 @@
run:
go run ./src
run-cli:
go run ./src --cli

debug:
go run ./src -v
debug-cli:
go run ./srv -v --cli

trace:
go run ./src -vv
trace-cli:
go run ./src -vv --cli

setup:
go get ./...
go install github.com/dmarkham/enumer@latest
go generate ./...
install: setup

build:
go build ./...

test:
sh ./scripts/test.sh

test-engines:
sh ./scripts/test-engines.sh

test-redis:
sh ./scripts/test-redis.sh

test-redis-podman:
sh ./scripts/test-redis-podman.sh

test-redis-docker:
sh ./scripts/test-redis-docker.sh
test-all: test test-redis test-engines
test-all-podman: test test-redis-podman test-engines
test-all-docker: test test-redis-docker test-engines

update:
go get -u ./...
Expand Down
2 changes: 1 addition & 1 deletion generate/searcher/searcher.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ var (
trimprefix = flag.String("trimprefix", "", "trim the `prefix` from the generated constant names")
buildTags = flag.String("tags", "", "comma-separated list of build tags to apply")
packageName = flag.String("packagename", "", "name of the package for generated code; default current package")
enginesImport = flag.String("enginesimport", "github.com/hearchco/hearchco/src/engines", "source of the engines import, which is prefixed to imports for consts; default github.com/hearchco/hearchco/src/engines")
enginesImport = flag.String("enginesimport", "github.com/hearchco/hearchco/src/search/engines", "source of the engines import, which is prefixed to imports for consts; default github.com/hearchco/hearchco/src/search/engines")
linecomment = flag.Bool("linecomment", false, "use line comment text as printed text when present")
)

Expand Down
6 changes: 3 additions & 3 deletions src/cli/climode.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,12 @@ import (
"time"

"github.com/hearchco/hearchco/src/anonymize"
"github.com/hearchco/hearchco/src/bucket/result"
"github.com/hearchco/hearchco/src/cache"
"github.com/hearchco/hearchco/src/category"
"github.com/hearchco/hearchco/src/config"
"github.com/hearchco/hearchco/src/engines"
"github.com/hearchco/hearchco/src/search"
"github.com/hearchco/hearchco/src/search/category"
"github.com/hearchco/hearchco/src/search/engines"
"github.com/hearchco/hearchco/src/search/result"
"github.com/rs/zerolog/log"
)

Expand Down
4 changes: 2 additions & 2 deletions src/cli/setup.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@ import (
"fmt"

"github.com/alecthomas/kong"
"github.com/hearchco/hearchco/src/category"
"github.com/hearchco/hearchco/src/engines"
"github.com/hearchco/hearchco/src/search/category"
"github.com/hearchco/hearchco/src/search/engines"
"github.com/rs/zerolog/log"
)

Expand Down
4 changes: 2 additions & 2 deletions src/config/defaults.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@ import (
"log"
"time"

"github.com/hearchco/hearchco/src/category"
"github.com/hearchco/hearchco/src/engines"
"github.com/hearchco/hearchco/src/moretime"
"github.com/hearchco/hearchco/src/search/category"
"github.com/hearchco/hearchco/src/search/engines"
)

const DefaultLocale string = "en_US"
Expand Down
4 changes: 2 additions & 2 deletions src/config/load.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@ import (
"strings"
"time"

"github.com/hearchco/hearchco/src/category"
"github.com/hearchco/hearchco/src/engines"
"github.com/hearchco/hearchco/src/moretime"
"github.com/hearchco/hearchco/src/search/category"
"github.com/hearchco/hearchco/src/search/engines"
"github.com/knadh/koanf/parsers/yaml"
"github.com/knadh/koanf/providers/env"
"github.com/knadh/koanf/providers/file"
Expand Down
4 changes: 2 additions & 2 deletions src/config/structs.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@ package config
import (
"time"

"github.com/hearchco/hearchco/src/category"
"github.com/hearchco/hearchco/src/engines"
"github.com/hearchco/hearchco/src/search/category"
"github.com/hearchco/hearchco/src/search/engines"
)

type EngineRanking struct {
Expand Down
6 changes: 3 additions & 3 deletions src/router/search.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,12 @@ import (
"github.com/gin-gonic/gin"
"github.com/goccy/go-json"

"github.com/hearchco/hearchco/src/bucket/result"
"github.com/hearchco/hearchco/src/cache"
"github.com/hearchco/hearchco/src/category"
"github.com/hearchco/hearchco/src/config"
"github.com/hearchco/hearchco/src/engines"
"github.com/hearchco/hearchco/src/search"
"github.com/hearchco/hearchco/src/search/category"
"github.com/hearchco/hearchco/src/search/engines"
"github.com/hearchco/hearchco/src/search/result"
)

func Search(c *gin.Context, conf *config.Config, db cache.DB) error {
Expand Down
55 changes: 4 additions & 51 deletions src/bucket/bucket.go → src/search/bucket/addresult.go
Original file line number Diff line number Diff line change
@@ -1,22 +1,14 @@
package bucket

import (
"fmt"
"sync"

"github.com/gocolly/colly/v2"
"github.com/hearchco/hearchco/src/bucket/result"
"github.com/hearchco/hearchco/src/config"
"github.com/hearchco/hearchco/src/engines"
"github.com/hearchco/hearchco/src/search/engines"
"github.com/hearchco/hearchco/src/search/result"
"github.com/rs/zerolog/log"
)

type Relay struct {
ResultMap map[string]*result.Result
Mutex sync.RWMutex
}

func AddSEResult(seResult *engines.RetrievedResult, seName engines.Name, relay *Relay, options *engines.Options, pagesCol *colly.Collector) {
func AddSEResult(seResult *result.RetrievedResult, seName engines.Name, relay *Relay, options *engines.Options, pagesCol *colly.Collector) {
log.Trace().
Str("engine", seName.String()).
Str("title", seResult.Title).
Expand All @@ -28,7 +20,7 @@ func AddSEResult(seResult *engines.RetrievedResult, seName engines.Name, relay *
relay.Mutex.RUnlock()

if !exists {
engineRanks := make([]engines.RetrievedRank, len(config.EnabledEngines))
engineRanks := make([]result.RetrievedRank, len(config.EnabledEngines))
engineRanks[0] = seResult.Rank
result := result.Result{
URL: seResult.URL,
Expand Down Expand Up @@ -74,42 +66,3 @@ func AddSEResult(seResult *engines.RetrievedResult, seName engines.Name, relay *
}
}
}

func SetResultResponse(link string, response *colly.Response, relay *Relay, seName engines.Name) error {
log.Trace().
Str("engine", seName.String()).
Str("link", link).
Msg("Got response")

relay.Mutex.Lock()
mapRes, exists := relay.ResultMap[link]

if !exists {
relay.Mutex.Unlock()
relay.Mutex.RLock()
err := fmt.Errorf("bucket.SetResultResponse(): URL not in map when adding response, should not be possible. URL: %v.\nRelay: %v", link, relay)
relay.Mutex.RUnlock()
return err
} else {
mapRes.Response = response
relay.Mutex.Unlock()
}

return nil
}

func MakeSEResult(urll string, title string, description string, searchEngineName engines.Name, sePage int, seOnPageRank int) *engines.RetrievedResult {
ser := engines.RetrievedRank{
SearchEngine: searchEngineName,
Rank: 0,
Page: uint(sePage),
OnPageRank: uint(seOnPageRank),
}
res := engines.RetrievedResult{
URL: urll,
Title: title,
Description: description,
Rank: ser,
}
return &res
}
22 changes: 22 additions & 0 deletions src/search/bucket/makeresult.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
package bucket

import (
"github.com/hearchco/hearchco/src/search/engines"
"github.com/hearchco/hearchco/src/search/result"
)

func MakeSEResult(urll string, title string, description string, searchEngineName engines.Name, sePage int, seOnPageRank int) *result.RetrievedResult {
ser := result.RetrievedRank{
SearchEngine: searchEngineName,
Rank: 0,
Page: uint(sePage),
OnPageRank: uint(seOnPageRank),
}
res := result.RetrievedResult{
URL: urll,
Title: title,
Description: description,
Rank: ser,
}
return &res
}
12 changes: 12 additions & 0 deletions src/search/bucket/relay.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
package bucket

import (
"sync"

"github.com/hearchco/hearchco/src/search/result"
)

type Relay struct {
ResultMap map[string]*result.Result
Mutex sync.RWMutex
}
32 changes: 32 additions & 0 deletions src/search/bucket/setresponse.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
package bucket

import (
"fmt"

"github.com/gocolly/colly/v2"
"github.com/hearchco/hearchco/src/search/engines"
"github.com/rs/zerolog/log"
)

func SetResultResponse(link string, response *colly.Response, relay *Relay, seName engines.Name) error {
log.Trace().
Str("engine", seName.String()).
Str("link", link).
Msg("Got response")

relay.Mutex.Lock()
mapRes, exists := relay.ResultMap[link]

if !exists {
relay.Mutex.Unlock()
relay.Mutex.RLock()
err := fmt.Errorf("bucket.SetResultResponse(): URL not in map when adding response, should not be possible. URL: %v.\nRelay: %v", link, relay)
relay.Mutex.RUnlock()
return err
} else {
mapRes.Response = response
relay.Mutex.Unlock()
}

return nil
}
4 changes: 2 additions & 2 deletions src/search/cache.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@ package search

import (
"github.com/hearchco/hearchco/src/anonymize"
"github.com/hearchco/hearchco/src/bucket/result"
"github.com/hearchco/hearchco/src/cache"
"github.com/hearchco/hearchco/src/config"
"github.com/hearchco/hearchco/src/engines"
"github.com/hearchco/hearchco/src/search/engines"
"github.com/hearchco/hearchco/src/search/result"
"github.com/rs/zerolog/log"
)

Expand Down
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@ package _engines_test
import (
"time"

"github.com/hearchco/hearchco/src/category"
"github.com/hearchco/hearchco/src/config"
"github.com/hearchco/hearchco/src/engines"
"github.com/hearchco/hearchco/src/search/category"
"github.com/hearchco/hearchco/src/search/engines"
)

type TestCaseHasAnyResults struct {
Expand Down
68 changes: 68 additions & 0 deletions src/search/engines/_sedefaults/colly.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
package _sedefaults

import (
"context"
"fmt"
"os"

"github.com/gocolly/colly/v2"
"github.com/hearchco/hearchco/src/config"
"github.com/hearchco/hearchco/src/search/engines"
"github.com/rs/zerolog"
"github.com/rs/zerolog/log"
)

func ColRequest(seName engines.Name, col *colly.Collector, ctx context.Context) {
col.OnRequest(func(r *colly.Request) {
if err := ctx.Err(); err != nil {
if engines.IsTimeoutError(err) {
log.Trace().
Err(err).
Str("engine", seName.String()).
Msg("_sedefaults.ColRequest() -> col.OnRequest(): context timeout error")
} else {
log.Error().
Err(err).
Str("engine", seName.String()).
Msg("_sedefaults.ColRequest() -> col.OnRequest(): context error")
}
r.Abort()
return
}
})
}

func ColError(seName engines.Name, col *colly.Collector) {
col.OnError(func(r *colly.Response, err error) {
if engines.IsTimeoutError(err) {
log.Trace().
// Err(err). // timeout error produces Get "url" error with the query
Str("engine", seName.String()).
// Str("url", urll). // can't reliably anonymize it (because it's engine dependent and query isn't passed to this function)
Msg("_sedefaults.ColError() -> col.OnError(): request timeout error for url")
} else {
log.Error().
Err(err).
Str("engine", seName.String()).
// Str("url", urll). // can't reliably anonymize it (because it's engine dependent and query isn't passed to this function)
Int("statusCode", r.StatusCode).
Str("response", string(r.Body)). // query can be present, depending on the response from the engine (Google has the query in 3 places)
Msg("_sedefaults.ColError() -> col.OnError(): request error for url")

dumpPath := fmt.Sprintf("%v%v_col.log.html", config.LogDumpLocation, seName.String())
log.Debug().
Str("engine", seName.String()).
Str("responsePath", dumpPath).
Func(func(e *zerolog.Event) {
bodyWriteErr := os.WriteFile(dumpPath, r.Body, 0644)
if bodyWriteErr != nil {
log.Error().
Err(bodyWriteErr).
Str("engine", seName.String()).
Msg("_sedefaults.ColError() -> col.OnError(): error writing html response body to file")
}
}).
Msg("_sedefaults.ColError() -> col.OnError(): html response written")
}
})
}
Loading
Loading