Skip to content

Commit

Permalink
🐛 improve documentation/logs and minors
Browse files Browse the repository at this point in the history
  • Loading branch information
perebaj committed Jan 19, 2024
1 parent 8ed182d commit e555007
Show file tree
Hide file tree
Showing 4 changed files with 14 additions and 9 deletions.
4 changes: 2 additions & 2 deletions cmd/newsletter/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@ type Config struct {
func main() {

cfg := Config{
LogLevel: getEnvWithDefault("LOG_LEVEL", "INFO"),
LogType: getEnvWithDefault("LOG_TYPE", "json"),
LogLevel: getEnvWithDefault("LOG_LEVEL", ""),
LogType: getEnvWithDefault("LOG_TYPE", ""),
Mongo: mongodb.Config{
URI: getEnvWithDefault("NL_MONGO_URI", ""),
},
Expand Down
2 changes: 2 additions & 0 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ services:
GOLANGCI_LINT_VERSION: $GOLANGCI_LINT_VERSION
environment:
NL_MONGO_URI: "mongodb://root:root@mongodb:27017"
LOG_LEVEL: "DEBUG"
LOG_TYPE: "json"
depends_on:
- mongodb
volumes:
Expand Down
10 changes: 5 additions & 5 deletions scrape.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ type PageContent struct {
URL string
}

// Storage is the interface that wraps the basic methods to save data and get data from the database
// Storage is the interface that wraps the basic methods to save and get data from the database
type Storage interface {
SaveSite(ctx context.Context, site []mongodb.Site) error
DistinctEngineerURLs(ctx context.Context) ([]interface{}, error)
Expand Down Expand Up @@ -50,7 +50,7 @@ func NewCrawler(maxJobs int, s time.Duration, signalCh chan os.Signal) *Crawler
}
}

// Run starts the crawler, s is the storage to save the results, f is the function to get the content of a url
// Run starts the crawler, where s represents the storage and f the function to fetch the content of a website
func (c *Crawler) Run(ctx context.Context, s Storage, f func(string) (string, error)) {
c.wg.Add(c.MaxJobs)
for i := 0; i < c.MaxJobs; i++ {
Expand All @@ -60,14 +60,14 @@ func (c *Crawler) Run(ctx context.Context, s Storage, f func(string) (string, er
go func() {
defer close(c.URLch)
for range time.Tick(c.scheduler) {
slog.Info("fetching engineers")
slog.Debug("fetching engineers")
gotURLs, err := s.DistinctEngineerURLs(ctx)
if err != nil {
slog.Error("error getting engineers", "error", err)
c.signalCh <- syscall.SIGTERM
}

slog.Info("fetched engineers", "engineers", len(gotURLs))
slog.Debug("fetched engineers", "engineers", len(gotURLs))
for _, url := range gotURLs {
c.URLch <- url.(string)
}
Expand All @@ -81,7 +81,7 @@ func (c *Crawler) Run(ctx context.Context, s Storage, f func(string) (string, er

go func() {
for v := range c.resultCh {
slog.Info("saving fetched sites response")
slog.Debug("saving fetched sites response")
err := s.SaveSite(ctx, []mongodb.Site{
{
URL: v.URL,
Expand Down
7 changes: 5 additions & 2 deletions scrape_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ import (

const fakeURL = "http://fakeurl.test"

// Even not verifying the result, this test is useful to check if the crawler is running properly, since it is
// using Mocks for the Storage and the Fetch function.
func TestCrawlerRun(t *testing.T) {
timeoutCh := time.After(time.Duration(150) * time.Millisecond)
ctx := context.Background()
Expand Down Expand Up @@ -73,6 +75,7 @@ func TestGetReferences_Status500(t *testing.T) {
}
}

// TODO: Move the StorageMock to a separate file, preferable in the same package(mongodb)
type StorageMock interface {
SaveSite(ctx context.Context, site []mongodb.Site) error
DistinctEngineerURLs(ctx context.Context) ([]interface{}, error)
Expand All @@ -85,10 +88,10 @@ func NewStorageMock() StorageMock {
return StorageMockImpl{}
}

func (s StorageMockImpl) SaveSite(ctx context.Context, site []mongodb.Site) error {
func (s StorageMockImpl) SaveSite(_ context.Context, _ []mongodb.Site) error {
return nil
}

func (s StorageMockImpl) DistinctEngineerURLs(ctx context.Context) ([]interface{}, error) {
func (s StorageMockImpl) DistinctEngineerURLs(_ context.Context) ([]interface{}, error) {
return []interface{}{fakeURL}, nil
}

0 comments on commit e555007

Please sign in to comment.