Skip to content

Commit

Permalink
Merge pull request #14 from perebaj/arq
Browse files Browse the repository at this point in the history
✨ readme + arq
  • Loading branch information
perebaj authored Jan 24, 2024
2 parents 21090b5 + c19fdcd commit 91e39d8
Show file tree
Hide file tree
Showing 7 changed files with 60 additions and 51 deletions.
24 changes: 12 additions & 12 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,19 +4,9 @@ Some skilled engineers even have a blog site where they push some gold content,

![newsletter](./assets/newsletter.png)

## Architecture


# Roadmap

This program aims to create the following features:

- Given a list of websites, that are located in a MongoDB collection, scrape the content of each website and save it in another MongoDB collection. ✅
- After the scraping, calculate the similarity between the new content and the previous content of each website, and update the MongoDB collection
with this information. ✅
- All the registered users will receive an email according to the URL that they have registered notifying them about news in their favorite engineers websites. ✅

Obs: All these flows will be trigerred by a cron job. ✅

![architecture](./assets/newsletterarq.png)

## Environement Variables

Expand Down Expand Up @@ -49,4 +39,14 @@ Access the dev container and run the tests:
make integration-test
```

# Roadmap

This program aims to create the following features:

- Given a list of websites, that are located in a MongoDB collection, scrape the content of each website and save it in another MongoDB collection. ✅
- After the scraping, calculate the similarity between the new content and the previous content of each website, and update the MongoDB collection
with this information. ✅
- All the registered users will receive an email according to the URL that they have registered notifying them about news in their favorite engineers websites. ✅
- Create API routes to register new users and the websites that they want to follow. ⌛

Obs: All these flows will be trigerred by a cron job. ✅
Binary file added assets/newsletterarq.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
3 changes: 2 additions & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ module github.com/perebaj/newsletter

go 1.21.5

require go.mongodb.org/mongo-driver v1.13.1

require (
github.com/golang/snappy v0.0.1 // indirect
github.com/klauspost/compress v1.13.6 // indirect
Expand All @@ -10,7 +12,6 @@ require (
github.com/xdg-go/scram v1.1.2 // indirect
github.com/xdg-go/stringprep v1.0.4 // indirect
github.com/youmark/pkcs8 v0.0.0-20181117223130-1be2e3e5546d // indirect
go.mongodb.org/mongo-driver v1.13.1 // indirect
golang.org/x/crypto v0.0.0-20220622213112-05595931fe9d // indirect
golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4 // indirect
golang.org/x/text v0.7.0 // indirect
Expand Down
3 changes: 3 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/golang/snappy v0.0.1 h1:Qgr9rKW7uDUkrbSmQeiDsGa8SjGyCOGtuasMWwvp2P4=
github.com/golang/snappy v0.0.1/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
github.com/google/go-cmp v0.5.2 h1:X2ev0eStA3AbceY54o37/0PQ/UWqKEiiO2dKL5OPaFM=
github.com/google/go-cmp v0.5.2/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/klauspost/compress v1.13.6 h1:P76CopJELS0TiO2mebmnzgWaajssP/EszplttgQxcgc=
github.com/klauspost/compress v1.13.6/go.mod h1:/3/Vjq9QcHkK5uEr5lBEmyoZ1iFhe47etQ6QUkpK6sk=
Expand Down Expand Up @@ -48,4 +50,5 @@ golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGm
golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc=
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543 h1:E7g+9GITq07hpfrRu66IVDexMakfv52eLZ2CXBWiKr4=
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
20 changes: 20 additions & 0 deletions mail_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
package newsletter

import (
"context"
"testing"
)

type MailClientMockImpl struct{}

func (m MailClientMockImpl) Send(_ []string, _ string) error { return nil }

func TestEmailTrigger(t *testing.T) {
ctx := context.Background()
s := NewStorageMock()
e := MailClientMockImpl{}
err := EmailTrigger(ctx, s, e)
if err != nil {
t.Errorf("expected nil, got %v", err)
}
}
File renamed without changes.
61 changes: 23 additions & 38 deletions scrape_test.go → scraper_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,19 +12,39 @@ import (
"github.com/perebaj/newsletter/mongodb"
)

const fakeURL = "http://fakeurl.test"
type StorageMockImpl struct{}

const FakeURL = "http://fakeurl.test"

func NewStorageMock() StorageMockImpl { return StorageMockImpl{} }
func (s StorageMockImpl) SavePage(_ context.Context, _ []mongodb.Page) error { return nil }
func (s StorageMockImpl) DistinctEngineerURLs(_ context.Context) ([]interface{}, error) {
return []interface{}{FakeURL}, nil
}
func (s StorageMockImpl) Page(_ context.Context, _ string) ([]mongodb.Page, error) {
return []mongodb.Page{}, nil
}
func (s StorageMockImpl) Newsletter() ([]mongodb.Newsletter, error) {
return []mongodb.Newsletter{{URLs: []string{FakeURL}}}, nil
}
func (s StorageMockImpl) PageIn(_ context.Context, _ []string) ([]mongodb.Page, error) {
return []mongodb.Page{
{IsMostRecent: true, URL: FakeURL, Content: "Hello, World!", HashMD5: md5.Sum([]byte("Hello, World!"))},
{IsMostRecent: true, URL: FakeURL, Content: "Hello, World! 2", HashMD5: md5.Sum([]byte("Hello, World! 2"))},
}, nil
}

func TestPageComparation(t *testing.T) {
recentScrapedPage := Page{
Content: "Hello, World!",
URL: fakeURL,
URL: FakeURL,
ScrapeDateTime: time.Now().UTC(),
}

lastScrapedPage := []mongodb.Page{
{
Content: "Hello, World!",
URL: fakeURL,
URL: FakeURL,
ScrapeDatetime: time.Now().UTC().Add(-time.Duration(1) * time.Hour),
HashMD5: md5.Sum([]byte("Hello, World!")),
},
Expand Down Expand Up @@ -115,38 +135,3 @@ func TestFetch_Status500(t *testing.T) {
t.Errorf("expected empty body, got %s", got)
}
}

func TestEmailTrigger(t *testing.T) {
ctx := context.Background()
s := NewStorageMock()
e := MailClientMockImpl{}

err := EmailTrigger(ctx, s, e)
if err != nil {
t.Errorf("expected nil, got %v", err)
}
}

type MailClientMockImpl struct{}

func (m MailClientMockImpl) Send(_ []string, _ string) error { return nil }

type StorageMockImpl struct{}

func NewStorageMock() StorageMockImpl { return StorageMockImpl{} }
func (s StorageMockImpl) SavePage(_ context.Context, _ []mongodb.Page) error { return nil }
func (s StorageMockImpl) DistinctEngineerURLs(_ context.Context) ([]interface{}, error) {
return []interface{}{fakeURL}, nil
}
func (s StorageMockImpl) Page(_ context.Context, _ string) ([]mongodb.Page, error) {
return []mongodb.Page{}, nil
}
func (s StorageMockImpl) Newsletter() ([]mongodb.Newsletter, error) {
return []mongodb.Newsletter{{URLs: []string{fakeURL}}}, nil
}
func (s StorageMockImpl) PageIn(_ context.Context, _ []string) ([]mongodb.Page, error) {
return []mongodb.Page{
{IsMostRecent: true, URL: fakeURL, Content: "Hello, World!", HashMD5: md5.Sum([]byte("Hello, World!"))},
{IsMostRecent: true, URL: fakeURL, Content: "Hello, World! 2", HashMD5: md5.Sum([]byte("Hello, World! 2"))},
}, nil
}

0 comments on commit 91e39d8

Please sign in to comment.