diff --git a/.gitignore b/.gitignore index 3b735ec..d16f074 100644 --- a/.gitignore +++ b/.gitignore @@ -19,3 +19,4 @@ # Go workspace file go.work +cmd/newsletter/newsletter diff --git a/cmd/newsletter/newsletter b/cmd/newsletter/newsletter deleted file mode 100755 index 6c984e4..0000000 Binary files a/cmd/newsletter/newsletter and /dev/null differ diff --git a/mail.go b/mail.go index 630d3f1..63f5054 100644 --- a/mail.go +++ b/mail.go @@ -1,31 +1,71 @@ package newsletter import ( + "context" "fmt" + "log/slog" "net/smtp" ) +// SMTPServer is the SMTP server of gmail +const SMTPServer = "smtp.gmail.com" + // EmailConfig contains the necessary information to authenticate in the SMTP server type EmailConfig struct { Password string UserName string } -// SMTPServer is the SMTP server of gmail -const SMTPServer = "smtp.gmail.com" +type MailClient struct { + cfg EmailConfig +} + +func NewMailClient(cfg EmailConfig) *MailClient { + return &MailClient{ + cfg: cfg, + } +} // Send sends an email to the given destination -func Send(dest []string, bodyMessage string, cfg EmailConfig) error { - auth := smtp.PlainAuth("", cfg.UserName, cfg.Password, SMTPServer) +func (m MailClient) Send(dest []string, bodyMessage string) error { + auth := smtp.PlainAuth("", m.cfg.UserName, m.cfg.Password, SMTPServer) msg := []byte("To: " + dest[0] + "\r\n" + "Subject: Newsletter\r\n" + "\r\n" + bodyMessage + "\r\n") - err := smtp.SendMail(SMTPServer+":587", auth, cfg.UserName, dest, []byte(msg)) + err := smtp.SendMail(SMTPServer+":587", auth, m.cfg.UserName, dest, []byte(msg)) if err != nil { return fmt.Errorf("error sending email: %v", err) } return nil } + +func EmailTrigger(ctx context.Context, s Storage, e Email) error { + nl, err := s.Newsletter() + if err != nil { + return fmt.Errorf("error getting newsletter: %v", err) + } + + for _, n := range nl { + pages, err := s.PageIn(ctx, n.URLs) + if err != nil { + slog.Error("error getting pages", "error", err) + } + var validURLS []string + for _, p := range pages { + if p.IsMostRecent { + validURLS = append(validURLS, p.URL) + } + } + if len(validURLS) > 0 { + err = e.Send([]string{n.UserEmail}, fmt.Sprintf("Hi %s, \n\nWe have found %d new articles for you: \n\n%s", n.UserEmail, len(validURLS), validURLS)) + if err != nil { + slog.Error("error sending email", "error", err) + } + } + } + + return nil +} diff --git a/mongodb/newsletter.go b/mongodb/newsletter.go index cf4c5b2..02b785f 100644 --- a/mongodb/newsletter.go +++ b/mongodb/newsletter.go @@ -98,6 +98,52 @@ func (m *NLStorage) SavePage(ctx context.Context, pages []Page) error { return nil } +// PageIn returns the last scraped content of a given list of urls +func (m *NLStorage) PageIn(ctx context.Context, urls []string) ([]Page, error) { + database := m.client.Database(m.DBName) + collection := database.Collection("pages") + + pipeline := []bson.M{ + { + "$match": bson.M{ + "url": bson.M{ + "$in": urls, + }, + }, + }, + { + "$sort": bson.M{ + "scrape_date": -1, + }, + }, + { + "$group": bson.M{ + "_id": "$url", + "page": bson.M{ + "$first": "$$ROOT", + }, + }, + }, + { + "$replaceRoot": bson.M{ + "newRoot": "$page", + }, + }, + } + + cursor, err := collection.Aggregate(ctx, pipeline) + if err != nil { + return nil, fmt.Errorf("error getting page: %v", err) + } + + var page []Page + if err = cursor.All(ctx, &page); err != nil { + return page, fmt.Errorf("error decoding page: %v", err) + } + + return page, nil +} + // Page returns the last scraped content of a given url func (m *NLStorage) Page(ctx context.Context, url string) ([]Page, error) { var page []Page diff --git a/mongodb/newsletter_test.go b/mongodb/newsletter_test.go index 99fb0b3..118d860 100644 --- a/mongodb/newsletter_test.go +++ b/mongodb/newsletter_test.go @@ -129,6 +129,37 @@ func TestNLStorageSavePage(t *testing.T) { t.Cleanup(teardown(ctx, client, DBName)) } +func TestNLStoragePageIn(t *testing.T) { + ctx := context.Background() + client, DBName := setup(ctx, t) + + want := []Page{ + {URL: "https://www.google.com", Content: "HTML", ScrapeDatetime: time.Date(2023, time.August, 13, 15, 30, 0, 0, time.UTC), IsMostRecent: true, HashMD5: md5.Sum([]byte("HTML"))}, + {URL: "https://www.google.com", Content: "HTML", ScrapeDatetime: time.Date(2023, time.August, 12, 15, 30, 0, 0, time.UTC), IsMostRecent: true, HashMD5: md5.Sum([]byte("HTML"))}, + {URL: "https://facebook.com", Content: "HTML", ScrapeDatetime: time.Date(2023, time.August, 11, 15, 30, 0, 0, time.UTC), IsMostRecent: true, HashMD5: md5.Sum([]byte("HTML"))}, + {URL: "https://jj.com", Content: "HTML", ScrapeDatetime: time.Date(2023, time.August, 15, 15, 30, 0, 0, time.UTC), IsMostRecent: true, HashMD5: md5.Sum([]byte("HTML"))}, + } + + storage := NewNLStorage(client, DBName) + err := storage.SavePage(ctx, want) + if err != nil { + t.Fatal("error saving page", err) + } + + got, err := storage.PageIn(ctx, []string{"https://www.google.com", "https://facebook.com", "https://jj.com"}) + if err != nil { + t.Fatal("error getting page", err) + } + + lenWant := 3 + if len(got) == lenWant { + reflect.DeepEqual(got, []Page{want[0], want[2], want[3]}) + } else { + t.Fatalf("expected %d pages, got %d", lenWant, len(got)) + } + t.Cleanup(teardown(ctx, client, DBName)) +} + func TestNLStoragePage(t *testing.T) { ctx := context.Background() client, DBName := setup(ctx, t) diff --git a/scrape.go b/scrape.go index 33a2e75..43b2fd3 100644 --- a/scrape.go +++ b/scrape.go @@ -28,6 +28,12 @@ type Storage interface { SavePage(ctx context.Context, site []mongodb.Page) error DistinctEngineerURLs(ctx context.Context) ([]interface{}, error) Page(ctx context.Context, url string) ([]mongodb.Page, error) + Newsletter() ([]mongodb.Newsletter, error) + PageIn(ctx context.Context, urls []string) ([]mongodb.Page, error) +} + +type Email interface { + Send(dest []string, bodyMessage string) error } // Crawler contains the necessary information to run the crawler @@ -167,3 +173,4 @@ func Fetch(url string) (string, error) { return bodyString, nil } + diff --git a/scrape_test.go b/scrape_test.go index a40dc3b..f232efb 100644 --- a/scrape_test.go +++ b/scrape_test.go @@ -126,3 +126,9 @@ func (s StorageMockImpl) DistinctEngineerURLs(_ context.Context) ([]interface{}, func (s StorageMockImpl) Page(_ context.Context, _ string) ([]mongodb.Page, error) { return []mongodb.Page{}, nil } +func (s StorageMockImpl) Newsletter() ([]mongodb.Newsletter, error) { + return []mongodb.Newsletter{{URLs: []string{fakeURL}}}, nil +} +func (s StorageMockImpl) PageIn(_ context.Context, _ []string) ([]mongodb.Page, error) { + return []mongodb.Page{}, nil +}