Skip to content

Commit

Permalink
Merge pull request #8 from perebaj/timer
Browse files Browse the repository at this point in the history
Timer
  • Loading branch information
perebaj authored Jan 19, 2024
2 parents d4dbc6e + 13d1396 commit 1f7366d
Show file tree
Hide file tree
Showing 10 changed files with 277 additions and 45 deletions.
6 changes: 6 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# TOOLS VERSIONS
export GO_VERSION=1.21.5
export GOLANGCI_LINT_VERSION=v1.55.2

devimage=newsletter-dev
gopkg=$(devimage)-gopkg
gocache=$(devimage)-gocache
Expand Down Expand Up @@ -94,6 +95,11 @@ dev/logs:
dev/stop:
docker-compose stop

## Dev container cleanup (remove volumes and images)
.PHONY: dev/cleanup
dev/cleanup:
docker-compose down -v --remove-orphans --rmi all

## Access the container
dev:
@$(devrun) bash
Expand Down
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
# Newsletter

Some skilled engineers even have a blog site where they push some gold content, but they doesn't have yet, a way for their fan base to have recurrent access to this content. Newsletter try to circumvent it, scraping their pages and triggering e-mails for the guys who have an interest in those hidden gems.

![newsletter](./assets/newsletter.png)

Why the software magicians guys don't have an newsletter?


# Roadmap
Expand Down
109 changes: 94 additions & 15 deletions cmd/newsletter/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,26 +2,36 @@
package main

import (
"context"
"fmt"
"log/slog"
"os"
"os/signal"
"sync"
"syscall"
"time"

"github.com/perebaj/newsletter"
"github.com/perebaj/newsletter/mongodb"
)

// Config is the struct that contains the configuration for the service.
type Config struct {
LogLevel string
LogType string
LogLevel string
LogType string
LoopDurationMinutes time.Duration
Mongo mongodb.Config
}

func main() {

cfg := Config{
LogLevel: "INFO",
LogType: "json",
LogLevel: getEnvWithDefault("LOG_LEVEL", "INFO"),
LogType: getEnvWithDefault("LOG_TYPE", "json"),
Mongo: mongodb.Config{
URI: getEnvWithDefault("NL_MONGO_URI", ""),
},
LoopDurationMinutes: time.Duration(10) * time.Second,
}

signalCh := make(chan os.Signal, 1)
Expand All @@ -32,22 +42,83 @@ func main() {
signalCh <- syscall.SIGTERM
}

sSlice := []string{"http://www.google.com", "www.facebook.com", "www.x.com"}
jobs := make(chan string, len(sSlice))
result := make(chan string, len(sSlice))
for _, s := range sSlice {
jobs <- s
ctx := context.Background()

client, err := mongodb.OpenDB(ctx, cfg.Mongo)
if err != nil {
slog.Error("error connecting to MongoDB", "error", err)
signalCh <- syscall.SIGTERM
}

go newsletter.Worker(jobs, result, newsletter.GetReferences)
slog.Info("connected successfully to MongoDB instance")

for i := 0; i < len(sSlice); i++ {
r := <-result
if r != "" {
slog.Info(r)
}
storage := mongodb.NewNLStorage(client, "newsletter")

err = storage.SaveEngineer(ctx, mongodb.Engineer{
Name: "Paul Graham",
URL: "http://www.paulgraham.com/articles.html",
Description: "Paul Graham is an English-born computer scientist, entrepreneur, venture capitalist, author, and essayist. He is best known for his work on Lisp, his former startup Viaweb (later renamed \"Yahoo! Store\"), co-founding the influential startup accelerator and seed capital firm Y Combinator, his blog, and Hacker News.",
})
if err != nil {
slog.Error("error saving engineer", "error", err)
signalCh <- syscall.SIGTERM
}

err = storage.SaveEngineer(ctx, mongodb.Engineer{
Name: "Joel Spolsky",
URL: "https://www.joelonsoftware.com/",
Description: "Joel Spolsky is a software engineer and writer. He is the author of Joel on Software, a blog on software development, and the creator of the project management software Trello. He has previously worked as a programmer, software designer, and software consultant.",
})
if err != nil {
slog.Error("error saving engineer", "error", err)
signalCh <- syscall.SIGTERM
}

URLCh := make(chan string)
fetchResultCh := make(chan string)

var wg sync.WaitGroup
wg.Add(5)

for i := 0; i < 5; i++ {
go newsletter.Worker(&wg, URLCh, fetchResultCh, newsletter.Fetch)
}

go func() {
defer close(URLCh)
for range time.Tick(cfg.LoopDurationMinutes) {
slog.Info("fetching engineers")
gotURLs, err := storage.DistinctEngineerURLs(ctx)
if err != nil {
slog.Error("error getting engineers", "error", err)
signalCh <- syscall.SIGTERM
}

slog.Info("fetched engineers", "engineers", len(gotURLs))
for _, url := range gotURLs {
URLCh <- url.(string)
}
}
}()

go func() {
wg.Wait()
defer close(fetchResultCh)
}()

go func() {
for v := range fetchResultCh {
slog.Info("saving fetched sites response", "response", v[:10])
err := storage.SaveSite(ctx, []mongodb.Site{
{Content: v, ScrapeDatetime: time.Now().UTC()},
})
if err != nil {
slog.Error("error saving site result", "error", err)
signalCh <- syscall.SIGTERM
}
}
}()

<-signalCh
}

Expand Down Expand Up @@ -83,3 +154,11 @@ func setUpLog(cfg Config) error {
slog.SetDefault(logger)
return nil
}

func getEnvWithDefault(key, defaultValue string) string {
value := os.Getenv(key)
if value == "" {
return defaultValue
}
return value
}
Binary file modified cmd/newsletter/newsletter
Binary file not shown.
2 changes: 2 additions & 0 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ services:
args:
GO_VERSION: $GO_VERSION
GOLANGCI_LINT_VERSION: $GOLANGCI_LINT_VERSION
environment:
NL_MONGO_URI: "mongodb://root:root@mongodb:27017"
depends_on:
- mongodb
volumes:
Expand Down
5 changes: 5 additions & 0 deletions mongodb/db.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package mongodb

import (
"context"
"fmt"

"go.mongodb.org/mongo-driver/mongo"
"go.mongodb.org/mongo-driver/mongo/options"
Expand All @@ -15,6 +16,10 @@ type Config struct {

// OpenDB connects to the MongoDB instance.
func OpenDB(ctx context.Context, cfg Config) (*mongo.Client, error) {
if cfg.URI == "" {
return nil, fmt.Errorf("MongoDB URI is empty")
}

bsonOpts := &options.BSONOptions{
UseJSONStructTags: true,
NilSliceAsEmpty: true,
Expand Down
32 changes: 32 additions & 0 deletions mongodb/newsletter.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package mongodb

import (
"context"
"fmt"
"time"

"go.mongodb.org/mongo-driver/bson"
Expand All @@ -14,6 +15,13 @@ type Newsletter struct {
URLs []string `bson:"urls"`
}

// Engineer is the struct that gather the scraped content of an engineer
type Engineer struct {
Name string `bson:"name"`
Description string `bson:"description"`
URL string `bson:"url"`
}

// Site is the struct that gather the scraped content of a website
type Site struct {
UserEmail string `bson:"user_email"`
Expand All @@ -33,6 +41,30 @@ func (m *NLStorage) SaveNewsletter(ctx context.Context, newsletter Newsletter) e
return nil
}

// SaveEngineer saves an engineer in the database
func (m *NLStorage) SaveEngineer(ctx context.Context, e Engineer) error {
database := m.client.Database(m.DBName)
collection := database.Collection("engineers")
_, err := collection.InsertOne(ctx, e)
if err != nil {
return err
}
return nil
}

// DistinctEngineerURLs returns all url sites of each distinct engineer
func (m *NLStorage) DistinctEngineerURLs(ctx context.Context) ([]interface{}, error) {
database := m.client.Database(m.DBName)
collection := database.Collection("engineers")

resp, err := collection.Distinct(ctx, "url", bson.M{})
if err != nil {
return nil, fmt.Errorf("error getting engineers: %w", err)
}

return resp, nil
}

// Newsletter returns all the newsletters in the database
func (m *NLStorage) Newsletter() ([]Newsletter, error) {
var newsletters []Newsletter
Expand Down
100 changes: 98 additions & 2 deletions mongodb/newsletter_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ package mongodb
import (
"context"
"fmt"
"os"
"reflect"
"testing"
"time"
Expand Down Expand Up @@ -168,6 +169,102 @@ func TestNLStorageSites(t *testing.T) {
t.Cleanup(teardown(ctx, client, DBName))
}

func TestNLStorageSaveEngineer(t *testing.T) {
ctx := context.Background()
client, DBName := setup(ctx, t)

database := client.Database(DBName)
collection := database.Collection("engineers")

want := Engineer{
Name: "John", URL: "https://www.1.com", Description: "John is a software engineer",
}

want2 := Engineer{
Name: "John", URL: "https://www.2.com", Description: "John is a software engineer",
}

NLStorage := NewNLStorage(client, DBName)
err := NLStorage.SaveEngineer(ctx, want)
if err != nil {
t.Fatal("error saving 1 engineer", err)
}

err = NLStorage.SaveEngineer(ctx, want2)
if err != nil {
t.Fatal("error saving 2 engineer", err)
}

var got []Engineer
cursor, err := collection.Find(context.Background(), bson.M{})
if err != nil {
t.Fatal("error finding engineer", err)
}

if err := cursor.All(ctx, &got); err != nil {
t.Fatal("error decoding engineer", err)
}

if len(got) == 2 {
if !reflect.DeepEqual(got, []Engineer{want, want2}) {
t.Fatalf("got %v, want %v", got, []Engineer{want, want2})
}
} else {
t.Fatal("expected 2 engineers, got", len(got))
}

t.Cleanup(teardown(ctx, client, DBName))
}

func TestNLStorageDistinctEngineerURLs(t *testing.T) {
ctx := context.Background()
client, DBName := setup(ctx, t)

want := Engineer{
Name: "John", URL: "https://www.1.com", Description: "John is a software engineer",
}

want2 := Engineer{
Name: "John", URL: "https://www.2.com", Description: "John is a software engineer",
}

want3 := Engineer{
Name: "John", URL: "https://www.2.com", Description: "John is a software engineer",
}

NLStorage := NewNLStorage(client, DBName)

err := NLStorage.SaveEngineer(ctx, want)
if err != nil {
t.Fatal("error saving 1 engineer", err)
}

err = NLStorage.SaveEngineer(ctx, want2)
if err != nil {
t.Fatal("error saving 2 engineer", err)
}

err = NLStorage.SaveEngineer(ctx, want3)
if err != nil {
t.Fatal("error saving 3 engineer", err)
}

got, err := NLStorage.DistinctEngineerURLs(ctx)
if err != nil {
t.Fatal("error getting engineers", err)
}

if len(got) == 2 {
if !reflect.DeepEqual(got, []interface{}{want.URL, want2.URL}) {
t.Fatalf("got %v, want %v", got, []interface{}{want.URL, want2.URL})
}
} else {
t.Fatal("expected 2 engineers, got", len(got))
}

t.Cleanup(teardown(ctx, client, DBName))
}

func assert(t testing.TB, got, want interface{}) {
t.Helper()
if got != want {
Expand All @@ -184,8 +281,7 @@ func teardown(ctx context.Context, client *mongo.Client, DBName string) func() {
}

func setup(ctx context.Context, t testing.TB) (*mongo.Client, string) {
// TODO: Receive the URI from the environment variable
URI := "mongodb://root:root@mongodb:27017/"
URI := os.Getenv("NL_MONGO_URI")
client, err := OpenDB(ctx, Config{
URI: URI,
})
Expand Down
Loading

0 comments on commit 1f7366d

Please sign in to comment.