Skip to content

Commit

Permalink
Merge pull request #5 from perebaj/save_site_structure
Browse files Browse the repository at this point in the history
✨ save/get site structure
  • Loading branch information
perebaj authored Jan 5, 2024
2 parents dab8450 + ec896f3 commit 11219a8
Show file tree
Hide file tree
Showing 3 changed files with 159 additions and 39 deletions.
14 changes: 14 additions & 0 deletions mongodb/db.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,3 +35,17 @@ func OpenDB(ctx context.Context, cfg Config) (*mongo.Client, error) {

return client, nil
}

// NLStorage joins the Mongo operations for the Newsletter collection
type NLStorage struct {
client *mongo.Client
DBName string
}

// NewNLStorage initializes a new NLStorage
func NewNLStorage(client *mongo.Client, DBName string) *NLStorage {
return &NLStorage{
client: client,
DBName: DBName,
}
}
64 changes: 45 additions & 19 deletions mongodb/newsletter.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ import (
"time"

"go.mongodb.org/mongo-driver/bson"
"go.mongodb.org/mongo-driver/mongo"
"go.mongodb.org/mongo-driver/mongo/options"
)

// Newsletter is the struct that gather what websites to scrape for an user email
Expand All @@ -16,24 +16,10 @@ type Newsletter struct {

// Site is the struct that gather the scraped content of a website
type Site struct {
UserEmail string `bson:"user_email"`
URL string `bson:"url"`
Content string `bson:"content"`
ScrapeDate time.Time `bson:"scrape_date"`
}

// NLStorage joins the Mongo operations for the Newsletter collection
type NLStorage struct {
client *mongo.Client
DBName string
}

// NewNLStorage initializes a new NLStorage
func NewNLStorage(client *mongo.Client, DBName string) *NLStorage {
return &NLStorage{
client: client,
DBName: DBName,
}
UserEmail string `bson:"user_email"`
URL string `bson:"url"`
Content string `bson:"content"`
ScrapeDatetime time.Time `bson:"scrape_date"`
}

// SaveNewsletter saves a newsletter in the database
Expand Down Expand Up @@ -63,3 +49,43 @@ func (m *NLStorage) Newsletter() ([]Newsletter, error) {

return newsletters, nil
}

// SaveSite saves a site in the database
func (m *NLStorage) SaveSite(ctx context.Context, sites []Site) error {
database := m.client.Database(m.DBName)
collection := database.Collection("sites")

//parse sites to []interface{} to use InsertMany
var docs []interface{}
for _, site := range sites {
docs = append(docs, site)
}
_, err := collection.InsertMany(ctx, docs)
if err != nil {
return err
}
return nil
}

// Sites returns given an user email and a URL, the last scraped content of that URL
func (m *NLStorage) Sites(usrEmail, URL string) ([]Site, error) {
database := m.client.Database(m.DBName)
collection := database.Collection("sites")
max := int64(2)

filter := bson.M{"user_email": usrEmail, "url": URL}
sort := bson.D{{Key: "scrape_date", Value: -1}}
opts := options.Find().SetSort(sort)
opts.Limit = &max

cursor, err := collection.Find(context.Background(), filter, opts)
if err != nil {
return nil, err
}

var sites []Site
if err = cursor.All(context.Background(), &sites); err != nil {
return nil, err
}
return sites, nil
}
120 changes: 100 additions & 20 deletions mongodb/newsletter_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,34 +21,35 @@ func TestNLStorageSaveNewsletter(t *testing.T) {
database := client.Database(DBName)
collection := database.Collection("newsletter")

NLStorage := NewNLStorage(client, DBName)
err := NLStorage.SaveNewsletter(ctx, Newsletter{
want := Newsletter{
UserEmail: "[email protected]",
URLs: []string{"https://www.google.com"},
})
}

NLStorage := NewNLStorage(client, DBName)
err := NLStorage.SaveNewsletter(ctx, want)

if err != nil {
t.Fatal("error saving newsletter", err)
}

var nls []Newsletter
var got []Newsletter
cursor, err := collection.Find(context.Background(), bson.M{})

if err != nil {
t.Fatal("error finding newsletter", err)
}

if err := cursor.All(ctx, &nls); err != nil {
if err := cursor.All(ctx, &got); err != nil {
t.Fatal("error decoding newsletter", err)
}

if len(nls) == 1 {
reflect.DeepEqual(nls[0], Newsletter{
UserEmail: "[email protected]",
URLs: []string{"https://www.google.com"},
})
if len(got) == 1 {
if !reflect.DeepEqual(got[0], want) {
t.Fatalf("got %v, want %v", got[0], want)
}
} else {
t.Fatal("expected 1 newsletter, got", len(nls))
t.Fatal("expected 1 newsletter, got", len(got))
}

t.Cleanup(teardown(ctx, client, DBName))
Expand All @@ -61,28 +62,107 @@ func TestNLStorageNewsletter(t *testing.T) {
database := client.Database(DBName)
collection := database.Collection("newsletter")

_, err := collection.InsertOne(ctx, Newsletter{
want := Newsletter{
UserEmail: "[email protected]",
URLs: []string{"https://www.google.com"},
})
}
_, err := collection.InsertOne(ctx, want)

if err != nil {
t.Fatal("error saving newsletter", err)
}

NLStorage := NewNLStorage(client, DBName)
nls, err := NLStorage.Newsletter()
got, err := NLStorage.Newsletter()
if err != nil {
t.Fatal("error getting newsletter", err)
}

if len(nls) == 1 {
reflect.DeepEqual(nls[0], Newsletter{
UserEmail: "[email protected]",
URLs: []string{"https://www.google.com"},
})
if len(got) == 1 {
if !reflect.DeepEqual(got[0], want) {
t.Fatalf("got %v, want %v", got[0], want)
}
} else {
t.Fatal("expected 1 newsletter, got", len(got))
}

t.Cleanup(teardown(ctx, client, DBName))
}

func TestNLStorageSaveSite(t *testing.T) {
ctx := context.Background()
client, DBName := setup(ctx, t)

database := client.Database(DBName)
collection := database.Collection("sites")

want := []Site{
{UserEmail: "[email protected]", URL: "https://www.google.com", Content: "HTML", ScrapeDatetime: time.Date(2023, time.August, 14, 15, 30, 0, 0, time.UTC)},
{UserEmail: "[email protected]", URL: "https://www.google.com", Content: "HTML", ScrapeDatetime: time.Date(2023, time.August, 14, 15, 30, 0, 0, time.UTC)},
{UserEmail: "[email protected]", URL: "https://www.jj.com", Content: "HTML", ScrapeDatetime: time.Date(2023, time.August, 14, 15, 30, 0, 0, time.UTC)},
}

NLStorage := NewNLStorage(client, DBName)
err := NLStorage.SaveSite(ctx, want)

if err != nil {
t.Fatal("error saving site", err)
}

var got []Site
cursor, err := collection.Find(context.Background(), bson.M{})
if err != nil {
t.Fatal("error finding site", err)
}

if err := cursor.All(ctx, &got); err != nil {
t.Fatal("error decoding site", err)
}

if len(got) == 3 {
if !reflect.DeepEqual(got, want) {
t.Fatalf("got %v, want %v", got, want)
}
} else {
t.Fatal("expected 2 sites, got", len(got))
}

t.Cleanup(teardown(ctx, client, DBName))
}

func TestNLStorageSites(t *testing.T) {
ctx := context.Background()
client, DBName := setup(ctx, t)

want := []Site{
{UserEmail: "[email protected]", URL: "https://www.google.com", Content: "HTML", ScrapeDatetime: time.Date(2023, time.August, 13, 15, 30, 0, 0, time.UTC)},
{UserEmail: "[email protected]", URL: "https://www.google.com", Content: "HTML", ScrapeDatetime: time.Date(2023, time.August, 12, 15, 30, 0, 0, time.UTC)},
{UserEmail: "[email protected]", URL: "https://www.google.com", Content: "HTML", ScrapeDatetime: time.Date(2023, time.August, 11, 15, 30, 0, 0, time.UTC)},
}

NLStorage := NewNLStorage(client, DBName)
err := NLStorage.SaveSite(ctx, want)
if err != nil {
t.Fatal("error saving site", err)
}

got, err := NLStorage.Sites("[email protected]", "https://www.google.com")
if err != nil {
t.Fatal("error getting site", err)
}

if len(got) == 2 {
assert(t, got[0].UserEmail, want[0].UserEmail)
assert(t, got[0].URL, want[0].URL)
assert(t, got[0].Content, want[0].Content)
assert(t, got[0].ScrapeDatetime, want[0].ScrapeDatetime)

assert(t, got[1].UserEmail, want[1].UserEmail)
assert(t, got[1].URL, want[1].URL)
assert(t, got[1].Content, want[1].Content)
assert(t, got[1].ScrapeDatetime, want[1].ScrapeDatetime)
} else {
t.Fatal("expected 1 newsletter, got", len(nls))
t.Fatal("expected 2 sites, got", len(got))
}

t.Cleanup(teardown(ctx, client, DBName))
Expand Down

0 comments on commit 11219a8

Please sign in to comment.