From ec896f330171093df83fb579c46989557f83a2ea Mon Sep 17 00:00:00 2001 From: jojo Date: Fri, 5 Jan 2024 14:27:38 -0300 Subject: [PATCH] :sparkles: save/get site structure --- mongodb/db.go | 14 +++++ mongodb/newsletter.go | 64 ++++++++++++++------ mongodb/newsletter_test.go | 120 ++++++++++++++++++++++++++++++------- 3 files changed, 159 insertions(+), 39 deletions(-) diff --git a/mongodb/db.go b/mongodb/db.go index 27bc986..2ff8731 100644 --- a/mongodb/db.go +++ b/mongodb/db.go @@ -35,3 +35,17 @@ func OpenDB(ctx context.Context, cfg Config) (*mongo.Client, error) { return client, nil } + +// NLStorage joins the Mongo operations for the Newsletter collection +type NLStorage struct { + client *mongo.Client + DBName string +} + +// NewNLStorage initializes a new NLStorage +func NewNLStorage(client *mongo.Client, DBName string) *NLStorage { + return &NLStorage{ + client: client, + DBName: DBName, + } +} diff --git a/mongodb/newsletter.go b/mongodb/newsletter.go index acea5f7..ed73e08 100644 --- a/mongodb/newsletter.go +++ b/mongodb/newsletter.go @@ -5,7 +5,7 @@ import ( "time" "go.mongodb.org/mongo-driver/bson" - "go.mongodb.org/mongo-driver/mongo" + "go.mongodb.org/mongo-driver/mongo/options" ) // Newsletter is the struct that gather what websites to scrape for an user email @@ -16,24 +16,10 @@ type Newsletter struct { // Site is the struct that gather the scraped content of a website type Site struct { - UserEmail string `bson:"user_email"` - URL string `bson:"url"` - Content string `bson:"content"` - ScrapeDate time.Time `bson:"scrape_date"` -} - -// NLStorage joins the Mongo operations for the Newsletter collection -type NLStorage struct { - client *mongo.Client - DBName string -} - -// NewNLStorage initializes a new NLStorage -func NewNLStorage(client *mongo.Client, DBName string) *NLStorage { - return &NLStorage{ - client: client, - DBName: DBName, - } + UserEmail string `bson:"user_email"` + URL string `bson:"url"` + Content string `bson:"content"` + ScrapeDatetime time.Time `bson:"scrape_date"` } // SaveNewsletter saves a newsletter in the database @@ -63,3 +49,43 @@ func (m *NLStorage) Newsletter() ([]Newsletter, error) { return newsletters, nil } + +// SaveSite saves a site in the database +func (m *NLStorage) SaveSite(ctx context.Context, sites []Site) error { + database := m.client.Database(m.DBName) + collection := database.Collection("sites") + + //parse sites to []interface{} to use InsertMany + var docs []interface{} + for _, site := range sites { + docs = append(docs, site) + } + _, err := collection.InsertMany(ctx, docs) + if err != nil { + return err + } + return nil +} + +// Sites returns given an user email and a URL, the last scraped content of that URL +func (m *NLStorage) Sites(usrEmail, URL string) ([]Site, error) { + database := m.client.Database(m.DBName) + collection := database.Collection("sites") + max := int64(2) + + filter := bson.M{"user_email": usrEmail, "url": URL} + sort := bson.D{{Key: "scrape_date", Value: -1}} + opts := options.Find().SetSort(sort) + opts.Limit = &max + + cursor, err := collection.Find(context.Background(), filter, opts) + if err != nil { + return nil, err + } + + var sites []Site + if err = cursor.All(context.Background(), &sites); err != nil { + return nil, err + } + return sites, nil +} diff --git a/mongodb/newsletter_test.go b/mongodb/newsletter_test.go index d158c36..90f094d 100644 --- a/mongodb/newsletter_test.go +++ b/mongodb/newsletter_test.go @@ -21,34 +21,35 @@ func TestNLStorageSaveNewsletter(t *testing.T) { database := client.Database(DBName) collection := database.Collection("newsletter") - NLStorage := NewNLStorage(client, DBName) - err := NLStorage.SaveNewsletter(ctx, Newsletter{ + want := Newsletter{ UserEmail: "j@gmail.com", URLs: []string{"https://www.google.com"}, - }) + } + + NLStorage := NewNLStorage(client, DBName) + err := NLStorage.SaveNewsletter(ctx, want) if err != nil { t.Fatal("error saving newsletter", err) } - var nls []Newsletter + var got []Newsletter cursor, err := collection.Find(context.Background(), bson.M{}) if err != nil { t.Fatal("error finding newsletter", err) } - if err := cursor.All(ctx, &nls); err != nil { + if err := cursor.All(ctx, &got); err != nil { t.Fatal("error decoding newsletter", err) } - if len(nls) == 1 { - reflect.DeepEqual(nls[0], Newsletter{ - UserEmail: "j@gmail.com", - URLs: []string{"https://www.google.com"}, - }) + if len(got) == 1 { + if !reflect.DeepEqual(got[0], want) { + t.Fatalf("got %v, want %v", got[0], want) + } } else { - t.Fatal("expected 1 newsletter, got", len(nls)) + t.Fatal("expected 1 newsletter, got", len(got)) } t.Cleanup(teardown(ctx, client, DBName)) @@ -61,28 +62,107 @@ func TestNLStorageNewsletter(t *testing.T) { database := client.Database(DBName) collection := database.Collection("newsletter") - _, err := collection.InsertOne(ctx, Newsletter{ + want := Newsletter{ UserEmail: "j@gmail.com", URLs: []string{"https://www.google.com"}, - }) + } + _, err := collection.InsertOne(ctx, want) if err != nil { t.Fatal("error saving newsletter", err) } NLStorage := NewNLStorage(client, DBName) - nls, err := NLStorage.Newsletter() + got, err := NLStorage.Newsletter() if err != nil { t.Fatal("error getting newsletter", err) } - if len(nls) == 1 { - reflect.DeepEqual(nls[0], Newsletter{ - UserEmail: "j@gmail.com", - URLs: []string{"https://www.google.com"}, - }) + if len(got) == 1 { + if !reflect.DeepEqual(got[0], want) { + t.Fatalf("got %v, want %v", got[0], want) + } + } else { + t.Fatal("expected 1 newsletter, got", len(got)) + } + + t.Cleanup(teardown(ctx, client, DBName)) +} + +func TestNLStorageSaveSite(t *testing.T) { + ctx := context.Background() + client, DBName := setup(ctx, t) + + database := client.Database(DBName) + collection := database.Collection("sites") + + want := []Site{ + {UserEmail: "j@gmail.com", URL: "https://www.google.com", Content: "HTML", ScrapeDatetime: time.Date(2023, time.August, 14, 15, 30, 0, 0, time.UTC)}, + {UserEmail: "j@gmail.com", URL: "https://www.google.com", Content: "HTML", ScrapeDatetime: time.Date(2023, time.August, 14, 15, 30, 0, 0, time.UTC)}, + {UserEmail: "jj@gmail.com", URL: "https://www.jj.com", Content: "HTML", ScrapeDatetime: time.Date(2023, time.August, 14, 15, 30, 0, 0, time.UTC)}, + } + + NLStorage := NewNLStorage(client, DBName) + err := NLStorage.SaveSite(ctx, want) + + if err != nil { + t.Fatal("error saving site", err) + } + + var got []Site + cursor, err := collection.Find(context.Background(), bson.M{}) + if err != nil { + t.Fatal("error finding site", err) + } + + if err := cursor.All(ctx, &got); err != nil { + t.Fatal("error decoding site", err) + } + + if len(got) == 3 { + if !reflect.DeepEqual(got, want) { + t.Fatalf("got %v, want %v", got, want) + } + } else { + t.Fatal("expected 2 sites, got", len(got)) + } + + t.Cleanup(teardown(ctx, client, DBName)) +} + +func TestNLStorageSites(t *testing.T) { + ctx := context.Background() + client, DBName := setup(ctx, t) + + want := []Site{ + {UserEmail: "j@gmail.com", URL: "https://www.google.com", Content: "HTML", ScrapeDatetime: time.Date(2023, time.August, 13, 15, 30, 0, 0, time.UTC)}, + {UserEmail: "j@gmail.com", URL: "https://www.google.com", Content: "HTML", ScrapeDatetime: time.Date(2023, time.August, 12, 15, 30, 0, 0, time.UTC)}, + {UserEmail: "j@gmail.com", URL: "https://www.google.com", Content: "HTML", ScrapeDatetime: time.Date(2023, time.August, 11, 15, 30, 0, 0, time.UTC)}, + } + + NLStorage := NewNLStorage(client, DBName) + err := NLStorage.SaveSite(ctx, want) + if err != nil { + t.Fatal("error saving site", err) + } + + got, err := NLStorage.Sites("j@gmail.com", "https://www.google.com") + if err != nil { + t.Fatal("error getting site", err) + } + + if len(got) == 2 { + assert(t, got[0].UserEmail, want[0].UserEmail) + assert(t, got[0].URL, want[0].URL) + assert(t, got[0].Content, want[0].Content) + assert(t, got[0].ScrapeDatetime, want[0].ScrapeDatetime) + + assert(t, got[1].UserEmail, want[1].UserEmail) + assert(t, got[1].URL, want[1].URL) + assert(t, got[1].Content, want[1].Content) + assert(t, got[1].ScrapeDatetime, want[1].ScrapeDatetime) } else { - t.Fatal("expected 1 newsletter, got", len(nls)) + t.Fatal("expected 2 sites, got", len(got)) } t.Cleanup(teardown(ctx, client, DBName))