-
Notifications
You must be signed in to change notification settings - Fork 2
/
shopify.go
99 lines (76 loc) · 3.09 KB
/
shopify.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
package shopify
import (
"fmt"
"log"
"github.com/gocolly/colly"
"github.com/gocolly/colly/extensions"
//"go.mongodb.org/mongo-driver/bson"
"strconv"
"strings"
"time"
"go.mongodb.org/mongo-driver/bson/primitive"
)
// DateFormat represents the date format used by shopify
const DateFormat = "January 2, 2006"
// Review represents the structure of a single review on apps.shopify.com; compatible with json and bson
type Review struct {
ID primitive.ObjectID `json:"_id,omitempty" bson:"_id,omitempty"`
Username string `json:"username,omitempty" bson:"username,omitempty"`
Rating int `json:"rating,omitempty" bson:"rating,omitempty"`
Date int64 `json:"time,omitempty" bson:"time,omitempty"`
Content string `json:"content,omitempty" bson:"content,omitempty"`
Edited bool `json:"edited,omitempty" bson:"edited,omitempty"`
Helpful int `json:"helpful,omitempty" bson:"helpful,omitempty"`
Reply string `json:"reply,omitempty" bson:"reply,omitempty"`
ReplyDate int64 `json:"replydate,omitempty" bson:"replydate,omitempty"`
}
// Parse parses shopify
func Parse(url string) *[]Review {
reviews := []Review{}
c := colly.NewCollector(
colly.Async(true),
)
extensions.RandomUserAgent(c)
c.Limit(&colly.LimitRule{
DomainGlob: "*",
Parallelism: 4,
RandomDelay: time.Second,
})
c.OnHTML("div.review-listing", func(e *colly.HTMLElement) {
var err error
var review Review
review.Username = e.ChildText("div[data-review-id] div.review-listing-header h3")
review.Content = strings.TrimSpace(e.ChildText("div[data-review-id] div.review-content div.truncate-content-copy"))
review.Rating, _ = strconv.Atoi(e.ChildAttr("div[data-review-id] div.review-metadata div:nth-child(1) div.review-metadata__item-value div[data-rating]", "data-rating"))
review.Helpful, _ = strconv.Atoi(e.ChildText("div.review-footer div.review-helpfulness form button span.review-helpfulness__helpful-count"))
review.Reply = strings.TrimSpace(e.ChildText("div.review-reply div.review-content div.truncate-content-copy p"))
dateStr := strings.TrimSpace(e.ChildText("div[data-review-id] div.review-metadata div.review-metadata__item-label"))
if strings.Contains(dateStr, "Edited ") {
dateStr = strings.ReplaceAll(dateStr, "Edited ", "")
review.Edited = true
}
date, err := time.Parse(DateFormat, dateStr)
if err != nil {
log.Fatal(err)
}
review.Date = date.Unix()
replyDate, err := time.Parse(DateFormat, strings.TrimSpace(e.ChildText("div.review-reply div.review-reply__header div.review-reply__header-item")))
if err == nil {
review.ReplyDate = replyDate.Unix()
}
reviews = append(reviews, review)
})
c.OnError(func(resp *colly.Response, err error) {
fmt.Println(err)
})
c.OnHTML("a.search-pagination__next-page-text", func(e *colly.HTMLElement) {
fmt.Printf("=")
e.Request.Visit(e.Request.AbsoluteURL(e.Attr("href")))
})
// Basic loading bar to understand that the process is going
fmt.Printf("[")
c.Visit(url)
c.Wait()
fmt.Printf("]")
return &reviews
}