-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.go
99 lines (87 loc) · 2.14 KB
/
main.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
package main
import (
//"encoding/json"
"fmt"
"os"
"net/url"
"time"
"strconv"
"github.com/PuerkitoBio/goquery"
"github.com/codegangsta/cli"
r "github.com/mohakkataria/kfit-scraper/retriever"
s "github.com/mohakkataria/kfit-scraper/scraper"
w "github.com/mohakkataria/kfit-scraper/writer"
)
const gateway = "https://access.kfit.com/partners?city=kuala-lampur"
const maxPages = 10000
func main() {
app := cli.NewApp()
app.Name = "KFit Scraper"
app.Version = "0.0.1"
app.Author = "Mohak Kataria"
app.Usage = "CLI tool for scraping contents of partners from KFit Kuala Lampur page"
app.Action = process
app.CommandNotFound = commandNotFound
app.Run(os.Args)
}
func process(c *cli.Context) {
ch := make(chan r.Collection)
quit := make(chan int)
pagesFetched := 0
go func () {
fetch := true
for i := 0; i < maxPages; i++ {
time.Sleep(500*time.Millisecond)
if (fetch == false) {
break
}
go func(i int) {
//fmt.Println(i)
v := url.Values{}
v.Set("page", strconv.Itoa(i))
coll, err := r.RetrievePartnerLinks(gateway+"&"+v.Encode(), goquery.NewDocument)
if err != nil {
fmt.Printf("There was an issue retrieving links from the page: %s", err.Error())
os.Exit(1)
}
//fmt.Println(coll)
if (len(coll) == 0) {
fetch = false
quit <- i
} else {
pagesFetched++
ch <- coll
}
}(i)
}
}()
i := 0
shouldContinue := true
for {
if (shouldContinue == false && i == pagesFetched) {
break
}
select {
case x := <- ch:
i++
// scrape the page here and retrieve the data from the partner page now
b := s.Scrape(x)
// write the data to the file
w.Write(b.Partners)
//fmt.Println(b)
case <-quit:
shouldContinue = false
}
}
// b, err := json.MarshalIndent(s.Scrape(coll), "", " ")
// if err != nil {
// fmt.Printf("There was an issue converting our data into JSON: %s", err.Error())
// os.Exit(1)
// }
// fmt.Println(string(b))
//fmt.Println(coll)
}
func commandNotFound(c *cli.Context, command string) {
fmt.Fprintf(os.Stderr, "%s: '%s' is not a %s command. See '%s --help'.", c.App.Name, command, c.App.Name, c.App.Name)
os.Exit(2)
}