-
Notifications
You must be signed in to change notification settings - Fork 36
/
Copy pathmain.go
executable file
·79 lines (64 loc) · 1.49 KB
/
main.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
package main
import (
"log"
"fmt"
"time"
"github.com/PuerkitoBio/goquery"
"github.com/go-crawler/car-prices/downloader"
"github.com/go-crawler/car-prices/scheduler"
"github.com/go-crawler/car-prices/spiders"
"github.com/go-crawler/car-prices/model"
)
var (
StartUrl = "/2sc/%s/a0_0msdgscncgpi1ltocsp1exb4/"
BaseUrl = "https://car.autohome.com.cn"
maxPage int = 99
cars []spiders.QcCar
)
func Start(url string, ch chan []spiders.QcCar) {
body := downloader.Get(BaseUrl + url)
doc, err := goquery.NewDocumentFromReader(body)
if err != nil {
log.Printf("Downloader.Get err: %v", err)
}
currentPage := spiders.GetCurrentPage(doc)
nextPageUrl, _ := spiders.GetNextPageUrl(doc)
if currentPage > 0 && currentPage <= maxPage {
cars := spiders.GetCars(doc)
log.Println(cars)
ch <- cars
if url := nextPageUrl; url != "" {
scheduler.AppendUrl(url)
}
log.Println(url)
} else {
log.Println("Max page !!!")
}
}
func main() {
citys := spiders.GetCitys()
for _, v := range citys {
scheduler.AppendUrl(fmt.Sprintf(StartUrl, v.Pinyin))
}
start := time.Now()
delayTime := time.Second * 6
ch := make(chan []spiders.QcCar)
L:
for {
if url := scheduler.PopUrl(); url != "" {
go Start(url, ch)
}
select {
case r := <-ch:
cars = append(cars, r...)
go Start(scheduler.PopUrl(), ch)
case <-time.After(delayTime):
log.Println("Timeout...")
break L
}
}
if len(cars) > 0 {
model.AddCars(cars)
}
log.Printf("Time: %s", time.Since(start) - delayTime)
}