diff --git a/pkg/scrape/wetvr.go b/pkg/scrape/wetvr.go index 4de99ce98..6abccfa7c 100644 --- a/pkg/scrape/wetvr.go +++ b/pkg/scrape/wetvr.go @@ -2,6 +2,7 @@ package scrape import ( "encoding/json" + "fmt" "strings" "sync" "time" @@ -21,6 +22,7 @@ func WetVR(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- sceneCollector := createCollector("wetvr.com") siteCollector := createCollector("wetvr.com") + pageCnt := 1 sceneCollector.OnHTML(`div#trailer_player`, func(e *colly.HTMLElement) { sc := models.ScrapedScene{} @@ -89,10 +91,17 @@ func WetVR(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- out <- sc }) - siteCollector.OnHTML(`ul a.page-link`, func(e *colly.HTMLElement) { + //This p with Latest only exists on pages container sceneURLs. If the index page doesn't have this we have reached the end of sceneURLs + // Stop search for more siteURLs + siteCollector.OnHTML(`div:has(p:contains("Latest"))`, func(e *colly.HTMLElement) { + // Check to make sure we aren't getting multiple firings from pages we have already incremented the count on if !limitScraping { - pageURL := e.Request.AbsoluteURL(e.Attr("href")) - siteCollector.Visit(pageURL) + if e.Request.URL.String() == `https://wetvr.com/?page=`+fmt.Sprint(pageCnt) { + pageCnt += 1 + + pageURL := e.Request.AbsoluteURL(`https://wetvr.com/?page=` + fmt.Sprint(pageCnt)) + siteCollector.Visit(pageURL) + } } }) @@ -133,8 +142,9 @@ func WetVR(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- ctx.Put("scene-date", "") } sceneCollector.Request("GET", singleSceneURL, nil, ctx, nil) - } else { - siteCollector.Visit("https://wetvr.com/") + } else if pageCnt == 1 { + // Only visit page 1 on start up of scraper + siteCollector.Visit("https://wetvr.com/?page=1") } if updateSite {