From 1d6c08a79813d0bc31bc0407d0fb1b5982b97f71 Mon Sep 17 00:00:00 2001 From: crwxaj Date: Wed, 28 Aug 2024 20:35:17 +0200 Subject: [PATCH] Revert "feat: Optimize concurrent scraping (#1828)" This reverts commit d648c4cdb173545dde09055a93cca96efd126cf3. --- pkg/models/model_scraper.go | 30 ++---------------------------- pkg/scrape/baberoticavr.go | 3 ++- pkg/scrape/badoink.go | 13 +++++++------ pkg/scrape/caribbeancom.go | 3 ++- pkg/scrape/czechvr.go | 7 ++++--- pkg/scrape/darkroomvr.go | 3 ++- pkg/scrape/fuckpassvr.go | 3 ++- pkg/scrape/groobyvr.go | 3 ++- pkg/scrape/hologirlsvr.go | 3 ++- pkg/scrape/lethalhardcorevr.go | 7 ++++--- pkg/scrape/littlecaprice.go | 3 ++- pkg/scrape/navr.go | 3 ++- pkg/scrape/povr.go | 9 +++++---- pkg/scrape/realitylovers.go | 7 ++++--- pkg/scrape/realjamvr.go | 7 ++++--- pkg/scrape/sexbabesvr.go | 3 ++- pkg/scrape/sinsvr.go | 3 ++- pkg/scrape/slrstudios.go | 9 +++++---- pkg/scrape/stasyqvr.go | 3 ++- pkg/scrape/swallowbay.go | 3 ++- pkg/scrape/tmwvrnet.go | 3 ++- pkg/scrape/tngf.go | 3 ++- pkg/scrape/transvr.go | 3 ++- pkg/scrape/virtualpee.go | 3 ++- pkg/scrape/virtualporn.go | 3 ++- pkg/scrape/virtualrealporn.go | 13 +++++++------ pkg/scrape/virtualtaboo.go | 3 ++- pkg/scrape/vr3000.go | 3 ++- pkg/scrape/vrallure.go | 3 ++- pkg/scrape/vrbangers.go | 13 +++++++------ pkg/scrape/vrhush.go | 3 ++- pkg/scrape/vrlatina.go | 3 ++- pkg/scrape/vrphub.go | 7 ++++--- pkg/scrape/vrporn.go | 9 +++++---- pkg/scrape/vrsexygirlz.go | 3 ++- pkg/scrape/vrspy.go | 3 ++- pkg/scrape/wetvr.go | 3 ++- pkg/scrape/zexywankitnow.go | 7 ++++--- pkg/tasks/content.go | 13 +++++++------ 39 files changed, 119 insertions(+), 107 deletions(-) mode change 100644 => 100755 pkg/scrape/vrbangers.go mode change 100644 => 100755 pkg/scrape/vrspy.go diff --git a/pkg/models/model_scraper.go b/pkg/models/model_scraper.go index 317e747c0..6d5bf44ec 100644 --- a/pkg/models/model_scraper.go +++ b/pkg/models/model_scraper.go @@ -2,12 +2,12 @@ package models import ( "encoding/json" - "sync/atomic" + "sync" ) var scrapers []Scraper -type ScraperFunc func(*ScrapeWG, bool, []string, chan<- ScrapedScene, string, string, bool) error +type ScraperFunc func(*sync.WaitGroup, bool, []string, chan<- ScrapedScene, string, string, bool) error type Scraper struct { ID string `json:"id"` @@ -90,29 +90,3 @@ func RegisterScraper(id string, name string, avatarURL string, domain string, f s.MasterSiteId = masterSiteId scrapers = append(scrapers, s) } - -// Custom wg functions, to allow access to the current count of waitgroups. This allows running scrapers at max count always -type ScrapeWG struct { - count int64 -} - -func (wg *ScrapeWG) Add(n int64) { - atomic.AddInt64(&wg.count, n) -} - -func (wg *ScrapeWG) Done() { - wg.Add(-1) - if atomic.LoadInt64(&wg.count) < 0 { - panic("negative wait group counter") - } -} - -func (wg *ScrapeWG) Wait(n int64) { - for atomic.LoadInt64(&wg.count) >= n && atomic.LoadInt64(&wg.count) != 0 { - continue - } -} - -func (wg *ScrapeWG) Count() int64 { - return atomic.LoadInt64(&wg.count) -} diff --git a/pkg/scrape/baberoticavr.go b/pkg/scrape/baberoticavr.go index fe76386a9..08ce27227 100644 --- a/pkg/scrape/baberoticavr.go +++ b/pkg/scrape/baberoticavr.go @@ -7,6 +7,7 @@ import ( "regexp" "strconv" "strings" + "sync" "github.com/go-resty/resty/v2" "github.com/gocolly/colly/v2" @@ -15,7 +16,7 @@ import ( "github.com/xbapps/xbvr/pkg/models" ) -func BaberoticaVR(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { +func BaberoticaVR(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { defer wg.Done() scraperID := "baberoticavr" siteID := "BaberoticaVR" diff --git a/pkg/scrape/badoink.go b/pkg/scrape/badoink.go index 6c18805bf..c547da02b 100644 --- a/pkg/scrape/badoink.go +++ b/pkg/scrape/badoink.go @@ -8,6 +8,7 @@ import ( "regexp" "strconv" "strings" + "sync" "time" "github.com/go-resty/resty/v2" @@ -22,7 +23,7 @@ import ( "github.com/xbapps/xbvr/pkg/models" ) -func BadoinkSite(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, scraperID string, siteID string, URL string, singeScrapeAdditionalInfo string, limitScraping bool) error { +func BadoinkSite(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, scraperID string, siteID string, URL string, singeScrapeAdditionalInfo string, limitScraping bool) error { defer wg.Done() logScrapeStart(scraperID, siteID) @@ -267,23 +268,23 @@ func BadoinkSite(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out return nil } -func BadoinkVR(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { +func BadoinkVR(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { return BadoinkSite(wg, updateSite, knownScenes, out, singleSceneURL, "badoinkvr", "BadoinkVR", "https://badoinkvr.com/vrpornvideos?order=newest", singeScrapeAdditionalInfo, limitScraping) } -func B18VR(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { +func B18VR(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { return BadoinkSite(wg, updateSite, knownScenes, out, singleSceneURL, "18vr", "18VR", "https://18vr.com/vrpornvideos?order=newest", singeScrapeAdditionalInfo, limitScraping) } -func VRCosplayX(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { +func VRCosplayX(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { return BadoinkSite(wg, updateSite, knownScenes, out, singleSceneURL, "vrcosplayx", "VRCosplayX", "https://vrcosplayx.com/cosplaypornvideos?order=newest", singeScrapeAdditionalInfo, limitScraping) } -func BabeVR(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { +func BabeVR(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { return BadoinkSite(wg, updateSite, knownScenes, out, singleSceneURL, "babevr", "BabeVR", "https://babevr.com/vrpornvideos?order=newest", singeScrapeAdditionalInfo, limitScraping) } -func KinkVR(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { +func KinkVR(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { return BadoinkSite(wg, updateSite, knownScenes, out, singleSceneURL, "kinkvr", "KinkVR", "https://kinkvr.com/bdsm-vr-videos?order=newest", singeScrapeAdditionalInfo, limitScraping) } diff --git a/pkg/scrape/caribbeancom.go b/pkg/scrape/caribbeancom.go index f2de10531..82152d4c9 100644 --- a/pkg/scrape/caribbeancom.go +++ b/pkg/scrape/caribbeancom.go @@ -4,6 +4,7 @@ import ( "encoding/json" "strconv" "strings" + "sync" "github.com/bregydoc/gtranslate" "github.com/gocolly/colly/v2" @@ -14,7 +15,7 @@ import ( "golang.org/x/text/language" ) -func CariVR(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { +func CariVR(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { defer wg.Done() scraperID := "caribbeancomvr" siteID := "CaribbeanCom VR" diff --git a/pkg/scrape/czechvr.go b/pkg/scrape/czechvr.go index bdb17b074..b470feb31 100644 --- a/pkg/scrape/czechvr.go +++ b/pkg/scrape/czechvr.go @@ -4,6 +4,7 @@ import ( "regexp" "strconv" "strings" + "sync" "github.com/gocolly/colly/v2" "github.com/mozillazg/go-slugify" @@ -13,7 +14,7 @@ import ( "github.com/xbapps/xbvr/pkg/models" ) -func CzechVR(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, scraperID string, siteID string, nwID string, singeScrapeAdditionalInfo string, limitScraping bool) error { +func CzechVR(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, scraperID string, siteID string, nwID string, singeScrapeAdditionalInfo string, limitScraping bool) error { defer wg.Done() logScrapeStart(scraperID, siteID) commonDb, _ := models.GetCommonDB() @@ -198,14 +199,14 @@ func CzechVR(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out cha } func addCZVRScraper(id string, name string, nwid string, avatarURL string) { - registerScraper(id, name, avatarURL, "czechvrnetwork.com", func(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { + registerScraper(id, name, avatarURL, "czechvrnetwork.com", func(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { return CzechVR(wg, updateSite, knownScenes, out, singleSceneURL, id, name, nwid, singeScrapeAdditionalInfo, limitScraping) }) } func init() { // scraper for scraping single scenes where only the url is provided - registerScraper("czechvr-single_scene", "Czech VR - Other Studios", "", "czechvrnetwork.com", func(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { + registerScraper("czechvr-single_scene", "Czech VR - Other Studios", "", "czechvrnetwork.com", func(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { return CzechVR(wg, updateSite, knownScenes, out, singleSceneURL, "", "", "", "", limitScraping) }) addCZVRScraper("czechvr", "Czech VR", "15", "https://www.czechvr.com/images/favicon/android-chrome-256x256.png") diff --git a/pkg/scrape/darkroomvr.go b/pkg/scrape/darkroomvr.go index ec9e7fbee..a00d19bd9 100644 --- a/pkg/scrape/darkroomvr.go +++ b/pkg/scrape/darkroomvr.go @@ -5,6 +5,7 @@ import ( "fmt" "strconv" "strings" + "sync" "github.com/gocolly/colly/v2" "github.com/nleeper/goment" @@ -12,7 +13,7 @@ import ( "github.com/xbapps/xbvr/pkg/models" ) -func DarkRoomVR(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { +func DarkRoomVR(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { defer wg.Done() scraperID := "darkroomvr" siteID := "DarkRoomVR" diff --git a/pkg/scrape/fuckpassvr.go b/pkg/scrape/fuckpassvr.go index 34963bf86..a0ff8229f 100644 --- a/pkg/scrape/fuckpassvr.go +++ b/pkg/scrape/fuckpassvr.go @@ -5,6 +5,7 @@ import ( "net/url" "regexp" "strings" + "sync" "github.com/go-resty/resty/v2" "github.com/gocolly/colly/v2" @@ -13,7 +14,7 @@ import ( "github.com/xbapps/xbvr/pkg/models" ) -func FuckPassVR(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { +func FuckPassVR(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { defer wg.Done() scraperID := "fuckpassvr-native" siteID := "FuckPassVR" diff --git a/pkg/scrape/groobyvr.go b/pkg/scrape/groobyvr.go index b10e5041e..62d7f467c 100644 --- a/pkg/scrape/groobyvr.go +++ b/pkg/scrape/groobyvr.go @@ -5,6 +5,7 @@ import ( "regexp" "strconv" "strings" + "sync" "github.com/gocolly/colly/v2" "github.com/mozillazg/go-slugify" @@ -13,7 +14,7 @@ import ( "github.com/xbapps/xbvr/pkg/models" ) -func GroobyVR(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { +func GroobyVR(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { defer wg.Done() scraperID := "groobyvr" siteID := "GroobyVR" diff --git a/pkg/scrape/hologirlsvr.go b/pkg/scrape/hologirlsvr.go index 0ce59d9e2..fba258474 100644 --- a/pkg/scrape/hologirlsvr.go +++ b/pkg/scrape/hologirlsvr.go @@ -3,6 +3,7 @@ package scrape import ( "regexp" "strings" + "sync" "github.com/gocolly/colly/v2" "github.com/mozillazg/go-slugify" @@ -10,7 +11,7 @@ import ( "github.com/xbapps/xbvr/pkg/models" ) -func HoloGirlsVR(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { +func HoloGirlsVR(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { defer wg.Done() scraperID := "hologirlsvr" siteID := "HoloGirlsVR" diff --git a/pkg/scrape/lethalhardcorevr.go b/pkg/scrape/lethalhardcorevr.go index 41ade50c7..95cdce180 100644 --- a/pkg/scrape/lethalhardcorevr.go +++ b/pkg/scrape/lethalhardcorevr.go @@ -3,6 +3,7 @@ package scrape import ( "regexp" "strings" + "sync" "github.com/gocolly/colly/v2" "github.com/mozillazg/go-slugify" @@ -25,7 +26,7 @@ func isGoodTag(lookup string) bool { return true } -func LethalHardcoreSite(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, scraperID string, siteID string, URL string, singeScrapeAdditionalInfo string, limitScraping bool) error { +func LethalHardcoreSite(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, scraperID string, siteID string, URL string, singeScrapeAdditionalInfo string, limitScraping bool) error { defer wg.Done() logScrapeStart(scraperID, siteID) @@ -175,11 +176,11 @@ func LethalHardcoreSite(wg *models.ScrapeWG, updateSite bool, knownScenes []stri return nil } -func LethalHardcoreVR(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { +func LethalHardcoreVR(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { return LethalHardcoreSite(wg, updateSite, knownScenes, out, singleSceneURL, "lethalhardcorevr", "LethalHardcoreVR", "https://lethalhardcorevr.com/lethal-hardcore-vr-scenes.html?studio=95595&sort=released", singeScrapeAdditionalInfo, limitScraping) } -func WhorecraftVR(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { +func WhorecraftVR(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { return LethalHardcoreSite(wg, updateSite, knownScenes, out, singleSceneURL, "whorecraftvr", "WhorecraftVR", "https://lethalhardcorevr.com/lethal-hardcore-vr-scenes.html?studio=95347&sort=released", singeScrapeAdditionalInfo, limitScraping) } diff --git a/pkg/scrape/littlecaprice.go b/pkg/scrape/littlecaprice.go index e2cc05a1a..a0387df0e 100644 --- a/pkg/scrape/littlecaprice.go +++ b/pkg/scrape/littlecaprice.go @@ -3,6 +3,7 @@ package scrape import ( "net/url" "strings" + "sync" "time" "github.com/gocolly/colly/v2" @@ -11,7 +12,7 @@ import ( "github.com/xbapps/xbvr/pkg/models" ) -func LittleCaprice(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { +func LittleCaprice(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { defer wg.Done() scraperID := "littlecaprice" siteID := "Little Caprice Dreams" diff --git a/pkg/scrape/navr.go b/pkg/scrape/navr.go index 4ee8f4aba..532dbb4ec 100644 --- a/pkg/scrape/navr.go +++ b/pkg/scrape/navr.go @@ -4,6 +4,7 @@ import ( "html" "strconv" "strings" + "sync" "github.com/gocolly/colly/v2" "github.com/mozillazg/go-slugify" @@ -13,7 +14,7 @@ import ( "github.com/xbapps/xbvr/pkg/models" ) -func NaughtyAmericaVR(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { +func NaughtyAmericaVR(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { defer wg.Done() scraperID := "naughtyamericavr" siteID := "NaughtyAmerica VR" diff --git a/pkg/scrape/povr.go b/pkg/scrape/povr.go index 0368294aa..4d75e247f 100644 --- a/pkg/scrape/povr.go +++ b/pkg/scrape/povr.go @@ -6,6 +6,7 @@ import ( "regexp" "strconv" "strings" + "sync" "github.com/gocolly/colly/v2" "github.com/nleeper/goment" @@ -14,7 +15,7 @@ import ( "github.com/xbapps/xbvr/pkg/models" ) -func POVR(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, scraperID string, siteID string, company string, siteURL string, singeScrapeAdditionalInfo string, limitScraping bool, masterSiteId string) error { +func POVR(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, scraperID string, siteID string, company string, siteURL string, singeScrapeAdditionalInfo string, limitScraping bool, masterSiteId string) error { defer wg.Done() logScrapeStart(scraperID, siteID) @@ -163,18 +164,18 @@ func addPOVRScraper(id string, name string, company string, avatarURL string, cu } if masterSiteId == "" { - registerScraper(id, suffixedName, avatarURL, "povr.com", func(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { + registerScraper(id, suffixedName, avatarURL, "povr.com", func(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { return POVR(wg, updateSite, knownScenes, out, singleSceneURL, id, siteNameSuffix, company, siteURL, singeScrapeAdditionalInfo, limitScraping, "") }) } else { - registerAlternateScraper(id, suffixedName, avatarURL, "povr.com", masterSiteId, func(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { + registerAlternateScraper(id, suffixedName, avatarURL, "povr.com", masterSiteId, func(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { return POVR(wg, updateSite, knownScenes, out, singleSceneURL, id, siteNameSuffix, company, siteURL, singeScrapeAdditionalInfo, limitScraping, masterSiteId) }) } } func init() { - registerScraper("povr-single_scene", "POVR - Other Studios", "", "povr.com", func(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { + registerScraper("povr-single_scene", "POVR - Other Studios", "", "povr.com", func(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { return POVR(wg, updateSite, knownScenes, out, singleSceneURL, "", "", "", "", singeScrapeAdditionalInfo, limitScraping, "") }) var scrapers config.ScraperList diff --git a/pkg/scrape/realitylovers.go b/pkg/scrape/realitylovers.go index ba4029cae..23a0d5bfd 100644 --- a/pkg/scrape/realitylovers.go +++ b/pkg/scrape/realitylovers.go @@ -4,6 +4,7 @@ import ( "fmt" "regexp" "strings" + "sync" "time" "github.com/go-resty/resty/v2" @@ -14,7 +15,7 @@ import ( "github.com/xbapps/xbvr/pkg/models" ) -func RealityLoversSite(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, scraperID string, siteID string, domain string, singeScrapeAdditionalInfo string, limitScraping bool) error { +func RealityLoversSite(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, scraperID string, siteID string, domain string, singeScrapeAdditionalInfo string, limitScraping bool) error { defer wg.Done() logScrapeStart(scraperID, siteID) @@ -150,11 +151,11 @@ func RealityLoversSite(wg *models.ScrapeWG, updateSite bool, knownScenes []strin return nil } -func RealityLovers(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { +func RealityLovers(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { return RealityLoversSite(wg, updateSite, knownScenes, out, singleSceneURL, "realitylovers", "RealityLovers", "realitylovers.com", singeScrapeAdditionalInfo, limitScraping) } -func TSVirtualLovers(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { +func TSVirtualLovers(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { return RealityLoversSite(wg, updateSite, knownScenes, out, singleSceneURL, "tsvirtuallovers", "TSVirtualLovers", "tsvirtuallovers.com", singeScrapeAdditionalInfo, limitScraping) } diff --git a/pkg/scrape/realjamvr.go b/pkg/scrape/realjamvr.go index 1c98e67da..d2ea92306 100644 --- a/pkg/scrape/realjamvr.go +++ b/pkg/scrape/realjamvr.go @@ -6,6 +6,7 @@ import ( "regexp" "strconv" "strings" + "sync" "time" "github.com/gocolly/colly/v2" @@ -16,7 +17,7 @@ import ( "github.com/xbapps/xbvr/pkg/models" ) -func RealJamSite(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, scraperID string, siteID string, domain string, singeScrapeAdditionalInfo string, limitScraping bool) error { +func RealJamSite(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, scraperID string, siteID string, domain string, singeScrapeAdditionalInfo string, limitScraping bool) error { defer wg.Done() logScrapeStart(scraperID, siteID) @@ -192,10 +193,10 @@ func RealJamSite(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out return nil } -func RealJamVR(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { +func RealJamVR(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { return RealJamSite(wg, updateSite, knownScenes, out, singleSceneURL, "realjamvr", "RealJam VR", "realjamvr.com", singeScrapeAdditionalInfo, limitScraping) } -func PornCornVR(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { +func PornCornVR(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { return RealJamSite(wg, updateSite, knownScenes, out, singleSceneURL, "porncornvr", "PornCorn VR", "porncornvr.com", singeScrapeAdditionalInfo, limitScraping) } diff --git a/pkg/scrape/sexbabesvr.go b/pkg/scrape/sexbabesvr.go index 16bb81e22..edf92dbe7 100644 --- a/pkg/scrape/sexbabesvr.go +++ b/pkg/scrape/sexbabesvr.go @@ -4,6 +4,7 @@ import ( "encoding/json" "net/url" "strings" + "sync" "time" "github.com/gocolly/colly/v2" @@ -13,7 +14,7 @@ import ( "github.com/xbapps/xbvr/pkg/models" ) -func SexBabesVR(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { +func SexBabesVR(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { defer wg.Done() scraperID := "sexbabesvr" siteID := "SexBabesVR" diff --git a/pkg/scrape/sinsvr.go b/pkg/scrape/sinsvr.go index 145bd63f6..433aada96 100644 --- a/pkg/scrape/sinsvr.go +++ b/pkg/scrape/sinsvr.go @@ -6,6 +6,7 @@ import ( "regexp" "strconv" "strings" + "sync" "github.com/gocolly/colly/v2" "github.com/mozillazg/go-slugify" @@ -15,7 +16,7 @@ import ( "github.com/xbapps/xbvr/pkg/models" ) -func SinsVR(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { +func SinsVR(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { defer wg.Done() scraperID := "sinsvr" siteID := "SinsVR" diff --git a/pkg/scrape/slrstudios.go b/pkg/scrape/slrstudios.go index d02eff5f2..b21e7c252 100644 --- a/pkg/scrape/slrstudios.go +++ b/pkg/scrape/slrstudios.go @@ -7,6 +7,7 @@ import ( "regexp" "strconv" "strings" + "sync" "github.com/go-resty/resty/v2" "github.com/gocolly/colly/v2" @@ -25,7 +26,7 @@ func absolutegallery(match string) string { return submatches[1] + submatches[3] + "_o.jpg" // construct new string with desired format } -func SexLikeReal(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, scraperID string, siteID string, company string, siteURL string, singeScrapeAdditionalInfo string, limitScraping bool, masterSiteId string) error { +func SexLikeReal(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, scraperID string, siteID string, company string, siteURL string, singeScrapeAdditionalInfo string, limitScraping bool, masterSiteId string) error { defer wg.Done() logScrapeStart(scraperID, siteID) @@ -537,11 +538,11 @@ func addSLRScraper(id string, name string, company string, avatarURL string, cus } if masterSiteId == "" { - registerScraper(id, suffixedName, avatarURL, "sexlikereal.com", func(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { + registerScraper(id, suffixedName, avatarURL, "sexlikereal.com", func(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { return SexLikeReal(wg, updateSite, knownScenes, out, singleSceneURL, id, siteNameSuffix, company, siteURL, singeScrapeAdditionalInfo, limitScraping, "") }) } else { - registerAlternateScraper(id, suffixedName, avatarURL, "sexlikereal.com", masterSiteId, func(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { + registerAlternateScraper(id, suffixedName, avatarURL, "sexlikereal.com", masterSiteId, func(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { return SexLikeReal(wg, updateSite, knownScenes, out, singleSceneURL, id, siteNameSuffix, company, siteURL, singeScrapeAdditionalInfo, limitScraping, masterSiteId) }) } @@ -550,7 +551,7 @@ func addSLRScraper(id string, name string, company string, avatarURL string, cus func init() { var scrapers config.ScraperList // scraper for single scenes with no existing scraper for the studio - registerScraper("slr-single_scene", "SLR - Other Studios", "", "sexlikereal.com", func(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { + registerScraper("slr-single_scene", "SLR - Other Studios", "", "sexlikereal.com", func(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { return SexLikeReal(wg, updateSite, knownScenes, out, singleSceneURL, "", "", "", "", singeScrapeAdditionalInfo, limitScraping, "") }) diff --git a/pkg/scrape/stasyqvr.go b/pkg/scrape/stasyqvr.go index e55708dbd..e131badb1 100644 --- a/pkg/scrape/stasyqvr.go +++ b/pkg/scrape/stasyqvr.go @@ -4,6 +4,7 @@ import ( "net/url" "strconv" "strings" + "sync" "github.com/gocolly/colly/v2" "github.com/mozillazg/go-slugify" @@ -12,7 +13,7 @@ import ( "github.com/xbapps/xbvr/pkg/models" ) -func StasyQVR(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { +func StasyQVR(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { defer wg.Done() scraperID := "stasyqvr" siteID := "StasyQVR" diff --git a/pkg/scrape/swallowbay.go b/pkg/scrape/swallowbay.go index dfa9a5bce..99627beb1 100644 --- a/pkg/scrape/swallowbay.go +++ b/pkg/scrape/swallowbay.go @@ -6,6 +6,7 @@ import ( "regexp" "strconv" "strings" + "sync" "github.com/gocolly/colly/v2" "github.com/nleeper/goment" @@ -13,7 +14,7 @@ import ( "github.com/xbapps/xbvr/pkg/models" ) -func SwallowBay(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { +func SwallowBay(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { defer wg.Done() scraperID := "swallowbay" siteID := "SwallowBay" diff --git a/pkg/scrape/tmwvrnet.go b/pkg/scrape/tmwvrnet.go index 0d925c3fd..17f826d59 100644 --- a/pkg/scrape/tmwvrnet.go +++ b/pkg/scrape/tmwvrnet.go @@ -4,6 +4,7 @@ import ( "encoding/json" "strconv" "strings" + "sync" "github.com/gocolly/colly/v2" "github.com/mozillazg/go-slugify" @@ -13,7 +14,7 @@ import ( "github.com/xbapps/xbvr/pkg/models" ) -func TmwVRnet(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { +func TmwVRnet(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { defer wg.Done() scraperID := "tmwvrnet" siteID := "TmwVRnet" diff --git a/pkg/scrape/tngf.go b/pkg/scrape/tngf.go index d7b13182e..47273b6f4 100644 --- a/pkg/scrape/tngf.go +++ b/pkg/scrape/tngf.go @@ -4,6 +4,7 @@ import ( "encoding/json" "strconv" "strings" + "sync" "github.com/gocolly/colly/v2" "github.com/mozillazg/go-slugify" @@ -12,7 +13,7 @@ import ( "github.com/xbapps/xbvr/pkg/models" ) -func TNGFVR(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { +func TNGFVR(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { defer wg.Done() scraperID := "tonightsgirlfriend" siteID := "Tonight's Girlfriend VR" diff --git a/pkg/scrape/transvr.go b/pkg/scrape/transvr.go index 623dcc08a..7ba7d0faa 100644 --- a/pkg/scrape/transvr.go +++ b/pkg/scrape/transvr.go @@ -5,6 +5,7 @@ import ( "regexp" "strconv" "strings" + "sync" "github.com/gocolly/colly/v2" "github.com/mozillazg/go-slugify" @@ -13,7 +14,7 @@ import ( "github.com/xbapps/xbvr/pkg/models" ) -func TransVR(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { +func TransVR(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { defer wg.Done() scraperID := "transvr" siteID := "TransVR" diff --git a/pkg/scrape/virtualpee.go b/pkg/scrape/virtualpee.go index f33e504bf..5731a30e3 100644 --- a/pkg/scrape/virtualpee.go +++ b/pkg/scrape/virtualpee.go @@ -3,6 +3,7 @@ package scrape import ( "strconv" "strings" + "sync" "github.com/gocolly/colly/v2" "github.com/mozillazg/go-slugify" @@ -11,7 +12,7 @@ import ( "github.com/xbapps/xbvr/pkg/models" ) -func VirtualPee(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { +func VirtualPee(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { defer wg.Done() scraperID := "virtualpee" siteID := "VirtualPee" diff --git a/pkg/scrape/virtualporn.go b/pkg/scrape/virtualporn.go index 351db81f4..7f44d766b 100644 --- a/pkg/scrape/virtualporn.go +++ b/pkg/scrape/virtualporn.go @@ -5,6 +5,7 @@ import ( "regexp" "strconv" "strings" + "sync" "github.com/gocolly/colly/v2" "github.com/mozillazg/go-slugify" @@ -13,7 +14,7 @@ import ( "github.com/xbapps/xbvr/pkg/models" ) -func VirtualPorn(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { +func VirtualPorn(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { // this scraper is non-standard in that it gathers info via an api rather than scraping html pages defer wg.Done() scraperID := "bvr" diff --git a/pkg/scrape/virtualrealporn.go b/pkg/scrape/virtualrealporn.go index 44d85e324..ecab19f92 100644 --- a/pkg/scrape/virtualrealporn.go +++ b/pkg/scrape/virtualrealporn.go @@ -8,6 +8,7 @@ import ( "image" "strconv" "strings" + "sync" "github.com/gocolly/colly/v2" "github.com/mozillazg/go-slugify" @@ -16,7 +17,7 @@ import ( "github.com/xbapps/xbvr/pkg/models" ) -func VirtualRealPornSite(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, scraperID string, siteID string, URL string, singeScrapeAdditionalInfo string, limitScraping bool) error { +func VirtualRealPornSite(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, scraperID string, siteID string, URL string, singeScrapeAdditionalInfo string, limitScraping bool) error { defer wg.Done() logScrapeStart(scraperID, siteID) page := 1 @@ -279,19 +280,19 @@ func VirtualRealPornSite(wg *models.ScrapeWG, updateSite bool, knownScenes []str return nil } -func VirtualRealPorn(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { +func VirtualRealPorn(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { return VirtualRealPornSite(wg, updateSite, knownScenes, out, singleSceneURL, "virtualrealporn", "VirtualRealPorn", "https://virtualrealporn.com/", singeScrapeAdditionalInfo, limitScraping) } -func VirtualRealTrans(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { +func VirtualRealTrans(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { return VirtualRealPornSite(wg, updateSite, knownScenes, out, singleSceneURL, "virtualrealtrans", "VirtualRealTrans", "https://virtualrealtrans.com/", singeScrapeAdditionalInfo, limitScraping) } -func VirtualRealAmateur(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { +func VirtualRealAmateur(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { return VirtualRealPornSite(wg, updateSite, knownScenes, out, singleSceneURL, "virtualrealamateur", "VirtualRealAmateurPorn", "https://virtualrealamateurporn.com/", singeScrapeAdditionalInfo, limitScraping) } -func VirtualRealGay(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { +func VirtualRealGay(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { return VirtualRealPornSite(wg, updateSite, knownScenes, out, singleSceneURL, "virtualrealgay", "VirtualRealGay", "https://virtualrealgay.com/", singeScrapeAdditionalInfo, limitScraping) } -func VirtualRealPassion(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { +func VirtualRealPassion(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { return VirtualRealPornSite(wg, updateSite, knownScenes, out, singleSceneURL, "virtualrealpassion", "VirtualRealPassion", "https://virtualrealpassion.com/", singeScrapeAdditionalInfo, limitScraping) } diff --git a/pkg/scrape/virtualtaboo.go b/pkg/scrape/virtualtaboo.go index 12e3a5f98..c997d1918 100644 --- a/pkg/scrape/virtualtaboo.go +++ b/pkg/scrape/virtualtaboo.go @@ -5,6 +5,7 @@ import ( "regexp" "strconv" "strings" + "sync" "github.com/gocolly/colly/v2" "github.com/mozillazg/go-slugify" @@ -13,7 +14,7 @@ import ( "github.com/xbapps/xbvr/pkg/models" ) -func VirtualTaboo(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { +func VirtualTaboo(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { defer wg.Done() scraperID := "virtualtaboo" siteID := "VirtualTaboo" diff --git a/pkg/scrape/vr3000.go b/pkg/scrape/vr3000.go index d027d5f02..52a32fb44 100644 --- a/pkg/scrape/vr3000.go +++ b/pkg/scrape/vr3000.go @@ -5,6 +5,7 @@ import ( "regexp" "strconv" "strings" + "sync" "github.com/gocolly/colly/v2" "github.com/mozillazg/go-slugify" @@ -13,7 +14,7 @@ import ( "github.com/xbapps/xbvr/pkg/models" ) -func VR3000(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { +func VR3000(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { defer wg.Done() scraperID := "vr3000" siteID := "VR3000" diff --git a/pkg/scrape/vrallure.go b/pkg/scrape/vrallure.go index 543538d50..13b3f8520 100644 --- a/pkg/scrape/vrallure.go +++ b/pkg/scrape/vrallure.go @@ -6,6 +6,7 @@ import ( "regexp" "strconv" "strings" + "sync" "golang.org/x/net/html" @@ -17,7 +18,7 @@ import ( "github.com/xbapps/xbvr/pkg/models" ) -func VRAllure(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { +func VRAllure(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { defer wg.Done() scraperID := "vrallure" siteID := "VRAllure" diff --git a/pkg/scrape/vrbangers.go b/pkg/scrape/vrbangers.go old mode 100644 new mode 100755 index 71bbb96b9..72d273001 --- a/pkg/scrape/vrbangers.go +++ b/pkg/scrape/vrbangers.go @@ -4,6 +4,7 @@ import ( "encoding/json" "strconv" "strings" + "sync" "github.com/go-resty/resty/v2" "github.com/gocolly/colly/v2" @@ -14,7 +15,7 @@ import ( "github.com/xbapps/xbvr/pkg/models" ) -func VRBangersSite(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, scraperID string, siteID string, URL string, limitScraping bool) error { +func VRBangersSite(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, scraperID string, siteID string, URL string, limitScraping bool) error { defer wg.Done() logScrapeStart(scraperID, siteID) @@ -174,19 +175,19 @@ func VRBangersSite(wg *models.ScrapeWG, updateSite bool, knownScenes []string, o return nil } -func VRBangers(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { +func VRBangers(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { return VRBangersSite(wg, updateSite, knownScenes, out, singleSceneURL, "vrbangers", "VRBangers", "https://vrbangers.com/", limitScraping) } -func VRBTrans(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { +func VRBTrans(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { return VRBangersSite(wg, updateSite, knownScenes, out, singleSceneURL, "vrbtrans", "VRBTrans", "https://vrbtrans.com/", limitScraping) } -func VRBGay(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { +func VRBGay(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { return VRBangersSite(wg, updateSite, knownScenes, out, singleSceneURL, "vrbgay", "VRBGay", "https://vrbgay.com/", limitScraping) } -func VRConk(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { +func VRConk(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { return VRBangersSite(wg, updateSite, knownScenes, out, singleSceneURL, "vrconk", "VRCONK", "https://vrconk.com/", limitScraping) } -func BlowVR(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { +func BlowVR(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { return VRBangersSite(wg, updateSite, knownScenes, out, singleSceneURL, "blowvr", "BlowVR", "https://blowvr.com/", limitScraping) } diff --git a/pkg/scrape/vrhush.go b/pkg/scrape/vrhush.go index 5976b138d..564801e82 100644 --- a/pkg/scrape/vrhush.go +++ b/pkg/scrape/vrhush.go @@ -7,6 +7,7 @@ import ( "path" "strconv" "strings" + "sync" "github.com/gocolly/colly/v2" "github.com/mozillazg/go-slugify" @@ -15,7 +16,7 @@ import ( "github.com/xbapps/xbvr/pkg/models" ) -func VRHush(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { +func VRHush(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { defer wg.Done() scraperID := "vrhush" siteID := "VRHush" diff --git a/pkg/scrape/vrlatina.go b/pkg/scrape/vrlatina.go index c9e6778df..8dcf3c112 100644 --- a/pkg/scrape/vrlatina.go +++ b/pkg/scrape/vrlatina.go @@ -6,6 +6,7 @@ import ( "regexp" "strconv" "strings" + "sync" "github.com/gocolly/colly/v2" "github.com/nleeper/goment" @@ -13,7 +14,7 @@ import ( "github.com/xbapps/xbvr/pkg/models" ) -func VRLatina(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { +func VRLatina(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { defer wg.Done() scraperID := "vrlatina" siteID := "VRLatina" diff --git a/pkg/scrape/vrphub.go b/pkg/scrape/vrphub.go index e461fbb7e..1b22b92ec 100644 --- a/pkg/scrape/vrphub.go +++ b/pkg/scrape/vrphub.go @@ -7,6 +7,7 @@ import ( "path" "regexp" "strings" + "sync" "github.com/gocolly/colly/v2" "github.com/nleeper/goment" @@ -28,7 +29,7 @@ func getVideoName(fileUrl string) (string, error) { return filename, nil } -func VRPHub(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, scraperID string, siteID string, company string, siteURL string, singeScrapeAdditionalInfo string, limitScraping bool, callback func(e *colly.HTMLElement, sc *models.ScrapedScene)) error { +func VRPHub(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, scraperID string, siteID string, company string, siteURL string, singeScrapeAdditionalInfo string, limitScraping bool, callback func(e *colly.HTMLElement, sc *models.ScrapedScene)) error { defer wg.Done() logScrapeStart(scraperID, siteID) @@ -260,13 +261,13 @@ func addVRPHubScraper(id string, name string, company string, avatarURL string, avatarURL = "https://cdn.vrphub.com/wp-content/uploads/2016/08/vrphubnew.png" } - registerScraper(id, suffixedName, avatarURL, "vrphub.com", func(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { + registerScraper(id, suffixedName, avatarURL, "vrphub.com", func(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { return VRPHub(wg, updateSite, knownScenes, out, singleSceneURL, id, siteNameSuffix, company, siteURL, singeScrapeAdditionalInfo, limitScraping, callback) }) } func init() { - registerScraper("vrphub-single_scene", "VRPHub - Other Studios", "", "vrphub.com", func(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { + registerScraper("vrphub-single_scene", "VRPHub - Other Studios", "", "vrphub.com", func(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { return VRPHub(wg, updateSite, knownScenes, out, singleSceneURL, "", "", "", "", singeScrapeAdditionalInfo, limitScraping, noop) }) var scrapers config.ScraperList diff --git a/pkg/scrape/vrporn.go b/pkg/scrape/vrporn.go index 8876d87d8..858611369 100644 --- a/pkg/scrape/vrporn.go +++ b/pkg/scrape/vrporn.go @@ -5,6 +5,7 @@ import ( "regexp" "strconv" "strings" + "sync" "time" "github.com/gocolly/colly/v2" @@ -13,7 +14,7 @@ import ( "github.com/xbapps/xbvr/pkg/models" ) -func VRPorn(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, scraperID string, siteID string, company string, siteURL string, singeScrapeAdditionalInfo string, limitScraping bool, masterSiteId string) error { +func VRPorn(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, scraperID string, siteID string, company string, siteURL string, singeScrapeAdditionalInfo string, limitScraping bool, masterSiteId string) error { defer wg.Done() logScrapeStart(scraperID, siteID) @@ -183,18 +184,18 @@ func addVRPornScraper(id string, name string, company string, avatarURL string, } if masterSiteId == "" { - registerScraper(id, suffixedName, avatarURL, "vrporn.com", func(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { + registerScraper(id, suffixedName, avatarURL, "vrporn.com", func(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { return VRPorn(wg, updateSite, knownScenes, out, singleSceneURL, id, siteNameSuffix, company, siteURL, singeScrapeAdditionalInfo, limitScraping, "") }) } else { - registerAlternateScraper(id, suffixedName, avatarURL, "vrporn.com", masterSiteId, func(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { + registerAlternateScraper(id, suffixedName, avatarURL, "vrporn.com", masterSiteId, func(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { return VRPorn(wg, updateSite, knownScenes, out, singleSceneURL, id, siteNameSuffix, company, siteURL, singeScrapeAdditionalInfo, limitScraping, masterSiteId) }) } } func init() { - registerScraper("vrporn-single_scene", "VRPorn - Other Studios", "", "vrporn.com", func(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { + registerScraper("vrporn-single_scene", "VRPorn - Other Studios", "", "vrporn.com", func(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { return VRPorn(wg, updateSite, knownScenes, out, singleSceneURL, "", "", "", "", singeScrapeAdditionalInfo, limitScraping, "") }) diff --git a/pkg/scrape/vrsexygirlz.go b/pkg/scrape/vrsexygirlz.go index 2b8050921..03019986d 100644 --- a/pkg/scrape/vrsexygirlz.go +++ b/pkg/scrape/vrsexygirlz.go @@ -4,6 +4,7 @@ import ( "fmt" "strconv" "strings" + "sync" "github.com/gocolly/colly/v2" "github.com/nleeper/goment" @@ -11,7 +12,7 @@ import ( "github.com/xbapps/xbvr/pkg/models" ) -func VRSexygirlz(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { +func VRSexygirlz(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { defer wg.Done() scraperID := "vrsexygirlz" diff --git a/pkg/scrape/vrspy.go b/pkg/scrape/vrspy.go old mode 100644 new mode 100755 index 2a816ca55..22ab5a183 --- a/pkg/scrape/vrspy.go +++ b/pkg/scrape/vrspy.go @@ -7,6 +7,7 @@ import ( "regexp" "strconv" "strings" + "sync" "github.com/gocolly/colly/v2" "github.com/nleeper/goment" @@ -22,7 +23,7 @@ const ( baseURL = "https://" + domain ) -func VRSpy(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singleScrapeAdditionalInfo string, limitScraping bool) error { +func VRSpy(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singleScrapeAdditionalInfo string, limitScraping bool) error { defer wg.Done() logScrapeStart(scraperID, siteID) diff --git a/pkg/scrape/wetvr.go b/pkg/scrape/wetvr.go index 8cfe1a266..6abccfa7c 100644 --- a/pkg/scrape/wetvr.go +++ b/pkg/scrape/wetvr.go @@ -4,6 +4,7 @@ import ( "encoding/json" "fmt" "strings" + "sync" "time" "github.com/gocolly/colly/v2" @@ -13,7 +14,7 @@ import ( "github.com/xbapps/xbvr/pkg/models" ) -func WetVR(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { +func WetVR(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { defer wg.Done() scraperID := "wetvr" siteID := "WetVR" diff --git a/pkg/scrape/zexywankitnow.go b/pkg/scrape/zexywankitnow.go index 7aa1e8790..e442f0c95 100644 --- a/pkg/scrape/zexywankitnow.go +++ b/pkg/scrape/zexywankitnow.go @@ -4,6 +4,7 @@ import ( "regexp" "strconv" "strings" + "sync" "github.com/gocolly/colly/v2" "github.com/mozillazg/go-slugify" @@ -12,7 +13,7 @@ import ( "github.com/xbapps/xbvr/pkg/models" ) -func TwoWebMediaSite(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, scraperID string, siteID string, URL string, limitScraping bool) error { +func TwoWebMediaSite(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, scraperID string, siteID string, URL string, limitScraping bool) error { defer wg.Done() logScrapeStart(scraperID, siteID) @@ -188,11 +189,11 @@ func TwoWebMediaSite(wg *models.ScrapeWG, updateSite bool, knownScenes []string, return nil } -func WankitNowVR(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { +func WankitNowVR(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { return TwoWebMediaSite(wg, updateSite, knownScenes, out, singleSceneURL, "wankitnowvr", "WankitNowVR", "https://wankitnowvr.com/videos/", limitScraping) } -func ZexyVR(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { +func ZexyVR(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { return TwoWebMediaSite(wg, updateSite, knownScenes, out, singleSceneURL, "zexyvr", "ZexyVR", "https://zexyvr.com/videos/", limitScraping) } diff --git a/pkg/tasks/content.go b/pkg/tasks/content.go index d9cdf4e64..59d358d2f 100644 --- a/pkg/tasks/content.go +++ b/pkg/tasks/content.go @@ -115,9 +115,10 @@ func runScrapers(knownScenes []string, toScrape string, updateSite bool, collect commonDb.Where(&models.Site{ID: toScrape}).Find(&sites) } - var wg models.ScrapeWG + var wg sync.WaitGroup - concurrent_scrapers := int64(common.ConcurrentScrapers) + sitecnt := 1 + concurrent_scrapers := common.ConcurrentScrapers if concurrent_scrapers == 0 { concurrent_scrapers = 99999 } @@ -137,10 +138,10 @@ func runScrapers(knownScenes []string, toScrape string, updateSite bool, collect site.Save() }(scraper) - if wg.Count() >= concurrent_scrapers { // processing batches of 35 sites - wg.Wait(concurrent_scrapers) + if sitecnt%concurrent_scrapers == 0 { // processing batches of 35 sites + wg.Wait() } - + sitecnt++ } } } @@ -157,7 +158,7 @@ func runScrapers(knownScenes []string, toScrape string, updateSite bool, collect } } - wg.Wait(0) + wg.Wait() return nil }