From d648c4cdb173545dde09055a93cca96efd126cf3 Mon Sep 17 00:00:00 2001 From: pops64 Date: Wed, 28 Aug 2024 10:36:20 -0400 Subject: [PATCH] feat: Optimize concurrent scraping (#1828) * Concurrent Scraper Updgrade Will now no longer wait for the entire wait group to finish before adding more workers. It will add them one when below the concurrent scraper count * Formating * Tweak to WG functions Switch to using a function to retrive the count instead of direct access. Something this low level probably should be protected. * go fmt --------- Co-authored-by: crwxaj <52156245+crwxaj@users.noreply.github.com> --- pkg/models/model_scraper.go | 30 ++++++++++++++++++++++++++++-- pkg/scrape/baberoticavr.go | 3 +-- pkg/scrape/badoink.go | 13 ++++++------- pkg/scrape/caribbeancom.go | 3 +-- pkg/scrape/czechvr.go | 7 +++---- pkg/scrape/darkroomvr.go | 3 +-- pkg/scrape/fuckpassvr.go | 3 +-- pkg/scrape/groobyvr.go | 3 +-- pkg/scrape/hologirlsvr.go | 3 +-- pkg/scrape/lethalhardcorevr.go | 8 ++++---- pkg/scrape/littlecaprice.go | 3 +-- pkg/scrape/navr.go | 3 +-- pkg/scrape/povr.go | 9 ++++----- pkg/scrape/realitylovers.go | 7 +++---- pkg/scrape/realjamvr.go | 7 +++---- pkg/scrape/sexbabesvr.go | 3 +-- pkg/scrape/sinsvr.go | 3 +-- pkg/scrape/slrstudios.go | 9 ++++----- pkg/scrape/stasyqvr.go | 3 +-- pkg/scrape/swallowbay.go | 3 +-- pkg/scrape/tmwvrnet.go | 3 +-- pkg/scrape/tngf.go | 3 +-- pkg/scrape/transvr.go | 3 +-- pkg/scrape/virtualpee.go | 3 +-- pkg/scrape/virtualporn.go | 3 +-- pkg/scrape/virtualrealporn.go | 13 ++++++------- pkg/scrape/virtualtaboo.go | 3 +-- pkg/scrape/vr3000.go | 3 +-- pkg/scrape/vrallure.go | 3 +-- pkg/scrape/vrbangers.go | 13 ++++++------- pkg/scrape/vrhush.go | 3 +-- pkg/scrape/vrlatina.go | 3 +-- pkg/scrape/vrphub.go | 7 +++---- pkg/scrape/vrporn.go | 9 ++++----- pkg/scrape/vrsexygirlz.go | 3 +-- pkg/scrape/vrspy.go | 3 +-- pkg/scrape/wetvr.go | 3 +-- pkg/scrape/zexywankitnow.go | 7 +++---- pkg/tasks/content.go | 13 ++++++------- 39 files changed, 108 insertions(+), 119 deletions(-) mode change 100755 => 100644 pkg/scrape/vrbangers.go mode change 100755 => 100644 pkg/scrape/vrspy.go diff --git a/pkg/models/model_scraper.go b/pkg/models/model_scraper.go index 6d5bf44ec..317e747c0 100644 --- a/pkg/models/model_scraper.go +++ b/pkg/models/model_scraper.go @@ -2,12 +2,12 @@ package models import ( "encoding/json" - "sync" + "sync/atomic" ) var scrapers []Scraper -type ScraperFunc func(*sync.WaitGroup, bool, []string, chan<- ScrapedScene, string, string, bool) error +type ScraperFunc func(*ScrapeWG, bool, []string, chan<- ScrapedScene, string, string, bool) error type Scraper struct { ID string `json:"id"` @@ -90,3 +90,29 @@ func RegisterScraper(id string, name string, avatarURL string, domain string, f s.MasterSiteId = masterSiteId scrapers = append(scrapers, s) } + +// Custom wg functions, to allow access to the current count of waitgroups. This allows running scrapers at max count always +type ScrapeWG struct { + count int64 +} + +func (wg *ScrapeWG) Add(n int64) { + atomic.AddInt64(&wg.count, n) +} + +func (wg *ScrapeWG) Done() { + wg.Add(-1) + if atomic.LoadInt64(&wg.count) < 0 { + panic("negative wait group counter") + } +} + +func (wg *ScrapeWG) Wait(n int64) { + for atomic.LoadInt64(&wg.count) >= n && atomic.LoadInt64(&wg.count) != 0 { + continue + } +} + +func (wg *ScrapeWG) Count() int64 { + return atomic.LoadInt64(&wg.count) +} diff --git a/pkg/scrape/baberoticavr.go b/pkg/scrape/baberoticavr.go index 08ce27227..fe76386a9 100644 --- a/pkg/scrape/baberoticavr.go +++ b/pkg/scrape/baberoticavr.go @@ -7,7 +7,6 @@ import ( "regexp" "strconv" "strings" - "sync" "github.com/go-resty/resty/v2" "github.com/gocolly/colly/v2" @@ -16,7 +15,7 @@ import ( "github.com/xbapps/xbvr/pkg/models" ) -func BaberoticaVR(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { +func BaberoticaVR(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { defer wg.Done() scraperID := "baberoticavr" siteID := "BaberoticaVR" diff --git a/pkg/scrape/badoink.go b/pkg/scrape/badoink.go index c547da02b..6c18805bf 100644 --- a/pkg/scrape/badoink.go +++ b/pkg/scrape/badoink.go @@ -8,7 +8,6 @@ import ( "regexp" "strconv" "strings" - "sync" "time" "github.com/go-resty/resty/v2" @@ -23,7 +22,7 @@ import ( "github.com/xbapps/xbvr/pkg/models" ) -func BadoinkSite(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, scraperID string, siteID string, URL string, singeScrapeAdditionalInfo string, limitScraping bool) error { +func BadoinkSite(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, scraperID string, siteID string, URL string, singeScrapeAdditionalInfo string, limitScraping bool) error { defer wg.Done() logScrapeStart(scraperID, siteID) @@ -268,23 +267,23 @@ func BadoinkSite(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out return nil } -func BadoinkVR(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { +func BadoinkVR(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { return BadoinkSite(wg, updateSite, knownScenes, out, singleSceneURL, "badoinkvr", "BadoinkVR", "https://badoinkvr.com/vrpornvideos?order=newest", singeScrapeAdditionalInfo, limitScraping) } -func B18VR(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { +func B18VR(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { return BadoinkSite(wg, updateSite, knownScenes, out, singleSceneURL, "18vr", "18VR", "https://18vr.com/vrpornvideos?order=newest", singeScrapeAdditionalInfo, limitScraping) } -func VRCosplayX(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { +func VRCosplayX(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { return BadoinkSite(wg, updateSite, knownScenes, out, singleSceneURL, "vrcosplayx", "VRCosplayX", "https://vrcosplayx.com/cosplaypornvideos?order=newest", singeScrapeAdditionalInfo, limitScraping) } -func BabeVR(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { +func BabeVR(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { return BadoinkSite(wg, updateSite, knownScenes, out, singleSceneURL, "babevr", "BabeVR", "https://babevr.com/vrpornvideos?order=newest", singeScrapeAdditionalInfo, limitScraping) } -func KinkVR(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { +func KinkVR(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { return BadoinkSite(wg, updateSite, knownScenes, out, singleSceneURL, "kinkvr", "KinkVR", "https://kinkvr.com/bdsm-vr-videos?order=newest", singeScrapeAdditionalInfo, limitScraping) } diff --git a/pkg/scrape/caribbeancom.go b/pkg/scrape/caribbeancom.go index 82152d4c9..f2de10531 100644 --- a/pkg/scrape/caribbeancom.go +++ b/pkg/scrape/caribbeancom.go @@ -4,7 +4,6 @@ import ( "encoding/json" "strconv" "strings" - "sync" "github.com/bregydoc/gtranslate" "github.com/gocolly/colly/v2" @@ -15,7 +14,7 @@ import ( "golang.org/x/text/language" ) -func CariVR(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { +func CariVR(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { defer wg.Done() scraperID := "caribbeancomvr" siteID := "CaribbeanCom VR" diff --git a/pkg/scrape/czechvr.go b/pkg/scrape/czechvr.go index b470feb31..bdb17b074 100644 --- a/pkg/scrape/czechvr.go +++ b/pkg/scrape/czechvr.go @@ -4,7 +4,6 @@ import ( "regexp" "strconv" "strings" - "sync" "github.com/gocolly/colly/v2" "github.com/mozillazg/go-slugify" @@ -14,7 +13,7 @@ import ( "github.com/xbapps/xbvr/pkg/models" ) -func CzechVR(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, scraperID string, siteID string, nwID string, singeScrapeAdditionalInfo string, limitScraping bool) error { +func CzechVR(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, scraperID string, siteID string, nwID string, singeScrapeAdditionalInfo string, limitScraping bool) error { defer wg.Done() logScrapeStart(scraperID, siteID) commonDb, _ := models.GetCommonDB() @@ -199,14 +198,14 @@ func CzechVR(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan } func addCZVRScraper(id string, name string, nwid string, avatarURL string) { - registerScraper(id, name, avatarURL, "czechvrnetwork.com", func(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { + registerScraper(id, name, avatarURL, "czechvrnetwork.com", func(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { return CzechVR(wg, updateSite, knownScenes, out, singleSceneURL, id, name, nwid, singeScrapeAdditionalInfo, limitScraping) }) } func init() { // scraper for scraping single scenes where only the url is provided - registerScraper("czechvr-single_scene", "Czech VR - Other Studios", "", "czechvrnetwork.com", func(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { + registerScraper("czechvr-single_scene", "Czech VR - Other Studios", "", "czechvrnetwork.com", func(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { return CzechVR(wg, updateSite, knownScenes, out, singleSceneURL, "", "", "", "", limitScraping) }) addCZVRScraper("czechvr", "Czech VR", "15", "https://www.czechvr.com/images/favicon/android-chrome-256x256.png") diff --git a/pkg/scrape/darkroomvr.go b/pkg/scrape/darkroomvr.go index a00d19bd9..ec9e7fbee 100644 --- a/pkg/scrape/darkroomvr.go +++ b/pkg/scrape/darkroomvr.go @@ -5,7 +5,6 @@ import ( "fmt" "strconv" "strings" - "sync" "github.com/gocolly/colly/v2" "github.com/nleeper/goment" @@ -13,7 +12,7 @@ import ( "github.com/xbapps/xbvr/pkg/models" ) -func DarkRoomVR(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { +func DarkRoomVR(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { defer wg.Done() scraperID := "darkroomvr" siteID := "DarkRoomVR" diff --git a/pkg/scrape/fuckpassvr.go b/pkg/scrape/fuckpassvr.go index a0ff8229f..34963bf86 100644 --- a/pkg/scrape/fuckpassvr.go +++ b/pkg/scrape/fuckpassvr.go @@ -5,7 +5,6 @@ import ( "net/url" "regexp" "strings" - "sync" "github.com/go-resty/resty/v2" "github.com/gocolly/colly/v2" @@ -14,7 +13,7 @@ import ( "github.com/xbapps/xbvr/pkg/models" ) -func FuckPassVR(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { +func FuckPassVR(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { defer wg.Done() scraperID := "fuckpassvr-native" siteID := "FuckPassVR" diff --git a/pkg/scrape/groobyvr.go b/pkg/scrape/groobyvr.go index 62d7f467c..b10e5041e 100644 --- a/pkg/scrape/groobyvr.go +++ b/pkg/scrape/groobyvr.go @@ -5,7 +5,6 @@ import ( "regexp" "strconv" "strings" - "sync" "github.com/gocolly/colly/v2" "github.com/mozillazg/go-slugify" @@ -14,7 +13,7 @@ import ( "github.com/xbapps/xbvr/pkg/models" ) -func GroobyVR(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { +func GroobyVR(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { defer wg.Done() scraperID := "groobyvr" siteID := "GroobyVR" diff --git a/pkg/scrape/hologirlsvr.go b/pkg/scrape/hologirlsvr.go index fba258474..0ce59d9e2 100644 --- a/pkg/scrape/hologirlsvr.go +++ b/pkg/scrape/hologirlsvr.go @@ -3,7 +3,6 @@ package scrape import ( "regexp" "strings" - "sync" "github.com/gocolly/colly/v2" "github.com/mozillazg/go-slugify" @@ -11,7 +10,7 @@ import ( "github.com/xbapps/xbvr/pkg/models" ) -func HoloGirlsVR(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { +func HoloGirlsVR(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { defer wg.Done() scraperID := "hologirlsvr" siteID := "HoloGirlsVR" diff --git a/pkg/scrape/lethalhardcorevr.go b/pkg/scrape/lethalhardcorevr.go index 95cdce180..cb1cc0bb1 100644 --- a/pkg/scrape/lethalhardcorevr.go +++ b/pkg/scrape/lethalhardcorevr.go @@ -1,9 +1,9 @@ package scrape import ( + "context" "regexp" "strings" - "sync" "github.com/gocolly/colly/v2" "github.com/mozillazg/go-slugify" @@ -26,7 +26,7 @@ func isGoodTag(lookup string) bool { return true } -func LethalHardcoreSite(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, scraperID string, siteID string, URL string, singeScrapeAdditionalInfo string, limitScraping bool) error { +func LethalHardcoreSite(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, scraperID string, siteID string, URL string, singeScrapeAdditionalInfo string, limitScraping bool) error { defer wg.Done() logScrapeStart(scraperID, siteID) @@ -176,11 +176,11 @@ func LethalHardcoreSite(wg *sync.WaitGroup, updateSite bool, knownScenes []strin return nil } -func LethalHardcoreVR(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { +func LethalHardcoreVR(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { return LethalHardcoreSite(wg, updateSite, knownScenes, out, singleSceneURL, "lethalhardcorevr", "LethalHardcoreVR", "https://lethalhardcorevr.com/lethal-hardcore-vr-scenes.html?studio=95595&sort=released", singeScrapeAdditionalInfo, limitScraping) } -func WhorecraftVR(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { +func WhorecraftVR(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { return LethalHardcoreSite(wg, updateSite, knownScenes, out, singleSceneURL, "whorecraftvr", "WhorecraftVR", "https://lethalhardcorevr.com/lethal-hardcore-vr-scenes.html?studio=95347&sort=released", singeScrapeAdditionalInfo, limitScraping) } diff --git a/pkg/scrape/littlecaprice.go b/pkg/scrape/littlecaprice.go index a0387df0e..e2cc05a1a 100644 --- a/pkg/scrape/littlecaprice.go +++ b/pkg/scrape/littlecaprice.go @@ -3,7 +3,6 @@ package scrape import ( "net/url" "strings" - "sync" "time" "github.com/gocolly/colly/v2" @@ -12,7 +11,7 @@ import ( "github.com/xbapps/xbvr/pkg/models" ) -func LittleCaprice(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { +func LittleCaprice(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { defer wg.Done() scraperID := "littlecaprice" siteID := "Little Caprice Dreams" diff --git a/pkg/scrape/navr.go b/pkg/scrape/navr.go index 532dbb4ec..4ee8f4aba 100644 --- a/pkg/scrape/navr.go +++ b/pkg/scrape/navr.go @@ -4,7 +4,6 @@ import ( "html" "strconv" "strings" - "sync" "github.com/gocolly/colly/v2" "github.com/mozillazg/go-slugify" @@ -14,7 +13,7 @@ import ( "github.com/xbapps/xbvr/pkg/models" ) -func NaughtyAmericaVR(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { +func NaughtyAmericaVR(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { defer wg.Done() scraperID := "naughtyamericavr" siteID := "NaughtyAmerica VR" diff --git a/pkg/scrape/povr.go b/pkg/scrape/povr.go index 4d75e247f..0368294aa 100644 --- a/pkg/scrape/povr.go +++ b/pkg/scrape/povr.go @@ -6,7 +6,6 @@ import ( "regexp" "strconv" "strings" - "sync" "github.com/gocolly/colly/v2" "github.com/nleeper/goment" @@ -15,7 +14,7 @@ import ( "github.com/xbapps/xbvr/pkg/models" ) -func POVR(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, scraperID string, siteID string, company string, siteURL string, singeScrapeAdditionalInfo string, limitScraping bool, masterSiteId string) error { +func POVR(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, scraperID string, siteID string, company string, siteURL string, singeScrapeAdditionalInfo string, limitScraping bool, masterSiteId string) error { defer wg.Done() logScrapeStart(scraperID, siteID) @@ -164,18 +163,18 @@ func addPOVRScraper(id string, name string, company string, avatarURL string, cu } if masterSiteId == "" { - registerScraper(id, suffixedName, avatarURL, "povr.com", func(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { + registerScraper(id, suffixedName, avatarURL, "povr.com", func(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { return POVR(wg, updateSite, knownScenes, out, singleSceneURL, id, siteNameSuffix, company, siteURL, singeScrapeAdditionalInfo, limitScraping, "") }) } else { - registerAlternateScraper(id, suffixedName, avatarURL, "povr.com", masterSiteId, func(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { + registerAlternateScraper(id, suffixedName, avatarURL, "povr.com", masterSiteId, func(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { return POVR(wg, updateSite, knownScenes, out, singleSceneURL, id, siteNameSuffix, company, siteURL, singeScrapeAdditionalInfo, limitScraping, masterSiteId) }) } } func init() { - registerScraper("povr-single_scene", "POVR - Other Studios", "", "povr.com", func(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { + registerScraper("povr-single_scene", "POVR - Other Studios", "", "povr.com", func(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { return POVR(wg, updateSite, knownScenes, out, singleSceneURL, "", "", "", "", singeScrapeAdditionalInfo, limitScraping, "") }) var scrapers config.ScraperList diff --git a/pkg/scrape/realitylovers.go b/pkg/scrape/realitylovers.go index 23a0d5bfd..ba4029cae 100644 --- a/pkg/scrape/realitylovers.go +++ b/pkg/scrape/realitylovers.go @@ -4,7 +4,6 @@ import ( "fmt" "regexp" "strings" - "sync" "time" "github.com/go-resty/resty/v2" @@ -15,7 +14,7 @@ import ( "github.com/xbapps/xbvr/pkg/models" ) -func RealityLoversSite(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, scraperID string, siteID string, domain string, singeScrapeAdditionalInfo string, limitScraping bool) error { +func RealityLoversSite(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, scraperID string, siteID string, domain string, singeScrapeAdditionalInfo string, limitScraping bool) error { defer wg.Done() logScrapeStart(scraperID, siteID) @@ -151,11 +150,11 @@ func RealityLoversSite(wg *sync.WaitGroup, updateSite bool, knownScenes []string return nil } -func RealityLovers(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { +func RealityLovers(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { return RealityLoversSite(wg, updateSite, knownScenes, out, singleSceneURL, "realitylovers", "RealityLovers", "realitylovers.com", singeScrapeAdditionalInfo, limitScraping) } -func TSVirtualLovers(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { +func TSVirtualLovers(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { return RealityLoversSite(wg, updateSite, knownScenes, out, singleSceneURL, "tsvirtuallovers", "TSVirtualLovers", "tsvirtuallovers.com", singeScrapeAdditionalInfo, limitScraping) } diff --git a/pkg/scrape/realjamvr.go b/pkg/scrape/realjamvr.go index d2ea92306..1c98e67da 100644 --- a/pkg/scrape/realjamvr.go +++ b/pkg/scrape/realjamvr.go @@ -6,7 +6,6 @@ import ( "regexp" "strconv" "strings" - "sync" "time" "github.com/gocolly/colly/v2" @@ -17,7 +16,7 @@ import ( "github.com/xbapps/xbvr/pkg/models" ) -func RealJamSite(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, scraperID string, siteID string, domain string, singeScrapeAdditionalInfo string, limitScraping bool) error { +func RealJamSite(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, scraperID string, siteID string, domain string, singeScrapeAdditionalInfo string, limitScraping bool) error { defer wg.Done() logScrapeStart(scraperID, siteID) @@ -193,10 +192,10 @@ func RealJamSite(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out return nil } -func RealJamVR(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { +func RealJamVR(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { return RealJamSite(wg, updateSite, knownScenes, out, singleSceneURL, "realjamvr", "RealJam VR", "realjamvr.com", singeScrapeAdditionalInfo, limitScraping) } -func PornCornVR(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { +func PornCornVR(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { return RealJamSite(wg, updateSite, knownScenes, out, singleSceneURL, "porncornvr", "PornCorn VR", "porncornvr.com", singeScrapeAdditionalInfo, limitScraping) } diff --git a/pkg/scrape/sexbabesvr.go b/pkg/scrape/sexbabesvr.go index edf92dbe7..16bb81e22 100644 --- a/pkg/scrape/sexbabesvr.go +++ b/pkg/scrape/sexbabesvr.go @@ -4,7 +4,6 @@ import ( "encoding/json" "net/url" "strings" - "sync" "time" "github.com/gocolly/colly/v2" @@ -14,7 +13,7 @@ import ( "github.com/xbapps/xbvr/pkg/models" ) -func SexBabesVR(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { +func SexBabesVR(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { defer wg.Done() scraperID := "sexbabesvr" siteID := "SexBabesVR" diff --git a/pkg/scrape/sinsvr.go b/pkg/scrape/sinsvr.go index 433aada96..145bd63f6 100644 --- a/pkg/scrape/sinsvr.go +++ b/pkg/scrape/sinsvr.go @@ -6,7 +6,6 @@ import ( "regexp" "strconv" "strings" - "sync" "github.com/gocolly/colly/v2" "github.com/mozillazg/go-slugify" @@ -16,7 +15,7 @@ import ( "github.com/xbapps/xbvr/pkg/models" ) -func SinsVR(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { +func SinsVR(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { defer wg.Done() scraperID := "sinsvr" siteID := "SinsVR" diff --git a/pkg/scrape/slrstudios.go b/pkg/scrape/slrstudios.go index b21e7c252..d02eff5f2 100644 --- a/pkg/scrape/slrstudios.go +++ b/pkg/scrape/slrstudios.go @@ -7,7 +7,6 @@ import ( "regexp" "strconv" "strings" - "sync" "github.com/go-resty/resty/v2" "github.com/gocolly/colly/v2" @@ -26,7 +25,7 @@ func absolutegallery(match string) string { return submatches[1] + submatches[3] + "_o.jpg" // construct new string with desired format } -func SexLikeReal(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, scraperID string, siteID string, company string, siteURL string, singeScrapeAdditionalInfo string, limitScraping bool, masterSiteId string) error { +func SexLikeReal(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, scraperID string, siteID string, company string, siteURL string, singeScrapeAdditionalInfo string, limitScraping bool, masterSiteId string) error { defer wg.Done() logScrapeStart(scraperID, siteID) @@ -538,11 +537,11 @@ func addSLRScraper(id string, name string, company string, avatarURL string, cus } if masterSiteId == "" { - registerScraper(id, suffixedName, avatarURL, "sexlikereal.com", func(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { + registerScraper(id, suffixedName, avatarURL, "sexlikereal.com", func(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { return SexLikeReal(wg, updateSite, knownScenes, out, singleSceneURL, id, siteNameSuffix, company, siteURL, singeScrapeAdditionalInfo, limitScraping, "") }) } else { - registerAlternateScraper(id, suffixedName, avatarURL, "sexlikereal.com", masterSiteId, func(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { + registerAlternateScraper(id, suffixedName, avatarURL, "sexlikereal.com", masterSiteId, func(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { return SexLikeReal(wg, updateSite, knownScenes, out, singleSceneURL, id, siteNameSuffix, company, siteURL, singeScrapeAdditionalInfo, limitScraping, masterSiteId) }) } @@ -551,7 +550,7 @@ func addSLRScraper(id string, name string, company string, avatarURL string, cus func init() { var scrapers config.ScraperList // scraper for single scenes with no existing scraper for the studio - registerScraper("slr-single_scene", "SLR - Other Studios", "", "sexlikereal.com", func(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { + registerScraper("slr-single_scene", "SLR - Other Studios", "", "sexlikereal.com", func(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { return SexLikeReal(wg, updateSite, knownScenes, out, singleSceneURL, "", "", "", "", singeScrapeAdditionalInfo, limitScraping, "") }) diff --git a/pkg/scrape/stasyqvr.go b/pkg/scrape/stasyqvr.go index e131badb1..e55708dbd 100644 --- a/pkg/scrape/stasyqvr.go +++ b/pkg/scrape/stasyqvr.go @@ -4,7 +4,6 @@ import ( "net/url" "strconv" "strings" - "sync" "github.com/gocolly/colly/v2" "github.com/mozillazg/go-slugify" @@ -13,7 +12,7 @@ import ( "github.com/xbapps/xbvr/pkg/models" ) -func StasyQVR(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { +func StasyQVR(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { defer wg.Done() scraperID := "stasyqvr" siteID := "StasyQVR" diff --git a/pkg/scrape/swallowbay.go b/pkg/scrape/swallowbay.go index 99627beb1..dfa9a5bce 100644 --- a/pkg/scrape/swallowbay.go +++ b/pkg/scrape/swallowbay.go @@ -6,7 +6,6 @@ import ( "regexp" "strconv" "strings" - "sync" "github.com/gocolly/colly/v2" "github.com/nleeper/goment" @@ -14,7 +13,7 @@ import ( "github.com/xbapps/xbvr/pkg/models" ) -func SwallowBay(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { +func SwallowBay(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { defer wg.Done() scraperID := "swallowbay" siteID := "SwallowBay" diff --git a/pkg/scrape/tmwvrnet.go b/pkg/scrape/tmwvrnet.go index 17f826d59..0d925c3fd 100644 --- a/pkg/scrape/tmwvrnet.go +++ b/pkg/scrape/tmwvrnet.go @@ -4,7 +4,6 @@ import ( "encoding/json" "strconv" "strings" - "sync" "github.com/gocolly/colly/v2" "github.com/mozillazg/go-slugify" @@ -14,7 +13,7 @@ import ( "github.com/xbapps/xbvr/pkg/models" ) -func TmwVRnet(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { +func TmwVRnet(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { defer wg.Done() scraperID := "tmwvrnet" siteID := "TmwVRnet" diff --git a/pkg/scrape/tngf.go b/pkg/scrape/tngf.go index 47273b6f4..d7b13182e 100644 --- a/pkg/scrape/tngf.go +++ b/pkg/scrape/tngf.go @@ -4,7 +4,6 @@ import ( "encoding/json" "strconv" "strings" - "sync" "github.com/gocolly/colly/v2" "github.com/mozillazg/go-slugify" @@ -13,7 +12,7 @@ import ( "github.com/xbapps/xbvr/pkg/models" ) -func TNGFVR(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { +func TNGFVR(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { defer wg.Done() scraperID := "tonightsgirlfriend" siteID := "Tonight's Girlfriend VR" diff --git a/pkg/scrape/transvr.go b/pkg/scrape/transvr.go index 7ba7d0faa..623dcc08a 100644 --- a/pkg/scrape/transvr.go +++ b/pkg/scrape/transvr.go @@ -5,7 +5,6 @@ import ( "regexp" "strconv" "strings" - "sync" "github.com/gocolly/colly/v2" "github.com/mozillazg/go-slugify" @@ -14,7 +13,7 @@ import ( "github.com/xbapps/xbvr/pkg/models" ) -func TransVR(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { +func TransVR(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { defer wg.Done() scraperID := "transvr" siteID := "TransVR" diff --git a/pkg/scrape/virtualpee.go b/pkg/scrape/virtualpee.go index 5731a30e3..f33e504bf 100644 --- a/pkg/scrape/virtualpee.go +++ b/pkg/scrape/virtualpee.go @@ -3,7 +3,6 @@ package scrape import ( "strconv" "strings" - "sync" "github.com/gocolly/colly/v2" "github.com/mozillazg/go-slugify" @@ -12,7 +11,7 @@ import ( "github.com/xbapps/xbvr/pkg/models" ) -func VirtualPee(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { +func VirtualPee(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { defer wg.Done() scraperID := "virtualpee" siteID := "VirtualPee" diff --git a/pkg/scrape/virtualporn.go b/pkg/scrape/virtualporn.go index 7f44d766b..351db81f4 100644 --- a/pkg/scrape/virtualporn.go +++ b/pkg/scrape/virtualporn.go @@ -5,7 +5,6 @@ import ( "regexp" "strconv" "strings" - "sync" "github.com/gocolly/colly/v2" "github.com/mozillazg/go-slugify" @@ -14,7 +13,7 @@ import ( "github.com/xbapps/xbvr/pkg/models" ) -func VirtualPorn(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { +func VirtualPorn(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { // this scraper is non-standard in that it gathers info via an api rather than scraping html pages defer wg.Done() scraperID := "bvr" diff --git a/pkg/scrape/virtualrealporn.go b/pkg/scrape/virtualrealporn.go index ecab19f92..44d85e324 100644 --- a/pkg/scrape/virtualrealporn.go +++ b/pkg/scrape/virtualrealporn.go @@ -8,7 +8,6 @@ import ( "image" "strconv" "strings" - "sync" "github.com/gocolly/colly/v2" "github.com/mozillazg/go-slugify" @@ -17,7 +16,7 @@ import ( "github.com/xbapps/xbvr/pkg/models" ) -func VirtualRealPornSite(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, scraperID string, siteID string, URL string, singeScrapeAdditionalInfo string, limitScraping bool) error { +func VirtualRealPornSite(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, scraperID string, siteID string, URL string, singeScrapeAdditionalInfo string, limitScraping bool) error { defer wg.Done() logScrapeStart(scraperID, siteID) page := 1 @@ -280,19 +279,19 @@ func VirtualRealPornSite(wg *sync.WaitGroup, updateSite bool, knownScenes []stri return nil } -func VirtualRealPorn(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { +func VirtualRealPorn(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { return VirtualRealPornSite(wg, updateSite, knownScenes, out, singleSceneURL, "virtualrealporn", "VirtualRealPorn", "https://virtualrealporn.com/", singeScrapeAdditionalInfo, limitScraping) } -func VirtualRealTrans(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { +func VirtualRealTrans(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { return VirtualRealPornSite(wg, updateSite, knownScenes, out, singleSceneURL, "virtualrealtrans", "VirtualRealTrans", "https://virtualrealtrans.com/", singeScrapeAdditionalInfo, limitScraping) } -func VirtualRealAmateur(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { +func VirtualRealAmateur(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { return VirtualRealPornSite(wg, updateSite, knownScenes, out, singleSceneURL, "virtualrealamateur", "VirtualRealAmateurPorn", "https://virtualrealamateurporn.com/", singeScrapeAdditionalInfo, limitScraping) } -func VirtualRealGay(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { +func VirtualRealGay(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { return VirtualRealPornSite(wg, updateSite, knownScenes, out, singleSceneURL, "virtualrealgay", "VirtualRealGay", "https://virtualrealgay.com/", singeScrapeAdditionalInfo, limitScraping) } -func VirtualRealPassion(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { +func VirtualRealPassion(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { return VirtualRealPornSite(wg, updateSite, knownScenes, out, singleSceneURL, "virtualrealpassion", "VirtualRealPassion", "https://virtualrealpassion.com/", singeScrapeAdditionalInfo, limitScraping) } diff --git a/pkg/scrape/virtualtaboo.go b/pkg/scrape/virtualtaboo.go index c997d1918..12e3a5f98 100644 --- a/pkg/scrape/virtualtaboo.go +++ b/pkg/scrape/virtualtaboo.go @@ -5,7 +5,6 @@ import ( "regexp" "strconv" "strings" - "sync" "github.com/gocolly/colly/v2" "github.com/mozillazg/go-slugify" @@ -14,7 +13,7 @@ import ( "github.com/xbapps/xbvr/pkg/models" ) -func VirtualTaboo(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { +func VirtualTaboo(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { defer wg.Done() scraperID := "virtualtaboo" siteID := "VirtualTaboo" diff --git a/pkg/scrape/vr3000.go b/pkg/scrape/vr3000.go index 52a32fb44..d027d5f02 100644 --- a/pkg/scrape/vr3000.go +++ b/pkg/scrape/vr3000.go @@ -5,7 +5,6 @@ import ( "regexp" "strconv" "strings" - "sync" "github.com/gocolly/colly/v2" "github.com/mozillazg/go-slugify" @@ -14,7 +13,7 @@ import ( "github.com/xbapps/xbvr/pkg/models" ) -func VR3000(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { +func VR3000(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { defer wg.Done() scraperID := "vr3000" siteID := "VR3000" diff --git a/pkg/scrape/vrallure.go b/pkg/scrape/vrallure.go index 13b3f8520..543538d50 100644 --- a/pkg/scrape/vrallure.go +++ b/pkg/scrape/vrallure.go @@ -6,7 +6,6 @@ import ( "regexp" "strconv" "strings" - "sync" "golang.org/x/net/html" @@ -18,7 +17,7 @@ import ( "github.com/xbapps/xbvr/pkg/models" ) -func VRAllure(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { +func VRAllure(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { defer wg.Done() scraperID := "vrallure" siteID := "VRAllure" diff --git a/pkg/scrape/vrbangers.go b/pkg/scrape/vrbangers.go old mode 100755 new mode 100644 index 72d273001..71bbb96b9 --- a/pkg/scrape/vrbangers.go +++ b/pkg/scrape/vrbangers.go @@ -4,7 +4,6 @@ import ( "encoding/json" "strconv" "strings" - "sync" "github.com/go-resty/resty/v2" "github.com/gocolly/colly/v2" @@ -15,7 +14,7 @@ import ( "github.com/xbapps/xbvr/pkg/models" ) -func VRBangersSite(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, scraperID string, siteID string, URL string, limitScraping bool) error { +func VRBangersSite(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, scraperID string, siteID string, URL string, limitScraping bool) error { defer wg.Done() logScrapeStart(scraperID, siteID) @@ -175,19 +174,19 @@ func VRBangersSite(wg *sync.WaitGroup, updateSite bool, knownScenes []string, ou return nil } -func VRBangers(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { +func VRBangers(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { return VRBangersSite(wg, updateSite, knownScenes, out, singleSceneURL, "vrbangers", "VRBangers", "https://vrbangers.com/", limitScraping) } -func VRBTrans(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { +func VRBTrans(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { return VRBangersSite(wg, updateSite, knownScenes, out, singleSceneURL, "vrbtrans", "VRBTrans", "https://vrbtrans.com/", limitScraping) } -func VRBGay(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { +func VRBGay(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { return VRBangersSite(wg, updateSite, knownScenes, out, singleSceneURL, "vrbgay", "VRBGay", "https://vrbgay.com/", limitScraping) } -func VRConk(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { +func VRConk(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { return VRBangersSite(wg, updateSite, knownScenes, out, singleSceneURL, "vrconk", "VRCONK", "https://vrconk.com/", limitScraping) } -func BlowVR(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { +func BlowVR(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { return VRBangersSite(wg, updateSite, knownScenes, out, singleSceneURL, "blowvr", "BlowVR", "https://blowvr.com/", limitScraping) } diff --git a/pkg/scrape/vrhush.go b/pkg/scrape/vrhush.go index 564801e82..5976b138d 100644 --- a/pkg/scrape/vrhush.go +++ b/pkg/scrape/vrhush.go @@ -7,7 +7,6 @@ import ( "path" "strconv" "strings" - "sync" "github.com/gocolly/colly/v2" "github.com/mozillazg/go-slugify" @@ -16,7 +15,7 @@ import ( "github.com/xbapps/xbvr/pkg/models" ) -func VRHush(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { +func VRHush(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { defer wg.Done() scraperID := "vrhush" siteID := "VRHush" diff --git a/pkg/scrape/vrlatina.go b/pkg/scrape/vrlatina.go index 8dcf3c112..c9e6778df 100644 --- a/pkg/scrape/vrlatina.go +++ b/pkg/scrape/vrlatina.go @@ -6,7 +6,6 @@ import ( "regexp" "strconv" "strings" - "sync" "github.com/gocolly/colly/v2" "github.com/nleeper/goment" @@ -14,7 +13,7 @@ import ( "github.com/xbapps/xbvr/pkg/models" ) -func VRLatina(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { +func VRLatina(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { defer wg.Done() scraperID := "vrlatina" siteID := "VRLatina" diff --git a/pkg/scrape/vrphub.go b/pkg/scrape/vrphub.go index 1b22b92ec..e461fbb7e 100644 --- a/pkg/scrape/vrphub.go +++ b/pkg/scrape/vrphub.go @@ -7,7 +7,6 @@ import ( "path" "regexp" "strings" - "sync" "github.com/gocolly/colly/v2" "github.com/nleeper/goment" @@ -29,7 +28,7 @@ func getVideoName(fileUrl string) (string, error) { return filename, nil } -func VRPHub(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, scraperID string, siteID string, company string, siteURL string, singeScrapeAdditionalInfo string, limitScraping bool, callback func(e *colly.HTMLElement, sc *models.ScrapedScene)) error { +func VRPHub(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, scraperID string, siteID string, company string, siteURL string, singeScrapeAdditionalInfo string, limitScraping bool, callback func(e *colly.HTMLElement, sc *models.ScrapedScene)) error { defer wg.Done() logScrapeStart(scraperID, siteID) @@ -261,13 +260,13 @@ func addVRPHubScraper(id string, name string, company string, avatarURL string, avatarURL = "https://cdn.vrphub.com/wp-content/uploads/2016/08/vrphubnew.png" } - registerScraper(id, suffixedName, avatarURL, "vrphub.com", func(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { + registerScraper(id, suffixedName, avatarURL, "vrphub.com", func(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { return VRPHub(wg, updateSite, knownScenes, out, singleSceneURL, id, siteNameSuffix, company, siteURL, singeScrapeAdditionalInfo, limitScraping, callback) }) } func init() { - registerScraper("vrphub-single_scene", "VRPHub - Other Studios", "", "vrphub.com", func(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { + registerScraper("vrphub-single_scene", "VRPHub - Other Studios", "", "vrphub.com", func(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { return VRPHub(wg, updateSite, knownScenes, out, singleSceneURL, "", "", "", "", singeScrapeAdditionalInfo, limitScraping, noop) }) var scrapers config.ScraperList diff --git a/pkg/scrape/vrporn.go b/pkg/scrape/vrporn.go index 858611369..8876d87d8 100644 --- a/pkg/scrape/vrporn.go +++ b/pkg/scrape/vrporn.go @@ -5,7 +5,6 @@ import ( "regexp" "strconv" "strings" - "sync" "time" "github.com/gocolly/colly/v2" @@ -14,7 +13,7 @@ import ( "github.com/xbapps/xbvr/pkg/models" ) -func VRPorn(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, scraperID string, siteID string, company string, siteURL string, singeScrapeAdditionalInfo string, limitScraping bool, masterSiteId string) error { +func VRPorn(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, scraperID string, siteID string, company string, siteURL string, singeScrapeAdditionalInfo string, limitScraping bool, masterSiteId string) error { defer wg.Done() logScrapeStart(scraperID, siteID) @@ -184,18 +183,18 @@ func addVRPornScraper(id string, name string, company string, avatarURL string, } if masterSiteId == "" { - registerScraper(id, suffixedName, avatarURL, "vrporn.com", func(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { + registerScraper(id, suffixedName, avatarURL, "vrporn.com", func(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { return VRPorn(wg, updateSite, knownScenes, out, singleSceneURL, id, siteNameSuffix, company, siteURL, singeScrapeAdditionalInfo, limitScraping, "") }) } else { - registerAlternateScraper(id, suffixedName, avatarURL, "vrporn.com", masterSiteId, func(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { + registerAlternateScraper(id, suffixedName, avatarURL, "vrporn.com", masterSiteId, func(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { return VRPorn(wg, updateSite, knownScenes, out, singleSceneURL, id, siteNameSuffix, company, siteURL, singeScrapeAdditionalInfo, limitScraping, masterSiteId) }) } } func init() { - registerScraper("vrporn-single_scene", "VRPorn - Other Studios", "", "vrporn.com", func(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { + registerScraper("vrporn-single_scene", "VRPorn - Other Studios", "", "vrporn.com", func(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { return VRPorn(wg, updateSite, knownScenes, out, singleSceneURL, "", "", "", "", singeScrapeAdditionalInfo, limitScraping, "") }) diff --git a/pkg/scrape/vrsexygirlz.go b/pkg/scrape/vrsexygirlz.go index 03019986d..2b8050921 100644 --- a/pkg/scrape/vrsexygirlz.go +++ b/pkg/scrape/vrsexygirlz.go @@ -4,7 +4,6 @@ import ( "fmt" "strconv" "strings" - "sync" "github.com/gocolly/colly/v2" "github.com/nleeper/goment" @@ -12,7 +11,7 @@ import ( "github.com/xbapps/xbvr/pkg/models" ) -func VRSexygirlz(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { +func VRSexygirlz(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { defer wg.Done() scraperID := "vrsexygirlz" diff --git a/pkg/scrape/vrspy.go b/pkg/scrape/vrspy.go old mode 100755 new mode 100644 index 22ab5a183..2a816ca55 --- a/pkg/scrape/vrspy.go +++ b/pkg/scrape/vrspy.go @@ -7,7 +7,6 @@ import ( "regexp" "strconv" "strings" - "sync" "github.com/gocolly/colly/v2" "github.com/nleeper/goment" @@ -23,7 +22,7 @@ const ( baseURL = "https://" + domain ) -func VRSpy(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singleScrapeAdditionalInfo string, limitScraping bool) error { +func VRSpy(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singleScrapeAdditionalInfo string, limitScraping bool) error { defer wg.Done() logScrapeStart(scraperID, siteID) diff --git a/pkg/scrape/wetvr.go b/pkg/scrape/wetvr.go index 6abccfa7c..8cfe1a266 100644 --- a/pkg/scrape/wetvr.go +++ b/pkg/scrape/wetvr.go @@ -4,7 +4,6 @@ import ( "encoding/json" "fmt" "strings" - "sync" "time" "github.com/gocolly/colly/v2" @@ -14,7 +13,7 @@ import ( "github.com/xbapps/xbvr/pkg/models" ) -func WetVR(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { +func WetVR(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { defer wg.Done() scraperID := "wetvr" siteID := "WetVR" diff --git a/pkg/scrape/zexywankitnow.go b/pkg/scrape/zexywankitnow.go index e442f0c95..7aa1e8790 100644 --- a/pkg/scrape/zexywankitnow.go +++ b/pkg/scrape/zexywankitnow.go @@ -4,7 +4,6 @@ import ( "regexp" "strconv" "strings" - "sync" "github.com/gocolly/colly/v2" "github.com/mozillazg/go-slugify" @@ -13,7 +12,7 @@ import ( "github.com/xbapps/xbvr/pkg/models" ) -func TwoWebMediaSite(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, scraperID string, siteID string, URL string, limitScraping bool) error { +func TwoWebMediaSite(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, scraperID string, siteID string, URL string, limitScraping bool) error { defer wg.Done() logScrapeStart(scraperID, siteID) @@ -189,11 +188,11 @@ func TwoWebMediaSite(wg *sync.WaitGroup, updateSite bool, knownScenes []string, return nil } -func WankitNowVR(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { +func WankitNowVR(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { return TwoWebMediaSite(wg, updateSite, knownScenes, out, singleSceneURL, "wankitnowvr", "WankitNowVR", "https://wankitnowvr.com/videos/", limitScraping) } -func ZexyVR(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { +func ZexyVR(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error { return TwoWebMediaSite(wg, updateSite, knownScenes, out, singleSceneURL, "zexyvr", "ZexyVR", "https://zexyvr.com/videos/", limitScraping) } diff --git a/pkg/tasks/content.go b/pkg/tasks/content.go index 59d358d2f..d9cdf4e64 100644 --- a/pkg/tasks/content.go +++ b/pkg/tasks/content.go @@ -115,10 +115,9 @@ func runScrapers(knownScenes []string, toScrape string, updateSite bool, collect commonDb.Where(&models.Site{ID: toScrape}).Find(&sites) } - var wg sync.WaitGroup + var wg models.ScrapeWG - sitecnt := 1 - concurrent_scrapers := common.ConcurrentScrapers + concurrent_scrapers := int64(common.ConcurrentScrapers) if concurrent_scrapers == 0 { concurrent_scrapers = 99999 } @@ -138,10 +137,10 @@ func runScrapers(knownScenes []string, toScrape string, updateSite bool, collect site.Save() }(scraper) - if sitecnt%concurrent_scrapers == 0 { // processing batches of 35 sites - wg.Wait() + if wg.Count() >= concurrent_scrapers { // processing batches of 35 sites + wg.Wait(concurrent_scrapers) } - sitecnt++ + } } } @@ -158,7 +157,7 @@ func runScrapers(knownScenes []string, toScrape string, updateSite bool, collect } } - wg.Wait() + wg.Wait(0) return nil }