Skip to content

Commit

Permalink
scraper: Add RealVR Scraper (#1899)
Browse files Browse the repository at this point in the history
* Inital Commit

Appears to fully work. Some where in the code the part of the studio url is being added to the name used for the actor scraper. I can't seem to find this

* Actor scraper and Site ID bug fix

* Go fmt

* Merge Typos
  • Loading branch information
pops64 authored Jan 18, 2025
1 parent f6b19f9 commit 0aae3b1
Show file tree
Hide file tree
Showing 11 changed files with 103 additions and 16 deletions.
4 changes: 4 additions & 0 deletions pkg/api/options.go
Original file line number Diff line number Diff line change
Expand Up @@ -1020,6 +1020,7 @@ func (i ConfigResource) createCustomSite(req *restful.Request, resp *restful.Res
scrapers["stashdb"] = scraperConfig.CustomScrapers.StashDbScrapers
scrapers["vrphub"] = scraperConfig.CustomScrapers.VrphubScrapers
scrapers["vrporn"] = scraperConfig.CustomScrapers.VrpornScrapers
scrapers["realvr"] = scraperConfig.CustomScrapers.RealVRScrapers

exists := false
for key, group := range scrapers {
Expand Down Expand Up @@ -1047,13 +1048,16 @@ func (i ConfigResource) createCustomSite(req *restful.Request, resp *restful.Res
scrapers["vrphub"] = append(scrapers["vrphub"], scraper)
case "vrporn":
scrapers["vrporn"] = append(scrapers["vrporn"], scraper)
case "realvr":
scrapers["realvr"] = append(scrapers["realvr"], scraper)
}
}
scraperConfig.CustomScrapers.PovrScrapers = scrapers["povr"]
scraperConfig.CustomScrapers.SlrScrapers = scrapers["slr"]
scraperConfig.CustomScrapers.StashDbScrapers = scrapers["stashdb"]
scraperConfig.CustomScrapers.VrphubScrapers = scrapers["vrphub"]
scraperConfig.CustomScrapers.VrpornScrapers = scrapers["vrporn"]
scraperConfig.CustomScrapers.RealVRScrapers = scrapers["realvr"]
fName := filepath.Join(common.AppDir, "scrapers.json")
list, _ := json.MarshalIndent(scraperConfig, "", " ")
os.WriteFile(fName, list, 0644)
Expand Down
2 changes: 2 additions & 0 deletions pkg/api/scenes.go
Original file line number Diff line number Diff line change
Expand Up @@ -382,6 +382,7 @@ func (i SceneResource) getFilters(req *restful.Request, resp *restful.Response)
outAttributes = append(outAttributes, "Has Image")
outAttributes = append(outAttributes, "VRPHub Scraper")
outAttributes = append(outAttributes, "VRPorn Scraper")
outAttributes = append(outAttributes, "RealVR Scraper")
outAttributes = append(outAttributes, "Stashdb Linked")
outAttributes = append(outAttributes, "Has Script Download")
outAttributes = append(outAttributes, "Has AI Generated Script")
Expand All @@ -391,6 +392,7 @@ func (i SceneResource) getFilters(req *restful.Request, resp *restful.Response)
outAttributes = append(outAttributes, "Available from Alternate Sites")
outAttributes = append(outAttributes, "Available from POVR")
outAttributes = append(outAttributes, "Available from VRPorn")
outAttributes = append(outAttributes, "Available from RealVR")
outAttributes = append(outAttributes, "Available from SLR")
outAttributes = append(outAttributes, "Multiple Scenes Available at an Alternate Site")
type Results struct {
Expand Down
13 changes: 11 additions & 2 deletions pkg/config/scraper_list.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,13 +24,15 @@ type XbvrScrapers struct {
PovrScrapers []ScraperConfig `json:"povr"`
SlrScrapers []ScraperConfig `json:"slr"`
StashDbScrapers []ScraperConfig `json:"stashdb"`
RealVRScrapers []ScraperConfig `json:"realvr"`
VrpornScrapers []ScraperConfig `json:"vrporn"`
VrphubScrapers []ScraperConfig `json:"vrphub"`
}
type CustomScrapers struct {
PovrScrapers []ScraperConfig `json:"povr"`
SlrScrapers []ScraperConfig `json:"slr"`
StashDbScrapers []ScraperConfig `json:"stashdb"`
RealVRScrapers []ScraperConfig `json:"realvr"`
VrpornScrapers []ScraperConfig `json:"vrporn"`
VrphubScrapers []ScraperConfig `json:"vrphub"`
}
Expand Down Expand Up @@ -76,18 +78,21 @@ func (o *ScraperList) Load() error {
SetSiteId(&o.XbvrScrapers.StashDbScrapers, "")
SetSiteId(&o.XbvrScrapers.VrphubScrapers, "")
SetSiteId(&o.XbvrScrapers.VrpornScrapers, "")
SetSiteId(&o.XbvrScrapers.RealVRScrapers, "")
SetSiteId(&o.CustomScrapers.PovrScrapers, "povr")
SetSiteId(&o.CustomScrapers.SlrScrapers, "slr")
SetSiteId(&o.CustomScrapers.StashDbScrapers, "stashdb")
SetSiteId(&o.CustomScrapers.VrphubScrapers, "vrphub")
SetSiteId(&o.CustomScrapers.VrpornScrapers, "vrporn")
SetSiteId(&o.CustomScrapers.RealVRScrapers, "realvr")

// remove custom sites that are now offical for the same aggregation site
o.CustomScrapers.PovrScrapers = RemoveCustomListNowOffical(o.CustomScrapers.PovrScrapers, o.XbvrScrapers.PovrScrapers)
o.CustomScrapers.SlrScrapers = RemoveCustomListNowOffical(o.CustomScrapers.SlrScrapers, o.XbvrScrapers.SlrScrapers)
o.CustomScrapers.StashDbScrapers = RemoveCustomListNowOffical(o.CustomScrapers.StashDbScrapers, o.XbvrScrapers.StashDbScrapers)
o.CustomScrapers.VrphubScrapers = RemoveCustomListNowOffical(o.CustomScrapers.VrphubScrapers, o.XbvrScrapers.VrphubScrapers)
o.CustomScrapers.VrpornScrapers = RemoveCustomListNowOffical(o.CustomScrapers.VrpornScrapers, o.XbvrScrapers.VrpornScrapers)
o.CustomScrapers.RealVRScrapers = RemoveCustomListNowOffical(o.CustomScrapers.RealVRScrapers, o.XbvrScrapers.RealVRScrapers)

list, err := json.MarshalIndent(o, "", " ")
if err == nil {
Expand Down Expand Up @@ -149,8 +154,12 @@ func CheckMatchingSiteID(findSite ScraperConfig, searchList []ScraperConfig) boo
func SetSiteId(configList *[]ScraperConfig, customId string) {
for idx, siteconfig := range *configList {
if siteconfig.FileID == "" || customId != "" {
id := strings.TrimRight(siteconfig.URL, "/")
siteconfig.ID = strings.ToLower(id[strings.LastIndex(id, "/")+1:])
temp := strings.TrimRight(siteconfig.URL, "/")
id := temp[strings.LastIndex(temp, "/")+1:]
if customId == "realvr" {
id = id[:strings.Index(id, "-")-1]
}
siteconfig.ID = strings.ToLower(id)
} else {
siteconfig.ID = strings.ToLower(siteconfig.FileID)
}
Expand Down
3 changes: 2 additions & 1 deletion pkg/config/scrapers.json
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@
"povr": [],
"slr": [],
"vrporn": [],
"vrphub": []
"vrphub": [],
"realvr": []
},
"xbvr": {
"povr": [
Expand Down
7 changes: 6 additions & 1 deletion pkg/models/model_external_reference.go
Original file line number Diff line number Diff line change
Expand Up @@ -306,7 +306,10 @@ func (o *ExternalReference) DetermineActorScraperBySiteId(siteId string) string
return "vrphub scrape"
}
if strings.HasSuffix(site.Name, "VRPorn)") {
return "slr scrape"
return "vrporn scrape"
}
if strings.HasSuffix(site.Name, "RealVR)") {
return "realvr scrape"
}
return siteId + " scrape"
}
Expand Down Expand Up @@ -717,6 +720,8 @@ func (scrapeRules ActorScraperConfig) buildGenericActorScraperRules() {
scrapeRules.GenericActorScrapingConfig["vrcosplayx scrape"] = siteDetails
siteDetails.Domain = "18vr.com"
scrapeRules.GenericActorScrapingConfig["18vr scrape"] = siteDetails
siteDetails.Domain = "realvr.com"
scrapeRules.GenericActorScrapingConfig["realvr scrape"] = siteDetails

siteDetails = GenericScraperRuleSet{}
siteDetails.Domain = "darkroomvr.com"
Expand Down
4 changes: 4 additions & 0 deletions pkg/models/model_scene.go
Original file line number Diff line number Diff line change
Expand Up @@ -895,6 +895,8 @@ func queryScenes(db *gorm.DB, r RequestSceneList) (*gorm.DB, *gorm.DB) {
where = `scenes.scene_id like "vrphub-%"`
case "VRPorn Scraper":
where = `scenes.scene_id like "vrporn-%"`
case "RealVR Scraper":
where = `scenes.scene_id like "realvr-%"`
case "Has Script Download":
// querying the scenes in from alternate sources (stored in external_reference) has a performance impact, so it's user choice
if config.Advanced.UseAltSrcInFileMatching {
Expand Down Expand Up @@ -924,6 +926,8 @@ func queryScenes(db *gorm.DB, r RequestSceneList) (*gorm.DB, *gorm.DB) {
where = "exists (select 1 from external_reference_links where external_source like 'alternate scene %' and external_id like 'povr-%' and internal_db_id = scenes.id)"
case "Available from VRPorn":
where = "exists (select 1 from external_reference_links where external_source like 'alternate scene %' and external_id like 'vrporn-%' and internal_db_id = scenes.id)"
case "Available from RealVR":
where = "exists (select 1 from external_reference_links where external_source like 'alternate scene %' and external_id like 'realvr-%' and internal_db_id = scenes.id)"
case "Available from SLR":
where = "exists (select 1 from external_reference_links where external_source like 'alternate scene %' and external_id like 'slr-%' and internal_db_id = scenes.id)"
case "Available from Alternate Sites":
Expand Down
29 changes: 18 additions & 11 deletions pkg/scrape/badoink.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@ import (

"github.com/go-resty/resty/v2"
"github.com/gocolly/colly/v2"
"github.com/mozillazg/go-slugify"
"github.com/nleeper/goment"
"github.com/thoas/go-funk"
"github.com/xbapps/xbvr/pkg/config"
Expand All @@ -22,12 +21,13 @@ import (
"github.com/xbapps/xbvr/pkg/models"
)

func BadoinkSite(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, scraperID string, siteID string, URL string, singeScrapeAdditionalInfo string, limitScraping bool) error {
func BadoinkSite(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, scraperID string, siteID string, company string, URL string, singeScrapeAdditionalInfo string, limitScraping bool, masterSiteId string, ogSite bool) error {
defer wg.Done()
logScrapeStart(scraperID, siteID)

sceneCollector := createCollector("badoinkvr.com", "babevr.com", "vrcosplayx.com", "18vr.com")
siteCollector := createCollector("badoinkvr.com", "babevr.com", "vrcosplayx.com", "18vr.com")
sceneCollector := createCollector("badoinkvr.com", "babevr.com", "vrcosplayx.com", "18vr.com", "realvr.com")
siteCollector := createCollector("badoinkvr.com", "babevr.com", "vrcosplayx.com", "18vr.com", "realvr.com")

trailerCollector := cloneCollector(sceneCollector)

commonDb, _ := models.GetCommonDB()
Expand All @@ -38,16 +38,23 @@ func BadoinkSite(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out
sc := models.ScrapedScene{}
sc.ScraperID = scraperID
sc.SceneType = "VR"
sc.Studio = "Badoink"
sc.Studio = company
sc.HomepageURL = strings.Split(e.Request.URL.String(), "?")[0]
sc.MasterSiteId = masterSiteId

// Site ID
sc.Site = siteID

// Scene ID - get from URL
tmp := strings.Split(sc.HomepageURL, "-")
sc.SiteID = strings.Replace(tmp[len(tmp)-1], "/", "", -1)
sc.SceneID = slugify.Slugify(sc.Site) + "-" + sc.SiteID

idPrefix := scraperID
if !ogSite {
idPrefix = "realvr"
}

sc.SceneID = idPrefix + "-" + sc.SiteID

// Title
e.ForEach(`h1.video-title`, func(id int, e *colly.HTMLElement) {
Expand Down Expand Up @@ -102,7 +109,7 @@ func BadoinkSite(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out
sc.ActorDetails = make(map[string]models.ActorDetails)
e.ForEach(`a.video-actor-link`, func(id int, e *colly.HTMLElement) {
sc.Cast = append(sc.Cast, strings.TrimSpace(e.Text))
sc.ActorDetails[strings.TrimSpace(e.Text)] = models.ActorDetails{Source: sc.ScraperID + " scrape", ProfileUrl: e.Request.AbsoluteURL(e.Attr("href"))}
sc.ActorDetails[strings.TrimSpace(e.Text)] = models.ActorDetails{Source: idPrefix + " scrape", ProfileUrl: e.Request.AbsoluteURL(e.Attr("href"))}
})

// Date
Expand Down Expand Up @@ -268,19 +275,19 @@ func BadoinkSite(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out
}

func BadoinkVR(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error {
return BadoinkSite(wg, updateSite, knownScenes, out, singleSceneURL, "badoinkvr", "BadoinkVR", "https://badoinkvr.com/vrpornvideos?order=newest", singeScrapeAdditionalInfo, limitScraping)
return BadoinkSite(wg, updateSite, knownScenes, out, singleSceneURL, "badoinkvr", "BadoinkVR", "Badoink", "https://badoinkvr.com/vrpornvideos?order=newest", singeScrapeAdditionalInfo, limitScraping, "", true)
}

func B18VR(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error {
return BadoinkSite(wg, updateSite, knownScenes, out, singleSceneURL, "18vr", "18VR", "https://18vr.com/vrpornvideos?order=newest", singeScrapeAdditionalInfo, limitScraping)
return BadoinkSite(wg, updateSite, knownScenes, out, singleSceneURL, "18vr", "18VR", "Badoink", "https://18vr.com/vrpornvideos?order=newest", singeScrapeAdditionalInfo, limitScraping, "", true)
}

func VRCosplayX(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error {
return BadoinkSite(wg, updateSite, knownScenes, out, singleSceneURL, "vrcosplayx", "VRCosplayX", "https://vrcosplayx.com/cosplaypornvideos?order=newest", singeScrapeAdditionalInfo, limitScraping)
return BadoinkSite(wg, updateSite, knownScenes, out, singleSceneURL, "vrcosplayx", "VRCosplayX", "Badoink", "https://vrcosplayx.com/cosplaypornvideos?order=newest", singeScrapeAdditionalInfo, limitScraping, "", true)
}

func BabeVR(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error {
return BadoinkSite(wg, updateSite, knownScenes, out, singleSceneURL, "babevr", "BabeVR", "https://babevr.com/vrpornvideos?order=newest", singeScrapeAdditionalInfo, limitScraping)
return BadoinkSite(wg, updateSite, knownScenes, out, singleSceneURL, "babevr", "BabeVR", "Badoink", "https://babevr.com/vrpornvideos?order=newest", singeScrapeAdditionalInfo, limitScraping, "", true)
}

func init() {
Expand Down
49 changes: 49 additions & 0 deletions pkg/scrape/realvr.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
package scrape

import (
"strings"

"github.com/xbapps/xbvr/pkg/config"
"github.com/xbapps/xbvr/pkg/models"
)

func addRealVRScraper(id string, name string, company string, avatarURL string, custom bool, siteURL string, masterSiteId string) {
suffixedName := name
siteNameSuffix := name
if custom {
suffixedName += " (Custom RealVR)"
siteNameSuffix += " (RealVR)"
} else {
suffixedName += " (RealVR)"
}
if avatarURL == "" {
avatarURL = "https://realvr.com/icons/realvr/favicon-32x32.png"
}

siteURL = strings.TrimSuffix(siteURL, "/")
siteURL += "/videos/1?order=newest"

if masterSiteId == "" {
registerScraper(id, suffixedName, avatarURL, "realvr.com", func(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error {
return BadoinkSite(wg, updateSite, knownScenes, out, singleSceneURL, id, siteNameSuffix, company, siteURL, singeScrapeAdditionalInfo, limitScraping, "", false)
})
} else {
registerAlternateScraper(id, suffixedName, avatarURL, "realvr.com", masterSiteId, func(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error {
return BadoinkSite(wg, updateSite, knownScenes, out, singleSceneURL, id, siteNameSuffix, company, siteURL, singeScrapeAdditionalInfo, limitScraping, masterSiteId, false)
})
}
}

func init() {
registerScraper("realvr-single_scene", "RealVR - Other Studios", "", "realvr.com", func(wg *models.ScrapeWG, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene, singleSceneURL string, singeScrapeAdditionalInfo string, limitScraping bool) error {
return BadoinkSite(wg, updateSite, knownScenes, out, singleSceneURL, "", "", "", "", singeScrapeAdditionalInfo, limitScraping, "", false)
})
var scrapers config.ScraperList
scrapers.Load()
for _, scraper := range scrapers.XbvrScrapers.RealVRScrapers {
addRealVRScraper(scraper.ID, scraper.Name, scraper.Company, scraper.AvatarUrl, false, scraper.URL, scraper.MasterSiteId)
}
for _, scraper := range scrapers.CustomScrapers.RealVRScrapers {
addRealVRScraper(scraper.ID, scraper.Name, scraper.Company, scraper.AvatarUrl, true, scraper.URL, scraper.MasterSiteId)
}
}
3 changes: 3 additions & 0 deletions ui/src/components/RescrapeButton.vue
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,9 @@ export default {
if (this.item.scene_url.toLowerCase().includes("vrporn.com")) {
site = "vrporn-single_scene"
}
if (this.item.scene_url.toLowerCase().includes("realvr.com")) {
site = "realvr-single_scene"
}
if (this.item.scene_url.toLowerCase().includes("vrphub.com")) {
site = "vrphub-single_scene"
}
Expand Down
2 changes: 1 addition & 1 deletion ui/src/views/options/sections/InterfaceAdvanced.vue
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,7 @@ export default {
this.scraperFieldsValid=false
if (this.scraperName != "") {
if (this.scraperUrl.startsWith("https://") || this.scraperUrl.startsWith("http://") ) {
if (this.scraperUrl.includes("povr.com") || this.scraperUrl.includes("sexlikereal.com") || this.scraperUrl.includes("vrphub.com") || this.scraperUrl.includes("vrporn.com") || this.scraperUrl.includes("stashdb.org")) {
if (this.scraperUrl.includes("povr.com") || this.scraperUrl.includes("sexlikereal.com") || this.scraperUrl.includes("vrphub.com") || this.scraperUrl.includes("vrporn.com") || this.scraperUrl.includes("stashdb.org") || this.scraperUrl.includes("realvr.com")) {
this.scraperFieldsValid=true
}
}
Expand Down
3 changes: 3 additions & 0 deletions ui/src/views/options/sections/OptionsSceneCreate.vue
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,9 @@ export default {
if (this.scrapeUrl.toLowerCase().includes("vrphub.com")) {
site = "vrphub-single_scene"
}
if (this.scrapeUrl.toLowerCase().includes("realvr.com")) {
site = "realvr-single_scene"
}
if (this.scrapeUrl.toLowerCase().includes("stashdb.org")) {
site = "single_scene-stashdb"
}
Expand Down

0 comments on commit 0aae3b1

Please sign in to comment.