From 16e3913e6bcd0d69324732ed4a29aec5a1aaeee8 Mon Sep 17 00:00:00 2001 From: jrebey <55519905+jrebey@users.noreply.github.com> Date: Sun, 3 Nov 2019 18:40:15 -0500 Subject: [PATCH] Add VRCONK scraper (#180) --- pkg/models/model_tag.go | 16 +++- pkg/scrape/vrconk.go | 129 ++++++++++++++++++++++++++++++++ ui/src/views/scenes/Details.vue | 2 +- 3 files changed, 144 insertions(+), 3 deletions(-) create mode 100644 pkg/scrape/vrconk.go diff --git a/pkg/models/model_tag.go b/pkg/models/model_tag.go index 7d229e0bc..fce4701b3 100644 --- a/pkg/models/model_tag.go +++ b/pkg/models/model_tag.go @@ -24,7 +24,11 @@ func (t *Tag) Save() error { func ConvertTag(t string) string { t = strings.ToLower(t) - if funk.Contains([]string{"180", "60fps", "60 fps", "5k", "5k+", "big dick", "big cocks", "axaxqxrrysrwqua", "girl-boy", "virtual reality", "vr porn"}, t) { + if funk.Contains([]string{"180", "60fps", "60 fps", "5k", "5k+", "big dick", "big cocks", + "axaxqxrrysrwqua", "girl-boy", "virtual reality", + "virtual reality porn", "vr porn", "180 vr porn", "xxxsex vr", + "xxx vr porn", "VRconk", "sex onbed", + },t) { return "" } @@ -48,7 +52,7 @@ func ConvertTag(t string) string { return "athletic body" } - if funk.Contains([]string{"threesome bgg", "bgg", "girl-girl-boy"}, t) { + if funk.Contains([]string{"threesome bgg", "bgg", "girl-girl-boy", "ffm threesome"}, t) { return "threesome ffm" } @@ -268,5 +272,13 @@ func ConvertTag(t string) string { return "dp" } + if funk.Contains([]string{"pov fucking"}, t) { + return "pov" + } + + if funk.Contains([]string{"xxx parody", "xxx parody vr porn"}, t) { + return "parody" + } + return t } diff --git a/pkg/scrape/vrconk.go b/pkg/scrape/vrconk.go new file mode 100644 index 000000000..6c1b9bdbc --- /dev/null +++ b/pkg/scrape/vrconk.go @@ -0,0 +1,129 @@ +package scrape + +import ( + "strconv" + "strings" + "sync" + + "github.com/gocolly/colly" + "github.com/mozillazg/go-slugify" + "github.com/nleeper/goment" + "github.com/thoas/go-funk" + "github.com/xbapps/xbvr/pkg/models" +) + +func VRCONK(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene) error { + defer wg.Done() + logScrapeStart("vrconk", "VRCONK") + + siteCollector := colly.NewCollector( + colly.AllowedDomains("www.vrconk.com"), + colly.CacheDir(siteCacheDir), + colly.UserAgent(userAgent), + ) + + sceneCollector := colly.NewCollector( + colly.AllowedDomains("www.vrconk.com"), + colly.CacheDir(sceneCacheDir), + colly.UserAgent(userAgent), + ) + + siteCollector.OnRequest(func(r *colly.Request) { + log.Println("visiting", r.URL.String()) + }) + + sceneCollector.OnRequest(func(r *colly.Request) { + log.Println("visiting", r.URL.String()) + }) + + // + sceneCollector.OnHTML(`html`, func(e *colly.HTMLElement) { + sc := models.ScrapedScene{} + sc.SceneType = "VR" + sc.Studio = "VRCONK" + sc.Site = "VRCONK" + sc.HomepageURL = strings.Split(e.Request.URL.String(), "?")[0] + + // Scene ID - get from URL + tmp := strings.Split(sc.HomepageURL, "/") + s := strings.Split(tmp[len(tmp)-1], "-") + sc.SiteID = strings.TrimSuffix(s[len(s)-1], ".html") + sc.SceneID = slugify.Slugify(sc.Site) + "-" + sc.SiteID + + sc.Title = strings.TrimSpace(e.ChildAttr(`meta[property="og:title"]`, "content")) + sc.Covers = append(sc.Covers, e.ChildAttr(`meta[property="og:image"]`, "content")) + + e.ForEach(`.gallery-block img`, func(id int, e *colly.HTMLElement) { + sc.Gallery = append(sc.Gallery, e.Request.AbsoluteURL(e.Attr("src"))) + }) + + e.ForEach(`.stats-list li`, func(id int, e *colly.HTMLElement) { + //