diff --git a/pkg/models/model_tag.go b/pkg/models/model_tag.go index 7d229e0bc..fce4701b3 100644 --- a/pkg/models/model_tag.go +++ b/pkg/models/model_tag.go @@ -24,7 +24,11 @@ func (t *Tag) Save() error { func ConvertTag(t string) string { t = strings.ToLower(t) - if funk.Contains([]string{"180", "60fps", "60 fps", "5k", "5k+", "big dick", "big cocks", "axaxqxrrysrwqua", "girl-boy", "virtual reality", "vr porn"}, t) { + if funk.Contains([]string{"180", "60fps", "60 fps", "5k", "5k+", "big dick", "big cocks", + "axaxqxrrysrwqua", "girl-boy", "virtual reality", + "virtual reality porn", "vr porn", "180 vr porn", "xxxsex vr", + "xxx vr porn", "VRconk", "sex onbed", + },t) { return "" } @@ -48,7 +52,7 @@ func ConvertTag(t string) string { return "athletic body" } - if funk.Contains([]string{"threesome bgg", "bgg", "girl-girl-boy"}, t) { + if funk.Contains([]string{"threesome bgg", "bgg", "girl-girl-boy", "ffm threesome"}, t) { return "threesome ffm" } @@ -268,5 +272,13 @@ func ConvertTag(t string) string { return "dp" } + if funk.Contains([]string{"pov fucking"}, t) { + return "pov" + } + + if funk.Contains([]string{"xxx parody", "xxx parody vr porn"}, t) { + return "parody" + } + return t } diff --git a/pkg/scrape/vrconk.go b/pkg/scrape/vrconk.go new file mode 100644 index 000000000..6c1b9bdbc --- /dev/null +++ b/pkg/scrape/vrconk.go @@ -0,0 +1,129 @@ +package scrape + +import ( + "strconv" + "strings" + "sync" + + "github.com/gocolly/colly" + "github.com/mozillazg/go-slugify" + "github.com/nleeper/goment" + "github.com/thoas/go-funk" + "github.com/xbapps/xbvr/pkg/models" +) + +func VRCONK(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene) error { + defer wg.Done() + logScrapeStart("vrconk", "VRCONK") + + siteCollector := colly.NewCollector( + colly.AllowedDomains("www.vrconk.com"), + colly.CacheDir(siteCacheDir), + colly.UserAgent(userAgent), + ) + + sceneCollector := colly.NewCollector( + colly.AllowedDomains("www.vrconk.com"), + colly.CacheDir(sceneCacheDir), + colly.UserAgent(userAgent), + ) + + siteCollector.OnRequest(func(r *colly.Request) { + log.Println("visiting", r.URL.String()) + }) + + sceneCollector.OnRequest(func(r *colly.Request) { + log.Println("visiting", r.URL.String()) + }) + + // + sceneCollector.OnHTML(`html`, func(e *colly.HTMLElement) { + sc := models.ScrapedScene{} + sc.SceneType = "VR" + sc.Studio = "VRCONK" + sc.Site = "VRCONK" + sc.HomepageURL = strings.Split(e.Request.URL.String(), "?")[0] + + // Scene ID - get from URL + tmp := strings.Split(sc.HomepageURL, "/") + s := strings.Split(tmp[len(tmp)-1], "-") + sc.SiteID = strings.TrimSuffix(s[len(s)-1], ".html") + sc.SceneID = slugify.Slugify(sc.Site) + "-" + sc.SiteID + + sc.Title = strings.TrimSpace(e.ChildAttr(`meta[property="og:title"]`, "content")) + sc.Covers = append(sc.Covers, e.ChildAttr(`meta[property="og:image"]`, "content")) + + e.ForEach(`.gallery-block img`, func(id int, e *colly.HTMLElement) { + sc.Gallery = append(sc.Gallery, e.Request.AbsoluteURL(e.Attr("src"))) + }) + + e.ForEach(`.stats-list li`, func(id int, e *colly.HTMLElement) { + //
  • 40:54
  • + c := e.ChildAttr(`span`, "class") + if strings.Contains(c, "i-clock") { + tmpDuration, err := strconv.Atoi(strings.Split(e.ChildText(`.sub-label`), ":")[0]) + if err == nil { + sc.Duration = tmpDuration + } + } + + if strings.Contains(c, "i-calendar") { + tmpDate, _ := goment.New(e.ChildText(`.sub-label`)) + sc.Released = tmpDate.Format("YYYY-MM-DD") + } + + }) + + // Tags and Cast + unfilteredTags := []string{} + e.ForEach(`.tags-block`, func(id int, e *colly.HTMLElement) { + c := e.ChildText(`.sub-label`) + if strings.Contains(c, "Categories:") || strings.Contains(c, "Tags:") { + e.ForEach(`a`, func(id int, ce *colly.HTMLElement) { + unfilteredTags = append(unfilteredTags, strings.TrimSpace(ce.Text)) + }) + } + + if strings.Contains(c, "Models:") { + e.ForEach(`a`, func(id int, ce *colly.HTMLElement) { + sc.Cast = append(sc.Cast, strings.TrimSpace(ce.Text)) + }) + } + + }) + + sc.Tags = funk.FilterString(unfilteredTags, func(t string) bool { + return !funk.ContainsString(sc.Cast, t) + }) + + out <- sc + }) + + siteCollector.OnHTML(`a[data-mb="shuffle-thumbs"]`, func(e *colly.HTMLElement) { + sceneURL := e.Request.AbsoluteURL(e.Attr("href")) + + // If scene exist in database, there's no need to scrape + if !funk.ContainsString(knownScenes, sceneURL) && !strings.Contains(sceneURL, "/signup") { + sceneCollector.Visit(sceneURL) + } + }) + + siteCollector.OnHTML(`.pagination a`, func(e *colly.HTMLElement) { + pageURL := e.Request.AbsoluteURL(e.Attr("href")) + if !strings.Contains(pageURL, "/signup") { + siteCollector.Visit(pageURL) + } + }) + + siteCollector.Visit("https://www.vrconk.com/") + + if updateSite { + updateSiteLastUpdate("vrconk") + } + logScrapeFinished("vrconk", "VRCONK") + return nil +} + +func init() { + registerScraper("vrconk", "VRCONK", VRCONK) +} diff --git a/ui/src/views/scenes/Details.vue b/ui/src/views/scenes/Details.vue index 3abc7a94c..5d92d3365 100644 --- a/ui/src/views/scenes/Details.vue +++ b/ui/src/views/scenes/Details.vue @@ -50,7 +50,7 @@
    {{c.name}} - {{tag.name}} ({{tag.count}} + {{tag.name}} ({{tag.count}})