Skip to content

Commit

Permalink
Added DDFNetworkVR scraper
Browse files Browse the repository at this point in the history
  • Loading branch information
cld9x committed Aug 4, 2019
1 parent 0f5dc2d commit 03ede9e
Show file tree
Hide file tree
Showing 3 changed files with 130 additions and 1 deletion.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
## Features

- Automatically match title, tags, cast, cover image, and more to your videos
- Support for all the most popular VR sites: BadoinkVR, CzechVR Network, MilfVR, NaughtyAmericaVR, SexBabesVR, StasyQVR, TmwVRnet, VirtualRealPorn, VirtualTaboo, VRBangers, VRHush, and WankzVR
- Support for all the most popular VR sites: BadoinkVR, CzechVR Network, DDFNetworkVR, MilfVR, NaughtyAmericaVR, SexBabesVR, StasyQVR, TmwVRnet, VirtualRealPorn, VirtualTaboo, VRBangers, VRHush, and WankzVR
- Built-in DLNA streaming server compatible with popular VR players (Pigasus, Skybox, Mobile Station VR)
- Sleek and simple web UI
- Browse your content by cast, site, tags, and release date
Expand Down
126 changes: 126 additions & 0 deletions pkg/scrape/ddfnetworkvr.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
package scrape

import (
"log"
"strconv"
"strings"

"github.com/gocolly/colly"
"github.com/mozillazg/go-slugify"
"github.com/nleeper/goment"
"github.com/thoas/go-funk"
)

func ScrapeDDFNetworkVR(knownScenes []string, out *[]ScrapedScene) error {
siteCollector := colly.NewCollector(
colly.AllowedDomains("ddfnetworkvr.com"),
colly.CacheDir(siteCacheDir),
colly.UserAgent(userAgent),
colly.MaxDepth(5),
)

sceneCollector := colly.NewCollector(
colly.AllowedDomains("ddfnetworkvr.com"),
colly.CacheDir(sceneCacheDir),
colly.UserAgent(userAgent),
)

siteCollector.OnRequest(func(r *colly.Request) {
log.Println("visiting", r.URL.String())
})

sceneCollector.OnRequest(func(r *colly.Request) {
log.Println("visiting", r.URL.String())
})

sceneCollector.OnHTML(`html`, func(e *colly.HTMLElement) {
sc := ScrapedScene{}
sc.SceneType = "VR"
sc.Studio = "DDFNetwork"
sc.Site = "DDFNetworkVR"
sc.HomepageURL = strings.Split(e.Request.URL.String(), "?")[0]

// ID
tmp := strings.Split(sc.HomepageURL, "/")
sc.SiteID = tmp[len(tmp)-1]
sc.SceneID = slugify.Slugify(sc.Site) + "-" + sc.SiteID

// Title
e.ForEach(`div.video-title h1`, func(id int, e *colly.HTMLElement) {
sc.Title = strings.TrimSpace(e.Text)
})

// Cover
e.ForEach(`dl8-video`, func(id int, e *colly.HTMLElement) {
// NOTE: preview image comes in two flavours - preview_vr.jpg and preview.jpg
sc.Covers = append(sc.Covers, strings.Replace(e.Attr("poster"), "_vr", "", -1))
})

// Cover (for older videos)
e.ForEach(`div.video-box-inner img`, func(id int, e *colly.HTMLElement) {
if len(sc.Covers) == 0 && id == 0 {
sc.Covers = append(sc.Covers, strings.Replace(e.Attr("src"), "_vr", "", -1))
}
})

// Gallery
e.ForEach(`#photoSliderGuest div.card a`, func(id int, e *colly.HTMLElement) {
sc.Gallery = append(sc.Gallery, e.Request.AbsoluteURL(e.Attr("href")))
})

// Synopsis
e.ForEach(`div.about-text p.box-container`, func(id int, e *colly.HTMLElement) {
sc.Synopsis = strings.TrimSpace(e.Text)
})

// Tags
e.ForEach(`ul.tags li`, func(id int, e *colly.HTMLElement) {
tag := strings.TrimSpace(e.Text)
if tag != "" {
sc.Tags = append(sc.Tags, tag)
}
})

// Cast
e.ForEach(`div.video-title h2.actors a`, func(id int, e *colly.HTMLElement) {
sc.Cast = append(sc.Cast, strings.TrimSpace(e.Text))
})

// Date
e.ForEach(`h2.actors time`, func(id int, e *colly.HTMLElement) {
tmpDate, _ := goment.New(e.Text, "MMMM DD, YYYY")
sc.Released = tmpDate.Format("YYYY-MM-DD")
})

// Duration
e.ForEach(`p.duration`, func(id int, e *colly.HTMLElement) {
tmpDuration, err := strconv.Atoi(strings.Split(e.Text, ":")[1])
if err == nil {
sc.Duration = tmpDuration
}
})

// Filenames
// NOTE: no way to guess filename

*out = append(*out, sc)
})

siteCollector.OnHTML(`ul.pagination a.page-link`, func(e *colly.HTMLElement) {
pageURL := e.Request.AbsoluteURL(e.Attr("href"))
siteCollector.Visit(pageURL)
})

siteCollector.OnHTML(`div#scenesAjaxReplace a.play-on-hover`, func(e *colly.HTMLElement) {
sceneURL := e.Request.AbsoluteURL(e.Attr("href"))

// If scene exist in database, there's no need to scrape
if !funk.ContainsString(knownScenes, sceneURL) {
sceneCollector.Visit(sceneURL)
}
})

siteCollector.Visit("https://ddfnetworkvr.com/")

return nil
}
3 changes: 3 additions & 0 deletions pkg/xbvr/task_content.go
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,9 @@ func Scrape() {
tlog.Infof("Scraping TmwVRnet")
scrape.ScrapeTmwVRnet(knownScenes, &collectedScenes)

tlog.Infof("Scraping DDFNetworkVR")
scrape.ScrapeDDFNetworkVR(knownScenes, &collectedScenes)

if len(collectedScenes) > 0 {
tlog.Infof("Scraped %v new scenes", len(collectedScenes))

Expand Down

0 comments on commit 03ede9e

Please sign in to comment.