From a0a4424ffe62850adb8145827ea38ff71ad18a41 Mon Sep 17 00:00:00 2001 From: pops64 Date: Tue, 20 Aug 2024 16:56:47 -0400 Subject: [PATCH 1/3] Fix for POVR scraper They are now putting porn star suggestions on the same page as scenes. This has the same search parameters as the scenes. The easist way is to make sure that links only in the `/vr-porn` directory are scraped. As there really isn't any identifiable way to target only the scene links through Query Selectors --- pkg/scrape/povr.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pkg/scrape/povr.go b/pkg/scrape/povr.go index 6d7fa6c46..944f75556 100644 --- a/pkg/scrape/povr.go +++ b/pkg/scrape/povr.go @@ -124,8 +124,8 @@ func POVR(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- siteCollector.OnHTML(`div.thumbnail-wrap div.thumbnail a.thumbnail__link`, func(e *colly.HTMLElement) { sceneURL := e.Request.AbsoluteURL(e.Attr("href")) - // If scene exists in database, or the slternate source exists, there's no need to scrape - if !funk.ContainsString(knownScenes, sceneURL) && !strings.Contains(sceneURL, "/join") { + // If scene exists in database, or the slternate source exists, there's no need to scrape. Also make sure we only grab valid scene links in the vr-porn directory + if !funk.ContainsString(knownScenes, sceneURL) && strings.Contains(sceneURL, "/vr-porn") and !strings.Contains(sceneURL, "/join") { WaitBeforeVisit("povr.com", sceneCollector.Visit, sceneURL) } }) From ecd1994d852202e8b9bd33a226a81e32f9e76421 Mon Sep 17 00:00:00 2001 From: pops64 Date: Tue, 20 Aug 2024 16:57:50 -0400 Subject: [PATCH 2/3] Typo fix --- pkg/scrape/povr.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/scrape/povr.go b/pkg/scrape/povr.go index 944f75556..cbef214a1 100644 --- a/pkg/scrape/povr.go +++ b/pkg/scrape/povr.go @@ -125,7 +125,7 @@ func POVR(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- sceneURL := e.Request.AbsoluteURL(e.Attr("href")) // If scene exists in database, or the slternate source exists, there's no need to scrape. Also make sure we only grab valid scene links in the vr-porn directory - if !funk.ContainsString(knownScenes, sceneURL) && strings.Contains(sceneURL, "/vr-porn") and !strings.Contains(sceneURL, "/join") { + if !funk.ContainsString(knownScenes, sceneURL) && strings.Contains(sceneURL, "/vr-porn") && !strings.Contains(sceneURL, "/join") { WaitBeforeVisit("povr.com", sceneCollector.Visit, sceneURL) } }) From 7f71f07274d7de5e09dd3a0fc1adeca3d030246d Mon Sep 17 00:00:00 2001 From: pops64 Date: Tue, 20 Aug 2024 18:31:32 -0400 Subject: [PATCH 3/3] Add trailing slash Just to make sure it matches a directory --- pkg/scrape/povr.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/scrape/povr.go b/pkg/scrape/povr.go index cbef214a1..4d75e247f 100644 --- a/pkg/scrape/povr.go +++ b/pkg/scrape/povr.go @@ -125,7 +125,7 @@ func POVR(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- sceneURL := e.Request.AbsoluteURL(e.Attr("href")) // If scene exists in database, or the slternate source exists, there's no need to scrape. Also make sure we only grab valid scene links in the vr-porn directory - if !funk.ContainsString(knownScenes, sceneURL) && strings.Contains(sceneURL, "/vr-porn") && !strings.Contains(sceneURL, "/join") { + if !funk.ContainsString(knownScenes, sceneURL) && strings.Contains(sceneURL, "/vr-porn/") && !strings.Contains(sceneURL, "/join") { WaitBeforeVisit("povr.com", sceneCollector.Visit, sceneURL) } })