Skip to content
This repository has been archived by the owner on Jul 5, 2024. It is now read-only.

Commit

Permalink
Add support for scrolller.com (#624)
Browse files Browse the repository at this point in the history
* Add support for scrolller.com

* Remove unused import

* Remove unnecessary logging

* Version update

---------

Co-authored-by: Jules <[email protected]>
  • Loading branch information
Fovty and Jules-WinnfieldX authored Nov 27, 2023
1 parent e9d20e7 commit bae7651
Show file tree
Hide file tree
Showing 3 changed files with 132 additions and 1 deletion.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ Read the [Wiki](https://github.com/Jules-WinnfieldX/CyberDropDownloader/wiki/)!
| Cyberfile | folders: cyberfile.su/folder/... <br> shared: cyberfile.su/shared/... <br> Direct: cyberfile.su/... |
| E-Hentai | Albums: e-hentai.org/g/... <br> Posts: e-hentai.org/s/... |
| Erome | Albums: erome.com/a/... |
| Scrolller | Subreddits: scrolller.com/r/... |
| Fapello | Models: fapello.com/... |
| Gallery.DeltaPorno.com | Albums: Gallery.DeltaPorno.com/album/... <br> Direct Images: Gallery.DeltaPorno.com/image/... <br> User Profile: Gallery.DeltaPorno.com/#USER# <br> All User Albums: Gallery.DeltaPorno.com/#USER#/albums |
| GoFile | Albums: gofile.io/d/... |
Expand Down
117 changes: 117 additions & 0 deletions cyberdrop_dl/crawlers/Scrolller_Spider.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
from __future__ import annotations

from typing import TYPE_CHECKING, Dict
import json

from yarl import URL

from ..base_functions.base_functions import log, logger, create_media_item
from ..base_functions.data_classes import DomainItem
from ..base_functions.error_classes import NoExtensionFailure

if TYPE_CHECKING:
from ..base_functions.base_functions import ErrorFileWriter
from ..base_functions.sql_helper import SQLHelper
from ..client.client import ScrapeSession

class ScrolllerCrawler:
def __init__(self, separate_posts: bool, quiet: bool, SQL_Helper: SQLHelper, error_writer: ErrorFileWriter,
args: Dict[str, str]):
self.separate_posts = separate_posts
self.quiet = quiet
self.SQL_Helper = SQL_Helper
self.error_writer = error_writer
self.scrolller_api = URL("https://api.scrolller.com/api/v2/graphql")
self.headers = {"Content-Type": "application/json"}

async def fetch(self, session: ScrapeSession, url: URL) -> DomainItem:
subreddit = url.parts[-1]
domain_obj = DomainItem("scrolller", {})
try:
log(f"Starting: {subreddit}", quiet=self.quiet, style="green")

body = {
"query": """
query SubredditQuery(
$url: String!
$filter: SubredditPostFilter
$iterator: String
) {
getSubreddit(url: $url) {
title
children(
limit: 10000
iterator: $iterator
filter: $filter
disabledHosts: null
) {
iterator
items {
title
mediaSources {
url
}
blurredMediaSources {
url
}
}
}
}
}
""",
"variables": {
"url": f"/r/{subreddit}",
"filter": None,
"hostsDown": None
},
}

iterator = None
prev_iterator = None
iterations = 0

while True:
# Fetching items with iterator iterator
body["variables"]["iterator"] = iterator
response = await session.post(self.scrolller_api, data=json.dumps(body))

if response:
data = response
items = data["data"]["getSubreddit"]["children"]["items"]

for item in items:
title = str(url.parts[-1]).split(".")[0]
mediaSources = item['mediaSources']
if mediaSources:
highest_res_image_url = mediaSources[-1]['url']
# Fetching highest resolution image
await self.get_image(URL(highest_res_image_url), URL(highest_res_image_url), title, domain_obj)

prev_iterator = iterator
iterator = data["data"]["getSubreddit"]["children"]["iterator"]

# If there's no more items or the iterator hasn't changed, break the loop
if not items or iterator == prev_iterator:
break
if iterations > 0 and iterator is None:
break
else:
break

iterations += 1

await self.SQL_Helper.insert_domain("scrolller", url, domain_obj)
log(f"Finished: {subreddit}", quiet=self.quiet, style="green")
except Exception as e:
logger.debug("Error encountered while handling %s", subreddit, exc_info=True)
await self.error_writer.write_errored_scrape(subreddit, e, self.quiet)

return domain_obj

async def get_image(self, url: URL, referer: URL, title: str, domain_obj: DomainItem):
try:
media_item = await create_media_item(url, referer, self.SQL_Helper, "scrolller")
except NoExtensionFailure:
logger.debug("Couldn't get extension for %s", url)
return
await domain_obj.add_media(title, media_item)
15 changes: 14 additions & 1 deletion cyberdrop_dl/scraper/Scraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from cyberdrop_dl.crawlers.Cyberdrop_Spider import CyberdropCrawler
from cyberdrop_dl.crawlers.EHentai_Spider import EHentaiCrawler
from cyberdrop_dl.crawlers.Erome_Spider import EromeCrawler
from cyberdrop_dl.crawlers.Scrolller_Spider import ScrolllerCrawler
from cyberdrop_dl.crawlers.Fapello_Spider import FapelloCrawler
from cyberdrop_dl.crawlers.Gfycat_Spider import GfycatCrawler
from cyberdrop_dl.crawlers.GoFile_Spider import GoFileCrawler
Expand Down Expand Up @@ -62,6 +63,7 @@ def __init__(self, args: Dict, client: Client, SQL_Helper: SQLHelper, quiet: boo
self.cyberfile_crawler: Optional[CyberFileCrawler] = None
self.ehentai_crawler: Optional[EHentaiCrawler] = None
self.erome_crawler: Optional[EromeCrawler] = None
self.scrolller_crawler: Optional[ScrolllerCrawler] = None
self.fapello_crawler: Optional[FapelloCrawler] = None
self.gfycat_crawler: Optional[GfycatCrawler] = None
self.gofile_crawler: Optional[GoFileCrawler] = None
Expand Down Expand Up @@ -97,7 +99,7 @@ def __init__(self, args: Dict, client: Client, SQL_Helper: SQLHelper, quiet: boo

self.mapping = {"anonfiles": self.Anonfiles, "bayfiles": self.Anonfiles, "xbunkr": self.XBunkr,
"bunkr": self.Bunkr, "cyberdrop": self.Cyberdrop, "cyberfile": self.CyberFile,
"erome": self.Erome, "fapello": self.Fapello, "gfycat": self.Gfycat, "gofile": self.GoFile,
"erome": self.Erome, "scrolller": self.Scrolller, "fapello": self.Fapello, "gfycat": self.Gfycat, "gofile": self.GoFile,
"hgamecg": self.HGameCG, "imgbox": self.ImgBox, "pixeldrain": self.PixelDrain,
"postimg": self.PostImg, "saint": self.Saint, "img.kiwi": self.ShareX, "imgur": self.Imgur,
"jpg.church": self.ShareX, "jpg.fish": self.ShareX, "jpg.pet": self.ShareX,
Expand Down Expand Up @@ -328,6 +330,17 @@ async def XBunkr(self, url, title=None):

"""Archive Sites"""

async def Scrolller(self, url: URL, title=None):
scrolller_session = ScrapeSession(self.client)
if not self.scrolller_crawler:
self.scrolller_crawler = ScrolllerCrawler(quiet=self.quiet, SQL_Helper=self.SQL_Helper,
error_writer=self.error_writer, separate_posts=self.separate_posts,
args=self.args)
domain_obj = await self.scrolller_crawler.fetch(scrolller_session, url)
if domain_obj:
await self._handle_domain_additions("scrolller", domain_obj, title)
await scrolller_session.exit_handler()

async def Fapello(self, url, title=None):
fapello_session = ScrapeSession(self.client)
if not self.fapello_crawler:
Expand Down

0 comments on commit bae7651

Please sign in to comment.