diff --git a/docs/source/api/data.rst b/docs/source/api/data.rst index 250e25a..f776790 100644 --- a/docs/source/api/data.rst +++ b/docs/source/api/data.rst @@ -25,4 +25,9 @@ by other PHUB objects. .. autoclass:: phub.utils.Quality :members: :undoc-members: - :special-members: __init__, __new__ \ No newline at end of file + :special-members: __init__, __new__ + +.. autoclass:: phub.utils.Category + :members: + :undoc-members: + :special-members: __init__ \ No newline at end of file diff --git a/docs/source/quickstart.rst b/docs/source/quickstart.rst index 0126909..beb2e06 100644 --- a/docs/source/quickstart.rst +++ b/docs/source/quickstart.rst @@ -46,13 +46,17 @@ like so: client = phub.Client() url = 'https://www.pornhub.com/view_video.php?viewkey=xxx' - video = client.get(url) + video = client.get(url) # (1)! # video will be a phub.Video object - # Note - You can also load the video - # using its `viewkey` https argument - # if you think that improves clarity. - video = client.get(key = 'xxx') +.. code-annotations:: + #. + Note that you can also load the video + using the `viewkey` paramater in the URL. + + .. code-block:: python + + video = client.get(key = 'xxx') Accessing video data -------------------- diff --git a/docs/source/searching.rst b/docs/source/searching.rst index 38d31ce..a3d1291 100644 --- a/docs/source/searching.rst +++ b/docs/source/searching.rst @@ -1,6 +1,11 @@ Searching ========= +.. warning:: + + The search feature might give inacurate results. + I don't know yet why. + One feature of PHUB is that it is able to send and receive queries. @@ -34,3 +39,27 @@ if needed. You can access its content like so: .. note:: Unlike :meth:`.Client.get`, by default the content of the video is not fetched yet, unless you ask for it (by calling `video.title` for example). + +Search filters +-------------- + +PHUB support filtering queries like on the PH website. + +.. automethod:: phub.core.Client.search + +Category object +^^^^^^^^^^^^^^^ + +Categories objects are concatenable, e.g.: + +.. code-block:: python + + from phub import Category + + # Represents both french and german categories + my_category = Category.FRENCH | Category.GERMAN + + query = client.search(exclude_category = my_category) # (1)! + +.. code-annotations:: + #. Pornhub doesn't support searching through multiple categories at once, so you can use this feature only with :atth:`exclude_category` and not :attr:`category`. diff --git a/setup.cfg b/setup.cfg index 581c153..8b92019 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,6 @@ [metadata] name = phub -version = 3.1.4 +version = 3.2 description = An API for PornHub author = Egsagon author_email = egsagon12@gmail.com diff --git a/src/phub/__init__.py b/src/phub/__init__.py index 81e675b..c1a0cd0 100644 --- a/src/phub/__init__.py +++ b/src/phub/__init__.py @@ -1,16 +1,15 @@ ''' -#### PHUB - An API wrapper. +#### PHUB - An API wrapper for Pornhub. See https://phub.rtfd.io for documentation. ''' -from phub import ( core, utils, consts, classes, parser ) +from phub import ( core, utils, consts, classes, parser, errors ) # Shortcuts from phub.core import Client -from phub.utils import Quality from phub.classes import Video, User -from phub.consts import locals +from phub.utils import Quality, Category # Debugging controls from sys import stdout diff --git a/src/phub/classes.py b/src/phub/classes.py index e7e34a7..ee9736e 100644 --- a/src/phub/classes.py +++ b/src/phub/classes.py @@ -13,22 +13,24 @@ from datetime import datetime, timedelta from functools import cached_property, cache -Soup = None -try: - from bs4 import BeautifulSoup as Soup - -except ModuleNotFoundError: - print('Warning: BS4 not installed. Feed features will not be available.') - from phub import utils from phub import consts from phub import parser +from phub import errors + from phub.utils import ( log, register_properties, download_presets as dlp ) +try: + from bs4 import BeautifulSoup as Soup + +except ModuleNotFoundError: + log('phub', 'Warning: BS4 not installed. Feed features will not be available.') + Soup = None + @dataclass class User: @@ -106,7 +108,7 @@ def get(cls, return cls(name = name, path = url, client = client) else: - raise consts.UserNotFoundError(f'User `{name}` not found.') + raise errors.UserNotFoundError(f'User `{name}` not found.') @cached_property def videos(self) -> Query: @@ -222,7 +224,7 @@ def _fetch(self, key: str) -> Any: return value except ValueError: - raise consts.ParsingError(f'key `{key}` does not exists in video data.') + raise errors.ParsingError(f'key `{key}` does not exists in video data.') # ========= Download ========= # @@ -470,7 +472,7 @@ def __init__(self, client: Client, url: str, corrector: Callable = None) -> None log('query', 'Initialising new Query object', level = 6) self.client = client - self.url = (url + '?&'['?' in url] + 'page=').replace(consts.ROOT, '') + self.url = url.replace(consts.ROOT, '') self._length: int = None self.index = 0 @@ -496,7 +498,7 @@ def __len__(self) -> int: counter = consts.regexes.video_search_counter(self.page) log('query', 'Collected counters:', counter, level = 4) - if len(counter) != 1: raise consts.CounterNotFound() + if len(counter) != 1: raise errors.CounterNotFound() return int(counter[0]) def __getitem__(self, index: int | slice) -> Video | Generator[Video, None, None]: @@ -517,8 +519,9 @@ def __getitem__(self, index: int | slice) -> Video | Generator[Video, None, None return self.get(index) - def wrapper(): # We need to wrap this, otherwise the whole __getitem__ will be - # Interpreted as a generator. + def wrapper() -> Generator[Video, None, None]: + # We need to wrap this, otherwise the whole __getitem__ will be + # Interpreted as a generator. indices = index.indices(len(self)) @@ -572,10 +575,15 @@ def _get_page(self, index: int) -> None: log('query', 'Fetching page at index', index, level = 4) - response = self.client._call('GET', self.url + str(index + 1), throw = False) + if index == 0: url = self.url + + else: + url = self.url + '?&'['?' in url] + 'page=' + str(index + 1) + + response = self.client._call('GET', url, throw = False) if response.status_code == 404: - raise consts.Noresult('No result for the given query.') + raise errors.Noresult('No result for the given query.') raw = response.text diff --git a/src/phub/consts.py b/src/phub/consts.py index 30162f4..2de6432 100644 --- a/src/phub/consts.py +++ b/src/phub/consts.py @@ -33,6 +33,23 @@ None: None } +# Search production filters +PROFESSIONAL = 'professional' +HOMEMADE = 'homemade' + +# Search sort filters +MOST_RELEVANT = None +MOST_RECENT = 'most recent' +MOST_VIEWED = 'most viewed' +TOP_RATED = 'top rated' +LONGEST = 'longest' + +# Search time filters +DAY = 'day' +WEEK = 'week' +MONTH = 'month' +YEAR = 'year' + class regexes: ''' Compiled regexes methods used for parsing. @@ -82,85 +99,4 @@ class FeedType: COMMENTED = 'stream_grouped_comments_videos' # TODO more stream types - -class locals: - ''' - Locales constants. - ''' - - # Search production types - PROFESSIONAL = 'professional' - HOMEMADE = 'homemade' - - # Search sort filters - MOST_RELEVANT = None - MOST_RECENT = 'most recent' - MOST_VIEWED = 'most viewed' - TOP_RATED = 'top rated' - LONGEST = 'longest' - - # Search time filters - DAY = 'day' - WEEK = 'week' - MONTH = 'month' - YEAR = 'year' - -# Exceptions - -class CounterNotFound(Exception): - ''' - The video counter wasn't found in the query, - or is badly parsed. - ''' - - pass - -class ParsingError(Exception): - ''' - The parser failed to properly resolve the script - for this element. - ''' - - pass - -class UserNotFoundError(Exception): - ''' - Failed to find a PH user account. - ''' - - pass - -class NotLoggedIn(Exception): - ''' - The client is not logged to a PH account, - but tried to access account data. - ''' - -class AlreadyLoggedIn(Exception): - ''' - The client already established a connection with PH. - ''' - -class LogginFailed(Exception): - ''' - Login phase failed. Credentials may be wrong. - ''' - -class TooManyRequests(Exception): - ''' - The client sent too many requests. - To bypass, consider using proxies or - set a small delay to the client request: - - .. code-block:: python - - client = phub.Client(delay = True) - ''' - -class Noresult(Exception): - ''' - The search query did not found videos with - its given filters. - ''' - -# EOF +# EOF \ No newline at end of file diff --git a/src/phub/core.py b/src/phub/core.py index c628a15..07d6ef8 100644 --- a/src/phub/core.py +++ b/src/phub/core.py @@ -13,6 +13,7 @@ from phub import utils from phub import consts +from phub import errors from phub import classes from phub.utils import log, register_properties @@ -66,7 +67,7 @@ def __getattribute__(self, item: str): if item.startswith('_'): return obj if item in self.__properties__ and not self.client.logged: - raise consts.NotLoggedIn('Client is not logged in.') + raise errors.NotLoggedIn('Client is not logged in.') return obj @@ -246,7 +247,7 @@ def _call(self, url = consts.ROOT + utils.slash(func, '**') \ if simple_url else func - log('clien', f'Sending request at {utils.shortify(func, 30)}', level = 6) + log('clien', f'Sending request at {utils.shortify(func, 120)}', level = 6) response = self.session.request( method = method, @@ -258,8 +259,8 @@ def _call(self, log('clien', 'Request passed with status', response.status_code, level = 6) if throw and response.status_code == 429: - raise consts.TooManyRequests('Too many requests.') - print(response.status_code) + raise errors.TooManyRequests('Too many requests.') + if throw and not response.ok: raise ConnectionError(f'Request `{func}` failed.') @@ -278,7 +279,7 @@ def login(self, force: bool = False, throw: bool = True) -> bool: ''' if not force and self.logged: - raise consts.AlreadyLoggedIn('Account already connected.') + raise errors.AlreadyLoggedIn('Account already connected.') log('client', 'Attempting loggin...', level = 6) @@ -302,7 +303,7 @@ def login(self, force: bool = False, throw: bool = True) -> bool: # Throw error if throw: - raise consts.LogginFailed('Login failed. Check credentials.') + raise errors.LogginFailed('Login failed. Check credentials.') self.logged = success return success @@ -348,7 +349,8 @@ def search(self, production: Literal['professional', 'homemade'] | None = None, duration: tuple[int] | None = None, hd: bool = False, - category: int = None, + category: utils.Category = None, + exclude_category: utils.Category = None, sort: Literal['most relevant', 'most recent', 'most viewed', 'top rated', 'longest'] = None, time: Literal['day', 'week', 'month', 'year'] | None = None @@ -361,7 +363,8 @@ def search(self, production (str): The production type, professional or homemade (both by default). duration (tuple[int]): Video duration boundaries. hd (bool): Wether to get only HD quality videos. - category (int): TODO + category (utils.Category): The video category to search in. + exclude_category (utils.Category): The video category(ies) to exclude from searching. sort (str): How to sort videos (most relevant by default). time (str): Video release approximation (does not work when sorting most relevant). @@ -373,14 +376,15 @@ def search(self, sort = consts.SEARCH_SORT_TRANSLATION[sort] # Add filters - if hd: url += '&hd=1' - if production: url += f'&p={production}' - if duration: url += '&min_duration={}&max_duration={}'.format(*duration) # TODO - if category: url += '&filter_category=' #TODO - if sort: url += f'&o={sort}' - if time and sort: url += f'&t=' + time[0] + if hd: url += '&hd=1' + if production: url += f'&p={production}' + if duration: url += '&min_duration={}&max_duration={}'.format(*duration) # TODO + if category: url += '&filter_category=' + str(category) + if exclude_category: url += '&exclude_category=' + str(exclude_category) + if sort: url += f'&o={sort}' + if time and sort: url += f'&t=' + time[0] log('clien', 'Opening new search query:', url, level = 6) - return classes.Query(self, url) + return classes.Query(self, url, utils.remove_video_ads) # EOF diff --git a/src/phub/errors.py b/src/phub/errors.py new file mode 100644 index 0000000..3de2061 --- /dev/null +++ b/src/phub/errors.py @@ -0,0 +1,56 @@ +''' +Errors for the PHUB package. +''' + + +class CounterNotFound(Exception): + ''' + The video counter wasn't found in the query, + or is badly parsed. + ''' + +class ParsingError(Exception): + ''' + The parser failed to properly resolve the script + for this element. + ''' + +class UserNotFoundError(Exception): + ''' + Failed to find a PH user account. + ''' + +class NotLoggedIn(Exception): + ''' + The client is not logged to a PH account, + but tried to access account data. + ''' + +class AlreadyLoggedIn(Exception): + ''' + The client already established a connection with PH. + ''' + +class LogginFailed(Exception): + ''' + Login phase failed. Credentials may be wrong. + ''' + +class TooManyRequests(Exception): + ''' + The client sent too many requests. + To bypass, consider using proxies or + set a small delay to the client request: + + .. code-block:: python + + client = phub.Client(delay = True) + ''' + +class Noresult(Exception): + ''' + The search query did not found videos with + its given filters. + ''' + +# EOF \ No newline at end of file diff --git a/src/phub/parser.py b/src/phub/parser.py index cdbbc80..60a1575 100644 --- a/src/phub/parser.py +++ b/src/phub/parser.py @@ -6,6 +6,7 @@ import json from phub import consts +from phub import errors from phub.utils import log, least_factors, hard_strip from typing import TYPE_CHECKING @@ -74,7 +75,7 @@ def resolve(video: Video) -> dict: break else: - raise consts.ParsingError('Max renew attempts exceeded.') + raise errors.ParsingError('Max renew attempts exceeded.') script = video.page.split("flashvars_['nextVideo'];")[1].split('var nextVideoPlay')[0] log('parse', 'Formating flash:', flash, level = 5) diff --git a/src/phub/utils.py b/src/phub/utils.py index 931190a..7d68bb2 100644 --- a/src/phub/utils.py +++ b/src/phub/utils.py @@ -22,6 +22,7 @@ DEBUG_RESET: bool = False DEBUG_FILE = sys.stdout +# === Utilities function === # def slash(string: str, form: Literal['**', '*/', '/*', '//']) -> str: ''' @@ -250,6 +251,7 @@ def least_factors(n: int): return n +# === Presets === # class download_presets: ''' @@ -319,6 +321,7 @@ def wrapper(cur: int, total: int) -> None: return __wrapper__ +# === Dataclasses === # @dataclass class BaseQuality: @@ -366,10 +369,145 @@ def select(self, quals: dict) -> str: # This should not happen raise TypeError('Internal error: quality type is', type(self.value)) +class BaseCategory: + ''' + Represent one or multiple categories. + ''' + + def __init__(self, id: str | int, name: str) -> None: + ''' + Initialise a new category. + ''' + + self.name = name + self.id = str(id) + + def __repr__(self) -> str: + + return f'' + + def __str__(self) -> str: + + return self.id + + def __or__(self, __value: Self) -> Self: + ''' + Concatenate two categories. + ''' + + return Category(f'{self.id}-{__value.id}', + f'{self.name} & {__value.name}') + +# === Constant classes === # class Quality(BaseQuality): + ''' + Represents a video quality. + ''' + BEST = BaseQuality('best') HALF = BaseQuality('half') WORST = BaseQuality('worst') +class Category(BaseCategory): + ''' + Represents a search category. + ''' + + # NOTE - These are all categories provided by PH when making + # search requests, but where the fuck are the missing ones + ASIAN = BaseCategory( 1 , 'Asian' ) + ORGY = BaseCategory( 2 , 'Orgy' ) + AMATEUR = BaseCategory( 3 , 'Amateur' ) + BIG_ASS = BaseCategory( 4 , 'Big Ass' ) + BABE = BaseCategory( 5 , 'Babe' ) + BBW = BaseCategory( 6 , 'BBW' ) + BIG_DICK = BaseCategory( 7 , 'Big Dick' ) + BIG_TITS = BaseCategory( 8 , 'Big Tits' ) + BLONDE = BaseCategory( 9 , 'Blonde' ) + BONDAGE = BaseCategory( 10 , 'Bondage' ) + BRUNETTE = BaseCategory( 11 , 'Brunette' ) + CELEBRITY = BaseCategory( 12 , 'Celebrity' ) + BLOWJOB = BaseCategory( 13 , 'Blowjob' ) + BUKKAKE = BaseCategory( 14 , 'Bukkake' ) + CREAMPIE = BaseCategory( 15 , 'Creampie' ) + CUMSHOT = BaseCategory( 16 , 'Cumshot' ) + EBONY = BaseCategory( 17 , 'Ebony' ) + FETISH = BaseCategory( 18 , 'Fetish' ) + FISTING = BaseCategory( 19 , 'Fisting' ) + HANDJOB = BaseCategory( 20 , 'Handjob' ) + HARDCORE = BaseCategory( 21 , 'Hardcore' ) + MASTURBATION = BaseCategory( 22 , 'Masturbation' ) + TOYS = BaseCategory( 23 , 'Toys' ) + PUBLIC = BaseCategory( 24 , 'Public' ) + INTERRACIAL = BaseCategory( 25 , 'Interracial' ) + LATINA = BaseCategory( 26 , 'Latina' ) + LESBIAN = BaseCategory( 27 , 'Lesbian' ) + MATURE = BaseCategory( 28 , 'Mature' ) + MILF = BaseCategory( 29 , 'MILF' ) + PORNSTAR = BaseCategory( 30 , 'Pornstar' ) + REALITY = BaseCategory( 31 , 'Reality' ) + ANAL = BaseCategory( 35 , 'Anal' ) + HENTAI = BaseCategory( 36 , 'Hentai' ) + TEEN = BaseCategory( 37 , 'Teen (18+)' ) + POV = BaseCategory( 41 , 'POV' ) + RED_HEAD = BaseCategory( 42 , 'Red Head' ) + VINTAGE = BaseCategory( 43 , 'Vintage' ) + PARTY = BaseCategory( 53 , 'Party' ) + COMPILATION = BaseCategory( 57 , 'Compilation' ) + SMALL_TITS = BaseCategory( 59 , 'Small Tits' ) + WEBCAM = BaseCategory( 61 , 'Webcam' ) + THREESOME = BaseCategory( 65 , 'Threesome' ) + ROUGH_SEX = BaseCategory( 67 , 'Rough Sex' ) + SQUIRT = BaseCategory( 69 , 'Squirt' ) + DP = BaseCategory( 72 , 'DP' ) + POPULAR_WITH_WOMEN = BaseCategory( 73 , 'Popular With Women' ) + BISEXUAL_MALE = BaseCategory( 76 , 'Bisexual Male' ) + MASSAGE = BaseCategory( 78 , 'Massage' ) + COLLEGE = BaseCategory( 79 , 'College (18+)' ) + GANGBANG = BaseCategory( 80 , 'Gangbang' ) + ROLE_PLAY = BaseCategory( 81 , 'Role Play' ) + TRANSGENDER = BaseCategory( 83 , 'Transgender' ) + CARTOON = BaseCategory( 86 , 'Cartoon' ) + SCHOOL = BaseCategory( 88 , 'School (18+)' ) + BABYSITTER = BaseCategory( 89 , 'Babysitter (18+)' ) + CASTING = BaseCategory( 90 , 'Casting' ) + SMOKING = BaseCategory( 91 , 'Smoking' ) + SOLO_MALE = BaseCategory( 92 , 'Solo Male' ) + FEET = BaseCategory( 93 , 'Feet' ) + FRENCH = BaseCategory( 94 , 'French' ) + GERMAN = BaseCategory( 95 , 'German' ) + BRITISH = BaseCategory( 96 , 'British' ) + ITALIAN = BaseCategory( 97 , 'Italian' ) + ARAB = BaseCategory( 98 , 'Arab' ) + RUSSIAN = BaseCategory( 99 , 'Russian' ) + CZECH = BaseCategory( 100, 'Czech' ) + INDIAN = BaseCategory( 101, 'Indian' ) + BRAZILIAN = BaseCategory( 102, 'Brazilian' ) + KOREAN = BaseCategory( 103, 'Korean' ) + VIRTUAL_REALITY = BaseCategory( 104, 'Virtual Reality' ) + FPS60 = BaseCategory( 105, '60FPS' ) + JAPANESE = BaseCategory( 111, 'Japanese' ) + EXCLUSIVE = BaseCategory( 115, 'Exclusive' ) + MUSIC = BaseCategory( 121, 'Music' ) + PUSSY_LICKING = BaseCategory( 131, 'Pussy Licking' ) + VERIFIED_AMATEURS = BaseCategory( 138, 'Verified Amateurs' ) + VERIFIED_MODELS = BaseCategory( 139, 'Verified Models' ) + BEHIND_THE_SCENES = BaseCategory( 141, 'Behind The Scenes' ) + OLD_YOUNG = BaseCategory( 181, 'Old/Young (18+)' ) + PARODY = BaseCategory( 201, 'Parody' ) + PISSING = BaseCategory( 211, 'Pissing' ) + SFW = BaseCategory( 221, 'SFW' ) + COSPLAY = BaseCategory( 241, 'Cosplay' ) + CUCKOLD = BaseCategory( 242, 'Cuckold' ) + STEP_FANTASY = BaseCategory( 444, 'Step Fantasy' ) + VERIFIED_COUPLES = BaseCategory( 482, 'Verified Couples' ) + SOLO_FEMALE = BaseCategory( 492, 'Solo Female' ) + FEMALE_ORGASM = BaseCategory( 502, 'Female Orgasm' ) + MUSCULAR_MEN = BaseCategory( 512, 'Muscular Men' ) + ROMANTIC = BaseCategory( 522, 'Romantic' ) + TATTOOED_WOMEN = BaseCategory( 562, 'Tattooed Women' ) + TRANS_WITH_GIRL = BaseCategory( 572, 'Trans With Girl' ) + TRANS_WITH_GUY = BaseCategory( 582, 'Trans With Guy' ) + # EOF \ No newline at end of file