diff --git a/README.md b/README.md index a717b7d..8191284 100644 --- a/README.md +++ b/README.md @@ -269,6 +269,7 @@ True | addons | List of Firefox addons to use. **Must be paths to extracted addons.** | ✔️ | | humanize | Humanize the cursor movement. Takes either True or the MAX duration in seconds of the cursor movement. The cursor typically takes up to 1.5 seconds to move across the window. | ✔️ | | allow_webgl | Enabled by default. Disabling it WebGL not recommended as many WAFs now checks if WebGL is enabled. | ✔️ | +| geoip | Recommended to use with proxies; Automatically use IP's longitude, latitude, timezone, country, locale, & spoof the WebRTC IP address. It will also calculate and spoof the browser's language based on the distribution of language speakers in the target region. | ✔️ | | disable_ads | Enabled by default, this installs `uBlock Origin` addon on the browser if enabled. | ✔️ | | network_idle | Wait for the page until there are no network connections for at least 500 ms. | ✔️ | | timeout | The timeout in milliseconds that is used in all operations and waits through the page. The default is 30000. | ✔️ | diff --git a/scrapling/engines/camo.py b/scrapling/engines/camo.py index d755275..955a700 100644 --- a/scrapling/engines/camo.py +++ b/scrapling/engines/camo.py @@ -18,6 +18,7 @@ def __init__( timeout: Optional[float] = 30000, page_action: Callable = do_nothing, wait_selector: Optional[str] = None, addons: Optional[List[str]] = None, wait_selector_state: str = 'attached', google_search: Optional[bool] = True, extra_headers: Optional[Dict[str, str]] = None, proxy: Optional[Union[str, Dict[str, str]]] = None, os_randomize: Optional[bool] = None, disable_ads: Optional[bool] = True, + geoip: Optional[bool] = False, adaptor_arguments: Dict = None, ): """An engine that utilizes Camoufox library, check the `StealthyFetcher` class for more documentation. @@ -38,6 +39,8 @@ def __init__( :param timeout: The timeout in milliseconds that is used in all operations and waits through the page. The default is 30000 :param page_action: Added for automation. A function that takes the `page` object, does the automation you need, then returns `page` again. :param wait_selector: Wait for a specific css selector to be in a specific state. + :param geoip: Recommended to use with proxies; Automatically use IP's longitude, latitude, timezone, country, locale, & spoof the WebRTC IP address. + It will also calculate and spoof the browser's language based on the distribution of language speakers in the target region. :param wait_selector_state: The state to wait for the selector given with `wait_selector`. Default state is `attached`. :param google_search: Enabled by default, Scrapling will set the referer header to be as if this request came from a Google search for this website's domain name. :param extra_headers: A dictionary of extra headers to add to the request. _The referer set by the `google_search` argument takes priority over the referer set here if used together._ @@ -53,6 +56,7 @@ def __init__( self.google_search = bool(google_search) self.os_randomize = bool(os_randomize) self.disable_ads = bool(disable_ads) + self.geoip = bool(geoip) self.extra_headers = extra_headers or {} self.proxy = construct_proxy_dict(proxy) self.addons = addons or [] @@ -76,6 +80,7 @@ def fetch(self, url: str) -> Response: """ addons = [] if self.disable_ads else [DefaultAddons.UBO] with Camoufox( + geoip=self.geoip, proxy=self.proxy, addons=self.addons, exclude_addons=addons, diff --git a/scrapling/fetchers.py b/scrapling/fetchers.py index 94059ab..f0f3c02 100644 --- a/scrapling/fetchers.py +++ b/scrapling/fetchers.py @@ -83,7 +83,7 @@ def fetch( block_webrtc: Optional[bool] = False, allow_webgl: Optional[bool] = True, network_idle: Optional[bool] = False, addons: Optional[List[str]] = None, timeout: Optional[float] = 30000, page_action: Callable = do_nothing, wait_selector: Optional[str] = None, humanize: Optional[Union[bool, float]] = True, wait_selector_state: str = 'attached', google_search: Optional[bool] = True, extra_headers: Optional[Dict[str, str]] = None, proxy: Optional[Union[str, Dict[str, str]]] = None, - os_randomize: Optional[bool] = None, disable_ads: Optional[bool] = True, + os_randomize: Optional[bool] = None, disable_ads: Optional[bool] = True, geoip: Optional[bool] = False, ) -> Response: """ Opens up a browser and do your request based on your chosen options below. @@ -100,6 +100,8 @@ def fetch( :param disable_ads: Enabled by default, this installs `uBlock Origin` addon on the browser if enabled. :param humanize: Humanize the cursor movement. Takes either True or the MAX duration in seconds of the cursor movement. The cursor typically takes up to 1.5 seconds to move across the window. :param allow_webgl: Enabled by default. Disabling it WebGL not recommended as many WAFs now checks if WebGL is enabled. + :param geoip: Recommended to use with proxies; Automatically use IP's longitude, latitude, timezone, country, locale, & spoof the WebRTC IP address. + It will also calculate and spoof the browser's language based on the distribution of language speakers in the target region. :param network_idle: Wait for the page until there are no network connections for at least 500 ms. :param os_randomize: If enabled, Scrapling will randomize the OS fingerprints used. The default is Scrapling matching the fingerprints with the current OS. :param timeout: The timeout in milliseconds that is used in all operations and waits through the page. The default is 30000 @@ -113,6 +115,7 @@ def fetch( """ engine = CamoufoxEngine( proxy=proxy, + geoip=geoip, addons=addons, timeout=timeout, headless=headless, diff --git a/setup.py b/setup.py index 2076c14..a548211 100644 --- a/setup.py +++ b/setup.py @@ -57,7 +57,7 @@ 'httpx[brotli,zstd]', 'playwright>=1.49.1', 'rebrowser-playwright>=1.49.1', - 'camoufox>=0.4.7', + 'camoufox[geoip]>=0.4.7', 'browserforge', ], python_requires=">=3.9",