diff --git a/README.md b/README.md index 144dd80..db471e9 100644 --- a/README.md +++ b/README.md @@ -9,22 +9,23 @@ $ pip install playwright-stealth ``` ## Usage + +Default stealth ### sync ```python - from playwright.sync_api import sync_playwright from playwright_stealth import stealth_sync with sync_playwright() as p: for browser_type in [p.chromium, p.firefox, p.webkit]: - browser = browser_type.launch() + browser = browser_type.launch(headless=False) page = browser.new_page() stealth_sync(page) - page.goto('http://whatsmyuseragent.org/') - page.screenshot(path=f'example-{browser_type.name}.png') + page.goto('https://bot.sannysoft.com/') + page.screenshot(path=f'example-{browser_type.name}.png', full_page=True) browser.close() - ``` + ### async ```python # -*- coding: utf-8 -*- @@ -35,22 +36,81 @@ from playwright_stealth import stealth_async async def main(): async with async_playwright() as p: for browser_type in [p.chromium, p.firefox, p.webkit]: - browser = await browser_type.launch() + browser = await browser_type.launch(headless=False) page = await browser.new_page() await stealth_async(page) - await page.goto('http://whatsmyuseragent.org/') - await page.screenshot(path=f'example-{browser_type.name}.png') + await page.goto('https://bot.sannysoft.com/') + await page.screenshot(path=f'example-{browser_type.name}.png', full_page=True) await browser.close() -asyncio.get_event_loop().run_until_complete(main()) +asyncio.run(main()) ``` -## Test results +Desired stealth argument (as a mobile device) +### sync +```python +from playwright.sync_api import sync_playwright +from playwright_stealth import stealth_sync, StealthConfig -### playwright with stealth +with sync_playwright() as p: + for browser_type in [p.chromium, p.firefox, p.webkit]: + browser = browser_type.launch(headless=False) + context = browser.new_context(**p.devices["Pixel 7"]) + page = context.new_page() + # Setting desired values for navigator properties + stealth_config = StealthConfig( + languages = ['en-US', 'en'], + navigator_plugins = False, # Mimicking real mobile device + navigator_hardware_concurrency = 8, + # nav_vendor = "", # Use only if you need to set empty string value to mimicking Firefox browser + nav_platform= 'Linux armv81', + vendor = 'Google Inc. (Qualcomm)', + renderer = 'ANGLE (Qualcomm, Adreno (TM) 640, OpenGL ES 3.2)', + ) + stealth_sync(page, stealth_config) + page.goto('https://bot.sannysoft.com/') + page.screenshot(path=f'example-{browser_type.name}.png', full_page=True) + browser.close() +``` -![playwright without stealth](./images/example_with_stealth.png) +### async +```python +# -*- coding: utf-8 -*- +import asyncio +from playwright.async_api import async_playwright +from playwright_stealth import stealth_async, StealthConfig + +async def main(): + async with async_playwright() as p: + for browser_type in [p.chromium, p.firefox, p.webkit]: + browser = await browser_type.launch(headless=False) + context = await browser.new_context(**p.devices["Pixel 7"]) + page = await context.new_page() + # Setting desired values for navigator properties + stealth_config = StealthConfig( + languages = ['en-US', 'en'], + navigator_plugins = False, # Mimicking real mobile device + navigator_hardware_concurrency = 8, + # nav_vendor = "", # Use only if you need to set empty string value to mimicking Firefox browser + nav_platform= 'Linux armv81', + vendor = 'Google Inc. (Qualcomm)', + renderer = 'ANGLE (Qualcomm, Adreno (TM) 640, OpenGL ES 3.2)', + ) + await stealth_async(page, stealth_config) + await page.goto('https://bot.sannysoft.com/') + await page.screenshot(path=f'example-{browser_type.name}.png') + await browser.close() + +asyncio.run(main()) +``` + +## Test results +### Playwright with stealth(no passed argument) +![playwright with stealth](./images/example_with_stealth.png) -### playwright without stealth +### Playwright without stealth +![playwright without stealth](./images/example_without_stealth.png) -![playwright with stealth](./images/example_without_stealth.png) +### Playwright with stealth(with passed argument) but as a mobile device +*NB: Mobile device have no plugin unlike desktop* +![playwright stealth with specified arguments](./images/example_with_stealth_passed_arguments.png) diff --git a/images/example_with_stealth_passed_arguments.png b/images/example_with_stealth_passed_arguments.png new file mode 100644 index 0000000..758533d Binary files /dev/null and b/images/example_with_stealth_passed_arguments.png differ diff --git a/playwright_stealth/js/chrome.csi.js b/playwright_stealth/js/chrome.csi.js index 388e39f..1391e39 100644 --- a/playwright_stealth/js/chrome.csi.js +++ b/playwright_stealth/js/chrome.csi.js @@ -14,7 +14,7 @@ if (!window.chrome) { if (!('csi' in window.chrome) && (window.performance || window.performance.timing)) { const {csi_timing} = window.performance - log.info('loading chrome.csi.js') + // log.info('loading chrome.csi.js') window.chrome.csi = function () { return { onloadT: csi_timing.domContentLoadedEventEnd, @@ -24,4 +24,4 @@ if (!('csi' in window.chrome) && (window.performance || window.performance.timin } } utils.patchToString(window.chrome.csi) -} \ No newline at end of file +} diff --git a/playwright_stealth/js/navigator.platform.js b/playwright_stealth/js/navigator.platform.js index f61e32f..6edbca3 100644 --- a/playwright_stealth/js/navigator.platform.js +++ b/playwright_stealth/js/navigator.platform.js @@ -1,5 +1,5 @@ if (opts.navigator_platform) { Object.defineProperty(Object.getPrototypeOf(navigator), 'platform', { - get: () => opts.navigator_plaftorm, + get: () => opts.navigator_platform, }) -} \ No newline at end of file +} diff --git a/playwright_stealth/js/navigator.userAgent.js b/playwright_stealth/js/navigator.userAgent.js index f35f07a..e870def 100644 --- a/playwright_stealth/js/navigator.userAgent.js +++ b/playwright_stealth/js/navigator.userAgent.js @@ -1,5 +1,5 @@ // replace Headless references in default useragent const current_ua = navigator.userAgent Object.defineProperty(Object.getPrototypeOf(navigator), 'userAgent', { - get: () => opts.navigator_user_agent || current_ua.replace('HeadlessChrome/', 'Chrome/') + get: () => opts.navigator_user_agent || current_ua.replace(/Headless/g, '') }) diff --git a/playwright_stealth/js/navigator.userAgentData.js b/playwright_stealth/js/navigator.userAgentData.js new file mode 100644 index 0000000..e8a7bf6 --- /dev/null +++ b/playwright_stealth/js/navigator.userAgentData.js @@ -0,0 +1,53 @@ +const ua = navigator.userAgent + +// const uaVersion = ua.includes('Chrome/') +// ? ua.match(/Chrome\/([\d|.]+)/)[1] +// : (await page.browser().version()).match(/\/([\d|.]+)/)[1] +const uaVersion = ua.match(/Chrome\/([\d|.]+)/)[1] + +// Source in C++: https://source.chromium.org/chromium/chromium/src/+/master:components/embedder_support/user_agent_utils.cc;l=55-100 +const _getBrands = () => { + const seed = uaVersion.split('.')[0] // the major version number of Chrome + + const order = [ + [0, 1, 2], + [0, 2, 1], + [1, 0, 2], + [1, 2, 0], + [2, 0, 1], + [2, 1, 0] + ][seed % 6] + const escapedChars = [' ', ' ', ';'] + + const greaseyBrand = `${escapedChars[order[0]]}Not${ + escapedChars[order[1]] + }A${escapedChars[order[2]]}Brand` + + const greasedBrandVersionList = [] + greasedBrandVersionList[order[0]] = { + brand: greaseyBrand, + version: '99' + } + greasedBrandVersionList[order[1]] = { + brand: 'Chromium', + version: seed + } + greasedBrandVersionList[order[2]] = { + brand: 'Google Chrome', + version: seed + } + + return greasedBrandVersionList +} + +const metadata = { + platform: "macOSs", + mobile: false, + brands: _getBrands() +} + +console.log("hi mike") + +Object.defineProperty(Object.getPrototypeOf(navigator), 'userAgentData', { + get: () => metadata +}) diff --git a/playwright_stealth/stealth.py b/playwright_stealth/stealth.py index 721e6f4..adca271 100644 --- a/playwright_stealth/stealth.py +++ b/playwright_stealth/stealth.py @@ -1,38 +1,41 @@ # -*- coding: utf-8 -*- import json +import os from dataclasses import dataclass from typing import Tuple, Optional, Dict -import pkg_resources from playwright.async_api import Page as AsyncPage from playwright.sync_api import Page as SyncPage -def from_file(name): +def from_file(name) -> str: """Read script from ./js directory""" - return pkg_resources.resource_string('playwright_stealth', f'js/{name}').decode() + filename = os.path.join(os.path.dirname(__file__), "js", name) + with open(filename, encoding="utf-8") as f: + return f.read() SCRIPTS: Dict[str, str] = { - 'chrome_csi': from_file('chrome.csi.js'), - 'chrome_app': from_file('chrome.app.js'), - 'chrome_runtime': from_file('chrome.runtime.js'), - 'chrome_load_times': from_file('chrome.load.times.js'), - 'chrome_hairline': from_file('chrome.hairline.js'), - 'generate_magic_arrays': from_file('generate.magic.arrays.js'), - 'iframe_content_window': from_file('iframe.contentWindow.js'), - 'media_codecs': from_file('media.codecs.js'), - 'navigator_vendor': from_file('navigator.vendor.js'), - 'navigator_plugins': from_file('navigator.plugins.js'), - 'navigator_permissions': from_file('navigator.permissions.js'), - 'navigator_languages': from_file('navigator.languages.js'), - 'navigator_platform': from_file('navigator.platform.js'), - 'navigator_user_agent': from_file('navigator.userAgent.js'), - 'navigator_hardware_concurrency': from_file('navigator.hardwareConcurrency.js'), - 'outerdimensions': from_file('window.outerdimensions.js'), - 'utils': from_file('utils.js'), - 'webdriver': from_file('navigator.webdriver.js'), - 'webgl_vendor': from_file('webgl.vendor.js'), + "chrome_csi": from_file("chrome.csi.js"), + "chrome_app": from_file("chrome.app.js"), + "chrome_runtime": from_file("chrome.runtime.js"), + "chrome_load_times": from_file("chrome.load.times.js"), + "chrome_hairline": from_file("chrome.hairline.js"), + "generate_magic_arrays": from_file("generate.magic.arrays.js"), + "iframe_content_window": from_file("iframe.contentWindow.js"), + "media_codecs": from_file("media.codecs.js"), + "navigator_vendor": from_file("navigator.vendor.js"), + "navigator_plugins": from_file("navigator.plugins.js"), + "navigator_permissions": from_file("navigator.permissions.js"), + "navigator_languages": from_file("navigator.languages.js"), + "navigator_platform": from_file("navigator.platform.js"), + "navigator_user_agent": from_file("navigator.userAgent.js"), + "navigator_user_agent_data": from_file("navigator.userAgentData.js"), + "navigator_hardware_concurrency": from_file("navigator.hardwareConcurrency.js"), + "outerdimensions": from_file("window.outerdimensions.js"), + "utils": from_file("utils.js"), + "webdriver": from_file("navigator.webdriver.js"), + "webgl_vendor": from_file("webgl.vendor.js"), } @@ -54,6 +57,7 @@ def enabled_scripts(): yield 'console.log("last script")' ``` """ + # load script options webdriver: bool = True webgl_vendor: bool = True @@ -69,68 +73,63 @@ def enabled_scripts(): navigator_platform: bool = True navigator_plugins: bool = True navigator_user_agent: bool = True + navigator_user_agent_data: bool = True navigator_vendor: bool = True outerdimensions: bool = True hairline: bool = True # options - vendor: str = 'Intel Inc.' - renderer: str = 'Intel Iris OpenGL Engine' - nav_vendor: str = 'Google Inc.' + vendor: str = "Intel Inc." + renderer: str = "Intel Iris OpenGL Engine" + nav_vendor: str = "Google Inc." nav_user_agent: str = None nav_platform: str = None - languages: Tuple[str] = ('en-US', 'en') + languages: Tuple[str] = ("en-US", "en") runOnInsecureOrigins: Optional[bool] = None @property def enabled_scripts(self): - opts = json.dumps({ - 'webgl_vendor': self.vendor, - 'webgl_renderer': self.renderer, - 'navigator_vendor': self.nav_vendor, - 'navigator_platform': self.nav_platform, - 'navigator_user_agent': self.nav_user_agent, - 'languages': list(self.languages), - 'runOnInsecureOrigins': self.runOnInsecureOrigins, - }) + opts = json.dumps( + { + "webgl_vendor": self.vendor, + "webgl_renderer": self.renderer, + "navigator_vendor": self.nav_vendor, + "navigator_platform": self.nav_platform, + "navigator_user_agent": self.nav_user_agent, + "navigator_user_agent_data": self.navigator_user_agent_data, + "languages": list(self.languages), + "runOnInsecureOrigins": self.runOnInsecureOrigins, + } + ) + # defined options constant - yield f'const opts = {opts}' + yield f"const opts = {opts}" # init utils and generate_magic_arrays helper - yield SCRIPTS['utils'] - yield SCRIPTS['generate_magic_arrays'] - - if self.chrome_app: - yield SCRIPTS['chrome_app'] - if self.chrome_csi: - yield SCRIPTS['chrome_csi'] - if self.hairline: - yield SCRIPTS['chrome_hairline'] - if self.chrome_load_times: - yield SCRIPTS['chrome_load_times'] - if self.chrome_runtime: - yield SCRIPTS['chrome_runtime'] - if self.iframe_content_window: - yield SCRIPTS['iframe_content_window'] - if self.media_codecs: - yield SCRIPTS['media_codecs'] - if self.navigator_languages: - yield SCRIPTS['navigator_languages'] - if self.navigator_permissions: - yield SCRIPTS['navigator_permissions'] - if self.navigator_platform: - yield SCRIPTS['navigator_platform'] - if self.navigator_plugins: - yield SCRIPTS['navigator_plugins'] - if self.navigator_user_agent: - yield SCRIPTS['navigator_user_agent'] - if self.navigator_vendor: - yield SCRIPTS['navigator_vendor'] - if self.webdriver: - yield SCRIPTS['webdriver'] - if self.outerdimensions: - yield SCRIPTS['outerdimensions'] - if self.webgl_vendor: - yield SCRIPTS['webgl_vendor'] + yield SCRIPTS["utils"] + yield SCRIPTS["generate_magic_arrays"] + + script_keys = [ + "chrome_app", + "chrome_csi", + "chrome_hairline", + "chrome_load_times", + "chrome_runtime", + "iframe_content_window", + "media_codecs", + "navigator_languages", + "navigator_permissions", + "navigator_platform", + "navigator_plugins", + "navigator_user_agent", + "navigator_user_agent_data", + "navigator_vendor", + "webdriver", + "outerdimensions", + "webgl_vendor", + ] + + for key in script_keys: + yield SCRIPTS[key] def stealth_sync(page: SyncPage, config: StealthConfig = None): diff --git a/setup.py b/setup.py index d8de041..f7c1f5d 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ setuptools.setup( name="playwright-stealth", - version="1.0.6", + version="1.0.7", author="AtuboDad", author_email="lcjasas@sina.com", description="playwright stealth", @@ -19,9 +19,13 @@ "Operating System :: OS Independent", ], package_data={"playwright_stealth": ["js/*.js"]}, - python_requires='>=3, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*', + python_requires=">=3, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*", install_requires=[ - 'playwright', + "playwright", ], - extras_require={"test": ["pytest", ]}, + extras_require={ + "test": [ + "pytest", + ] + }, )