Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

override user agent metadata and other fixes #29

Open
wants to merge 11 commits into
base: main
Choose a base branch
from
88 changes: 74 additions & 14 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,22 +9,23 @@ $ pip install playwright-stealth
```

## Usage

Default stealth
### sync
```python

from playwright.sync_api import sync_playwright
from playwright_stealth import stealth_sync

with sync_playwright() as p:
for browser_type in [p.chromium, p.firefox, p.webkit]:
browser = browser_type.launch()
browser = browser_type.launch(headless=False)
page = browser.new_page()
stealth_sync(page)
page.goto('http://whatsmyuseragent.org/')
page.screenshot(path=f'example-{browser_type.name}.png')
page.goto('https://bot.sannysoft.com/')
page.screenshot(path=f'example-{browser_type.name}.png', full_page=True)
browser.close()

```

### async
```python
# -*- coding: utf-8 -*-
Expand All @@ -35,22 +36,81 @@ from playwright_stealth import stealth_async
async def main():
async with async_playwright() as p:
for browser_type in [p.chromium, p.firefox, p.webkit]:
browser = await browser_type.launch()
browser = await browser_type.launch(headless=False)
page = await browser.new_page()
await stealth_async(page)
await page.goto('http://whatsmyuseragent.org/')
await page.screenshot(path=f'example-{browser_type.name}.png')
await page.goto('https://bot.sannysoft.com/')
await page.screenshot(path=f'example-{browser_type.name}.png', full_page=True)
await browser.close()

asyncio.get_event_loop().run_until_complete(main())
asyncio.run(main())
```

## Test results
Desired stealth argument (as a mobile device)
### sync
```python
from playwright.sync_api import sync_playwright
from playwright_stealth import stealth_sync, StealthConfig

### playwright with stealth
with sync_playwright() as p:
for browser_type in [p.chromium, p.firefox, p.webkit]:
browser = browser_type.launch(headless=False)
context = browser.new_context(**p.devices["Pixel 7"])
page = context.new_page()
# Setting desired values for navigator properties
stealth_config = StealthConfig(
languages = ['en-US', 'en'],
navigator_plugins = False, # Mimicking real mobile device
navigator_hardware_concurrency = 8,
# nav_vendor = "", # Use only if you need to set empty string value to mimicking Firefox browser
nav_platform= 'Linux armv81',
vendor = 'Google Inc. (Qualcomm)',
renderer = 'ANGLE (Qualcomm, Adreno (TM) 640, OpenGL ES 3.2)',
)
stealth_sync(page, stealth_config)
page.goto('https://bot.sannysoft.com/')
page.screenshot(path=f'example-{browser_type.name}.png', full_page=True)
browser.close()
```

![playwright without stealth](./images/example_with_stealth.png)
### async
```python
# -*- coding: utf-8 -*-
import asyncio
from playwright.async_api import async_playwright
from playwright_stealth import stealth_async, StealthConfig

async def main():
async with async_playwright() as p:
for browser_type in [p.chromium, p.firefox, p.webkit]:
browser = await browser_type.launch(headless=False)
context = await browser.new_context(**p.devices["Pixel 7"])
page = await context.new_page()
# Setting desired values for navigator properties
stealth_config = StealthConfig(
languages = ['en-US', 'en'],
navigator_plugins = False, # Mimicking real mobile device
navigator_hardware_concurrency = 8,
# nav_vendor = "", # Use only if you need to set empty string value to mimicking Firefox browser
nav_platform= 'Linux armv81',
vendor = 'Google Inc. (Qualcomm)',
renderer = 'ANGLE (Qualcomm, Adreno (TM) 640, OpenGL ES 3.2)',
)
await stealth_async(page, stealth_config)
await page.goto('https://bot.sannysoft.com/')
await page.screenshot(path=f'example-{browser_type.name}.png')
await browser.close()

asyncio.run(main())
```

## Test results
### Playwright with stealth(no passed argument)
![playwright with stealth](./images/example_with_stealth.png)

### playwright without stealth
### Playwright without stealth
![playwright without stealth](./images/example_without_stealth.png)

![playwright with stealth](./images/example_without_stealth.png)
### Playwright with stealth(with passed argument) but as a mobile device
*NB: Mobile device have no plugin unlike desktop*
![playwright stealth with specified arguments](./images/example_with_stealth_passed_arguments.png)
Binary file added images/example_with_stealth_passed_arguments.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
4 changes: 2 additions & 2 deletions playwright_stealth/js/chrome.csi.js
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ if (!window.chrome) {
if (!('csi' in window.chrome) && (window.performance || window.performance.timing)) {
const {csi_timing} = window.performance

log.info('loading chrome.csi.js')
// log.info('loading chrome.csi.js')
window.chrome.csi = function () {
return {
onloadT: csi_timing.domContentLoadedEventEnd,
Expand All @@ -24,4 +24,4 @@ if (!('csi' in window.chrome) && (window.performance || window.performance.timin
}
}
utils.patchToString(window.chrome.csi)
}
}
4 changes: 2 additions & 2 deletions playwright_stealth/js/navigator.platform.js
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
if (opts.navigator_platform) {
Object.defineProperty(Object.getPrototypeOf(navigator), 'platform', {
get: () => opts.navigator_plaftorm,
get: () => opts.navigator_platform,
})
}
}
2 changes: 1 addition & 1 deletion playwright_stealth/js/navigator.userAgent.js
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
// replace Headless references in default useragent
const current_ua = navigator.userAgent
Object.defineProperty(Object.getPrototypeOf(navigator), 'userAgent', {
get: () => opts.navigator_user_agent || current_ua.replace('HeadlessChrome/', 'Chrome/')
get: () => opts.navigator_user_agent || current_ua.replace(/Headless/g, '')
})
53 changes: 53 additions & 0 deletions playwright_stealth/js/navigator.userAgentData.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
const ua = navigator.userAgent

// const uaVersion = ua.includes('Chrome/')
// ? ua.match(/Chrome\/([\d|.]+)/)[1]
// : (await page.browser().version()).match(/\/([\d|.]+)/)[1]
const uaVersion = ua.match(/Chrome\/([\d|.]+)/)[1]

// Source in C++: https://source.chromium.org/chromium/chromium/src/+/master:components/embedder_support/user_agent_utils.cc;l=55-100
const _getBrands = () => {
const seed = uaVersion.split('.')[0] // the major version number of Chrome

const order = [
[0, 1, 2],
[0, 2, 1],
[1, 0, 2],
[1, 2, 0],
[2, 0, 1],
[2, 1, 0]
][seed % 6]
const escapedChars = [' ', ' ', ';']

const greaseyBrand = `${escapedChars[order[0]]}Not${
escapedChars[order[1]]
}A${escapedChars[order[2]]}Brand`

const greasedBrandVersionList = []
greasedBrandVersionList[order[0]] = {
brand: greaseyBrand,
version: '99'
}
greasedBrandVersionList[order[1]] = {
brand: 'Chromium',
version: seed
}
greasedBrandVersionList[order[2]] = {
brand: 'Google Chrome',
version: seed
}

return greasedBrandVersionList
}

const metadata = {
platform: "macOSs",
mobile: false,
brands: _getBrands()
}

console.log("hi mike")

Object.defineProperty(Object.getPrototypeOf(navigator), 'userAgentData', {
get: () => metadata
})
141 changes: 70 additions & 71 deletions playwright_stealth/stealth.py
Original file line number Diff line number Diff line change
@@ -1,38 +1,41 @@
# -*- coding: utf-8 -*-
import json
import os
from dataclasses import dataclass
from typing import Tuple, Optional, Dict

import pkg_resources
from playwright.async_api import Page as AsyncPage
from playwright.sync_api import Page as SyncPage


def from_file(name):
def from_file(name) -> str:
"""Read script from ./js directory"""
return pkg_resources.resource_string('playwright_stealth', f'js/{name}').decode()
filename = os.path.join(os.path.dirname(__file__), "js", name)
with open(filename, encoding="utf-8") as f:
return f.read()


SCRIPTS: Dict[str, str] = {
'chrome_csi': from_file('chrome.csi.js'),
'chrome_app': from_file('chrome.app.js'),
'chrome_runtime': from_file('chrome.runtime.js'),
'chrome_load_times': from_file('chrome.load.times.js'),
'chrome_hairline': from_file('chrome.hairline.js'),
'generate_magic_arrays': from_file('generate.magic.arrays.js'),
'iframe_content_window': from_file('iframe.contentWindow.js'),
'media_codecs': from_file('media.codecs.js'),
'navigator_vendor': from_file('navigator.vendor.js'),
'navigator_plugins': from_file('navigator.plugins.js'),
'navigator_permissions': from_file('navigator.permissions.js'),
'navigator_languages': from_file('navigator.languages.js'),
'navigator_platform': from_file('navigator.platform.js'),
'navigator_user_agent': from_file('navigator.userAgent.js'),
'navigator_hardware_concurrency': from_file('navigator.hardwareConcurrency.js'),
'outerdimensions': from_file('window.outerdimensions.js'),
'utils': from_file('utils.js'),
'webdriver': from_file('navigator.webdriver.js'),
'webgl_vendor': from_file('webgl.vendor.js'),
"chrome_csi": from_file("chrome.csi.js"),
"chrome_app": from_file("chrome.app.js"),
"chrome_runtime": from_file("chrome.runtime.js"),
"chrome_load_times": from_file("chrome.load.times.js"),
"chrome_hairline": from_file("chrome.hairline.js"),
"generate_magic_arrays": from_file("generate.magic.arrays.js"),
"iframe_content_window": from_file("iframe.contentWindow.js"),
"media_codecs": from_file("media.codecs.js"),
"navigator_vendor": from_file("navigator.vendor.js"),
"navigator_plugins": from_file("navigator.plugins.js"),
"navigator_permissions": from_file("navigator.permissions.js"),
"navigator_languages": from_file("navigator.languages.js"),
"navigator_platform": from_file("navigator.platform.js"),
"navigator_user_agent": from_file("navigator.userAgent.js"),
"navigator_user_agent_data": from_file("navigator.userAgentData.js"),
"navigator_hardware_concurrency": from_file("navigator.hardwareConcurrency.js"),
"outerdimensions": from_file("window.outerdimensions.js"),
"utils": from_file("utils.js"),
"webdriver": from_file("navigator.webdriver.js"),
"webgl_vendor": from_file("webgl.vendor.js"),
}


Expand All @@ -54,6 +57,7 @@ def enabled_scripts():
yield 'console.log("last script")'
```
"""

# load script options
webdriver: bool = True
webgl_vendor: bool = True
Expand All @@ -69,68 +73,63 @@ def enabled_scripts():
navigator_platform: bool = True
navigator_plugins: bool = True
navigator_user_agent: bool = True
navigator_user_agent_data: bool = True
navigator_vendor: bool = True
outerdimensions: bool = True
hairline: bool = True

# options
vendor: str = 'Intel Inc.'
renderer: str = 'Intel Iris OpenGL Engine'
nav_vendor: str = 'Google Inc.'
vendor: str = "Intel Inc."
renderer: str = "Intel Iris OpenGL Engine"
nav_vendor: str = "Google Inc."
nav_user_agent: str = None
nav_platform: str = None
languages: Tuple[str] = ('en-US', 'en')
languages: Tuple[str] = ("en-US", "en")
runOnInsecureOrigins: Optional[bool] = None

@property
def enabled_scripts(self):
opts = json.dumps({
'webgl_vendor': self.vendor,
'webgl_renderer': self.renderer,
'navigator_vendor': self.nav_vendor,
'navigator_platform': self.nav_platform,
'navigator_user_agent': self.nav_user_agent,
'languages': list(self.languages),
'runOnInsecureOrigins': self.runOnInsecureOrigins,
})
opts = json.dumps(
{
"webgl_vendor": self.vendor,
"webgl_renderer": self.renderer,
"navigator_vendor": self.nav_vendor,
"navigator_platform": self.nav_platform,
"navigator_user_agent": self.nav_user_agent,
"navigator_user_agent_data": self.navigator_user_agent_data,
"languages": list(self.languages),
"runOnInsecureOrigins": self.runOnInsecureOrigins,
}
)

# defined options constant
yield f'const opts = {opts}'
yield f"const opts = {opts}"
# init utils and generate_magic_arrays helper
yield SCRIPTS['utils']
yield SCRIPTS['generate_magic_arrays']

if self.chrome_app:
yield SCRIPTS['chrome_app']
if self.chrome_csi:
yield SCRIPTS['chrome_csi']
if self.hairline:
yield SCRIPTS['chrome_hairline']
if self.chrome_load_times:
yield SCRIPTS['chrome_load_times']
if self.chrome_runtime:
yield SCRIPTS['chrome_runtime']
if self.iframe_content_window:
yield SCRIPTS['iframe_content_window']
if self.media_codecs:
yield SCRIPTS['media_codecs']
if self.navigator_languages:
yield SCRIPTS['navigator_languages']
if self.navigator_permissions:
yield SCRIPTS['navigator_permissions']
if self.navigator_platform:
yield SCRIPTS['navigator_platform']
if self.navigator_plugins:
yield SCRIPTS['navigator_plugins']
if self.navigator_user_agent:
yield SCRIPTS['navigator_user_agent']
if self.navigator_vendor:
yield SCRIPTS['navigator_vendor']
if self.webdriver:
yield SCRIPTS['webdriver']
if self.outerdimensions:
yield SCRIPTS['outerdimensions']
if self.webgl_vendor:
yield SCRIPTS['webgl_vendor']
yield SCRIPTS["utils"]
yield SCRIPTS["generate_magic_arrays"]

script_keys = [
"chrome_app",
"chrome_csi",
"chrome_hairline",
"chrome_load_times",
"chrome_runtime",
"iframe_content_window",
"media_codecs",
"navigator_languages",
"navigator_permissions",
"navigator_platform",
"navigator_plugins",
"navigator_user_agent",
"navigator_user_agent_data",
"navigator_vendor",
"webdriver",
"outerdimensions",
"webgl_vendor",
]

for key in script_keys:
yield SCRIPTS[key]


def stealth_sync(page: SyncPage, config: StealthConfig = None):
Expand Down
Loading