diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index 5f18697..2f97301 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -19,57 +19,27 @@ permissions: contents: read jobs: - linux_x86_64: + linux: runs-on: ${{ matrix.platform.runner }} strategy: matrix: platform: - runner: ubuntu-latest target: x86_64 - python-version: ['3.8', '3.9', '3.10', '3.11', '3.12'] - steps: - - uses: actions/checkout@v4 - - uses: actions/setup-python@v5 - with: - python-version: ${{ matrix.python-version }} - - name: Build wheels - uses: PyO3/maturin-action@v1 - with: - target: ${{ matrix.platform.target }} - args: --release --out dist --zig -i python${{ matrix.python-version }} - sccache: 'true' - manylinux: auto - - name: Upload wheels - uses: actions/upload-artifact@v4 - with: - name: wheels-linux-${{ matrix.platform.target }}-python${{ matrix.python-version }} - path: dist - - name: pytest - shell: bash - run: | - set -e - pip install pyreqwest_impersonate --find-links dist --no-index --force-reinstall - pip install pytest - pytest - - linux_aarch64: - runs-on: ${{ matrix.platform.runner }} - strategy: - matrix: - platform: - runner: ubuntu-latest target: aarch64 - python-version: ['3.10'] + - runner: ubuntu-latest + target: armv7 steps: - uses: actions/checkout@v4 - uses: actions/setup-python@v5 with: - python-version: ${{ matrix.python-version }} + python-version: '3.10' - name: Build wheels uses: PyO3/maturin-action@v1 with: target: ${{ matrix.platform.target }} - args: --release --out dist --zig --find-interpreter + args: --release --out dist --zig sccache: 'true' manylinux: auto env: @@ -77,16 +47,31 @@ jobs: - name: Upload wheels uses: actions/upload-artifact@v4 with: - name: wheels-linux-${{ matrix.platform.target }}-python${{ matrix.python-version }} + name: wheels-linux-${{ matrix.platform.target }} path: dist - name: pytest - if: ${{ !startsWith(matrix.platform.target, 'aarch64') }} + if: ${{ startsWith(matrix.platform.target, 'x86_64') }} shell: bash run: | set -e - pip install pyreqwest_impersonate --find-links dist --no-index --force-reinstall + pip install pyreqwest_impersonate --find-links dist --force-reinstall pip install pytest pytest + - name: pytest + if: ${{ !startsWith(matrix.platform.target, 'x86') && matrix.platform.target != 'ppc64' }} + uses: uraimo/run-on-arch-action@v2.5.0 + with: + arch: ${{ matrix.platform.target }} + distro: ubuntu22.04 + githubToken: ${{ github.token }} + install: | + apt-get update + apt-get install -y --no-install-recommends python3 python3-pip + pip3 install -U pip pytest + run: | + set -e + pip3 install pyreqwest_impersonate --find-links dist --force-reinstall + pytest windows: runs-on: ${{ matrix.platform.runner }} @@ -107,19 +92,19 @@ jobs: uses: PyO3/maturin-action@v1 with: target: ${{ matrix.platform.target }} - args: --release --out dist --find-interpreter + args: --release --out dist sccache: 'true' - name: Upload wheels uses: actions/upload-artifact@v4 with: - name: wheels-windows-${{ matrix.platform.target }}-python${{ matrix.python-version }} + name: wheels-windows-${{ matrix.platform.target }} path: dist - name: pytest if: ${{ !startsWith(matrix.platform.target, 'aarch64') }} shell: bash run: | set -e - pip install pyreqwest_impersonate --find-links dist --no-index --force-reinstall + pip install pyreqwest_impersonate --find-links dist --force-reinstall pip install pytest pytest @@ -141,19 +126,19 @@ jobs: uses: PyO3/maturin-action@v1 with: target: ${{ matrix.platform.target }} - args: --release --out dist --find-interpreter + args: --release --out dist sccache: 'true' - name: Upload wheels uses: actions/upload-artifact@v4 with: - name: wheels-macos-${{ matrix.platform.target }}-python${{ matrix.python-version }} + name: wheels-macos-${{ matrix.platform.target }} path: dist - name: pytest if: ${{ !startsWith(matrix.platform.target, 'aarch64') }} shell: bash run: | set -e - pip install pyreqwest_impersonate --find-links dist --no-index --force-reinstall + pip install pyreqwest_impersonate --find-links dist --force-reinstall pip install pytest pytest @@ -176,7 +161,7 @@ jobs: name: Release runs-on: ubuntu-latest if: "startsWith(github.ref, 'refs/tags/')" - needs: [linux_x86_64, linux_aarch64, windows, macos, sdist] + needs: [linux, windows, macos, sdist] steps: - uses: actions/download-artifact@v4 - name: Publish to PyPI diff --git a/Cargo.lock b/Cargo.lock index 163907c..8c04578 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -61,17 +61,6 @@ dependencies = [ "tokio", ] -[[package]] -name = "async-trait" -version = "0.1.80" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c6fa2087f2753a7da8cc1c0dbfcf89579dd57458e36769de5ac750b4671737ca" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - [[package]] name = "autocfg" version = "1.2.0" @@ -304,12 +293,6 @@ dependencies = [ "cfg-if", ] -[[package]] -name = "data-encoding" -version = "2.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7e962a19be5cfc3f3bf6dd8f61eb50107f356ad6270fbb3ed41476571db78be5" - [[package]] name = "deranged" version = "0.3.11" @@ -334,18 +317,6 @@ dependencies = [ "cfg-if", ] -[[package]] -name = "enum-as-inner" -version = "0.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ffccbb6966c05b32ef8fbac435df276c4ae4d3dc55a8cd0eb9745e6c12f546a" -dependencies = [ - "heck", - "proc-macro2", - "quote", - "syn", -] - [[package]] name = "equivalent" version = "1.0.1" @@ -452,17 +423,6 @@ dependencies = [ "slab", ] -[[package]] -name = "getrandom" -version = "0.2.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "94b22e06ecb0110981051723910cbf0b5f5e09a2062dd7663334ee79a9d1286c" -dependencies = [ - "cfg-if", - "libc", - "wasi", -] - [[package]] name = "gimli" version = "0.28.1" @@ -512,17 +472,6 @@ version = "0.3.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d231dfb89cfffdbc30e7fc41579ed6066ad03abda9e567ccafae602b97ec5024" -[[package]] -name = "hostname" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3c731c3e10504cc8ed35cfe2f1db4c9274c3d35fa486e3b31df46f068ef3e867" -dependencies = [ - "libc", - "match_cfg", - "winapi", -] - [[package]] name = "http" version = "0.2.12" @@ -608,16 +557,6 @@ dependencies = [ "unicode-normalization", ] -[[package]] -name = "idna" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7d20d6b07bfbc108882d88ed8e37d39636dcc260e15e30c45e6ba089610b917c" -dependencies = [ - "unicode-bidi", - "unicode-normalization", -] - [[package]] name = "idna" version = "0.5.0" @@ -644,18 +583,6 @@ version = "2.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b248f5224d1d606005e02c97f5aa4e88eeb230488bcc03bc9ca4d7991399f2b5" -[[package]] -name = "ipconfig" -version = "0.3.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b58db92f96b720de98181bbbe63c831e87005ab460c1bf306eb2622b4707997f" -dependencies = [ - "socket2", - "widestring", - "windows-sys 0.48.0", - "winreg", -] - [[package]] name = "ipnet" version = "2.9.0" @@ -745,21 +672,6 @@ version = "0.4.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "90ed8c1e510134f979dbc4f070f87d4313098b704861a105fe34231c70a3901c" -[[package]] -name = "lru-cache" -version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "31e24f1ad8321ca0e8a1e0ac13f23cb668e6f5466c2c57319f6a5cf1cc8e3b1c" -dependencies = [ - "linked-hash-map", -] - -[[package]] -name = "match_cfg" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ffbee8634e0d45d258acb448e7eaab3fce7a0a467395d4d9f228e3c1f01fb2e4" - [[package]] name = "memchr" version = "2.7.2" @@ -911,12 +823,6 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391" -[[package]] -name = "ppv-lite86" -version = "0.2.17" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" - [[package]] name = "proc-macro2" version = "1.0.79" @@ -1007,7 +913,7 @@ dependencies = [ [[package]] name = "pyreqwest_impersonate" -version = "0.2.3" +version = "0.3.0" dependencies = [ "encoding_rs", "pyo3", @@ -1026,12 +932,6 @@ dependencies = [ "serde", ] -[[package]] -name = "quick-error" -version = "1.2.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a1d01941d82fa2ab50be1e79e6714289dd7cde78eba4c074bc5a4374f650dfe0" - [[package]] name = "quote" version = "1.0.36" @@ -1041,36 +941,6 @@ dependencies = [ "proc-macro2", ] -[[package]] -name = "rand" -version = "0.8.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" -dependencies = [ - "libc", - "rand_chacha", - "rand_core", -] - -[[package]] -name = "rand_chacha" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" -dependencies = [ - "ppv-lite86", - "rand_core", -] - -[[package]] -name = "rand_core" -version = "0.6.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" -dependencies = [ - "getrandom", -] - [[package]] name = "redox_syscall" version = "0.4.1" @@ -1148,7 +1018,6 @@ dependencies = [ "tokio-socks", "tokio-util", "tower-service", - "trust-dns-resolver", "url", "wasm-bindgen", "wasm-bindgen-futures", @@ -1156,16 +1025,6 @@ dependencies = [ "winreg", ] -[[package]] -name = "resolv-conf" -version = "0.7.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "52e44394d2086d010551b14b53b1f24e31647570cd1deb0379e2c21b329aba00" -dependencies = [ - "hostname", - "quick-error", -] - [[package]] name = "rustc-demangle" version = "0.1.23" @@ -1440,21 +1299,9 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c3523ab5a71916ccf420eebdf5521fcef02141234bbc0b8a49f2fdc4544364ef" dependencies = [ "pin-project-lite", - "tracing-attributes", "tracing-core", ] -[[package]] -name = "tracing-attributes" -version = "0.1.27" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - [[package]] name = "tracing-core" version = "0.1.32" @@ -1464,52 +1311,6 @@ dependencies = [ "once_cell", ] -[[package]] -name = "trust-dns-proto" -version = "0.23.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3119112651c157f4488931a01e586aa459736e9d6046d3bd9105ffb69352d374" -dependencies = [ - "async-trait", - "cfg-if", - "data-encoding", - "enum-as-inner", - "futures-channel", - "futures-io", - "futures-util", - "idna 0.4.0", - "ipnet", - "once_cell", - "rand", - "smallvec", - "thiserror", - "tinyvec", - "tokio", - "tracing", - "url", -] - -[[package]] -name = "trust-dns-resolver" -version = "0.23.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "10a3e6c3aff1718b3c73e395d1f35202ba2ffa847c6a62eea0db8fb4cfe30be6" -dependencies = [ - "cfg-if", - "futures-util", - "ipconfig", - "lru-cache", - "once_cell", - "parking_lot", - "rand", - "resolv-conf", - "smallvec", - "thiserror", - "tokio", - "tracing", - "trust-dns-proto", -] - [[package]] name = "try-lock" version = "0.2.5" @@ -1660,34 +1461,6 @@ dependencies = [ "wasm-bindgen", ] -[[package]] -name = "widestring" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7219d36b6eac893fa81e84ebe06485e7dcbb616177469b142df14f1f4deb1311" - -[[package]] -name = "winapi" -version = "0.3.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" -dependencies = [ - "winapi-i686-pc-windows-gnu", - "winapi-x86_64-pc-windows-gnu", -] - -[[package]] -name = "winapi-i686-pc-windows-gnu" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" - -[[package]] -name = "winapi-x86_64-pc-windows-gnu" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" - [[package]] name = "windows-sys" version = "0.48.0" diff --git a/Cargo.toml b/Cargo.toml index 296c027..44bf1d0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "pyreqwest_impersonate" -version = "0.2.3" +version = "0.3.0" edition = "2021" description = "HTTP client that can impersonate web browsers, mimicking their headers and `TLS/JA3/JA4/HTTP2` fingerprints" authors = ["deedy5"] @@ -12,14 +12,13 @@ name = "pyreqwest_impersonate" crate-type = ["cdylib"] [dependencies] -pyo3 = { version = "0.21", features = ["extension-module", "auto-initialize"] } +pyo3 = { version = "0.21", features = ["extension-module", "abi3-py38"] } reqwest-impersonate = { version = "0.11", default-features = false, features = [ "cookies", "blocking", "boring-tls", "impersonate", "json", - "trust-dns", # async resolver instead of a default threadpool using `getaddrinfo` "multipart", # to send a multipart/form-data body "socks", "gzip", diff --git a/README.md b/README.md index f4368e6..1e66115 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ Binding to the Rust `reqwest_impersonate` library.
Provides precompiled wheels: -- [x] Linux: `amd64`, `aarch64`. +- [x] Linux: `amd64`, `aarch64`, `armv7`. - [x] Windows: `amd64`. - [x] MacOS: `amd64`, `aarch64`. @@ -27,7 +27,8 @@ pip install -U pyreqwest_impersonate ``` ## Key Features - +- `Impersonate`: The `Client` offers an `impersonate` option, enabling it to mimic web browsers by replicating their headers and `TLS/JA3/JA4/HTTP2` fingerprints. This feature is crucial for avoiding detection as a bot and potential blocking by websites. +- `Thread-safe`: The `Client` is designed to be thread-safe, allowing it to be safely used in multithreaded environments. - `High Performance`: The attributes of the `Response` object are executed in Rust, which is known for its high performance. This ensures that operations like accessing headers, decoding text, or parsing JSON are very fast. - `Lazy Execution`: All attributes of the `Response` object are executed lazily. This means that the actual computation or data retrieval happens only when you access the attribute, not when the `Response` object is created. - `Automatic Character Encoding Detection`: The `Response` object intelligently detects the character encoding of the response body from the "Content-Type" header. If the encoding is not specified, it defaults to "UTF-8". @@ -35,10 +36,10 @@ pip install -U pyreqwest_impersonate ## Usage ### I. Client -A blocking HTTP client that can impersonate web browsers. Not thread-safe! -```python3 +HTTP client that can impersonate web browsers. +```python class Client: - """Initializes a blocking HTTP client that can impersonate web browsers. + """Initializes an HTTP client that can impersonate web browsers. Args: auth (tuple, optional): A tuple containing the username and password for basic authentication. Default is None. @@ -62,19 +63,16 @@ class Client: verify (bool, optional): Verify SSL certificates. Default is True. http1 (bool, optional): Use only HTTP/1.1. Default is None. http2 (bool, optional): Use only HTTP/2. Default is None. - - Note: - The Client instance is not thread-safe, meaning it should be initialized once and reused across a multi-threaded environment. - + """ ``` #### Client Methods The `Client` class provides a set of methods for making HTTP requests: `get`, `head`, `options`, `delete`, `post`, `put`, `patch`, each of which internally utilizes the `request()` method for execution. The parameters for these methods closely resemble those in `httpx`. -```python3 -def get(url, *, params=None, headers=None, auth=None, auth_bearer=None, timeout=None) - """Performs a GET request to the specified URL.""" +```python +def get(url, *, params=None, headers=None, auth=None, auth_bearer=None, timeout=None): + """Performs a GET request to the specified URL. Args: url (str): The URL to which the request will be made. @@ -84,10 +82,12 @@ def get(url, *, params=None, headers=None, auth=None, auth_bearer=None, timeout= for basic authentication. Default is None. auth_bearer (Optional[str]): A string representing the bearer token for bearer token authentication. Default is None. timeout (Optional[float]): The timeout for the request in seconds. Default is 30. + + """ ``` ```python -def post(url, *, params=None, headers=None, content=None, data=None, files=None, auth=None, auth_bearer=None, timeout=None) - """Performs a POST request to the specified URL.""" +def post(url, *, params=None, headers=None, content=None, data=None, files=None, auth=None, auth_bearer=None, timeout=None): + """Performs a POST request to the specified URL. Args: url (str): The URL to which the request will be made. @@ -100,6 +100,8 @@ def post(url, *, params=None, headers=None, content=None, data=None, files=None, for basic authentication. Default is None. auth_bearer (Optional[str]): A string representing the bearer token for bearer token authentication. Default is None. timeout (Optional[float]): The timeout for the request in seconds. Default is 30. + + """ ``` #### Example @@ -107,8 +109,7 @@ def post(url, *, params=None, headers=None, content=None, data=None, files=None, ```python from pyreqwest_impersonate import Client -# Not thread-safe! Initialize the Client instance once and reuse it across threads -client = Client(impersonate="chrome_123") +client = Client(impersonate="chrome_123") # get request resp = client.get("https://tls.peet.ws/api/all") diff --git a/benchmark/1_threads.csv b/benchmark/1_threads.csv index e7b15d8..093b592 100644 --- a/benchmark/1_threads.csv +++ b/benchmark/1_threads.csv @@ -1,6 +1,6 @@ name,threads,cpu_time 50k,cpu_time 5k,time 50k,time 5k -curl_cffi 0.6.2,1,5.011,1.42,6.987,2.52 -httpx 0.27.0,1,2.34,1.964,3.669,3.362 -pyreqwest_impersonate 0.2.2,1,1.394,0.223,2.897,0.759 -requests 2.31.0,1,4.244,3.231,6.144,4.974 -tls_client 1.0.1,1,4.802,1.501,5.471,2.527 +curl_cffi 0.6.3,1,1.728,0.95,3.177,2.493 +httpx 0.27.0,1,2.938,1.797,4.447,2.956 +pyreqwest_impersonate 0.3.0,1,1.27,0.45,2.77,1.559 +requests 2.31.0,1,5.105,3.087,7.127,4.599 +tls_client 1.0.1,1,5.392,1.471,6.031,2.247 diff --git a/benchmark/4_threads.csv b/benchmark/4_threads.csv index fa8bae2..2806549 100644 --- a/benchmark/4_threads.csv +++ b/benchmark/4_threads.csv @@ -1,6 +1,6 @@ name,threads,cpu_time 50k,cpu_time 5k,time 50k,time 5k -curl_cffi 0.6.2,4,3.966,1.056,1.465,0.672 -httpx 0.27.0,4,1.91,1.303,1.306,1.092 -pyreqwest_impersonate 0.2.2,4,1.271,0.327,1.095,0.679 -requests 2.31.0,4,4.329,2.912,3.46,2.709 -tls_client 1.0.1,4,3.454,1.094,1.214,0.714 +curl_cffi 0.6.3,4,1.311,0.757,0.83,0.75 +httpx 0.27.0,4,1.952,1.295,1.393,1.116 +pyreqwest_impersonate 0.3.0,4,1.066,0.302,0.986,0.679 +requests 2.31.0,4,4.022,2.733,3.256,2.561 +tls_client 1.0.1,4,3.626,1.099,1.419,0.739 diff --git a/src/lib.rs b/src/lib.rs index 925e36b..aa60753 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -16,7 +16,7 @@ mod response; use response::Response; #[pyclass] -/// A blocking HTTP client that can impersonate web browsers. +/// HTTP client that can impersonate web browsers. struct Client { client: reqwest_impersonate::blocking::Client, auth: Option<(String, Option)>, @@ -27,9 +27,9 @@ struct Client { #[pymethods] impl Client { #[new] - /// Initializes a blocking HTTP client that can impersonate web browsers. Not thread-safe! + /// Initializes an HTTP client that can impersonate web browsers. /// - /// This function creates a new instance of a blocking HTTP client that can impersonate various web browsers. + /// This function creates a new HTTP client instance that can impersonate various web browsers. /// It allows for customization of headers, proxy settings, timeout, impersonation type, SSL certificate verification, /// and HTTP version preferences. /// @@ -51,9 +51,6 @@ impl Client { /// * `http1` - An optional boolean indicating whether to use only HTTP/1.1. Default is `false`. /// * `http2` - An optional boolean indicating whether to use only HTTP/2. Default is `false`. /// - /// # Note - /// The Client instance is not thread-safe, meaning it should be initialized once and reused across a multi-threaded environment. - /// /// # Example /// /// ``` @@ -100,7 +97,6 @@ impl Client { let mut client_builder = reqwest_impersonate::blocking::Client::builder() .enable_ech_grease(true) .permute_extensions(true) - .trust_dns(true) .timeout(timeout.map(Duration::from_secs_f64)); // Headers