From 252dbd13f7eea10b00f2a7dc67c7632aca42eb11 Mon Sep 17 00:00:00 2001 From: Yifei Kong Date: Thu, 27 Jun 2024 15:54:37 +0800 Subject: [PATCH] Add coustomized ja3 and akamai fingerprints support (#331) --- Makefile | 2 +- curl_cffi/__init__.py | 3 +- curl_cffi/const.py | 16 ++ curl_cffi/requests/__init__.py | 14 +- curl_cffi/requests/impersonate.py | 280 +++++++++++++++++++++++++++++ curl_cffi/requests/session.py | 200 ++++++++++++++++----- examples/impersonate.py | 48 +++++ pyproject.toml | 2 +- scripts/build.py | 2 +- scripts/generate_consts.py | 59 ++++-- tests/unittest/test_impersonate.py | 180 ++++++++++++++++++- 11 files changed, 729 insertions(+), 77 deletions(-) create mode 100644 curl_cffi/requests/impersonate.py create mode 100644 examples/impersonate.py diff --git a/Makefile b/Makefile index f31fd41b..da7928f3 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ SHELL := bash # this is the upstream libcurl-impersonate version -VERSION := 0.7.0b6 +VERSION := 0.7.0 CURL_VERSION := curl-8_7_1 $(CURL_VERSION): diff --git a/curl_cffi/__init__.py b/curl_cffi/__init__.py index 99a1a9a1..5ed6efc1 100644 --- a/curl_cffi/__init__.py +++ b/curl_cffi/__init__.py @@ -8,6 +8,7 @@ "CurlMOpt", "CurlECode", "CurlHttpVersion", + "CurlSslVersion", "CurlWsFlag", "ffi", "lib", @@ -20,5 +21,5 @@ # This line includes _wrapper.so into the wheel from ._wrapper import ffi, lib from .aio import AsyncCurl -from .const import CurlECode, CurlHttpVersion, CurlInfo, CurlMOpt, CurlOpt, CurlWsFlag +from .const import CurlECode, CurlHttpVersion, CurlInfo, CurlMOpt, CurlOpt, CurlWsFlag, CurlSslVersion from .curl import Curl, CurlError, CurlMime diff --git a/curl_cffi/const.py b/curl_cffi/const.py index b7ca1891..e535ff39 100644 --- a/curl_cffi/const.py +++ b/curl_cffi/const.py @@ -328,6 +328,8 @@ class CurlOpt(IntEnum): TLS_EXTENSION_ORDER = 10000 + 1012 STREAM_EXCLUSIVE = 0 + 1013 TLS_KEY_USAGE_NO_CHECK = 0 + 1014 + TLS_SIGNED_CERT_TIMESTAMPS = 0 + 1015 + TLS_STATUS_REQUEST = 0 + 1016 if locals().get("WRITEDATA"): FILE = locals().get("WRITEDATA") @@ -563,3 +565,17 @@ class CurlWsFlag(IntEnum): CLOSE = 1 << 3 PING = 1 << 4 OFFSET = 1 << 5 + + +class CurlSslVersion(IntEnum): + """``CURL_SSLVERSION`` constants extracted from libcurl, see comments for details.""" + + DEFAULT = 0 + TLSv1 = 1 + SSLv2 = 2 + SSLv3 = 3 + TLSv1_0 = 4 + TLSv1_1 = 5 + TLSv1_2 = 6 + TLSv1_3 = 7 + MAX_DEFAULT = 1 << 16 diff --git a/curl_cffi/requests/__init__.py b/curl_cffi/requests/__init__.py index 4b6fec62..13fb1f78 100644 --- a/curl_cffi/requests/__init__.py +++ b/curl_cffi/requests/__init__.py @@ -19,17 +19,20 @@ "WebSocket", "WebSocketError", "WsCloseCode", + "ExtraFingerprints", ] from functools import partial from io import BytesIO from typing import Callable, Dict, List, Optional, Tuple, Union + from ..const import CurlHttpVersion, CurlWsFlag from ..curl import CurlMime from .cookies import Cookies, CookieTypes from .errors import RequestsError from .headers import Headers, HeaderTypes +from .impersonate import ExtraFingerprints, ExtraFpDict from .models import Request, Response from .session import AsyncSession, BrowserType, ProxySpec, Session, ThreadType from .websockets import WebSocket, WebSocketError, WsCloseCode @@ -56,6 +59,9 @@ def request( accept_encoding: Optional[str] = "gzip, deflate, br", content_callback: Optional[Callable] = None, impersonate: Optional[Union[str, BrowserType]] = None, + ja3: Optional[str] = None, + akamai: Optional[str] = None, + extra_fp: Optional[Union[ExtraFingerprints, ExtraFpDict]] = None, thread: Optional[ThreadType] = None, default_headers: Optional[bool] = None, default_encoding: Union[str, Callable[[bytes], str]] = "utf-8", @@ -65,7 +71,7 @@ def request( interface: Optional[str] = None, cert: Optional[Union[str, Tuple[str, str]]] = None, stream: bool = False, - max_recv_speed: int = 0, + max_recv_speed: int = 0, multipart: Optional[CurlMime] = None, ) -> Response: """Send an http request. @@ -95,6 +101,9 @@ def request( content_callback: a callback function to receive response body. ``def callback(chunk: bytes) -> None:`` impersonate: which browser version to impersonate. + ja3: ja3 string to impersonate. + akamai: akamai string to impersonate. + extra_fp: extra fingerprints options, in complement to ja3 and akamai strings. thread: work with other thread implementations. choices: eventlet, gevent. default_headers: whether to set default browser headers. default_encoding: encoding for decoding response content if charset is not found in headers. @@ -130,6 +139,9 @@ def request( accept_encoding=accept_encoding, content_callback=content_callback, impersonate=impersonate, + ja3=ja3, + akamai=akamai, + extra_fp=extra_fp, default_headers=default_headers, default_encoding=default_encoding, http_version=http_version, diff --git a/curl_cffi/requests/impersonate.py b/curl_cffi/requests/impersonate.py new file mode 100644 index 00000000..7fbca3c2 --- /dev/null +++ b/curl_cffi/requests/impersonate.py @@ -0,0 +1,280 @@ +from dataclasses import dataclass +from typing import List, Literal, Optional, TypedDict +import warnings +from enum import Enum + +from ..const import CurlSslVersion, CurlOpt + + +class BrowserType(str, Enum): + edge99 = "edge99" + edge101 = "edge101" + chrome99 = "chrome99" + chrome100 = "chrome100" + chrome101 = "chrome101" + chrome104 = "chrome104" + chrome107 = "chrome107" + chrome110 = "chrome110" + chrome116 = "chrome116" + chrome119 = "chrome119" + chrome120 = "chrome120" + chrome123 = "chrome123" + chrome124 = "chrome124" + chrome99_android = "chrome99_android" + safari15_3 = "safari15_3" + safari15_5 = "safari15_5" + safari17_0 = "safari17_0" + safari17_2_ios = "safari17_2_ios" + + chrome = "chrome124" + safari = "safari17_0" + safari_ios = "safari17_2_ios" + + @classmethod + def has(cls, item): + return item in cls.__members__ + + @classmethod + def normalize(cls, item): + if item == "chrome": # noqa: SIM116 + return cls.chrome + elif item == "safari": + return cls.safari + elif item == "safari_ios": + return cls.safari_ios + else: + return item + + +@dataclass +class ExtraFingerprints: + tls_min_version: int = CurlSslVersion.TLSv1_2 + tls_grease: bool = False + tls_permute_extensions: bool = False + tls_cert_compression: Literal["zlib", "brotli"] = "brotli" + tls_signature_algorithms: Optional[List[str]] = None + http2_stream_weight: int = 256 + http2_stream_exclusive: int = 1 + + +class ExtraFpDict(TypedDict, total=False): + tls_min_version: int + tls_grease: bool + tls_permute_extensions: bool + tls_cert_compression: Literal["zlib", "brotli"] + tls_signature_algorithms: Optional[List[str]] + http2_stream_weight: int + http2_stream_exclusive: int + + +# TLS version are in the format of 0xAABB, where AA is major version and BB is minor +# version. As of today, the major version is always 03. +TLS_VERSION_MAP = { + 0x0301: CurlSslVersion.TLSv1_0, # 769 + 0x0302: CurlSslVersion.TLSv1_1, # 770 + 0x0303: CurlSslVersion.TLSv1_2, # 771 + 0x0304: CurlSslVersion.TLSv1_3, # 772 +} + +# A list of the possible cipher suite ids. Taken from +# http://www.iana.org/assignments/tls-parameters/tls-parameters.xml +# via BoringSSL +TLS_CIPHER_NAME_MAP = { + 0x000A: "TLS_RSA_WITH_3DES_EDE_CBC_SHA", + 0x002F: "TLS_RSA_WITH_AES_128_CBC_SHA", + 0x0035: "TLS_RSA_WITH_AES_256_CBC_SHA", + 0x003C: "TLS_RSA_WITH_AES_128_CBC_SHA256", + 0x003D: "TLS_RSA_WITH_AES_256_CBC_SHA256", + 0x008C: "TLS_PSK_WITH_AES_128_CBC_SHA", + 0x008D: "TLS_PSK_WITH_AES_256_CBC_SHA", + 0x009C: "TLS_RSA_WITH_AES_128_GCM_SHA256", + 0x009D: "TLS_RSA_WITH_AES_256_GCM_SHA384", + 0xC009: "TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA", + 0xC00A: "TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA", + 0xC012: "TLS_ECDHE_RSA_WITH_3DES_EDE_CBC_SHA", + 0xC013: "TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA", + 0xC014: "TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA", + 0xC023: "TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256", + 0xC024: "TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA384", + 0xC027: "TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256", + 0xC028: "TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA384", + 0xC02B: "TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256", + 0xC02C: "TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384", + 0xC02F: "TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256", + 0xC030: "TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384", + 0xC035: "TLS_ECDHE_PSK_WITH_AES_128_CBC_SHA", + 0xC036: "TLS_ECDHE_PSK_WITH_AES_256_CBC_SHA", + 0xCCA8: "TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305_SHA256", + 0xCCA9: "TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305_SHA256", + 0xCCAC: "TLS_ECDHE_PSK_WITH_CHACHA20_POLY1305_SHA256", + 0x1301: "TLS_AES_128_GCM_SHA256", + 0x1302: "TLS_AES_256_GCM_SHA384", + 0x1303: "TLS_CHACHA20_POLY1305_SHA256", +} + + +# RFC tls extensions: https://datatracker.ietf.org/doc/html/rfc6066 +# IANA list: https://www.iana.org/assignments/tls-extensiontype-values/tls-extensiontype-values.xhtml +TLS_EXTENSION_NAME_MAP = { + 0: "server_name", + 1: "max_fragment_length", + 2: "client_certificate_url", + 3: "trusted_ca_keys", + 4: "truncated_hmac", + 5: "status_request", + 6: "user_mapping", + 7: "client_authz", + 8: "server_authz", + 9: "cert_type", + 10: "supported_groups", # (renamed from "elliptic_curves") + 11: "ec_point_formats", + 12: "srp", + 13: "signature_algorithms", + 14: "use_srtp", + 15: "heartbeat", + 16: "application_layer_protocol_negotiation", + 17: "status_request_v2", + 18: "signed_certificate_timestamp", + 19: "client_certificate_type", + 20: "server_certificate_type", + 21: "padding", + 22: "encrypt_then_mac", + 23: "extended_master_secret", + 24: "token_binding", + 25: "cached_info", + 26: "tls_lts", + 27: "compress_certificate", + 28: "record_size_limit", + 29: "pwd_protect", + 30: "pwd_clear", + 31: "password_salt", + 32: "ticket_pinning", + 33: "tls_cert_with_extern_psk", + 34: "delegated_credential", + 35: "session_ticket", # (renamed from "SessionTicket TLS") + 36: "TLMSP", + 37: "TLMSP_proxying", + 38: "TLMSP_delegate", + 39: "supported_ekt_ciphers", + # 40:"Reserved", + 41: "pre_shared_key", + 42: "early_data", + 43: "supported_versions", + 44: "cookie", + 45: "psk_key_exchange_modes", + # 46:"Reserved", + 47: "certificate_authorities", + 48: "oid_filters", + 49: "post_handshake_auth", + 50: "signature_algorithms_cert", + 51: "key_share", + 52: "transparency_info", + # 53:"connection_id", # (deprecated) + 54: "connection_id", + 55: "external_id_hash", + 56: "external_session_id", + 57: "quic_transport_parameters", + 58: "ticket_request", + 59: "dnssec_chain", + 60: "sequence_number_encryption_algorithms", + 61: "rrc", + 17513: "application_settings", # BoringSSL private usage + # 62-2569:"Unassigned + # 2570:"Reserved + # 2571-6681:"Unassigned + # 6682:"Reserved + # 6683-10793:"Unassigned + # 10794:"Reserved + # 10795-14905:"Unassigned + # 14906:"Reserved + # 14907-19017:"Unassigned + # 19018:"Reserved + # 19019-23129:"Unassigned + # 23130:"Reserved + # 23131-27241:"Unassigned + # 27242:"Reserved + # 27243-31353:"Unassigned + # 31354:"Reserved + # 31355-35465:"Unassigned + # 35466:"Reserved + # 35467-39577:"Unassigned + # 39578:"Reserved + # 39579-43689:"Unassigned + # 43690:"Reserved + # 43691-47801:"Unassigned + # 47802:"Reserved + # 47803-51913:"Unassigned + # 51914:"Reserved + # 51915-56025:"Unassigned + # 56026:"Reserved + # 56027-60137:"Unassigned + # 60138:"Reserved + # 60139-64249:"Unassigned + # 64250:"Reserved + # 64251-64767:"Unassigned + 64768: "ech_outer_extensions", + # 64769-65036:"Unassigned + 65037:"encrypted_client_hello", + # 65038-65279:"Unassigned + # 65280:"Reserved for Private Use + 65281:"renegotiation_info", + # 65282-65535:"Reserved for Private Use +} + + +TLS_EC_CURVES_MAP = { + 19: "P-192", + 21: "P-224", + 23: "P-256", + 24: "P-384", + 25: "P-521", + 29: "X25519", + 25497: "X25519Kyber768Draft00", +} + + +def toggle_extension(curl, extension_id: int, enable: bool): + # ECH + if extension_id == 65037: + if enable: + curl.setopt(CurlOpt.ECH, "GREASE") + else: + curl.setopt(CurlOpt.ECH, "") + # compress certificate + elif extension_id == 27: + if enable: + warnings.warn("Cert compression setting to brotli, you had better specify which to use: zlib/brotli") + curl.setopt(CurlOpt.SSL_CERT_COMPRESSION, "brotli") + else: + curl.setopt(CurlOpt.SSL_CERT_COMPRESSION, "") + # ALPS: application settings + elif extension_id == 17513: + if enable: + curl.setopt(CurlOpt.SSL_ENABLE_ALPS, 1) + else: + curl.setopt(CurlOpt.SSL_ENABLE_ALPS, 0) + # server_name + elif extension_id == 0: + raise NotImplementedError("It's unlikely that the server_name(0) extension being changed.") + # ALPN + elif extension_id == 16: + raise NotImplementedError("It's unlikely that the ALPN(16) extension being changed.") + # status_request + elif extension_id == 5: + if enable: + curl.setopt(CurlOpt.TLS_STATUS_REQUEST, 1) + # signed_certificate_timestamps + elif extension_id == 18: + if enable: + curl.setopt(CurlOpt.TLS_SIGNED_CERT_TIMESTAMPS, 1) + # session_ticket + elif extension_id == 35: + if enable: + curl.setopt(CurlOpt.SSL_ENABLE_TICKET, 1) + else: + curl.setopt(CurlOpt.SSL_ENABLE_TICKET, 0) + # padding + elif extension_id == 21: + pass + else: + raise NotImplementedError(f"This extension({extension_id}) can not be toggled for now, it may be updated later.") diff --git a/curl_cffi/requests/session.py b/curl_cffi/requests/session.py index b029b980..75e2d84a 100644 --- a/curl_cffi/requests/session.py +++ b/curl_cffi/requests/session.py @@ -6,7 +6,6 @@ from collections import Counter from concurrent.futures import ThreadPoolExecutor from contextlib import asynccontextmanager, contextmanager, suppress -from enum import Enum from functools import partialmethod from io import BytesIO from json import dumps @@ -25,11 +24,21 @@ ) from urllib.parse import ParseResult, parse_qsl, unquote, urlencode, urljoin, urlparse -from .. import AsyncCurl, Curl, CurlError, CurlHttpVersion, CurlInfo, CurlOpt + +from .. import AsyncCurl, Curl, CurlError, CurlHttpVersion, CurlInfo, CurlOpt, CurlSslVersion from ..curl import CURL_WRITEFUNC_ERROR, CurlMime from .cookies import Cookies, CookieTypes, CurlMorsel from .errors import RequestsError, SessionClosed from .headers import Headers, HeaderTypes +from .impersonate import ( + ExtraFingerprints, + ExtraFpDict, + toggle_extension, + BrowserType, + TLS_VERSION_MAP, + TLS_CIPHER_NAME_MAP, + TLS_EC_CURVES_MAP +) from .models import Request, Response from .websockets import WebSocket @@ -54,52 +63,6 @@ class ProxySpec(TypedDict, total=False): ThreadType = Literal["eventlet", "gevent"] -class BrowserType(str, Enum): - edge99 = "edge99" - edge101 = "edge101" - chrome99 = "chrome99" - chrome100 = "chrome100" - chrome101 = "chrome101" - chrome104 = "chrome104" - chrome107 = "chrome107" - chrome110 = "chrome110" - chrome116 = "chrome116" - chrome119 = "chrome119" - chrome120 = "chrome120" - chrome123 = "chrome123" - chrome124 = "chrome124" - chrome99_android = "chrome99_android" - safari15_3 = "safari15_3" - safari15_5 = "safari15_5" - safari17_0 = "safari17_0" - safari17_2_ios = "safari17_2_ios" - - chrome = "chrome123" - safari = "safari17_0" - safari_ios = "safari17_2_ios" - - @classmethod - def has(cls, item): - return item in cls.__members__ - - @classmethod - def normalize(cls, item): - if item == "chrome": # noqa: SIM116 - return cls.chrome - elif item == "safari": - return cls.safari - elif item == "safari_ios": - return cls.safari_ios - else: - return item - - -class BrowserSpec: - """A more structured way of selecting browsers""" - - # TODO - - def _is_absolute_url(url: str) -> bool: """Check if the provided url is an absolute url""" parsed_url = urlparse(url) @@ -215,6 +178,9 @@ def __init__( allow_redirects: bool = True, max_redirects: int = 30, impersonate: Optional[Union[str, BrowserType]] = None, + ja3: Optional[str] = None, + akamai: Optional[str] = None, + extra_fp: Optional[Union[ExtraFingerprints, ExtraFpDict]] = None, default_headers: bool = True, default_encoding: Union[str, Callable[[bytes], str]] = "utf-8", curl_options: Optional[dict] = None, @@ -235,6 +201,9 @@ def __init__( self.allow_redirects = allow_redirects self.max_redirects = max_redirects self.impersonate = impersonate + self.ja3 = ja3 + self.akamai = akamai + self.extra_fp = extra_fp self.default_headers = default_headers self.default_encoding = default_encoding self.curl_options = curl_options or {} @@ -256,6 +225,92 @@ def __init__( self._closed = False + def _toggle_extensions_by_ids(self, curl, extension_ids): + default_enabled = {0, 51, 13, 43, 65281, 23, 10, 45, 35, 11, 16} + + to_enable_ids = extension_ids - default_enabled + for ext_id in to_enable_ids: + toggle_extension(curl, ext_id, enable=True) + + # print("to_enable: ", to_enable_ids) + + to_disable_ids = default_enabled - extension_ids + for ext_id in to_disable_ids: + toggle_extension(curl, ext_id, enable=False) + + # print("to_disable: ", to_disable_ids) + + def _set_ja3_options(self, curl, ja3: str, permute: bool = False): + """ + Detailed explanation: https://engineering.salesforce.com/tls-fingerprinting-with-ja3-and-ja3s-247362855967/ + """ + tls_version, ciphers, extensions, curves, curve_formats = ja3.split(",") + + curl_tls_version = TLS_VERSION_MAP[int(tls_version)] + curl.setopt(CurlOpt.SSLVERSION, curl_tls_version | CurlSslVersion.MAX_DEFAULT) + assert curl_tls_version == CurlSslVersion.TLSv1_2, "Only TLS v1.2 works for now." + + cipher_names = [] + for cipher in ciphers.split("-"): + cipher_id = int(cipher) + cipher_name = TLS_CIPHER_NAME_MAP[cipher_id] + cipher_names.append(cipher_name) + + curl.setopt(CurlOpt.SSL_CIPHER_LIST, ":".join(cipher_names)) + + if extensions.endswith("-21"): + extensions = extensions[:-3] + warnings.warn( + "Padding(21) extension found in ja3 string, whether to add it should " + "be managed by the SSL engine. The TLS client hello packet may contain " + "or not contain this extension, any of which should be correct." + ) + extension_ids = set(int(e) for e in extensions.split("-")) + self._toggle_extensions_by_ids(curl, extension_ids) + + if not permute: + curl.setopt(CurlOpt.TLS_EXTENSION_ORDER, extensions) + + curve_names = [] + for curve in curves.split("-"): + curve_id = int(curve) + curve_name = TLS_EC_CURVES_MAP[curve_id] + curve_names.append(curve_name) + + curl.setopt(CurlOpt.SSL_EC_CURVES, ":".join(curve_names)) + + assert int(curve_formats) == 0, "Only curve_formats == 0 is supported." + + def _set_akamai_options(self, curl, akamai: str): + """ + Detailed explanation: https://www.blackhat.com/docs/eu-17/materials/eu-17-Shuster-Passive-Fingerprinting-Of-HTTP2-Clients-wp.pdf + """ + settings, window_update, streams, header_order = akamai.split("|") + + curl.setopt(CurlOpt.HTTP_VERSION, CurlHttpVersion.V2_0) + + curl.setopt(CurlOpt.HTTP2_SETTINGS, settings) + curl.setopt(CurlOpt.HTTP2_WINDOW_UPDATE, int(window_update)) + + if streams != "0": + curl.setopt(CurlOpt.HTTP2_STREAMS, streams) + + # m,a,s,p -> masp + # curl-impersonate only accepts masp format, without commas. + curl.setopt(CurlOpt.HTTP2_PSEUDO_HEADERS_ORDER, header_order.replace(",", "")) + + def _set_extra_fp(self, curl, fp: ExtraFingerprints): + + if fp.tls_signature_algorithms: + curl.setopt(CurlOpt.SSL_SIG_HASH_ALGS, ",".join(fp.tls_signature_algorithms)) + + curl.setopt(CurlOpt.SSLVERSION, fp.tls_min_version | CurlSslVersion.MAX_DEFAULT) + curl.setopt(CurlOpt.TLS_GREASE, int(fp.tls_grease)) + curl.setopt(CurlOpt.SSL_PERMUTE_EXTENSIONS, int(fp.tls_permute_extensions)) + curl.setopt(CurlOpt.SSL_CERT_COMPRESSION, fp.tls_cert_compression) + curl.setopt(CurlOpt.STREAM_WEIGHT, fp.http2_stream_weight) + curl.setopt(CurlOpt.STREAM_EXCLUSIVE, fp.http2_stream_exclusive) + def _set_curl_options( self, curl, @@ -279,6 +334,9 @@ def _set_curl_options( accept_encoding: Optional[str] = "gzip, deflate, br", content_callback: Optional[Callable] = None, impersonate: Optional[Union[str, BrowserType]] = None, + ja3: Optional[str] = None, + akamai: Optional[str] = None, + extra_fp: Optional[Union[ExtraFingerprints, ExtraFpDict]] = None, default_headers: Optional[bool] = None, http_version: Optional[CurlHttpVersion] = None, interface: Optional[str] = None, @@ -521,6 +579,34 @@ def _set_curl_options( if ret != 0: raise RequestsError(f"Impersonating {impersonate} is not supported") + # ja3 string + ja3 = ja3 or self.ja3 + if ja3: + if impersonate: + warnings.warn("JA3 was altered after browser version was set.") + permute = False + if isinstance(extra_fp, ExtraFingerprints) and extra_fp.tls_permute_extensions: + permute = True + if isinstance(extra_fp, dict) and extra_fp.get("tls_permute_extensions"): + permute = True + self._set_ja3_options(c, ja3, permute=permute) + + # akamai string + akamai = akamai or self.akamai + if akamai: + if impersonate: + warnings.warn("Akamai was altered after browser version was set.") + self._set_akamai_options(c, akamai) + + # extra_fp options + extra_fp = extra_fp or self.extra_fp + if extra_fp: + if isinstance(extra_fp, dict): + extra_fp = ExtraFingerprints(**extra_fp) + if impersonate: + warnings.warn("Extra fingerprints was altered after browser version was set.") + self._set_extra_fp(c, extra_fp) + # http_version, after impersonate, which will change this to http2 http_version = http_version or self.http_version if http_version: @@ -653,6 +739,9 @@ def __init__( allow_redirects: whether to allow redirection. max_redirects: max redirect counts, default 30, use -1 for unlimited. impersonate: which browser version to impersonate in the session. + ja3: ja3 string to impersonate in the session. + akamai: akamai string to impersonate in the session. + extra_fp: extra fingerprints options, in complement to ja3 and akamai strings. interface: which interface use in request to server. default_encoding: encoding for decoding response content if charset is not found in headers. Defaults to "utf-8". Can be set to a callable for automatic detection. @@ -783,6 +872,9 @@ def request( accept_encoding: Optional[str] = "gzip, deflate, br", content_callback: Optional[Callable] = None, impersonate: Optional[Union[str, BrowserType]] = None, + ja3: Optional[str] = None, + akamai: Optional[str] = None, + extra_fp: Optional[Union[ExtraFingerprints, ExtraFpDict]] = None, default_headers: Optional[bool] = None, default_encoding: Union[str, Callable[[bytes], str]] = "utf-8", http_version: Optional[CurlHttpVersion] = None, @@ -825,6 +917,9 @@ def request( accept_encoding=accept_encoding, content_callback=content_callback, impersonate=impersonate, + ja3=ja3, + akamai=akamai, + extra_fp=extra_fp, default_headers=default_headers, http_version=http_version, interface=interface, @@ -939,6 +1034,9 @@ def __init__( allow_redirects: whether to allow redirection. max_redirects: max redirect counts, default 30, use -1 for unlimited. impersonate: which browser version to impersonate in the session. + ja3: ja3 string to impersonate in the session. + akamai: akamai string to impersonate in the session. + extra_fp: extra fingerprints options, in complement to ja3 and akamai strings. default_encoding: encoding for decoding response content if charset is not found in headers. Defaults to "utf-8". Can be set to a callable for automatic detection. @@ -1065,6 +1163,9 @@ async def request( accept_encoding: Optional[str] = "gzip, deflate, br", content_callback: Optional[Callable] = None, impersonate: Optional[Union[str, BrowserType]] = None, + ja3: Optional[str] = None, + akamai: Optional[str] = None, + extra_fp: Optional[Union[ExtraFingerprints, ExtraFpDict]] = None, default_headers: Optional[bool] = None, default_encoding: Union[str, Callable[[bytes], str]] = "utf-8", http_version: Optional[CurlHttpVersion] = None, @@ -1100,6 +1201,9 @@ async def request( accept_encoding=accept_encoding, content_callback=content_callback, impersonate=impersonate, + ja3=ja3, + akamai=akamai, + extra_fp=extra_fp, default_headers=default_headers, http_version=http_version, interface=interface, diff --git a/examples/impersonate.py b/examples/impersonate.py new file mode 100644 index 00000000..f150366e --- /dev/null +++ b/examples/impersonate.py @@ -0,0 +1,48 @@ +from curl_cffi import requests + +# OKHTTP impersonatation examples +# credits: https://github.com/bogdanfinn/tls-client/blob/master/profiles/contributed_custom_profiles.go + +url = "https://tls.browserleaks.com/json" + +okhttp4_android10_ja3 = ",".join( + [ + "771", + "4865-4866-4867-49195-49196-52393-49199-49200-52392-49171-49172-156-157-47-53", + "0-23-65281-10-11-35-16-5-13-51-45-43-21", + "29-23-24", + "0", + ] +) + +okhttp4_android10_akamai = "4:16777216|16711681|0|m,p,a,s" + +extra_fp = { + "tls_signature_algorithms": [ + "ecdsa_secp256r1_sha256", + "rsa_pss_rsae_sha256", + "rsa_pkcs1_sha256", + "ecdsa_secp384r1_sha384", + "rsa_pss_rsae_sha384", + "rsa_pkcs1_sha384", + "rsa_pss_rsae_sha512", + "rsa_pkcs1_sha512", + "rsa_pkcs1_sha1", + ] + # other options: + # tls_min_version: int = CurlSslVersion.TLSv1_2 + # tls_grease: bool = False + # tls_permute_extensions: bool = False + # tls_cert_compression: Literal["zlib", "brotli"] = "brotli" + # tls_signature_algorithms: Optional[List[str]] = None + # http2_stream_weight: int = 256 + # http2_stream_exclusive: int = 1 + + # See requests/impersonate.py and tests/unittest/test_impersonate.py for more examples +} + + +r = requests.get( + url, ja3=okhttp4_android10_ja3, akamai=okhttp4_android10_akamai, extra_fp=extra_fp +) +print(r.json()) diff --git a/pyproject.toml b/pyproject.toml index 54396b86..f539150d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "curl_cffi" -version = "0.7.0b6" +version = "0.7.0b7" authors = [{ name = "Yifei Kong", email = "kong@yifei.me" }] description = "libcurl ffi bindings for Python, with impersonation support." license = { file = "LICENSE" } diff --git a/scripts/build.py b/scripts/build.py index f4566313..48918677 100644 --- a/scripts/build.py +++ b/scripts/build.py @@ -10,7 +10,7 @@ from cffi import FFI # this is the upstream libcurl-impersonate version -__version__ = "0.7.0b6" +__version__ = "0.7.0" tmpdir = None diff --git a/scripts/generate_consts.py b/scripts/generate_consts.py index 85c3d173..74ec6f02 100644 --- a/scripts/generate_consts.py +++ b/scripts/generate_consts.py @@ -64,24 +64,45 @@ """ # noqa E501 output = subprocess.check_output(cmd, shell=True) f.write(output.decode()) - f.write("\n\n") + f.write("\n") + + # These lines are not easy to be extracted automatically + f.write( + ''' +class CurlHttpVersion(IntEnum): + """``CURL_HTTP_VERSION`` constants extracted from libcurl, see comments for details.""" + + NONE = 0 + V1_0 = 1 # please use HTTP 1.0 in the request */ + V1_1 = 2 # please use HTTP 1.1 in the request */ + V2_0 = 3 # please use HTTP 2 in the request */ + V2TLS = 4 # use version 2 for HTTPS, version 1.1 for HTTP */ + V2_PRIOR_KNOWLEDGE = 5 # please use HTTP 2 without HTTP/1.1 Upgrade */ + V3 = 30 # Makes use of explicit HTTP/3 without fallback. - f.write("class CurlHttpVersion(IntEnum):\n") - f.write(' """``CURL_HTTP_VERSION`` constants extracted from libcurl, see comments for details."""\n\n') - f.write(" NONE = 0\n") - f.write(" V1_0 = 1 # please use HTTP 1.0 in the request */\n") - f.write(" V1_1 = 2 # please use HTTP 1.1 in the request */\n") - f.write(" V2_0 = 3 # please use HTTP 2 in the request */\n") - f.write(" V2TLS = 4 # use version 2 for HTTPS, version 1.1 for HTTP */\n") - f.write(" V2_PRIOR_KNOWLEDGE = 5 # please use HTTP 2 without HTTP/1.1 Upgrade */\n") - f.write(" V3 = 30 # Makes use of explicit HTTP/3 without fallback.\n") - f.write("\n\n") - f.write("class CurlWsFlag(IntEnum):\n") - f.write(' """``CURL_WS_FLAG`` constants extracted from libcurl, see comments for details."""\n\n') - f.write(" TEXT = 1 << 0\n") - f.write(" BINARY = 1 << 1\n") - f.write(" CONT = 1 << 2\n") - f.write(" CLOSE = 1 << 3\n") - f.write(" PING = 1 << 4\n") - f.write(" OFFSET = 1 << 5\n") +class CurlWsFlag(IntEnum): + """``CURL_WS_FLAG`` constants extracted from libcurl, see comments for details.""" + + TEXT = 1 << 0 + BINARY = 1 << 1 + CONT = 1 << 2 + CLOSE = 1 << 3 + PING = 1 << 4 + OFFSET = 1 << 5 + + +class CurlSslVersion(IntEnum): + """``CURL_SSLVERSION`` constants extracted from libcurl, see comments for details.""" + + DEFAULT = 0 + TLSv1 = 1 + SSLv2 = 2 + SSLv3 = 3 + TLSv1_0 = 4 + TLSv1_1 = 5 + TLSv1_2 = 6 + TLSv1_3 = 7 + MAX_DEFAULT = 1 << 16 +''' + ) diff --git a/tests/unittest/test_impersonate.py b/tests/unittest/test_impersonate.py index a0c72615..1859a54f 100644 --- a/tests/unittest/test_impersonate.py +++ b/tests/unittest/test_impersonate.py @@ -1,21 +1,29 @@ import pytest from curl_cffi import requests -from curl_cffi.const import CurlHttpVersion +from curl_cffi.const import CurlHttpVersion, CurlSslVersion def test_impersonate_with_version(server): # the test server does not understand http/2 - r = requests.get(str(server.url), impersonate="chrome120", http_version=CurlHttpVersion.V1_1) + r = requests.get( + str(server.url), impersonate="chrome120", http_version=CurlHttpVersion.V1_1 + ) assert r.status_code == 200 - r = requests.get(str(server.url), impersonate="safari17_0", http_version=CurlHttpVersion.V1_1) + r = requests.get( + str(server.url), impersonate="safari17_0", http_version=CurlHttpVersion.V1_1 + ) assert r.status_code == 200 def test_impersonate_without_version(server): - r = requests.get(str(server.url), impersonate="chrome", http_version=CurlHttpVersion.V1_1) + r = requests.get( + str(server.url), impersonate="chrome", http_version=CurlHttpVersion.V1_1 + ) assert r.status_code == 200 - r = requests.get(str(server.url), impersonate="safari_ios", http_version=CurlHttpVersion.V1_1) + r = requests.get( + str(server.url), impersonate="safari_ios", http_version=CurlHttpVersion.V1_1 + ) assert r.status_code == 200 @@ -24,3 +32,165 @@ def test_impersonate_non_exist(server): requests.get(str(server.url), impersonate="edge") with pytest.raises(requests.RequestsError, match="Impersonating"): requests.get(str(server.url), impersonate="chrome2952") + + +# TODO implement local ja3/akamai verification server with th1. + + +@pytest.mark.skip(reason="warning is used") +def test_costomized_no_impersonate_coexist(server): + with pytest.raises(requests.RequestsError): + requests.get(str(server.url), impersonate="chrome", ja3=",,,,") + with pytest.raises(requests.RequestsError): + requests.get(str(server.url), impersonate="chrome", akamai="|||") + + +def test_customized_ja3_chrome126(): + url = "https://tls.browserleaks.com/json" + ja3 = "771,4865-4866-4867-49195-49199-49196-49200-52393-52392-49171-49172-156-157-47-53,0-65037-27-51-13-43-5-18-17513-65281-23-10-45-35-11-16,25497-29-23-24,0" + r = requests.get(url, ja3=ja3).json() + assert r["ja3_text"] == ja3 + + +@pytest.mark.skip(reason="not working") +def test_customized_ja3_tls_version(): + url = "https://tls.browserleaks.com/json" + ja3 = "770,4865-4866-4867-49195-49199-49196-49200-52393-52392-49171-49172-156-157-47-53,0-65037-27-51-13-43-5-18-17513-65281-23-10-45-35-11-16,25497-29-23-24,0" + r = requests.get(url, ja3=ja3).json() + tls_version, _, _, _, _ = r["ja3_text"].split(",") + assert tls_version == "770" + + +def test_customized_ja3_ciphers(): + url = "https://tls.browserleaks.com/json" + ja3 = "771,4865-4866-4867-49195-49199-49196-49200-52393-52392-49171,0-65037-27-51-13-43-5-18-17513-65281-23-10-45-35-11-16,25497-29-23-24,0" + r = requests.get(url, ja3=ja3).json() + _, ciphers, _, _, _ = r["ja3_text"].split(",") + assert ciphers == "4865-4866-4867-49195-49199-49196-49200-52393-52392-49171" + + +# TODO change to parameterized test +def test_customized_ja3_extensions(): + url = "https://tls.browserleaks.com/json" + ja3 = "771,4865-4866-4867-49195-49199-49196-49200-52393-52392-49171-49172-156-157-47-53,65037-65281-0-11-23-5-18-27-16-17513-10-35-43-45-13-51,25497-29-23-24,0" + r = requests.get(url, ja3=ja3).json() + _, _, extensions, _, _ = r["ja3_text"].split(",") + assert extensions == "65037-65281-0-11-23-5-18-27-16-17513-10-35-43-45-13-51" + + ja3 = "771,4865-4866-4867-49195-49199-49196-49200-52393-52392-49171-49172-156-157-47-53,65281-0-11-23-5-18-27-16-17513-10-35-43-45-13-51,25497-29-23-24,0" + r = requests.get(url, ja3=ja3).json() + _, _, extensions, _, _ = r["ja3_text"].split(",") + assert extensions == "65281-0-11-23-5-18-27-16-17513-10-35-43-45-13-51" + + ja3 = "771,4865-4866-4867-49195-49199-49196-49200-52393-52392-49171-49172-156-157-47-53,65281-0-11-23-27-16-17513-10-35-43-45-13-51,25497-29-23-24,0" + r = requests.get(url, ja3=ja3).json() + _, _, extensions, _, _ = r["ja3_text"].split(",") + assert extensions == "65281-0-11-23-27-16-17513-10-35-43-45-13-51" + + # removed enable session_ticket() + ja3 = "771,4865-4866-4867-49195-49199-49196-49200-52393-52392-49171-49172-156-157-47-53,65281-0-11-23-5-18-27-16-17513-10-43-45-13-51,25497-29-23-24,0" + r = requests.get(url, ja3=ja3).json() + _, _, extensions, _, _ = r["ja3_text"].split(",") + assert extensions == "65281-0-11-23-5-18-27-16-17513-10-43-45-13-51" + + +def test_customized_ja3_curves(): + url = "https://tls.browserleaks.com/json" + ja3 = "771,4865-4866-4867-49195-49199-49196-49200-52393-52392-49171-49172-156-157-47-53,0-65037-27-51-13-43-5-18-17513-65281-23-10-45-35-11-16,25497-24-23-29,0" + r = requests.get(url, ja3=ja3).json() + _, _, _, curves, _ = r["ja3_text"].split(",") + assert curves == "25497-24-23-29" + + +def test_customized_akamai_chrome126(): + url = "https://tls.browserleaks.com/json" + akamai = "1:65536;2:0;4:6291456;6:262144|15663105|0|m,a,s,p" + r = requests.get(url, akamai=akamai).json() + assert r["akamai_text"] == akamai + + +def test_customized_akamai_safari(): + url = "https://tls.browserleaks.com/json" + akamai = "2:0;4:4194304;3:100|10485760|0|m,s,p,a" + r = requests.get(url, akamai=akamai).json() + assert r["akamai_text"] == akamai + + +def test_customized_extra_fp_sig_hash_algs(): + url = "https://tls.peet.ws/api/all" + safari_algs = [ + "ecdsa_secp256r1_sha256", + "rsa_pss_rsae_sha256", + "rsa_pkcs1_sha256", + "ecdsa_secp384r1_sha384", + "ecdsa_sha1", + "rsa_pss_rsae_sha384", + "rsa_pss_rsae_sha384", + "rsa_pkcs1_sha384", + "rsa_pss_rsae_sha512", + "rsa_pkcs1_sha512", + "rsa_pkcs1_sha1", + ] + fp = requests.ExtraFingerprints(tls_signature_algorithms=safari_algs) + r = requests.get(url, extra_fp=fp).json() + result_algs = [] + for ex in r["tls"]["extensions"]: + if ex["name"] == "signature_algorithms (13)": + result_algs = ex["signature_algorithms"] + assert safari_algs == result_algs + + +def test_customized_extra_fp_tls_min_version(): + url = "https://tls.peet.ws/api/all" + safari_min_version = CurlSslVersion.TLSv1_0 + fp = requests.ExtraFingerprints(tls_min_version=safari_min_version) + r = requests.get(url, extra_fp=fp).json() + for ex in r["tls"]["extensions"]: + if ex["name"] == "supported_versions (43)": + # TLS 1.0 1.1, 1.2, 1.3 + assert len(ex["versions"]) >= 4 + + +def test_customized_extra_fp_grease(): + url = "https://tls.peet.ws/api/all" + fp = requests.ExtraFingerprints(tls_grease=True) + r = requests.get(url, extra_fp=fp).json() + assert "TLS_GREASE" in r["tls"]["ciphers"][0] + + +def test_customized_extra_fp_permute(): + url = "https://tls.browserleaks.com/json" + ja3 = "771,4865-4866-4867-49195-49199-49196-49200-52393-52392-49171-49172-156-157-47-53,65037-65281-0-11-23-5-18-27-16-17513-10-35-43-45-13-51,25497-29-23-24,0" + + r = requests.get(url, ja3=ja3).json() + _, _, extensions, _, _ = r["ja3_text"].split(",") + assert extensions == "65037-65281-0-11-23-5-18-27-16-17513-10-35-43-45-13-51" + + r = requests.get(url, ja3=ja3, extra_fp=requests.ExtraFingerprints(tls_permute_extensions=True)).json() + _, _, extensions, _, _ = r["ja3_text"].split(",") + assert extensions != "65037-65281-0-11-23-5-18-27-16-17513-10-35-43-45-13-51" + + +def test_customized_extra_fp_cert_compression(): + url = "https://tls.peet.ws/api/all" + fp = requests.ExtraFingerprints(tls_cert_compression="zlib") + r = requests.get(url, extra_fp=fp).json() + result_algs = [] + for ex in r["tls"]["extensions"]: + if ex["name"] == "compress_certificate (27)": + result_algs = ex["algorithms"] + assert result_algs[0] == "zlib (1)" + + +def test_customized_extra_fp_stream_weight(): + url = "https://tls.peet.ws/api/all" + fp = requests.ExtraFingerprints(http2_stream_weight=64) + r = requests.get(url, extra_fp=fp).json() + assert r["http2"]["sent_frames"][2]["priority"]["weight"] == 64 + + +def test_customized_extra_fp_stream_exclusive(): + url = "https://tls.peet.ws/api/all" + fp = requests.ExtraFingerprints(http2_stream_exclusive=0) + r = requests.get(url, extra_fp=fp).json() + assert r["http2"]["sent_frames"][2]["priority"]["exclusive"] == 0