diff --git a/benchmark/README.md b/benchmark/README.md index a6f6e6e..163ec85 100644 --- a/benchmark/README.md +++ b/benchmark/README.md @@ -4,6 +4,7 @@ Benchmark between `pyreqwests_impersonate` and other python http clients: - curl_cffi - httpx +- pycurl - pyreqwests_impersonate - python-tls-client - requests diff --git a/benchmark/benchmark.py b/benchmark/benchmark.py index 13ebf42..3e30bcd 100644 --- a/benchmark/benchmark.py +++ b/benchmark/benchmark.py @@ -1,19 +1,44 @@ +import gzip import time -from concurrent.futures import ThreadPoolExecutor, as_completed +from io import BytesIO + from importlib.metadata import version import pandas as pd import requests import httpx import tls_client +import pycurl import pyreqwest_impersonate import curl_cffi.requests +class PycurlSession: + def __init__(self): + self.c = pycurl.Curl() + self.content = None + + def get(self, url): + buffer = BytesIO() + self.c.setopt(pycurl.URL, url) + self.c.setopt(pycurl.WRITEDATA, buffer) + self.c.setopt(pycurl.ENCODING, 'gzip') # Automatically handle gzip encoding + self.c.perform() + self.content = buffer.getvalue() + return self + + @property + def text(self): + return self.content.decode('utf-8') + + def __del__(self): + self.c.close() + results = [] PACKAGES = [ ("requests", requests.Session), ("httpx", httpx.Client), ("tls_client", tls_client.Session), ("curl_cffi", curl_cffi.requests.Session), + ("pycurl", PycurlSession), ("pyreqwest_impersonate", pyreqwest_impersonate.Client), ] @@ -22,6 +47,12 @@ def add_package_version(packages): return [(f"{name} {version(name)}", classname) for name, classname in packages] +def get_test(session_class, requests_number): + for _ in range(requests_number): + s = session_class() + s.get(url).text + + def session_get_test(session_class, requests_number): s = session_class() for _ in range(requests_number): @@ -30,52 +61,24 @@ def session_get_test(session_class, requests_number): PACKAGES = add_package_version(PACKAGES) -# one thread requests_number = 2000 -for response_size in ["5k", "50k", "200k"]: - url = f"http://127.0.0.1:8000/{response_size}" - print(f"\nOne worker, {response_size=}, {requests_number=}") - for name, session_class in PACKAGES: - start = time.perf_counter() - cpu_start = time.process_time() - session_get_test(session_class, requests_number) - dur = round(time.perf_counter() - start, 2) - cpu_dur = round(time.process_time() - cpu_start, 2) - results.append( - { - "name": name, - "threads": 1, - "size": response_size, - "time": dur, - "cpu_time": cpu_dur, - } - ) - print(f" name: {name:<30} time: {dur} cpu_time: {cpu_dur}") - - -# multiple threads -requests_number = 2000 -threads_numbers = [5, 32] -for threads_number in threads_numbers: +for session in [False, True]: for response_size in ["5k", "50k", "200k"]: url = f"http://127.0.0.1:8000/{response_size}" - print(f"\n{threads_number} workers, {response_size=}, {requests_number=}") + print(f"\n{session=}, {response_size=}, {requests_number=}") for name, session_class in PACKAGES: start = time.perf_counter() cpu_start = time.process_time() - with ThreadPoolExecutor(threads_number) as executor: - futures = [ - executor.submit(session_get_test, session_class, int(requests_number / threads_number)) - for _ in range(threads_number) - ] - for f in as_completed(futures): - f.result() + if session: + session_get_test(session_class, requests_number) + else: + get_test(session_class, requests_number) dur = round(time.perf_counter() - start, 2) cpu_dur = round(time.process_time() - cpu_start, 2) results.append( { "name": name, - "threads": threads_number, + "session": session, "size": response_size, "time": dur, "cpu_time": cpu_dur, @@ -83,10 +86,9 @@ def session_get_test(session_class, requests_number): ) print(f" name: {name:<30} time: {dur} cpu_time: {cpu_dur}") - df = pd.DataFrame(results) pivot_df = df.pivot_table( - index=["name", "threads"], + index=["name", "session"], columns="size", values=["time", "cpu_time"], aggfunc="mean", @@ -94,12 +96,13 @@ def session_get_test(session_class, requests_number): pivot_df.reset_index(inplace=True) pivot_df.columns = [" ".join(col).strip() for col in pivot_df.columns.values] pivot_df = pivot_df[ - ["name", "threads"] - + [col for col in pivot_df.columns if col not in ["name", "threads"]] + ["name", "session"] + + [col for col in pivot_df.columns if col not in ["name", "session"]] ] -unique_threads = pivot_df["threads"].unique() -for thread in unique_threads: - thread_df = pivot_df[pivot_df["threads"] == thread] - print(f"\nTable for {thread} threads:") - print(thread_df.to_string(index=False)) - thread_df.to_csv(f"{thread}_threads.csv", index=False) +print(pivot_df) + +for session in [False, True]: + session_df = pivot_df[pivot_df["session"] == session] + print(f"\nTable for {session=}:") + print(session_df.to_string(index=False)) + session_df.to_csv(f"{session=}.csv", index=False) diff --git a/benchmark/generate_image.py b/benchmark/generate_image.py index 55325c3..083f25c 100644 --- a/benchmark/generate_image.py +++ b/benchmark/generate_image.py @@ -49,17 +49,13 @@ def plot_data(file_name, ax, offset): return x, width, names # Create a figure with two subplots -fig, (ax1, ax2, ax3) = plt.subplots(3, 1, figsize=(10, 6), layout='constrained') +fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(10, 5), layout='constrained') # Plot data for 1 thread in the first subplot -x1, width, names = plot_data('1_threads.csv', ax1, 0) - -# Plot data for 5 threads in the second subplot -x2, _, _ = plot_data('5_threads.csv', ax2, 0) - -# Plot data for 32 threads in the second subplot -x3, _, _ = plot_data('32_threads.csv', ax3, 0) +x1, width, names = plot_data('session=False.csv', ax1, 0) +# Plot data for 8 threads in the second subplot +x2, _, _ = plot_data('session=True.csv', ax2, 0) # Adjust the y-axis limits for the first subplot y_min, y_max = ax1.get_ylim() @@ -71,29 +67,20 @@ def plot_data(file_name, ax, offset): new_y_max = y_max + 1 ax2.set_ylim(y_min, new_y_max) -# Adjust the y-axis limits for the third subplot -y_min, y_max = ax3.get_ylim() -new_y_max = y_max + 1 -ax3.set_ylim(y_min, new_y_max) - # Add some text for labels, title and custom x-axis tick labels, etc. ax1.set_ylabel('Time (s)') -ax1.set_title('Performance comparison. Threads: 1. Requests: 2000. Response: gzip, utf-8, size 5Kb|50Kb|200Kb') +ax1.set_title('Benchmark get(url).text | Session=False | Requests: 2000 | Response: gzip, utf-8, size 5Kb,50Kb,200Kb') ax1.set_xticks(x1 + 3*width - width/2) # Adjust the x-ticks to be after the 3rd bar, moved 0.5 bar width to the left ax1.set_xticklabels(names) ax1.legend(loc='upper left', ncols=6, prop={'size': 8}) +ax1.tick_params(axis='x', labelsize=8) ax2.set_ylabel('Time (s)') -ax2.set_title('Performance comparison. Threads: 5. Requests: 2000. Response: gzip, utf-8, size 5Kb|50Kb|200Kb') +ax2.set_title('Benchmark get(url).text | Session=True | Requests: 2000 | Response: gzip, utf-8, size 5Kb,50Kb,200Kb') ax2.set_xticks(x2 + 3*width - width/2) # Adjust the x-ticks to be after the 3rd bar, moved 0.5 bar width to the left ax2.set_xticklabels(names) ax2.legend(loc='upper left', ncols=6, prop={'size': 8}) - -ax3.set_ylabel('Time (s)') -ax3.set_title('Performance comparison. Threads: 32. Requests: 2000. Response: gzip, utf-8, size 5Kb|50Kb|200Kb') -ax3.set_xticks(x3 + 3*width - width/2) # Adjust the x-ticks to be after the 3rd bar, moved 0.5 bar width to the left -ax3.set_xticklabels(names) -ax3.legend(loc='upper left', ncols=6, prop={'size': 8}) +ax2.tick_params(axis='x', labelsize=8) # Save the plot to a file plt.savefig('benchmark.jpg', format='jpg', dpi=80, bbox_inches='tight') diff --git a/benchmark/requirements.txt b/benchmark/requirements.txt index 3737db2..99111f3 100644 --- a/benchmark/requirements.txt +++ b/benchmark/requirements.txt @@ -7,4 +7,5 @@ httpx tls-client pyreqwest_impersonate curl_cffi +pycurl typing_extensions # tls-client py3.12 dependence