Skip to content

Commit

Permalink
Benchmark: 1)add pycurl, 2)test session=False and session=True
Browse files Browse the repository at this point in the history
…cases
  • Loading branch information
deedy5 committed Jul 22, 2024
1 parent c476f64 commit 9df8e08
Show file tree
Hide file tree
Showing 4 changed files with 59 additions and 67 deletions.
1 change: 1 addition & 0 deletions benchmark/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ Benchmark between `pyreqwests_impersonate` and other python http clients:

- curl_cffi
- httpx
- pycurl
- pyreqwests_impersonate
- python-tls-client
- requests
Expand Down
95 changes: 49 additions & 46 deletions benchmark/benchmark.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,44 @@
import gzip
import time
from concurrent.futures import ThreadPoolExecutor, as_completed
from io import BytesIO

from importlib.metadata import version
import pandas as pd
import requests
import httpx
import tls_client
import pycurl
import pyreqwest_impersonate
import curl_cffi.requests

class PycurlSession:
def __init__(self):
self.c = pycurl.Curl()
self.content = None

def get(self, url):
buffer = BytesIO()
self.c.setopt(pycurl.URL, url)
self.c.setopt(pycurl.WRITEDATA, buffer)
self.c.setopt(pycurl.ENCODING, 'gzip') # Automatically handle gzip encoding
self.c.perform()
self.content = buffer.getvalue()
return self

@property
def text(self):
return self.content.decode('utf-8')

def __del__(self):
self.c.close()

results = []
PACKAGES = [
("requests", requests.Session),
("httpx", httpx.Client),
("tls_client", tls_client.Session),
("curl_cffi", curl_cffi.requests.Session),
("pycurl", PycurlSession),
("pyreqwest_impersonate", pyreqwest_impersonate.Client),
]

Expand All @@ -22,6 +47,12 @@ def add_package_version(packages):
return [(f"{name} {version(name)}", classname) for name, classname in packages]


def get_test(session_class, requests_number):
for _ in range(requests_number):
s = session_class()
s.get(url).text


def session_get_test(session_class, requests_number):
s = session_class()
for _ in range(requests_number):
Expand All @@ -30,76 +61,48 @@ def session_get_test(session_class, requests_number):

PACKAGES = add_package_version(PACKAGES)

# one thread
requests_number = 2000
for response_size in ["5k", "50k", "200k"]:
url = f"http://127.0.0.1:8000/{response_size}"
print(f"\nOne worker, {response_size=}, {requests_number=}")
for name, session_class in PACKAGES:
start = time.perf_counter()
cpu_start = time.process_time()
session_get_test(session_class, requests_number)
dur = round(time.perf_counter() - start, 2)
cpu_dur = round(time.process_time() - cpu_start, 2)
results.append(
{
"name": name,
"threads": 1,
"size": response_size,
"time": dur,
"cpu_time": cpu_dur,
}
)
print(f" name: {name:<30} time: {dur} cpu_time: {cpu_dur}")


# multiple threads
requests_number = 2000
threads_numbers = [5, 32]
for threads_number in threads_numbers:
for session in [False, True]:
for response_size in ["5k", "50k", "200k"]:
url = f"http://127.0.0.1:8000/{response_size}"
print(f"\n{threads_number} workers, {response_size=}, {requests_number=}")
print(f"\n{session=}, {response_size=}, {requests_number=}")
for name, session_class in PACKAGES:
start = time.perf_counter()
cpu_start = time.process_time()
with ThreadPoolExecutor(threads_number) as executor:
futures = [
executor.submit(session_get_test, session_class, int(requests_number / threads_number))
for _ in range(threads_number)
]
for f in as_completed(futures):
f.result()
if session:
session_get_test(session_class, requests_number)
else:
get_test(session_class, requests_number)
dur = round(time.perf_counter() - start, 2)
cpu_dur = round(time.process_time() - cpu_start, 2)
results.append(
{
"name": name,
"threads": threads_number,
"session": session,
"size": response_size,
"time": dur,
"cpu_time": cpu_dur,
}
)
print(f" name: {name:<30} time: {dur} cpu_time: {cpu_dur}")


df = pd.DataFrame(results)
pivot_df = df.pivot_table(
index=["name", "threads"],
index=["name", "session"],
columns="size",
values=["time", "cpu_time"],
aggfunc="mean",
)
pivot_df.reset_index(inplace=True)
pivot_df.columns = [" ".join(col).strip() for col in pivot_df.columns.values]
pivot_df = pivot_df[
["name", "threads"]
+ [col for col in pivot_df.columns if col not in ["name", "threads"]]
["name", "session"]
+ [col for col in pivot_df.columns if col not in ["name", "session"]]
]
unique_threads = pivot_df["threads"].unique()
for thread in unique_threads:
thread_df = pivot_df[pivot_df["threads"] == thread]
print(f"\nTable for {thread} threads:")
print(thread_df.to_string(index=False))
thread_df.to_csv(f"{thread}_threads.csv", index=False)
print(pivot_df)

for session in [False, True]:
session_df = pivot_df[pivot_df["session"] == session]
print(f"\nTable for {session=}:")
print(session_df.to_string(index=False))
session_df.to_csv(f"{session=}.csv", index=False)
29 changes: 8 additions & 21 deletions benchmark/generate_image.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,17 +49,13 @@ def plot_data(file_name, ax, offset):
return x, width, names

# Create a figure with two subplots
fig, (ax1, ax2, ax3) = plt.subplots(3, 1, figsize=(10, 6), layout='constrained')
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(10, 5), layout='constrained')

# Plot data for 1 thread in the first subplot
x1, width, names = plot_data('1_threads.csv', ax1, 0)

# Plot data for 5 threads in the second subplot
x2, _, _ = plot_data('5_threads.csv', ax2, 0)

# Plot data for 32 threads in the second subplot
x3, _, _ = plot_data('32_threads.csv', ax3, 0)
x1, width, names = plot_data('session=False.csv', ax1, 0)

# Plot data for 8 threads in the second subplot
x2, _, _ = plot_data('session=True.csv', ax2, 0)

# Adjust the y-axis limits for the first subplot
y_min, y_max = ax1.get_ylim()
Expand All @@ -71,29 +67,20 @@ def plot_data(file_name, ax, offset):
new_y_max = y_max + 1
ax2.set_ylim(y_min, new_y_max)

# Adjust the y-axis limits for the third subplot
y_min, y_max = ax3.get_ylim()
new_y_max = y_max + 1
ax3.set_ylim(y_min, new_y_max)

# Add some text for labels, title and custom x-axis tick labels, etc.
ax1.set_ylabel('Time (s)')
ax1.set_title('Performance comparison. Threads: 1. Requests: 2000. Response: gzip, utf-8, size 5Kb|50Kb|200Kb')
ax1.set_title('Benchmark get(url).text | Session=False | Requests: 2000 | Response: gzip, utf-8, size 5Kb,50Kb,200Kb')
ax1.set_xticks(x1 + 3*width - width/2) # Adjust the x-ticks to be after the 3rd bar, moved 0.5 bar width to the left
ax1.set_xticklabels(names)
ax1.legend(loc='upper left', ncols=6, prop={'size': 8})
ax1.tick_params(axis='x', labelsize=8)

ax2.set_ylabel('Time (s)')
ax2.set_title('Performance comparison. Threads: 5. Requests: 2000. Response: gzip, utf-8, size 5Kb|50Kb|200Kb')
ax2.set_title('Benchmark get(url).text | Session=True | Requests: 2000 | Response: gzip, utf-8, size 5Kb,50Kb,200Kb')
ax2.set_xticks(x2 + 3*width - width/2) # Adjust the x-ticks to be after the 3rd bar, moved 0.5 bar width to the left
ax2.set_xticklabels(names)
ax2.legend(loc='upper left', ncols=6, prop={'size': 8})

ax3.set_ylabel('Time (s)')
ax3.set_title('Performance comparison. Threads: 32. Requests: 2000. Response: gzip, utf-8, size 5Kb|50Kb|200Kb')
ax3.set_xticks(x3 + 3*width - width/2) # Adjust the x-ticks to be after the 3rd bar, moved 0.5 bar width to the left
ax3.set_xticklabels(names)
ax3.legend(loc='upper left', ncols=6, prop={'size': 8})
ax2.tick_params(axis='x', labelsize=8)

# Save the plot to a file
plt.savefig('benchmark.jpg', format='jpg', dpi=80, bbox_inches='tight')
1 change: 1 addition & 0 deletions benchmark/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,5 @@ httpx
tls-client
pyreqwest_impersonate
curl_cffi
pycurl
typing_extensions # tls-client py3.12 dependence

0 comments on commit 9df8e08

Please sign in to comment.