Benchmark: 1)add pycurl, 2)test session=False and session=True …

…cases
deedy5 · Jul 22, 2024 · 9df8e08 · 9df8e08
1 parent c476f64
commit 9df8e08
Show file tree

Hide file tree

Showing 4 changed files with 59 additions and 67 deletions.
diff --git a/benchmark/README.md b/benchmark/README.md
@@ -4,6 +4,7 @@ Benchmark between `pyreqwests_impersonate` and other python http clients:
 
 - curl_cffi
 - httpx
+- pycurl
 - pyreqwests_impersonate
 - python-tls-client
 - requests

diff --git a/benchmark/benchmark.py b/benchmark/benchmark.py
@@ -1,19 +1,44 @@
+import gzip
 import time
-from concurrent.futures import ThreadPoolExecutor, as_completed
+from io import BytesIO
+
 from importlib.metadata import version
 import pandas as pd
 import requests
 import httpx
 import tls_client
+import pycurl
 import pyreqwest_impersonate
 import curl_cffi.requests
 
+class PycurlSession:
+    def __init__(self):
+        self.c = pycurl.Curl()
+        self.content = None
+
+    def get(self, url):
+        buffer = BytesIO()
+        self.c.setopt(pycurl.URL, url)
+        self.c.setopt(pycurl.WRITEDATA, buffer)
+        self.c.setopt(pycurl.ENCODING, 'gzip')  # Automatically handle gzip encoding
+        self.c.perform()
+        self.content = buffer.getvalue()
+        return self
+
+    @property
+    def text(self):
+        return self.content.decode('utf-8')
+
+    def __del__(self):
+        self.c.close()
+
 results = []
 PACKAGES = [
     ("requests", requests.Session),
     ("httpx", httpx.Client),
     ("tls_client", tls_client.Session),
     ("curl_cffi", curl_cffi.requests.Session),
+    ("pycurl", PycurlSession),
     ("pyreqwest_impersonate", pyreqwest_impersonate.Client),
 ]
 
@@ -22,6 +47,12 @@ def add_package_version(packages):
     return [(f"{name} {version(name)}", classname) for name, classname in packages]
 
 
+def get_test(session_class, requests_number):
+    for _ in range(requests_number):
+        s = session_class()
+        s.get(url).text
+
+
 def session_get_test(session_class, requests_number):
     s = session_class()
     for _ in range(requests_number):
@@ -30,76 +61,48 @@ def session_get_test(session_class, requests_number):
 
 PACKAGES = add_package_version(PACKAGES)
 
-# one thread
 requests_number = 2000
-for response_size in ["5k", "50k", "200k"]:
-    url = f"http://127.0.0.1:8000/{response_size}"
-    print(f"\nOne worker, {response_size=}, {requests_number=}")
-    for name, session_class in PACKAGES:
-        start = time.perf_counter()
-        cpu_start = time.process_time()
-        session_get_test(session_class, requests_number)
-        dur = round(time.perf_counter() - start, 2)
-        cpu_dur = round(time.process_time() - cpu_start, 2)
-        results.append(
-            {
-                "name": name,
-                "threads": 1,
-                "size": response_size,
-                "time": dur,
-                "cpu_time": cpu_dur,
-            }
-        )
-        print(f"    name: {name:<30} time: {dur} cpu_time: {cpu_dur}")
-
-
-# multiple threads
-requests_number = 2000
-threads_numbers = [5, 32]
-for threads_number in threads_numbers:
+for session in [False, True]:
     for response_size in ["5k", "50k", "200k"]:
         url = f"http://127.0.0.1:8000/{response_size}"
-        print(f"\n{threads_number} workers, {response_size=}, {requests_number=}")
+        print(f"\n{session=}, {response_size=}, {requests_number=}")
         for name, session_class in PACKAGES:
             start = time.perf_counter()
             cpu_start = time.process_time()
-            with ThreadPoolExecutor(threads_number) as executor:
-                futures = [
-                    executor.submit(session_get_test, session_class, int(requests_number / threads_number))
-                    for _ in range(threads_number)
-                ]
-                for f in as_completed(futures):
-                    f.result()
+            if session:
+                session_get_test(session_class, requests_number)
+            else:
+                get_test(session_class, requests_number)
             dur = round(time.perf_counter() - start, 2)
             cpu_dur = round(time.process_time() - cpu_start, 2)
             results.append(
                 {
                     "name": name,
-                    "threads": threads_number,
+                    "session": session,
                     "size": response_size,
                     "time": dur,
                     "cpu_time": cpu_dur,
                 }
             )
             print(f"    name: {name:<30} time: {dur} cpu_time: {cpu_dur}")
 
-
 df = pd.DataFrame(results)
 pivot_df = df.pivot_table(
-    index=["name", "threads"],
+    index=["name", "session"],
     columns="size",
     values=["time", "cpu_time"],
     aggfunc="mean",
 )
 pivot_df.reset_index(inplace=True)
 pivot_df.columns = [" ".join(col).strip() for col in pivot_df.columns.values]
 pivot_df = pivot_df[
-    ["name", "threads"]
-    + [col for col in pivot_df.columns if col not in ["name", "threads"]]
+    ["name", "session"]
+    + [col for col in pivot_df.columns if col not in ["name", "session"]]
 ]
-unique_threads = pivot_df["threads"].unique()
-for thread in unique_threads:
-    thread_df = pivot_df[pivot_df["threads"] == thread]
-    print(f"\nTable for {thread} threads:")
-    print(thread_df.to_string(index=False))
-    thread_df.to_csv(f"{thread}_threads.csv", index=False)
+print(pivot_df)
+
+for session in [False, True]:
+    session_df = pivot_df[pivot_df["session"] == session]
+    print(f"\nTable for {session=}:")
+    print(session_df.to_string(index=False))
+    session_df.to_csv(f"{session=}.csv", index=False)
diff --git a/benchmark/generate_image.py b/benchmark/generate_image.py
@@ -49,17 +49,13 @@ def plot_data(file_name, ax, offset):
     return x, width, names
 
 # Create a figure with two subplots
-fig, (ax1, ax2, ax3) = plt.subplots(3, 1, figsize=(10, 6), layout='constrained')
+fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(10, 5), layout='constrained')
 
 # Plot data for 1 thread in the first subplot
-x1, width, names = plot_data('1_threads.csv', ax1, 0)
-
-# Plot data for 5 threads in the second subplot
-x2, _, _ = plot_data('5_threads.csv', ax2, 0)
-
-# Plot data for 32 threads in the second subplot
-x3, _, _ = plot_data('32_threads.csv', ax3, 0)
+x1, width, names = plot_data('session=False.csv', ax1, 0)
 
+# Plot data for 8 threads in the second subplot
+x2, _, _ = plot_data('session=True.csv', ax2, 0)
 
 # Adjust the y-axis limits for the first subplot
 y_min, y_max = ax1.get_ylim()
@@ -71,29 +67,20 @@ def plot_data(file_name, ax, offset):
 new_y_max = y_max + 1
 ax2.set_ylim(y_min, new_y_max)
 
-# Adjust the y-axis limits for the third subplot
-y_min, y_max = ax3.get_ylim()
-new_y_max = y_max + 1
-ax3.set_ylim(y_min, new_y_max)
-
 # Add some text for labels, title and custom x-axis tick labels, etc.
 ax1.set_ylabel('Time (s)')
-ax1.set_title('Performance comparison. Threads: 1. Requests: 2000. Response: gzip, utf-8, size 5Kb|50Kb|200Kb')
+ax1.set_title('Benchmark get(url).text | Session=False | Requests: 2000 | Response: gzip, utf-8, size 5Kb,50Kb,200Kb')
 ax1.set_xticks(x1 + 3*width - width/2) # Adjust the x-ticks to be after the 3rd bar, moved 0.5 bar width to the left
 ax1.set_xticklabels(names)
 ax1.legend(loc='upper left', ncols=6, prop={'size': 8})
+ax1.tick_params(axis='x', labelsize=8)
 
 ax2.set_ylabel('Time (s)')
-ax2.set_title('Performance comparison. Threads: 5. Requests: 2000. Response: gzip, utf-8, size 5Kb|50Kb|200Kb')
+ax2.set_title('Benchmark get(url).text | Session=True | Requests: 2000 | Response: gzip, utf-8, size 5Kb,50Kb,200Kb')
 ax2.set_xticks(x2 + 3*width - width/2) # Adjust the x-ticks to be after the 3rd bar, moved 0.5 bar width to the left
 ax2.set_xticklabels(names)
 ax2.legend(loc='upper left', ncols=6, prop={'size': 8})
-
-ax3.set_ylabel('Time (s)')
-ax3.set_title('Performance comparison. Threads: 32. Requests: 2000. Response: gzip, utf-8, size 5Kb|50Kb|200Kb')
-ax3.set_xticks(x3 + 3*width - width/2) # Adjust the x-ticks to be after the 3rd bar, moved 0.5 bar width to the left
-ax3.set_xticklabels(names)
-ax3.legend(loc='upper left', ncols=6, prop={'size': 8})
+ax2.tick_params(axis='x', labelsize=8)
 
 # Save the plot to a file
 plt.savefig('benchmark.jpg', format='jpg', dpi=80, bbox_inches='tight')
diff --git a/benchmark/requirements.txt b/benchmark/requirements.txt
@@ -7,4 +7,5 @@ httpx
 tls-client
 pyreqwest_impersonate
 curl_cffi
+pycurl
 typing_extensions  # tls-client py3.12 dependence