diff --git a/benchmarks/performance_test.py b/benchmarks/performance_test.py
new file mode 100644
index 0000000000..31a027505b
--- /dev/null
+++ b/benchmarks/performance_test.py
@@ -0,0 +1,505 @@
+# Copyright (c) 2015-2024 Satpy developers
+#
+# This file is part of satpy.
+#
+# satpy is free software: you can redistribute it and/or modify it under the
+# terms of the GNU General Public License as published by the Free Software
+# Foundation, either version 3 of the License, or (at your option) any later
+# version.
+#
+# satpy is distributed in the hope that it will be useful, but WITHOUT ANY
+# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
+# A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along with
+# satpy. If not, see .
+"""Base class for performance test."""
+import csv
+import glob
+import os
+import platform
+import time
+from datetime import datetime, timezone
+from io import BytesIO
+from itertools import zip_longest
+from threading import Thread
+
+import cpuinfo
+import matplotlib.pyplot as plt
+import numpy as np
+import pandas as pd
+import psutil
+
+OS_TYPE = platform.system()
+OS_MEMORY_KEY_MAP = {
+ "Windows": "Memory Usage (Physical + PageFile)",
+ "Linux": "Memory Usage (Physical + Swap)"
+}
+
+MEMORY_KEY = OS_MEMORY_KEY_MAP.get(OS_TYPE, "Memory Usage")
+FIGURES = {"Process Time (single scene average)": {"key_y": "Time (s)", "colors": ["#4E79A7"]},
+ "Average CPU Usage": {"key_y": "Avg CPU (%)", "colors": ["#F28E2B"]},
+ MEMORY_KEY: {"key_y": ["Avg Memory (GB)", "Max Memory (GB)"], "colors": ["#59A14F", "#EDC948"]}}
+
+
+class SatpyPerformanceTest:
+ """Test satpy performance by looping through conditions involving ``dask_array_chunk_size``and ``dask_num_workers``.
+
+ There are two tests: ``simple_test`` and ``resampler_test``. See below for details.
+
+ """
+ def __init__(self, work_dir, folder_pattern, reader_name, composite, chunk_size_opts, worker_opts,
+ reader_kwargs=None):
+ """Initialize SatpyPerformanceTest with some basic arguments.
+
+ Args:
+ work_dir (str): Absolute path to the base directory that contains all your dataset folders.
+ folder_pattern (str): Naming scheme of the dataset folders, e.g. `G16_s*_e*_FLDK`.
+ This will be used for ``glob.glob`` to search the datasets.
+ reader_name (str): Reader you want to test.
+ composite (str): Composite for test.
+ chunk_size_opts (list): All the ``dask_array_chunk_size`` values you wish for, in `MiB`.
+ worker_opts (list): All the ``dask_num_workers`` values you wish for.
+ reader_kwargs (dict): Additional reader arguments for ``Scene``,
+ like ``{'mask_saturated': False}`` in modis_l1b.
+
+ """
+ super().__init__()
+ self.work_dir = work_dir
+ self.folder_pattern = folder_pattern
+ self.reader_name = reader_name
+ self.reader_kwargs = reader_kwargs
+ self.composite = composite
+
+ self.folders = glob.glob(f"{self.work_dir}/{self.folder_pattern}")
+
+ self.chunk_size_opts = chunk_size_opts
+ self.worker_opts = worker_opts
+
+ self.result = {}
+ self.running = True
+
+ def monitor_system_usage(self, interval=0.5):
+ """Use psutil to record CPU and memory usage. Default sample rate is 0.5s."""
+ process = psutil.Process()
+ cpu_usage = []
+ memory_usage = []
+ timestamps = []
+
+ start_time = time.time()
+ while self.running:
+ cpu_usage.append(process.cpu_percent())
+ if OS_TYPE == "Windows":
+ # In Windows, "vms" means "pagefile"
+ memory_usage.append((process.memory_full_info().rss + process.memory_full_info().vms))
+ elif OS_TYPE == "Linux":
+ memory_usage.append((process.memory_full_info().rss + process.memory_full_info().swap))
+ else:
+ memory_usage.append(process.memory_full_info().rss)
+ timestamps.append(time.time() - start_time)
+ time.sleep(interval)
+
+ self.result["cpu_usage"] = cpu_usage
+ self.result["memory_usage"] = memory_usage
+ self.result["timestamps"] = timestamps
+
+ def write_to_csv(self, file_name):
+ """Write the result of each round to a csv file."""
+ with open(file_name, "w", newline="", encoding="utf-8") as csvfile:
+ csvwriter = csv.writer(csvfile)
+ csvwriter.writerow(["Timestamp (s)", "CPU Usage (%)", "Memory Usage (Byte)", "Process Time", "Scenes",
+ "Errors"])
+ for ts, cpu, mem, pt, scn, er in zip_longest(self.result["timestamps"], self.result["cpu_usage"],
+ self.result["memory_usage"], self.result["process_time"],
+ self.result["scenes"], self.result["errors"], fillvalue="N/A"):
+ csvwriter.writerow([ts, cpu, mem, pt, scn, er])
+
+ def satpy_test(self, resampler, generate=False, area_def=None, resampler_kwargs=None):
+ """Call satpy to do the test."""
+ from satpy import Scene, find_files_and_readers
+
+ reader_kwargs = {} if self.reader_kwargs is None else self.reader_kwargs
+ resampler_kwargs = {} if resampler_kwargs is None else resampler_kwargs
+ for folder in self.folders:
+ files = find_files_and_readers(base_dir=folder, reader=self.reader_name)
+ scn = Scene(filenames=files, reader_kwargs=reader_kwargs)
+ scn.load([self.composite], generate=generate)
+
+ if resampler == "none":
+ scn2 = scn
+ else:
+ scn2 = scn.resample(area_def, resampler=resampler, **resampler_kwargs)
+
+ scn2.save_dataset(self.composite, writer="geotiff", filename="test.tif", base_dir=self.work_dir,
+ fill_value=0, compress=None)
+
+ def single_loop(self, conditions, generate=False, area_def=None, resampler_kwargs=None):
+ """Single round of the test."""
+ import dask.config
+ self.running = True
+
+ chunk_size, num_worker, resampler = conditions
+
+ dask.config.set({"array.chunk-size": f"{chunk_size}MiB"})
+ dask.config.set(num_workers=num_worker)
+
+ try:
+ num_thread = os.environ["OMP_NUM_THREADS"]
+ except KeyError:
+ num_thread = psutil.cpu_count(logical=True)
+
+ print(f"Start testing CHUNK_SIZE={chunk_size}MiB, NUM_WORKER={num_worker}, NUM_THREADS={num_thread}, " # noqa
+ f"""resampler is "{resampler}".""")
+
+ # Start recording cpu/mem usage
+ monitor_thread = Thread(target=self.monitor_system_usage, args=(0.5,))
+ monitor_thread.start()
+
+ errors = []
+ start = time.perf_counter()
+ try:
+ self.satpy_test(resampler, generate, area_def, resampler_kwargs)
+ except Exception as e:
+ errors.append(e)
+
+ end = time.perf_counter()
+ # All of these must be list object
+ self.result["process_time"] = [end - start]
+ self.result["scenes"] = [len(self.folders)]
+ self.result["errors"] = errors
+
+ # Stop recording
+ self.running = False
+ monitor_thread.join()
+
+ if area_def is None:
+ area = "original"
+ else:
+ area = "local" if len(area_def.area_id) == 0 else area_def.area_id.replace("_", "")
+
+ csv_file = (f"{self.work_dir}/{self.reader_name.replace('_', '')}_"
+ f"chunk{chunk_size}_worker{num_worker}_thread{num_thread}_{area}_{resampler}.csv")
+ self.write_to_csv(csv_file)
+
+ def simple_test(self, diff_res=False):
+ """Test the reader in dataset's original projection, with no resampling or the simplest ``native`` resampling.
+
+ Args:
+ diff_res (bool): If the composite requires bands in different resolutions, this should be set to `True`
+ so the native resampler will match them to the ``scn.finest_area()``.
+ For example, ``true_color`` of ABI needs 500m C01 and 1000m C02 bands, so it's `True`.
+ **This is not a test option and should be set properly according to the composite,**
+ otherwise the test will end up in errors.
+
+ """
+ resampler = "native" if diff_res else "none"
+ generate = not diff_res
+ total_rounds = len(self.chunk_size_opts) * len(self.worker_opts)
+
+ print(f"{self.reader_name} test started. Composite is \"{self.composite}\".\n") # noqa
+ i = 0
+ for chunk_size in self.chunk_size_opts:
+ for num_worker in self.worker_opts:
+ self.single_loop((chunk_size, num_worker, resampler), generate=generate)
+ i = i + 1
+
+ if i == total_rounds:
+ print(f"ROUND {i} / {total_rounds} Completed. Generating HTML report.") # noqa
+ html_report(self.work_dir, self.reader_name)
+ else:
+ print(f"ROUND {i} / {total_rounds} Completed. Now take a 1-min rest.\n") # noqa
+ time.sleep(60)
+
+ def resampler_test(self, resamplers, area_def, resampler_kwargs=None):
+ """Test the reader with resampling.
+
+ Args:
+ resamplers (list or str): A single resampling algorithm or a list of resampling algorithms you want to test.
+ area_def (AreaDefinition or DynamicAreaDefinition or str): Area definition or the name of an area stored
+ in YAML.
+ resampler_kwargs (dict): Additional arguments passed to the resampler. You can specify the separate
+ kwargs for each resampler, e.g.
+ ``{'bilinear': {'cache_dir': '/path/to/my/cache'},
+ 'ewa': {'weight_delta_max': 40, 'weight_distance_max': 2}}``.
+ Or you can just give general kwargs like ``{'cache_dir': '/path/to/my/cache'}`` for
+ both ``nearest`` and ``bilinear``.
+
+ """
+ resamplers = [resamplers] if not isinstance(resamplers, list) else resamplers
+ resampler_kwargs = {} if resampler_kwargs is None else resampler_kwargs
+ total_rounds = len(self.chunk_size_opts) * len(self.worker_opts) * len(resamplers)
+
+ print(f"{self.reader_name} test started. Composite is \"{self.composite}\".\n") # noqa
+ i = 0
+ for chunk_size in self.chunk_size_opts:
+ for num_worker in self.worker_opts:
+ for resampler in resamplers:
+ try:
+ single_resampler_kwargs = resampler_kwargs[resampler]
+ except KeyError:
+ single_resampler_kwargs = resampler_kwargs
+
+ self.single_loop((chunk_size, num_worker, resampler),
+ area_def=area_def, resampler_kwargs=single_resampler_kwargs)
+ i = i + 1
+
+ if i == total_rounds:
+ print(f"ROUND {i} / {total_rounds} Completed. Generating HTML report.") # noqa
+ html_report(self.work_dir, self.reader_name)
+ else:
+ print(f"ROUND {i} / {total_rounds} Completed. Now take a 1-min rest.\n") # noqa
+ time.sleep(60)
+
+
+def process_csv(cvs_file):
+ """Process result csv and return a dataframe."""
+ # Extract information from the filename
+ filename = os.path.basename(cvs_file)
+ filename = filename.split(".")[0]
+ filename_parts = filename.split("_")
+ dask_array_chunk_size = int(filename_parts[1].replace("chunk", ""))
+ dask_num_workers = int(filename_parts[2].replace("worker", ""))
+ omp_num_threads = int(filename_parts[3].replace("thread", ""))
+ area = filename_parts[4]
+ resampling_alg = filename_parts[5]
+
+ data = pd.read_csv(cvs_file, keep_default_na=False)
+ scenes = int(data.loc[0, "Scenes"])
+ cpu_thread = psutil.cpu_count(logical=True)
+
+ # Prepare the row dictionary for the new CSV based on filename
+ new_row = {
+ "Dask Array Chunk Size (MB)": dask_array_chunk_size,
+ "Dask Num Workers": dask_num_workers,
+ "Omp Num Threads": omp_num_threads,
+ "Area": area,
+ "Resampling Algorithm": resampling_alg,
+ "Time (s)": round(float(data.loc[0, "Process Time"]) / scenes, 2),
+ "Avg Memory (GB)": round(data["Memory Usage (Byte)"].mean() / (1024 ** 3), 2),
+ "Max Memory (GB)": round(data["Memory Usage (Byte)"].max() / (1024 ** 3), 2),
+ "Avg CPU (%)": round(data["CPU Usage (%)"].mean() / cpu_thread, 2),
+ "Scenes": scenes,
+ "Errors": data.loc[0, "Errors"],
+ }
+
+ df = pd.DataFrame([new_row])
+
+ return df
+
+
+def combined_csv(work_dir, reader_name):
+ """Collect all the csv files under ``work_dir`` and merge them in to one dataframe."""
+ all_dataframes = []
+ csvs = glob.glob(f"{work_dir}/{reader_name.replace('_', '')}_chunk*_worker*_thread*_*_*.csv")
+ for file in csvs:
+ df = process_csv(file)
+ all_dataframes.append(df)
+
+ if not all_dataframes:
+ return
+
+ combined_df = pd.concat(all_dataframes, ignore_index=True)
+
+ # Sort the DataFrame
+ # Make sure "original" area always comes first
+ combined_df["sort_priority"] = np.where(combined_df["Area"].str.contains("original"), 0, 1)
+ sorted_df = combined_df.sort_values(by=["sort_priority", "Area", "Resampling Algorithm",
+ "Dask Array Chunk Size (MB)", "Dask Num Workers", "Omp Num Threads"])
+
+ sorted_df.reset_index(drop=True, inplace=True)
+
+ return sorted_df
+
+
+def draw_hbar(dataframe, title):
+ """Plot the bar chart by matplotlib."""
+ colors = FIGURES[title]["colors"]
+ key_x = "Chunk size - Num workers - Num Threads"
+ key_y = FIGURES[title]["key_y"]
+
+ dpi = 100
+ fig_width = 1080 / dpi
+ # Dynamic height according to the dataframe
+ num_bars = len(dataframe)
+ fig_height = max(600, 100 + 50 * num_bars) / dpi
+ fig, ax = plt.subplots(figsize=(fig_width, fig_height), dpi=dpi)
+ plt.subplots_adjust(left=0.15, right=0.85, top=0.9, bottom=0.1)
+
+ dataframe.plot.barh(x=key_x, y=key_y, legend=True if "Memory" in title else False,
+ ax=ax, width=0.5, color=colors, stacked=True if "Memory" in title else False)
+ plt.title(title, fontsize=16)
+ plt.ylabel(key_x, fontsize=14)
+ plt.xlabel("Memory (GB)" if "Memory" in title else key_y, fontsize=14)
+ ax.tick_params(axis="both", labelsize=12)
+ if "Memory" in title:
+ ax.legend(loc="upper right")
+ # Mark the position of physical memory limit
+ physical_memory = psutil.virtual_memory().total // (1024 ** 3)
+ ax.axvline(x=physical_memory, color="#808080", linestyle="--")
+ ax.text(physical_memory + 0.5, 1, "Physical\nMemory\nLimit", color="#808080")
+ if "CPU" in title:
+ ax.set_xlim([0, 100])
+
+ # Data label right to the bar
+ cumulative_widths = [0] * len(dataframe)
+ for i, container in enumerate(ax.containers):
+ for j, bar in enumerate(container):
+ width = bar.get_width()
+ cumulative_widths[j] = cumulative_widths[j] + width if "Memory" in title else width
+ label_x_pos = cumulative_widths[j] + 0.3
+
+ if i == 0:
+ # For "Time", "CPU" and "Avg Memory"
+ label_text = str(round(width, 2))
+ else:
+ # For "Max Memory"
+ # Because in the dataframe for graph it's actually the difference between Max and Avg
+ # so that we can draw the "stacked" bars correctly.
+ # Now we have to restore the value to the real Max when writing the label.
+ label_text = str(round(cumulative_widths[j], 2))
+
+ ax.text(label_x_pos, bar.get_y() + bar.get_height() / 2, label_text, va="center")
+
+ svg = BytesIO()
+ plt.savefig(svg, format="svg")
+ svg = svg.getvalue().decode("utf-8")
+ plt.close()
+
+ return svg
+
+
+def html_head(reader_name):
+ """Generate the html head of the report."""
+ import dask
+ import pyresample
+ import pyspectral
+ import xarray as xr
+
+ import satpy
+
+ # Get system info
+ cpu_core = psutil.cpu_count(logical=False)
+ cpu_thread = psutil.cpu_count(logical=True)
+ cpu_info = cpuinfo.get_cpu_info()
+ cpu_model = cpu_info["brand_raw"]
+ memory_info = psutil.virtual_memory().total // (1024 ** 3)
+ os_info = platform.platform()
+
+ # Get Python env
+ python_version = platform.python_version()
+ numpy_version = np.__version__
+ dask_version = dask.__version__
+ xarray_version = xr.__version__
+ satpy_version = satpy.__version__
+ pyresample_version = pyresample.__version__
+ pyspectral_version = pyspectral.__version__
+ psutil_version = psutil.__version__
+
+ html_content = f"""
+
+
+
+
+ Satpy Performance Test Report for {reader_name}
+
+
+
+
+ Satpy Performance Test Report for {reader_name}
+ Generation Date: UTC {datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M:%S")}
+ 1. System Info
+ 1.1 Platform
+ CPU: {cpu_model}, {cpu_core} cores / {cpu_thread} threads in total
+ Physical Memory: {memory_info} GB
+ OS: {os_info}
+ 1.2 Python Environment
+ Python: {python_version}
+ Numpy: {numpy_version}
+ Dask: {dask_version}
+ Xarray: {xarray_version}
+ Satpy: {satpy_version}
+ Pyresample: {pyresample_version}
+ Pyspectral: {pyspectral_version}
+ Psutil: {psutil_version}
+ 2. Test Results
+ """
+
+ return html_content
+
+
+def html_report(work_dir, reader_name):
+ """Analyze the summary dataframe and produce an HTML report."""
+ df = combined_csv(work_dir, reader_name)
+ if df is None:
+ print("Test CSV result not found! Or its filename doesn't fit [*_chunk*_worker*_thread*_*_*.csv]") # noqa
+ return
+ # Group the dataframe for report
+ df["Group"] = "Area: " + df["Area"] + " - " + "Resampler: " + df["Resampling Algorithm"]
+ groups = df["Group"].unique()
+
+ html_content = html_head(reader_name)
+
+ for group in groups:
+ group_df = df[df["Group"] == group]
+ # Drop unnecessary column
+ group_df_table = group_df.drop(["Group", "Area", "Resampling Algorithm", "sort_priority"],
+ axis=1, inplace=False)
+
+ group_df_graph = group_df.copy()
+ # Build a new column containing value group to make it easier for plotting the chart
+ group_df_graph["Chunk size - Num workers - Num Threads"] = (
+ group_df_graph["Dask Array Chunk Size (MB)"].astype(str) + " - " +
+ group_df_graph["Dask Num Workers"].astype(str) + " - " +
+ group_df_graph["Omp Num Threads"].astype(str))
+ group_df_graph = group_df_graph.sort_values(by=["Dask Array Chunk Size (MB)", "Dask Num Workers",
+ "Omp Num Threads"], ascending=False)
+ # For stacked bar
+ group_df_graph["Max Memory (GB)"] = group_df_graph["Max Memory (GB)"] - group_df_graph["Avg Memory (GB)"]
+ # Replace all the error rows with 0 so the chart will be significant in these rows
+ group_df_graph.loc[group_df_graph["Errors"] != "N/A", ["Time (s)", "Avg CPU (%)",
+ "Avg Memory (GB)", "Max Memory (GB)"]] = 0
+
+ group_html = group_df_table.to_html(index=False)
+ html_content += f"""
+ 2.{groups.tolist().index(group) + 1} {group}
+ 2.{groups.tolist().index(group) + 1}.1 Table
+ {group_html}
+ 2.{groups.tolist().index(group) + 1}.2 Charts
+ """
+
+ # Plot three charts: time, cpu and mem
+ for title in FIGURES.keys():
+ svg_bar = draw_hbar(group_df_graph, title)
+ html_content += f"""
+
+ {svg_bar}
+
+ """
+
+ # Finish HTML report
+ html_content += """
+
+
+ """
+
+ # Save it to disk
+ with open(f"{work_dir}/{reader_name}_test_report.html", "w", encoding="utf-8") as f:
+ f.write(html_content)
diff --git a/doc/rtd_environment.yml b/doc/rtd_environment.yml
index abd8add616..94c5567ae3 100644
--- a/doc/rtd_environment.yml
+++ b/doc/rtd_environment.yml
@@ -12,14 +12,18 @@ dependencies:
# 2.19.1 seems to cause library linking issues
- eccodes>=2.20
- graphviz
+ - matplotlib-base
- numba
- numpy
+ - pandas
- pillow
- pooch
+ - psutil
- pyresample
- pytest
- python-eccodes
- python-geotiepoints
+ - py-cpuinfo
- rasterio
- rioxarray
- setuptools
diff --git a/doc/source/conf.py b/doc/source/conf.py
index 62156e9760..e808c8bcfa 100644
--- a/doc/source/conf.py
+++ b/doc/source/conf.py
@@ -129,6 +129,9 @@ def __getattr__(cls, name):
"--private",
]
+# Additional api for 'performance_test'
+sys.path.insert(0, os.path.abspath("../../benchmarks/"))
+
# Add any paths that contain templates here, relative to this directory.
templates_path = ["_templates"]
diff --git a/doc/source/index.rst b/doc/source/index.rst
index 66a069fcda..296b931d9c 100644
--- a/doc/source/index.rst
+++ b/doc/source/index.rst
@@ -72,11 +72,13 @@ Documentation
multiscene
dev_guide/index
+
.. toctree::
:maxdepth: 1
Satpy API
faq
+ performance_test
Release Notes
Security Policy
diff --git a/doc/source/performance_test.rst b/doc/source/performance_test.rst
new file mode 100644
index 0000000000..9104620de2
--- /dev/null
+++ b/doc/source/performance_test.rst
@@ -0,0 +1,149 @@
+================
+Performance Test
+================
+
+For better performace tweaks on specific readers, a tool ``performance_test`` under ``benchmarks`` is introduced here.
+It involves ``DASK_ARRAY__CHUNK_SIZE``, ``DASK_NUM_WORKERS``, ``OMP_NUM_THREADS`` and other options mentioned in
+:doc:`FAQ <../faq>`. This tool can loop through conditions defined by these values finally give a report in HTML.
+The report contains tables and charts for better understanding. It has two types of tests: ``simple_test``
+and ``resampler_test``.
+
+
+How it works?
+=============
+- The core is just a regular satpy script -- find datasets, load the composite, resample it if needed and
+ save it as geotiff.
+- A monitor thread using ``psutil`` records the CPU and memory usage synchronously. The sample rate is around
+ 0.5 seconds. Any errors during the test will also be recorded.
+- Each round has one single condition tested. The result is stored in a csv file. After that, the machine will
+ take a 1-min rest to let the CPU cool down.
+- After all the tests finished, it collects all the result csv files, summarizing and visualizing them into the HTML
+ report.
+
+
+Preparations
+============
+1. Additional packages required
+-------------------------------
+- **psutil:** Record CPU/memory usage.
+- **pandas:** Analyze test result.
+- **matplotlib**: (Either ``matplotlib`` or ``matplotlib-base``) Plot the charts for report.
+- **py-cpuinfo**: Get the CPU model for report.
+
+
+2. Choose the composite and get corresponding datasets
+------------------------------------------------------
+Usually the composite should be the one involving a lot of computation like atmospheric correction. For most of the
+earth observing satellites, this could be ``true_color`` or something like that.
+
+Although one scene is enough to run the test, 3-5 scenes would be better to get the average.
+
+- For geostationary satellites, it is recommended to get those around **solar noon** under **full-disk** scan mode.
+- For polar orbit satellites, scenes should be around the **same area** so the intensities of the computation are similar.
+
+
+3. Organize the datasets
+------------------------
+One scene per folder. All the dataset folders should have the same naming patterns, e.g.:
+
+.. code-block:: batch
+
+ 2024/06/29 09:06 G16_s20241691700214_e20241691709522_FLDK
+ 2024/06/29 09:06 G16_s20241701700215_e20241701709523_FLDK
+ 2024/06/29 09:06 G16_s20241711700217_e20241711709525_FLDK
+ 2024/06/29 09:06 G16_s20241721700219_e20241721709527_FLDK
+ 2024/06/29 09:06 G16_s20241731700220_e20241731709528_FLDK
+
+
+4. Do I have enough swap memory?
+--------------------------------
+Some conditions or resamplers may consume a hell of physical memory and then swap. When both are at their limits,
+the OS may just kill the test process without any warnings or errors recorded.
+
+
+5. Arrange your time and work
+-----------------------------
+The whole test progress could last hours long depending on the reader, the datasets and the conditions.
+Keep the machine free during this period. Avoid any unnecessary background jobs like software update.
+
+
+Usage
+=====
+.. note::
+
+ Both ``simple_test`` and ``resampler_test`` collect all the results under ``work_dir`` and produce the report
+ in the same format. So if you already have some previous tests, just keep them in the same directory and the
+ test will merge them into one and refresh the report automatically.
+
+Initialize
+----------
+.. autofunction:: performance_test.SatpyPerformanceTest.__init__
+
+.. code-block:: python
+
+ import os
+ from performance_test import SatpyPerformanceTest
+
+ tester = SatpyPerformanceTest(work_dir="C:/Users/ABC/Downloads/Sat/Geo/ABI pef test",
+ folder_pattern="G16_s*_e*_FLDK",
+ reader_name="abi_l1b",
+ composite="true_color",
+ chunk_size_opts=[16, 64],
+ worker_opts=[8, 16])
+
+simple_test
+-----------
+.. autofunction:: performance_test.SatpyPerformanceTest.simple_test
+
+.. code-block:: python
+
+ # You can set some system environments related to satpy before running the test.
+ os.environ["PSP_CONFIG_FILE"] = "D:/satpy_config/pyspectral/pyspectral.yaml"
+
+ tester.simple_test(diff_res=True)
+
+resampler_test
+--------------
+.. autofunction:: performance_test.SatpyPerformanceTest.resampler_test
+
+.. code-block:: python
+
+ from pyresample.geometry import AreaDefinition
+
+ proj = "+proj=lcc +lon_0=-96 +lat_1=20 +lat_2=60 +datum=WGS84 +ellps=WGS84"
+ width = 8008
+ height = 8008
+ area_extent = (-106000, 2635000, 3898000, 6639000)
+ nprocs=8
+
+ area_def = AreaDefinition(area_id="NorthAmerica", proj_id="lcc", description="na",
+ projection=proj, width=width, height=height, area_extent=area_extent, nprocs=nprocs)
+ new_tester.resampler_test(resamplers=["bilinear", "ewa"],
+ area_def=area_def,
+ resampler_kwargs={
+ "bilinear": {"cache_dir": "C:/Users/45107/Downloads/Sat/Geo/ABI pef test/cache"},
+ "ewa": {"weight_delta_max": 40, "weight_distance_max": 2},
+ })
+.. note::
+
+ When you test ``bilinear`` or ``nearest`` resampler on geostationary datasets and want to both accelerate the test
+ and exclude the impact of resampling cache, it is recommended to pre-build the cache with just one scene and
+ one condition. And by that, you can also have a chance to tell how big the difference is between with and
+ without cache (Sometimes, it's VERY, especially for ``bilinear``).
+
+How to test ``OMP_NUM_THREADS``?
+--------------------------------
+``OMP_NUM_THREADS`` should be set outside the python script. In **Linux**, you can do it temporarily by
+
+.. code-block:: shell
+
+ OMP_NUM_THREADS=4 python your_test_script.py
+
+In **Windows**:
+
+.. code-block:: batch
+
+ set OMP_NUM_THREADS=4 && python your_test_script.py
+
+You can also choose not to set it. Normally the program will use as many logic cores as available. Either way, the test
+will pick up the correct value and pass it to the report.