From d4a5f4e3a7eb7acc42ea383fda700c3c28d40bf5 Mon Sep 17 00:00:00 2001
From: Daniel Standish <15932138+dstandish@users.noreply.github.com>
Date: Thu, 16 May 2024 15:07:01 -0700
Subject: [PATCH] Rename `telemetry-collection` to `usage-data-collection`
(#39673)
The point here is to avoid confusion with the _other_ (and arguably of greater importance to users) telemetry concept, namely OTEL / metrics / stats.
While at it, I made the code a little bit more provider-agnostic.
---
airflow/cli/commands/scheduler_command.py | 4 ++--
airflow/config_templates/config.yml | 10 ++++-----
airflow/settings.py | 6 +++---
.../{scarf.py => usage_data_collection.py} | 12 +++++++++--
airflow/www/views.py | 21 +++++++++++--------
docs/apache-airflow/faq.rst | 8 +++----
.../installation/installing-from-pypi.rst | 5 ++---
tests/core/test_settings.py | 10 ++++-----
...scarf.py => test_usage_data_collection.py} | 20 +++++++++---------
tests/www/views/test_views.py | 12 +++++------
tests/www/views/test_views_home.py | 2 +-
11 files changed, 60 insertions(+), 50 deletions(-)
rename airflow/utils/{scarf.py => usage_data_collection.py} (90%)
rename tests/utils/{test_scarf.py => test_usage_data_collection.py} (76%)
diff --git a/airflow/cli/commands/scheduler_command.py b/airflow/cli/commands/scheduler_command.py
index 4f943e961b454..2b7c77fda906d 100644
--- a/airflow/cli/commands/scheduler_command.py
+++ b/airflow/cli/commands/scheduler_command.py
@@ -33,8 +33,8 @@
from airflow.utils import cli as cli_utils
from airflow.utils.cli import process_subdir
from airflow.utils.providers_configuration_loader import providers_configuration_loaded
-from airflow.utils.scarf import scarf_analytics
from airflow.utils.scheduler_health import serve_health_check
+from airflow.utils.usage_data_collection import usage_data_collection
log = logging.getLogger(__name__)
@@ -56,7 +56,7 @@ def scheduler(args: Namespace):
"""Start Airflow Scheduler."""
print(settings.HEADER)
- scarf_analytics()
+ usage_data_collection()
run_command_with_daemon_option(
args=args,
diff --git a/airflow/config_templates/config.yml b/airflow/config_templates/config.yml
index edfe56b45cee6..36fb176e95d6c 100644
--- a/airflow/config_templates/config.yml
+++ b/airflow/config_templates/config.yml
@@ -2591,10 +2591,10 @@ sensors:
type: float
example: ~
default: "604800"
-telemetry_collection:
+usage_data_collection:
description: |
- Airflow integrates `Scarf `__ to collect basic telemetry data during operation.
- This data assists Airflow maintainers in better understanding how Airflow is used.
+ Airflow integrates `Scarf `__ to collect basic platform and usage data
+ during operation. This data assists Airflow maintainers in better understanding how Airflow is used.
Insights gained from this telemetry are critical for prioritizing patches, minor releases, and
security fixes. Additionally, this information supports key decisions related to the development road map.
Check the FAQ doc for more information on what data is collected.
@@ -2607,9 +2607,9 @@ telemetry_collection:
options:
enabled:
description: |
- Enable or disable telemetry data collection and sending via Scarf.
+ Enable or disable usage data collection and sending.
version_added: 2.10.0
type: boolean
example: ~
default: "True"
- see_also: ":ref:`Airflow telemetry FAQ `"
+ see_also: ":ref:`Usage data collection FAQ `"
diff --git a/airflow/settings.py b/airflow/settings.py
index 176d06270eb97..50c195f7fd3ac 100644
--- a/airflow/settings.py
+++ b/airflow/settings.py
@@ -576,9 +576,9 @@ def initialize():
atexit.register(dispose_orm)
-def is_telemetry_collection_enabled() -> bool:
- """Check if scarf analytics is enabled."""
- return conf.getboolean("telemetry_collection", "enabled", fallback=True) and (
+def is_usage_data_collection_enabled() -> bool:
+ """Check if data collection is enabled."""
+ return conf.getboolean("usage_data_collection", "enabled", fallback=True) and (
os.getenv("SCARF_ANALYTICS", "").strip().lower() != "false"
)
diff --git a/airflow/utils/scarf.py b/airflow/utils/usage_data_collection.py
similarity index 90%
rename from airflow/utils/scarf.py
rename to airflow/utils/usage_data_collection.py
index ec19480ee78c7..3736ba22cbffd 100644
--- a/airflow/utils/scarf.py
+++ b/airflow/utils/usage_data_collection.py
@@ -15,6 +15,14 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
+"""
+This module is for management of Airflow's usage data collection.
+
+This module is not part of the public interface and is subject to change at any time.
+
+:meta private:
+"""
+
from __future__ import annotations
import platform
@@ -27,8 +35,8 @@
from airflow.configuration import conf
-def scarf_analytics():
- if not settings.is_telemetry_collection_enabled():
+def usage_data_collection():
+ if not settings.is_usage_data_collection_enabled():
return
# Exclude pre-releases and dev versions
diff --git a/airflow/www/views.py b/airflow/www/views.py
index 606d48e99c2be..9c4d7355470fb 100644
--- a/airflow/www/views.py
+++ b/airflow/www/views.py
@@ -117,7 +117,7 @@
from airflow.timetables._cron import CronMixin
from airflow.timetables.base import DataInterval, TimeRestriction
from airflow.timetables.simple import ContinuousTimetable
-from airflow.utils import json as utils_json, scarf, timezone, yaml
+from airflow.utils import json as utils_json, timezone, usage_data_collection, yaml
from airflow.utils.airflow_flask_app import get_airflow_app
from airflow.utils.dag_edges import dag_edges
from airflow.utils.db import get_query_count
@@ -218,17 +218,20 @@ def get_safe_url(url):
def build_scarf_url(dags_count: int) -> str:
- """Build the URL for the Scarf telemetry collection."""
- if not settings.is_telemetry_collection_enabled():
+ """
+ Build the URL for the Scarf usage data collection.
+
+ :meta private:
+ """
+ if not settings.is_usage_data_collection_enabled():
return ""
scarf_domain = "https://apacheairflow.gateway.scarf.sh"
-
- platform_sys, platform_arch = scarf.get_platform_info()
- db_version = scarf.get_database_version()
- db_name = scarf.get_database_name()
- executor = scarf.get_executor()
- python_version = scarf.get_python_version()
+ platform_sys, platform_arch = usage_data_collection.get_platform_info()
+ db_version = usage_data_collection.get_database_version()
+ db_name = usage_data_collection.get_database_name()
+ executor = usage_data_collection.get_executor()
+ python_version = usage_data_collection.get_python_version()
# Path Format:
# /{version}/{python_version}/{platform}/{arch}/{database}/{db_version}/{executor}/{num_dags}
diff --git a/docs/apache-airflow/faq.rst b/docs/apache-airflow/faq.rst
index 31ec98b9ff90d..af45139fd5e88 100644
--- a/docs/apache-airflow/faq.rst
+++ b/docs/apache-airflow/faq.rst
@@ -526,14 +526,14 @@ This means ``explicit_defaults_for_timestamp`` is disabled in your mysql server
Does Airflow collect any telemetry data?
----------------------------------------
-.. _airflow-telemetry-faq:
+.. _usage-data-collection:
-Airflow integrates `Scarf `__ to collect basic telemetry data during operation.
+Airflow integrates `Scarf `__ to collect basic usage data during operation.
This data assists Airflow maintainers in better understanding how Airflow is used.
-Insights gained from this telemetry are critical for prioritizing patches, minor releases, and
+Insights gained from this data are helpful for prioritizing patches, minor releases, and
security fixes. Additionally, this information supports key decisions related to the development road map.
-Deployments can opt-out of analytics by setting the :ref:`[telemetry_collection] enabled `
+Deployments can opt-out of data collection by setting the :ref:`[usage_data_collection] enabled `
option to ``False``, or the ``SCARF_ANALYTICS=false`` environment variable.
Individual users can easily opt-out of analytics in various ways documented in the
`Scarf Do Not Track docs `__.
diff --git a/docs/apache-airflow/installation/installing-from-pypi.rst b/docs/apache-airflow/installation/installing-from-pypi.rst
index 4751b54112601..96758e34e79cd 100644
--- a/docs/apache-airflow/installation/installing-from-pypi.rst
+++ b/docs/apache-airflow/installation/installing-from-pypi.rst
@@ -333,9 +333,8 @@ dependencies compatible with just airflow core at the moment Airflow was release
.. note::
- Airflow uses `Scarf `__ to collect basic telemetry data during operation.
- Check the :ref:`Airflow telemetry FAQ ` for more information about the data collected
- and how to opt-out.
+ Airflow uses `Scarf `__ to collect basic usage data during operation.
+ Check the :ref:`Usage data collection FAQ ` for more information about the data collected and how to opt-out.
Troubleshooting
'''''''''''''''
diff --git a/tests/core/test_settings.py b/tests/core/test_settings.py
index c2b4938421667..c7df4e8d6451d 100644
--- a/tests/core/test_settings.py
+++ b/tests/core/test_settings.py
@@ -28,7 +28,7 @@
from airflow.api_internal.internal_api_call import InternalApiConfig
from airflow.exceptions import AirflowClusterPolicyViolation, AirflowConfigException
-from airflow.settings import _ENABLE_AIP_44, TracebackSession, is_telemetry_collection_enabled
+from airflow.settings import _ENABLE_AIP_44, TracebackSession, is_usage_data_collection_enabled
from airflow.utils.session import create_session
from tests.test_utils.config import conf_vars
@@ -338,12 +338,12 @@ def test_create_session_ctx_mgr_no_call_methods(mock_new, clear_internal_api):
(None, "False", False), # Default env, conf disables
],
)
-def test_telemetry_collection_disabled(env_var, conf_setting, is_enabled):
- conf_patch = conf_vars({("telemetry_collection", "enabled"): conf_setting})
+def test_usage_data_collection_disabled(env_var, conf_setting, is_enabled):
+ conf_patch = conf_vars({("usage_data_collection", "enabled"): conf_setting})
if env_var is not None:
with conf_patch, patch.dict(os.environ, {"SCARF_ANALYTICS": env_var}):
- assert is_telemetry_collection_enabled() == is_enabled
+ assert is_usage_data_collection_enabled() == is_enabled
else:
with conf_patch:
- assert is_telemetry_collection_enabled() == is_enabled
+ assert is_usage_data_collection_enabled() == is_enabled
diff --git a/tests/utils/test_scarf.py b/tests/utils/test_usage_data_collection.py
similarity index 76%
rename from tests/utils/test_scarf.py
rename to tests/utils/test_usage_data_collection.py
index 507ce0357b8d5..bb7710e88f016 100644
--- a/tests/utils/test_scarf.py
+++ b/tests/utils/test_usage_data_collection.py
@@ -24,27 +24,27 @@
from airflow import __version__ as airflow_version
from airflow.configuration import conf
-from airflow.utils.scarf import get_database_version, scarf_analytics
+from airflow.utils.usage_data_collection import get_database_version, usage_data_collection
@pytest.mark.parametrize("is_enabled, is_prerelease", [(False, True), (True, True)])
@mock.patch("httpx.get")
def test_scarf_analytics_disabled(mock_get, is_enabled, is_prerelease):
- with mock.patch("airflow.settings.is_telemetry_collection_enabled", return_value=is_enabled), mock.patch(
- "airflow.utils.scarf._version_is_prerelease", return_value=is_prerelease
+ with mock.patch("airflow.settings.is_usage_data_collection_enabled", return_value=is_enabled), mock.patch(
+ "airflow.utils.usage_data_collection._version_is_prerelease", return_value=is_prerelease
):
- scarf_analytics()
+ usage_data_collection()
mock_get.assert_not_called()
-@mock.patch("airflow.settings.is_telemetry_collection_enabled", return_value=True)
-@mock.patch("airflow.utils.scarf._version_is_prerelease", return_value=False)
-@mock.patch("airflow.utils.scarf.get_database_version", return_value="12.3")
-@mock.patch("airflow.utils.scarf.get_database_name", return_value="postgres")
+@mock.patch("airflow.settings.is_usage_data_collection_enabled", return_value=True)
+@mock.patch("airflow.utils.usage_data_collection._version_is_prerelease", return_value=False)
+@mock.patch("airflow.utils.usage_data_collection.get_database_version", return_value="12.3")
+@mock.patch("airflow.utils.usage_data_collection.get_database_name", return_value="postgres")
@mock.patch("httpx.get")
def test_scarf_analytics(
mock_get,
- mock_is_telemetry_collection_enabled,
+ mock_is_usage_data_collection_enabled,
mock_version_is_prerelease,
get_database_version,
get_database_name,
@@ -54,7 +54,7 @@ def test_scarf_analytics(
python_version = platform.python_version()
executor = conf.get("core", "EXECUTOR")
scarf_endpoint = "https://apacheairflow.gateway.scarf.sh/scheduler"
- scarf_analytics()
+ usage_data_collection()
expected_scarf_url = (
f"{scarf_endpoint}?version={airflow_version}"
diff --git a/tests/www/views/test_views.py b/tests/www/views/test_views.py
index 527e3ff5e4550..067f556bb7fee 100644
--- a/tests/www/views/test_views.py
+++ b/tests/www/views/test_views.py
@@ -531,11 +531,11 @@ def test_invalid_dates(app, admin_client, url, content):
@pytest.mark.parametrize("enabled, dags_count", [(False, 5), (True, 5)])
-@patch("airflow.utils.scarf.get_platform_info", return_value=("Linux", "x86_64"))
-@patch("airflow.utils.scarf.get_database_version", return_value="12.3")
-@patch("airflow.utils.scarf.get_database_name", return_value="postgres")
-@patch("airflow.utils.scarf.get_executor", return_value="SequentialExecutor")
-@patch("airflow.utils.scarf.get_python_version", return_value="3.8.5")
+@patch("airflow.utils.usage_data_collection.get_platform_info", return_value=("Linux", "x86_64"))
+@patch("airflow.utils.usage_data_collection.get_database_version", return_value="12.3")
+@patch("airflow.utils.usage_data_collection.get_database_name", return_value="postgres")
+@patch("airflow.utils.usage_data_collection.get_executor", return_value="SequentialExecutor")
+@patch("airflow.utils.usage_data_collection.get_python_version", return_value="3.8.5")
def test_build_scarf_url(
get_platform_info,
get_database_version,
@@ -545,7 +545,7 @@ def test_build_scarf_url(
enabled,
dags_count,
):
- with patch("airflow.settings.is_telemetry_collection_enabled", return_value=enabled):
+ with patch("airflow.settings.is_usage_data_collection_enabled", return_value=enabled):
result = build_scarf_url(dags_count)
expected_url = (
"https://apacheairflow.gateway.scarf.sh/webserver/"
diff --git a/tests/www/views/test_views_home.py b/tests/www/views/test_views_home.py
index 52011c96cf11c..23f0a80210f79 100644
--- a/tests/www/views/test_views_home.py
+++ b/tests/www/views/test_views_home.py
@@ -458,7 +458,7 @@ def test_analytics_pixel(user_client, is_enabled, should_have_pixel):
"""
Test that the analytics pixel is not included when the feature is disabled
"""
- with mock.patch("airflow.settings.is_telemetry_collection_enabled", return_value=is_enabled):
+ with mock.patch("airflow.settings.is_usage_data_collection_enabled", return_value=is_enabled):
resp = user_client.get("home", follow_redirects=True)
if should_have_pixel: