Skip to content

Commit

Permalink
Rename telemetry-collection to usage-data-collection (apache#39673)
Browse files Browse the repository at this point in the history
The point here is to avoid confusion with the _other_ (and arguably of greater importance to users) telemetry concept, namely OTEL / metrics / stats.

While at it, I made the code a little bit more provider-agnostic.
  • Loading branch information
dstandish authored May 16, 2024
1 parent 4d0c724 commit d4a5f4e
Show file tree
Hide file tree
Showing 11 changed files with 60 additions and 50 deletions.
4 changes: 2 additions & 2 deletions airflow/cli/commands/scheduler_command.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,8 @@
from airflow.utils import cli as cli_utils
from airflow.utils.cli import process_subdir
from airflow.utils.providers_configuration_loader import providers_configuration_loaded
from airflow.utils.scarf import scarf_analytics
from airflow.utils.scheduler_health import serve_health_check
from airflow.utils.usage_data_collection import usage_data_collection

log = logging.getLogger(__name__)

Expand All @@ -56,7 +56,7 @@ def scheduler(args: Namespace):
"""Start Airflow Scheduler."""
print(settings.HEADER)

scarf_analytics()
usage_data_collection()

run_command_with_daemon_option(
args=args,
Expand Down
10 changes: 5 additions & 5 deletions airflow/config_templates/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2591,10 +2591,10 @@ sensors:
type: float
example: ~
default: "604800"
telemetry_collection:
usage_data_collection:
description: |
Airflow integrates `Scarf <https://about.scarf.sh/>`__ to collect basic telemetry data during operation.
This data assists Airflow maintainers in better understanding how Airflow is used.
Airflow integrates `Scarf <https://about.scarf.sh/>`__ to collect basic platform and usage data
during operation. This data assists Airflow maintainers in better understanding how Airflow is used.
Insights gained from this telemetry are critical for prioritizing patches, minor releases, and
security fixes. Additionally, this information supports key decisions related to the development road map.
Check the FAQ doc for more information on what data is collected.
Expand All @@ -2607,9 +2607,9 @@ telemetry_collection:
options:
enabled:
description: |
Enable or disable telemetry data collection and sending via Scarf.
Enable or disable usage data collection and sending.
version_added: 2.10.0
type: boolean
example: ~
default: "True"
see_also: ":ref:`Airflow telemetry FAQ <airflow-telemetry-faq>`"
see_also: ":ref:`Usage data collection FAQ <usage-data-collection>`"
6 changes: 3 additions & 3 deletions airflow/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -576,9 +576,9 @@ def initialize():
atexit.register(dispose_orm)


def is_telemetry_collection_enabled() -> bool:
"""Check if scarf analytics is enabled."""
return conf.getboolean("telemetry_collection", "enabled", fallback=True) and (
def is_usage_data_collection_enabled() -> bool:
"""Check if data collection is enabled."""
return conf.getboolean("usage_data_collection", "enabled", fallback=True) and (
os.getenv("SCARF_ANALYTICS", "").strip().lower() != "false"
)

Expand Down
12 changes: 10 additions & 2 deletions airflow/utils/scarf.py → airflow/utils/usage_data_collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,14 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""
This module is for management of Airflow's usage data collection.
This module is not part of the public interface and is subject to change at any time.
:meta private:
"""

from __future__ import annotations

import platform
Expand All @@ -27,8 +35,8 @@
from airflow.configuration import conf


def scarf_analytics():
if not settings.is_telemetry_collection_enabled():
def usage_data_collection():
if not settings.is_usage_data_collection_enabled():
return

# Exclude pre-releases and dev versions
Expand Down
21 changes: 12 additions & 9 deletions airflow/www/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@
from airflow.timetables._cron import CronMixin
from airflow.timetables.base import DataInterval, TimeRestriction
from airflow.timetables.simple import ContinuousTimetable
from airflow.utils import json as utils_json, scarf, timezone, yaml
from airflow.utils import json as utils_json, timezone, usage_data_collection, yaml
from airflow.utils.airflow_flask_app import get_airflow_app
from airflow.utils.dag_edges import dag_edges
from airflow.utils.db import get_query_count
Expand Down Expand Up @@ -218,17 +218,20 @@ def get_safe_url(url):


def build_scarf_url(dags_count: int) -> str:
"""Build the URL for the Scarf telemetry collection."""
if not settings.is_telemetry_collection_enabled():
"""
Build the URL for the Scarf usage data collection.
:meta private:
"""
if not settings.is_usage_data_collection_enabled():
return ""

scarf_domain = "https://apacheairflow.gateway.scarf.sh"

platform_sys, platform_arch = scarf.get_platform_info()
db_version = scarf.get_database_version()
db_name = scarf.get_database_name()
executor = scarf.get_executor()
python_version = scarf.get_python_version()
platform_sys, platform_arch = usage_data_collection.get_platform_info()
db_version = usage_data_collection.get_database_version()
db_name = usage_data_collection.get_database_name()
executor = usage_data_collection.get_executor()
python_version = usage_data_collection.get_python_version()

# Path Format:
# /{version}/{python_version}/{platform}/{arch}/{database}/{db_version}/{executor}/{num_dags}
Expand Down
8 changes: 4 additions & 4 deletions docs/apache-airflow/faq.rst
Original file line number Diff line number Diff line change
Expand Up @@ -526,14 +526,14 @@ This means ``explicit_defaults_for_timestamp`` is disabled in your mysql server
Does Airflow collect any telemetry data?
----------------------------------------

.. _airflow-telemetry-faq:
.. _usage-data-collection:

Airflow integrates `Scarf <https://about.scarf.sh/>`__ to collect basic telemetry data during operation.
Airflow integrates `Scarf <https://about.scarf.sh/>`__ to collect basic usage data during operation.
This data assists Airflow maintainers in better understanding how Airflow is used.
Insights gained from this telemetry are critical for prioritizing patches, minor releases, and
Insights gained from this data are helpful for prioritizing patches, minor releases, and
security fixes. Additionally, this information supports key decisions related to the development road map.

Deployments can opt-out of analytics by setting the :ref:`[telemetry_collection] enabled <config:telemetry_collection__enabled>`
Deployments can opt-out of data collection by setting the :ref:`[usage_data_collection] enabled <config:usage_data_collection__enabled>`
option to ``False``, or the ``SCARF_ANALYTICS=false`` environment variable.
Individual users can easily opt-out of analytics in various ways documented in the
`Scarf Do Not Track docs <https://docs.scarf.sh/gateway/#do-not-track>`__.
Expand Down
5 changes: 2 additions & 3 deletions docs/apache-airflow/installation/installing-from-pypi.rst
Original file line number Diff line number Diff line change
Expand Up @@ -333,9 +333,8 @@ dependencies compatible with just airflow core at the moment Airflow was release
.. note::

Airflow uses `Scarf <https://about.scarf.sh/>`__ to collect basic telemetry data during operation.
Check the :ref:`Airflow telemetry FAQ <airflow-telemetry-faq>` for more information about the data collected
and how to opt-out.
Airflow uses `Scarf <https://about.scarf.sh/>`__ to collect basic usage data during operation.
Check the :ref:`Usage data collection FAQ <usage-data-collection>` for more information about the data collected and how to opt-out.

Troubleshooting
'''''''''''''''
Expand Down
10 changes: 5 additions & 5 deletions tests/core/test_settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@

from airflow.api_internal.internal_api_call import InternalApiConfig
from airflow.exceptions import AirflowClusterPolicyViolation, AirflowConfigException
from airflow.settings import _ENABLE_AIP_44, TracebackSession, is_telemetry_collection_enabled
from airflow.settings import _ENABLE_AIP_44, TracebackSession, is_usage_data_collection_enabled
from airflow.utils.session import create_session
from tests.test_utils.config import conf_vars

Expand Down Expand Up @@ -338,12 +338,12 @@ def test_create_session_ctx_mgr_no_call_methods(mock_new, clear_internal_api):
(None, "False", False), # Default env, conf disables
],
)
def test_telemetry_collection_disabled(env_var, conf_setting, is_enabled):
conf_patch = conf_vars({("telemetry_collection", "enabled"): conf_setting})
def test_usage_data_collection_disabled(env_var, conf_setting, is_enabled):
conf_patch = conf_vars({("usage_data_collection", "enabled"): conf_setting})

if env_var is not None:
with conf_patch, patch.dict(os.environ, {"SCARF_ANALYTICS": env_var}):
assert is_telemetry_collection_enabled() == is_enabled
assert is_usage_data_collection_enabled() == is_enabled
else:
with conf_patch:
assert is_telemetry_collection_enabled() == is_enabled
assert is_usage_data_collection_enabled() == is_enabled
Original file line number Diff line number Diff line change
Expand Up @@ -24,27 +24,27 @@

from airflow import __version__ as airflow_version
from airflow.configuration import conf
from airflow.utils.scarf import get_database_version, scarf_analytics
from airflow.utils.usage_data_collection import get_database_version, usage_data_collection


@pytest.mark.parametrize("is_enabled, is_prerelease", [(False, True), (True, True)])
@mock.patch("httpx.get")
def test_scarf_analytics_disabled(mock_get, is_enabled, is_prerelease):
with mock.patch("airflow.settings.is_telemetry_collection_enabled", return_value=is_enabled), mock.patch(
"airflow.utils.scarf._version_is_prerelease", return_value=is_prerelease
with mock.patch("airflow.settings.is_usage_data_collection_enabled", return_value=is_enabled), mock.patch(
"airflow.utils.usage_data_collection._version_is_prerelease", return_value=is_prerelease
):
scarf_analytics()
usage_data_collection()
mock_get.assert_not_called()


@mock.patch("airflow.settings.is_telemetry_collection_enabled", return_value=True)
@mock.patch("airflow.utils.scarf._version_is_prerelease", return_value=False)
@mock.patch("airflow.utils.scarf.get_database_version", return_value="12.3")
@mock.patch("airflow.utils.scarf.get_database_name", return_value="postgres")
@mock.patch("airflow.settings.is_usage_data_collection_enabled", return_value=True)
@mock.patch("airflow.utils.usage_data_collection._version_is_prerelease", return_value=False)
@mock.patch("airflow.utils.usage_data_collection.get_database_version", return_value="12.3")
@mock.patch("airflow.utils.usage_data_collection.get_database_name", return_value="postgres")
@mock.patch("httpx.get")
def test_scarf_analytics(
mock_get,
mock_is_telemetry_collection_enabled,
mock_is_usage_data_collection_enabled,
mock_version_is_prerelease,
get_database_version,
get_database_name,
Expand All @@ -54,7 +54,7 @@ def test_scarf_analytics(
python_version = platform.python_version()
executor = conf.get("core", "EXECUTOR")
scarf_endpoint = "https://apacheairflow.gateway.scarf.sh/scheduler"
scarf_analytics()
usage_data_collection()

expected_scarf_url = (
f"{scarf_endpoint}?version={airflow_version}"
Expand Down
12 changes: 6 additions & 6 deletions tests/www/views/test_views.py
Original file line number Diff line number Diff line change
Expand Up @@ -531,11 +531,11 @@ def test_invalid_dates(app, admin_client, url, content):


@pytest.mark.parametrize("enabled, dags_count", [(False, 5), (True, 5)])
@patch("airflow.utils.scarf.get_platform_info", return_value=("Linux", "x86_64"))
@patch("airflow.utils.scarf.get_database_version", return_value="12.3")
@patch("airflow.utils.scarf.get_database_name", return_value="postgres")
@patch("airflow.utils.scarf.get_executor", return_value="SequentialExecutor")
@patch("airflow.utils.scarf.get_python_version", return_value="3.8.5")
@patch("airflow.utils.usage_data_collection.get_platform_info", return_value=("Linux", "x86_64"))
@patch("airflow.utils.usage_data_collection.get_database_version", return_value="12.3")
@patch("airflow.utils.usage_data_collection.get_database_name", return_value="postgres")
@patch("airflow.utils.usage_data_collection.get_executor", return_value="SequentialExecutor")
@patch("airflow.utils.usage_data_collection.get_python_version", return_value="3.8.5")
def test_build_scarf_url(
get_platform_info,
get_database_version,
Expand All @@ -545,7 +545,7 @@ def test_build_scarf_url(
enabled,
dags_count,
):
with patch("airflow.settings.is_telemetry_collection_enabled", return_value=enabled):
with patch("airflow.settings.is_usage_data_collection_enabled", return_value=enabled):
result = build_scarf_url(dags_count)
expected_url = (
"https://apacheairflow.gateway.scarf.sh/webserver/"
Expand Down
2 changes: 1 addition & 1 deletion tests/www/views/test_views_home.py
Original file line number Diff line number Diff line change
Expand Up @@ -458,7 +458,7 @@ def test_analytics_pixel(user_client, is_enabled, should_have_pixel):
"""
Test that the analytics pixel is not included when the feature is disabled
"""
with mock.patch("airflow.settings.is_telemetry_collection_enabled", return_value=is_enabled):
with mock.patch("airflow.settings.is_usage_data_collection_enabled", return_value=is_enabled):
resp = user_client.get("home", follow_redirects=True)

if should_have_pixel:
Expand Down

0 comments on commit d4a5f4e

Please sign in to comment.