Skip to content

Commit

Permalink
Remove Scarf tracking (apache#45865)
Browse files Browse the repository at this point in the history
  • Loading branch information
kaxil authored Jan 22, 2025
1 parent 6751c96 commit 70a8a8a
Show file tree
Hide file tree
Showing 11 changed files with 2 additions and 318 deletions.
3 changes: 0 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -536,6 +536,3 @@ The CI infrastructure for Apache Airflow has been sponsored by:

<a href="https://astronomer.io"><img src="https://assets2.astronomer.io/logos/logoForLIGHTbackground.png" alt="astronomer.io" width="250px"></a>
<a href="https://aws.amazon.com/opensource/"><img src="docs/integration-logos/aws/[email protected]" alt="AWS OpenSource" width="130px"></a>

<!-- telemetry/analytics pixel: -->
<img referrerpolicy="no-referrer-when-downgrade" src="https://static.scarf.sh/a.png?x-pxid=1b5a5e3c-da81-42f5-befa-42d836bf1b54" alt="Tracking Pixel" />
1 change: 0 additions & 1 deletion RELEASE_NOTES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -220,7 +220,6 @@ Scarf based telemetry: Airflow now collect telemetry data (#39510)
""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
Airflow integrates Scarf to collect basic usage data during operation. Deployments can opt-out of data collection by
setting the ``[usage_data_collection]enabled`` option to ``False``, or the ``SCARF_ANALYTICS=false`` environment variable.
See :ref:`Usage data collection FAQ <usage-data-collection>` for more information.

Datasets no longer trigger inactive DAGs (#38891)
"""""""""""""""""""""""""""""""""""""""""""""""""
Expand Down
3 changes: 0 additions & 3 deletions airflow/cli/commands/local_commands/scheduler_command.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@
from airflow.utils import cli as cli_utils
from airflow.utils.providers_configuration_loader import providers_configuration_loaded
from airflow.utils.scheduler_health import serve_health_check
from airflow.utils.usage_data_collection import usage_data_collection

log = logging.getLogger(__name__)

Expand All @@ -50,8 +49,6 @@ def scheduler(args: Namespace):
"""Start Airflow Scheduler."""
print(settings.HEADER)

usage_data_collection()

run_command_with_daemon_option(
args=args,
process_name="scheduler",
Expand Down
22 changes: 0 additions & 22 deletions airflow/config_templates/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2625,28 +2625,6 @@ sensors:
type: float
example: ~
default: "604800"
usage_data_collection:
description: |
Airflow integrates `Scarf <https://about.scarf.sh/>`__ to collect basic platform and usage data
during operation. This data assists Airflow maintainers in better understanding how Airflow is used.
Insights gained from this telemetry are critical for prioritizing patches, minor releases, and
security fixes. Additionally, this information supports key decisions related to the development road map.
Check the FAQ doc for more information on what data is collected.
Deployments can opt-out of analytics by setting the ``enabled`` option
to ``False``, or the ``SCARF_ANALYTICS=false`` environment variable.
Individual users can easily opt-out of analytics in various ways documented in the
`Scarf Do Not Track docs <https://docs.scarf.sh/gateway/#do-not-track>`__.
options:
enabled:
description: |
Enable or disable usage data collection and sending.
version_added: 2.10.0
type: boolean
example: ~
default: "True"
see_also: ":ref:`Usage data collection FAQ <usage-data-collection>`"
dag_bundles:
description: |
Configuration for the DAG bundles. This allows Airflow to load DAGs from different sources.
Expand Down
4 changes: 2 additions & 2 deletions airflow/reproducible_build.yaml
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
release-notes-hash: f1d91d32ade6da6eedd24362610d5f84
source-date-epoch: 1734354109
release-notes-hash: ab7a935709e7a13d5587b7eb727ae2bd
source-date-epoch: 1737531923
7 changes: 0 additions & 7 deletions airflow/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -652,13 +652,6 @@ def initialize():
atexit.register(dispose_orm)


def is_usage_data_collection_enabled() -> bool:
"""Check if data collection is enabled."""
return conf.getboolean("usage_data_collection", "enabled", fallback=True) and (
os.getenv("SCARF_ANALYTICS", "").strip().lower() != "false"
)


# Const stuff

KILOBYTE = 1024
Expand Down
123 changes: 0 additions & 123 deletions airflow/utils/usage_data_collection.py

This file was deleted.

23 changes: 0 additions & 23 deletions docs/apache-airflow/faq.rst
Original file line number Diff line number Diff line change
Expand Up @@ -519,26 +519,3 @@ This means ``explicit_defaults_for_timestamp`` is disabled in your mysql server

#. Set ``explicit_defaults_for_timestamp = 1`` under the ``mysqld`` section in your ``my.cnf`` file.
#. Restart the Mysql server.

Does Airflow collect any telemetry data?
----------------------------------------

.. _usage-data-collection:

Airflow integrates `Scarf <https://about.scarf.sh/>`__ to collect basic usage data during operation.
This data assists Airflow maintainers in better understanding how Airflow is used.
Insights gained from this data are helpful for prioritizing patches, minor releases, and
security fixes. Additionally, this information supports key decisions related to the development road map.

Deployments can opt-out of data collection by setting the :ref:`[usage_data_collection] enabled <config:usage_data_collection__enabled>`
option to ``False``, or the ``SCARF_ANALYTICS=false`` environment variable.
Individual users can easily opt-out of analytics in various ways documented in the
`Scarf Do Not Track docs <https://docs.scarf.sh/gateway/#do-not-track>`__.

The telemetry data collected is limited to the following:

- Airflow version
- Python version
- Operating system & machine architecture
- Executor
- Metadata DB type & its version
6 changes: 0 additions & 6 deletions docs/apache-airflow/installation/installing-from-pypi.rst
Original file line number Diff line number Diff line change
Expand Up @@ -330,12 +330,6 @@ dependencies compatible with just airflow core at the moment Airflow was release
# For example: https://raw.githubusercontent.com/apache/airflow/constraints-|version|/constraints-no-providers-3.9.txt
pip install "apache-airflow==${AIRFLOW_VERSION}" --constraint "${CONSTRAINT_URL}"
.. note::

Airflow uses `Scarf <https://about.scarf.sh/>`__ to collect basic usage data during operation.
Check the :ref:`Usage data collection FAQ <usage-data-collection>` for more information about the data collected and how to opt-out.

Troubleshooting
'''''''''''''''

Expand Down
24 changes: 0 additions & 24 deletions tests/core/test_settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@
import pytest

from airflow.exceptions import AirflowClusterPolicyViolation, AirflowConfigException
from airflow.settings import is_usage_data_collection_enabled

from tests_common.test_utils.config import conf_vars

Expand Down Expand Up @@ -294,26 +293,3 @@ def test_encoding_absent_in_v2(is_v1, mock_conf):
engine_args = settings.prepare_engine_args()

assert "encoding" not in engine_args


@pytest.mark.parametrize(
"env_var, conf_setting, is_enabled",
[
("false", "True", False), # env forces disable
("false", "False", False), # Both force disable
("False ", "False", False), # Both force disable
("true", "True", True), # Both enable
("true", "False", False), # Conf forces disable
(None, "True", True), # Default env, conf enables
(None, "False", False), # Default env, conf disables
],
)
def test_usage_data_collection_disabled(env_var, conf_setting, is_enabled):
conf_patch = conf_vars({("usage_data_collection", "enabled"): conf_setting})

if env_var is not None:
with conf_patch, patch.dict(os.environ, {"SCARF_ANALYTICS": env_var}):
assert is_usage_data_collection_enabled() == is_enabled
else:
with conf_patch:
assert is_usage_data_collection_enabled() == is_enabled
104 changes: 0 additions & 104 deletions tests/utils/test_usage_data_collection.py

This file was deleted.

0 comments on commit 70a8a8a

Please sign in to comment.