diff --git a/.buildkite/pipeline.build.yml b/.buildkite/pipeline.build.yml index 32f9538100793..67f79624f55ad 100644 --- a/.buildkite/pipeline.build.yml +++ b/.buildkite/pipeline.build.yml @@ -84,26 +84,6 @@ # # Upload to latest directory. # - if [ "$BUILDKITE_BRANCH" == "master" ]; then python .buildkite/copy_files.py --destination wheels --path ./.whl; fi -- label: ":docker: Build Images: py36 (1/2)" - conditions: ["RAY_CI_LINUX_WHEELS_AFFECTED"] - instance_size: medium - commands: - - LINUX_WHEELS=1 ./ci/ci.sh build - - pip install -q docker aws_requests_auth boto3 - - ./ci/env/env_info.sh - - if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then python .buildkite/copy_files.py --destination docker_login; fi - - python ./ci/build/build-docker-images.py --py-versions py36 --device-types cpu cu101 cu102 cu110 --build-type BUILDKITE --build-base - -- label: ":docker: Build Images: py36 (2/2)" - conditions: ["RAY_CI_LINUX_WHEELS_AFFECTED"] - instance_size: medium - commands: - - LINUX_WHEELS=1 ./ci/ci.sh build - - pip install -q docker aws_requests_auth boto3 - - ./ci/env/env_info.sh - - if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then python .buildkite/copy_files.py --destination docker_login; fi - - python ./ci/build/build-docker-images.py --py-versions py36 --device-types cu111 cu112 --build-type BUILDKITE --build-base - - label: ":docker: Build Images: py37 (1/2)" conditions: ["RAY_CI_LINUX_WHEELS_AFFECTED"] instance_size: medium @@ -612,7 +592,7 @@ - label: ":cold_face: :python: Ray Python 3.6 ML compatibility tests" conditions: ["ALWAYS", "RAY_CI_TUNE_AFFECTED", "RAY_CI_TRAIN_AFFECTED", "RAY_CI_ML_AFFECTED", ] - instance_size: medium + instance_size: large commands: - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT - ./ci/env/install-minimal.sh 3.6 @@ -620,8 +600,31 @@ - pip install -U typing-extensions - HOROVOD_WITH_GLOO=1 HOROVOD_WITHOUT_MPI=1 HOROVOD_WITHOUT_MXNET=1 HOROVOD_WITH_TENSORFLOW=1 HOROVOD_WITH_PYTORCH=1 pip install horovod - ./ci/env/env_info.sh + - bazel test --config=ci $(./scripts/bazel_export_options) --test_tag_filters=compat_py36 + python/ray/tests/horovod/... + python/ray/tests/lightgbm/... + python/ray/tests/ml_py36_compat/... + python/ray/tests/xgboost/... + + +- label: ":cold_face: :python: Ray Python legacy dependency ML compatibility tests" + conditions: + ["RAY_CI_PYTHON_DEPENDENCIES_AFFECTED", "RAY_CI_TUNE_AFFECTED", "RAY_CI_TRAIN_AFFECTED", "RAY_CI_ML_AFFECTED"] + instance_size: large + commands: + - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT + - ./ci/env/install-minimal.sh 3.7 + - DATA_PROCESSING_TESTING=1 TUNE_TESTING=1 TRAIN_TESTING=1 ./ci/env/install-dependencies.sh + - pip install -r python/requirements/ml/requirements_legacy_compat.txt + - pip install -U typing-extensions + - HOROVOD_WITH_GLOO=1 HOROVOD_WITHOUT_MPI=1 HOROVOD_WITHOUT_MXNET=1 HOROVOD_WITH_TENSORFLOW=1 HOROVOD_WITH_PYTORCH=1 pip install horovod + - ./ci/env/env_info.sh - bazel test --config=ci $(./scripts/bazel_export_options) --test_tag_filters=compat python/ray/tests/horovod/... python/ray/tests/lightgbm/... python/ray/tests/ml_py36_compat/... python/ray/tests/xgboost/... + python/ray/tests/ray_lightning/... + - bazel test --config=ci $(./ci/run/bazel_export_options) --build_tests_only --test_tag_filters=-gpu,-mosaic,-needs_credentials python/ray/air/... + - bazel test --config=ci $(./ci/run/bazel_export_options) --build_tests_only --test_tag_filters=ray_air,-mosaic,-gpu_only,-gpu,-needs_credentials python/ray/train/... + - bazel test --config=ci $(./ci/run/bazel_export_options) --build_tests_only --test_tag_filters=ray_air,-mosaic python/ray/data/... diff --git a/ci/build/build-docker-images.py b/ci/build/build-docker-images.py index c3a907b2b09ca..1888ec220715f 100644 --- a/ci/build/build-docker-images.py +++ b/ci/build/build-docker-images.py @@ -60,9 +60,9 @@ # The CUDA version to use for the ML Docker image. # If changing the CUDA version in the below line, you should also change the base Docker -# image being used in ~/.buildkite/Dockerfile.gpu to match the same image being used +# image being used in ~/ci/docker/Dockerfile.gpu to match the same image being used # here. -ML_CUDA_VERSION = "cu112" +ML_CUDA_VERSION = "cu116" DEFAULT_PYTHON_VERSION = "py37" @@ -361,12 +361,16 @@ def prep_ray_ml(): ml_requirements_files = [ "python/requirements/ml/requirements_ml_docker.txt", "python/requirements/ml/requirements_dl.txt", - "python/requirements/ml/requirements_py36_compat.txt", "python/requirements/ml/requirements_tune.txt", "python/requirements/ml/requirements_rllib.txt", "python/requirements/ml/requirements_train.txt", "python/requirements/ml/requirements_upstream.txt", ] + # We don't need these in the ml docker image + ignore_requirements = [ + "python/requirements/ml/requirements_legacy_compat.txt", + "python/requirements/ml/requirements_py36_compat.txt", + ] files_on_disk = glob.glob(f"{root_dir}/python/**/requirements*.txt", recursive=True) for file_on_disk in files_on_disk: @@ -374,7 +378,7 @@ def prep_ray_ml(): print(rel) if not rel.startswith("python/requirements/ml"): continue - elif rel not in ml_requirements_files: + elif rel not in ml_requirements_files and rel not in ignore_requirements: raise RuntimeError( f"A new requirements file was found in the repository, but it has " f"not been added to `build-docker-images.py` " diff --git a/ci/docker/base.gpu.Dockerfile b/ci/docker/base.gpu.Dockerfile index 6d33fd2d9b25c..b3f79557f8315 100644 --- a/ci/docker/base.gpu.Dockerfile +++ b/ci/docker/base.gpu.Dockerfile @@ -1,4 +1,4 @@ -FROM nvidia/cuda:11.2.0-cudnn8-devel-ubuntu20.04 +FROM nvidia/cuda:11.6.1-cudnn8-devel-ubuntu20.04 ARG REMOTE_CACHE_URL ARG BUILDKITE_PULL_REQUEST diff --git a/ci/env/install-dependencies.sh b/ci/env/install-dependencies.sh index e53ef3a608d5f..25755e4758fbf 100755 --- a/ci/env/install-dependencies.sh +++ b/ci/env/install-dependencies.sh @@ -413,11 +413,6 @@ install_pip_packages() { fi fi - # RLlib testing with TF 1.x. - if [ "${RLLIB_TESTING-}" = 1 ] && { [ -n "${TF_VERSION-}" ] || [ -n "${TFP_VERSION-}" ]; }; then - pip install --upgrade tensorflow-probability=="${TFP_VERSION}" tensorflow=="${TF_VERSION}" - fi - # Inject our own mirror for the CIFAR10 dataset if [ "${TRAIN_TESTING-}" = 1 ] || [ "${TUNE_TESTING-}" = 1 ] || [ "${DOC_TESTING-}" = 1 ]; then SITE_PACKAGES=$(python -c 'from distutils.sysconfig import get_python_lib; print(get_python_lib())') diff --git a/python/ray/tests/horovod/BUILD b/python/ray/tests/horovod/BUILD index 2a1a17e61a110..cacba4d5f40f5 100644 --- a/python/ray/tests/horovod/BUILD +++ b/python/ray/tests/horovod/BUILD @@ -3,7 +3,7 @@ py_test( size = "medium", srcs = ["test_horovod.py"], deps = ["//:ray_lib"], - tags = ["team:ml", "compat", "exclusive"] + tags = ["team:ml", "compat", "compat_py36", "exclusive"] ) diff --git a/python/ray/tests/lightgbm/BUILD b/python/ray/tests/lightgbm/BUILD index 04570b9aabd11..81124dc850e04 100644 --- a/python/ray/tests/lightgbm/BUILD +++ b/python/ray/tests/lightgbm/BUILD @@ -3,7 +3,7 @@ py_test( size = "small", srcs = ["simple_example.py"], deps = ["//:ray_lib"], - tags = ["team:ml", "compat", "exclusive"], + tags = ["team:ml", "compat", "compat_py36", "exclusive"], ) py_test( @@ -11,7 +11,7 @@ py_test( size="small", srcs = ["simple_tune.py"], deps = ["//:ray_lib"], - tags = ["team:ml", "compat", "exclusive"] + tags = ["team:ml", "compat", "compat_py36", "exclusive"] ) diff --git a/python/ray/tests/ml_py36_compat/BUILD b/python/ray/tests/ml_py36_compat/BUILD index ba32738a80717..d353d5ee2e908 100644 --- a/python/ray/tests/ml_py36_compat/BUILD +++ b/python/ray/tests/ml_py36_compat/BUILD @@ -3,7 +3,7 @@ py_test( size = "medium", srcs = ["tune_hvd_keras.py"], deps = ["//:ray_lib"], - tags = ["team:ml", "compat", "exclusive"], + tags = ["team:ml", "compat", "compat_py36", "exclusive"], ) py_test( @@ -11,5 +11,5 @@ py_test( size = "medium", srcs = ["tune_hvd_torch.py"], deps = ["//:ray_lib"], - tags = ["team:ml", "compat", "exclusive"], + tags = ["team:ml", "compat", "compat_py36", "exclusive"], ) diff --git a/python/ray/tests/ray_lightning/BUILD b/python/ray/tests/ray_lightning/BUILD index 618c3ab482eaa..6b73fc9e18394 100644 --- a/python/ray/tests/ray_lightning/BUILD +++ b/python/ray/tests/ray_lightning/BUILD @@ -3,7 +3,7 @@ py_test( size = "small", srcs = ["simple_example.py"], deps = ["//:ray_lib"], - tags = ["team:ml", "exclusive"], + tags = ["team:ml", "compat", "exclusive"], ) py_test( @@ -11,5 +11,5 @@ py_test( size="small", srcs = ["simple_tune.py"], deps = ["//:ray_lib"], - tags = ["team:ml", "exclusive"] + tags = ["team:ml", "compat", "exclusive"] ) \ No newline at end of file diff --git a/python/ray/tests/xgboost/BUILD b/python/ray/tests/xgboost/BUILD index be39dec7738b6..5a1db458e3648 100644 --- a/python/ray/tests/xgboost/BUILD +++ b/python/ray/tests/xgboost/BUILD @@ -7,7 +7,7 @@ py_test( size = "small", srcs = ["simple_example.py"], deps = ["//:ray_lib"], - tags = ["team:ml", "compat", "exclusive"], + tags = ["team:ml", "compat", "compat_py36", "exclusive"], ) py_test( @@ -15,7 +15,7 @@ py_test( size="small", srcs = ["simple_tune.py"], deps = ["//:ray_lib"], - tags = ["team:ml", "compat", "exclusive"] + tags = ["team:ml", "compat", "compat_py36", "exclusive"] ) diff --git a/python/ray/tune/BUILD b/python/ray/tune/BUILD index e87f47a66197a..206cf53bcd517 100644 --- a/python/ray/tune/BUILD +++ b/python/ray/tune/BUILD @@ -679,7 +679,7 @@ py_test( py_test( name = "mnist_ptl_mini", - size = "small", + size = "medium", srcs = ["examples/mnist_ptl_mini.py"], deps = [":tune_lib"], tags = ["team:ml", "exclusive", "example", "pytorch"], diff --git a/python/ray/tune/tests/test_sample.py b/python/ray/tune/tests/test_sample.py index 24e2655645c94..a1b1b5e8d8293 100644 --- a/python/ray/tune/tests/test_sample.py +++ b/python/ray/tune/tests/test_sample.py @@ -921,8 +921,8 @@ def testConvertHEBO(self): self.assertEqual(config1, config2) self.assertIn(config1["a"], [2, 3, 4]) self.assertIn(config1["b"]["x"], list(range(5))) - self.assertLess(1e-4, config1["b"]["z"]) - self.assertLess(config1["b"]["z"], 1e-2) + self.assertLessEqual(1e-4, config1["b"]["z"]) + self.assertLessEqual(config1["b"]["z"], 1e-2) searcher = HEBOSearch(metric="a", mode="max", random_state_seed=123) analysis = tune.run( diff --git a/python/ray/tune/tests/test_tune_restore.py b/python/ray/tune/tests/test_tune_restore.py index 3a7baf7ae44de..333aca7f1d133 100644 --- a/python/ray/tune/tests/test_tune_restore.py +++ b/python/ray/tune/tests/test_tune_restore.py @@ -462,7 +462,7 @@ def tearDown(self): def testPBTKeras(self): from ray.tune.examples.pbt_tune_cifar10_with_keras import Cifar10Model - from tensorflow.python.keras.datasets import cifar10 + from tensorflow.keras.datasets import cifar10 cifar10.load_data() validate_save_restore(Cifar10Model) diff --git a/python/requirements/ml/requirements_dl.txt b/python/requirements/ml/requirements_dl.txt index 04d41897a8be6..b62c2bb8ae2e1 100644 --- a/python/requirements/ml/requirements_dl.txt +++ b/python/requirements/ml/requirements_dl.txt @@ -1,15 +1,15 @@ # These requirements are used for the CI and CPU-only Docker images so we install CPU only versions of torch. # For GPU Docker images, you should install requirements_ml_docker.txt afterwards. -tensorflow==2.6.2 -tensorflow-probability==0.14.1 +tensorflow==2.9.0 +tensorflow-probability==0.17.0 # If you make changes to the torch versions below, please also make the corresponding changes to `requirements_ml_docker.txt`! -torch==1.9.0;sys_platform=="darwin" -torchvision==0.10.0;sys_platform=="darwin" +torch==1.12.1;sys_platform=="darwin" +torchvision==0.13.1;sys_platform=="darwin" # On non-OSX machines only install CPU version of torch and torchvision -f https://download.pytorch.org/whl/torch_stable.html -torch==1.9.0+cpu;sys_platform!="darwin" -torchvision==0.10.0+cpu;sys_platform!="darwin" +torch==1.12.1+cpu;sys_platform!="darwin" +torchvision==0.13.1+cpu;sys_platform!="darwin" diff --git a/python/requirements/ml/requirements_legacy_compat.txt b/python/requirements/ml/requirements_legacy_compat.txt new file mode 100644 index 0000000000000..90021db8f802e --- /dev/null +++ b/python/requirements/ml/requirements_legacy_compat.txt @@ -0,0 +1,31 @@ +# ATTENTION: THESE DEPENDENCIES SHOULD USUALLY NOT BE UPDATED! + +# Updating these dependencies means we remove official support for dependency releases older than +# the specified version. + +# These are compatibility requirements to make sure certain workflows continue to work +# with these dependency versions. They thus act as a lower bound for compatibility +# with ML libraries. +# Concretely, we set up a fresh Python 3.7 environment and +# run the pipeline job in `Ray Python legacy dependency ML compatibility tests` with these dependencies installed. + +# ML libraries +torch==1.9.0 +tensorflow==2.7.0 +tensorflow-probability==0.14.1 +keras==2.7.0 + +# Torch addons +torchvision==0.10.0 + +pytorch-lightning==1.5.10 + +# Upstream libraries +lightgbm_ray==0.1.5 +xgboost_ray==0.1.10 +ray_lightning==0.2.0 + +# Datasets +pyarrow==6.0.1 + +ray[tune,data] diff --git a/python/requirements/ml/requirements_ml_docker.txt b/python/requirements/ml/requirements_ml_docker.txt index c38150ad56c2a..1f11e096ccd41 100644 --- a/python/requirements/ml/requirements_ml_docker.txt +++ b/python/requirements/ml/requirements_ml_docker.txt @@ -5,12 +5,12 @@ tblib # If you make changes to the torch versions, please also make the corresponding changes to `requirements_dl.txt`! -f https://download.pytorch.org/whl/torch_stable.html -torch==1.9.0+cu111 -torchvision==0.10.0+cu111 +torch==1.12.1+cu116 +torchvision==0.13.1+cu116 --f https://data.pyg.org/whl/torch-1.9.0+cu111.html +-f https://data.pyg.org/whl/torch-1.12.1+cu116.html torch-scatter==2.0.9 -torch-sparse==0.6.12 +torch-sparse==0.6.15 # torch-geometric has to be installed after torch-scatter and torch-sparse. torch-geometric==2.0.3; python_version < '3.7' diff --git a/python/requirements/ml/requirements_rllib.txt b/python/requirements/ml/requirements_rllib.txt index 66491196841a5..278cddf73d9ff 100644 --- a/python/requirements/ml/requirements_rllib.txt +++ b/python/requirements/ml/requirements_rllib.txt @@ -22,7 +22,7 @@ supersuit==3.3.3; python_version >= '3.7' pybullet==3.2.0 # For tests on RecSim and Kaggle envs. recsim==0.2.4 -tensorflow_estimator==2.6.0 +tensorflow_estimator==2.9.0 # DeepMind's OpenSpiel open-spiel==1.0.2 @@ -34,8 +34,8 @@ higher==0.2.1 pyglet==1.5.15 imageio-ffmpeg==0.4.5 # ONNX -onnx==1.9.0 -onnxruntime==1.9.0 -tf2onnx==1.8.5 +onnx==1.12.0 +onnxruntime==1.12.0 +tf2onnx==1.12.1 typer==0.6.1 rich==12.0.1 diff --git a/python/requirements/ml/requirements_tune.txt b/python/requirements/ml/requirements_tune.txt index b2e4e0c19f238..b4b3362fb0c72 100644 --- a/python/requirements/ml/requirements_tune.txt +++ b/python/requirements/ml/requirements_tune.txt @@ -28,7 +28,7 @@ pytest-remotedata==0.3.2 lightning-bolts==0.4.0 pytorch-lightning==1.5.10 shortuuid==1.0.1 -scikit-optimize==0.8.1 +scikit-optimize==0.9.0 sigopt==7.5.0 timm==0.4.5 transformers==4.18.0; python_version <= '3.6' diff --git a/rllib/BUILD b/rllib/BUILD index 58c1c988d1998..6e6a09b9be134 100644 --- a/rllib/BUILD +++ b/rllib/BUILD @@ -84,7 +84,7 @@ py_test( name = "learning_tests_cartpole_a2c_microbatch", main = "tests/run_regression_tests.py", tags = ["team:rllib", "exclusive", "learning_tests", "learning_tests_cartpole", "learning_tests_discrete"], - size = "medium", + size = "large", srcs = ["tests/run_regression_tests.py"], data = ["tuned_examples/a2c/cartpole-a2c-microbatch.yaml"], args = ["--yaml-dir=tuned_examples/a2c"] diff --git a/rllib/algorithms/a3c/tests/test_a3c.py b/rllib/algorithms/a3c/tests/test_a3c.py index 49fdff3327af2..1d9aa54025811 100644 --- a/rllib/algorithms/a3c/tests/test_a3c.py +++ b/rllib/algorithms/a3c/tests/test_a3c.py @@ -27,7 +27,7 @@ def test_a3c_compilation(self): num_iterations = 2 # Test against all frameworks. - for _ in framework_iterator(config, with_eager_tracing=True): + for _ in framework_iterator(config, with_eager_tracing=False): for env in ["CartPole-v1", "Pendulum-v1", "PongDeterministic-v0"]: print("env={}".format(env)) config.model["use_lstm"] = env == "CartPole-v1" diff --git a/rllib/tests/run_regression_tests.py b/rllib/tests/run_regression_tests.py index 996d9708a3caa..a4e6923af5184 100644 --- a/rllib/tests/run_regression_tests.py +++ b/rllib/tests/run_regression_tests.py @@ -55,6 +55,7 @@ "is particularly useful for timed tests." ), ) + # Obsoleted arg, use --framework=torch instead. parser.add_argument( "--torch", action="store_true", help="Runs all tests with PyTorch enabled." @@ -108,7 +109,13 @@ continue # Always run with eager-tracing when framework=tf2 if not in local-mode. - if args.framework == "tf2" and not args.local_mode: + # Ignore this if the yaml explicitly tells us to disable eager tracing + if ( + args.framework == "tf2" + and not args.local_mode + and not exp["config"].get("eager_tracing") is False + ): + exp["config"]["eager_tracing"] = True # Print out the actual config. diff --git a/rllib/tuned_examples/a2c/cartpole-a2c-microbatch.yaml b/rllib/tuned_examples/a2c/cartpole-a2c-microbatch.yaml index c72ea3d749a1c..ac6b6ef688af5 100644 --- a/rllib/tuned_examples/a2c/cartpole-a2c-microbatch.yaml +++ b/rllib/tuned_examples/a2c/cartpole-a2c-microbatch.yaml @@ -12,3 +12,5 @@ cartpole-a2c-microbatch: rollout_fragment_length: 20 microbatch_size: 40 train_batch_size: 120 + # When using tf>=2.8, eager tracing can not be used + eager_tracing: False