From 86d65565b843bf42a7df3c85c1755b97dcb27781 Mon Sep 17 00:00:00 2001 From: Kaiyue Wen Date: Sun, 31 Dec 2023 23:32:40 -0800 Subject: [PATCH 01/56] logs --- logs/log-t1v-n-d3fbbfef-w-0.log | 386 ++++++++++++++++++++++++++++++++ 1 file changed, 386 insertions(+) create mode 100644 logs/log-t1v-n-d3fbbfef-w-0.log diff --git a/logs/log-t1v-n-d3fbbfef-w-0.log b/logs/log-t1v-n-d3fbbfef-w-0.log new file mode 100644 index 000000000..f9bf6d3a7 --- /dev/null +++ b/logs/log-t1v-n-d3fbbfef-w-0.log @@ -0,0 +1,386 @@ + config.json: 0%| | 0.00/665 [00:00 + levanter.config.main(main)() + File "/home/kaiyue/levanter/src/levanter/config.py", line 84, in wrapper_inner + response = fn(cfg, *args, **kwargs) + File "/home/kaiyue/levanter/src/levanter/main/train_lm.py", line 105, in main + eval_datasets = config.data.validation_sets(Pos.size) + File "/home/kaiyue/levanter/src/levanter/data/text.py", line 540, in validation_sets + validation_set = self.validation_set(seq_len, monitors) + File "/home/kaiyue/levanter/src/levanter/data/text.py", line 535, in validation_set + return self.token_seq_dataset("validation", seq_len, monitors) + File "/home/kaiyue/levanter/src/levanter/data/text.py", line 564, in token_seq_dataset + cache = self.build_or_load_cache(split, monitors=monitors) + File "/home/kaiyue/levanter/src/levanter/data/text.py", line 574, in build_or_load_cache + return TokenizedDocumentCache.load(split_cache_dir, flatten_docs=True) + File "/home/kaiyue/levanter/src/levanter/data/text.py", line 270, in load + cache = ShardCache.load(cache_dir, batch_size=batch_size) + File "/home/kaiyue/levanter/src/levanter/data/shard_cache.py", line 1267, in load + ledger = _load_cache_ledger(cache_dir) + File "/home/kaiyue/levanter/src/levanter/data/shard_cache.py", line 442, in _load_cache_ledger + with fsspec.open(ledger_path) as file: + File "/home/kaiyue/venv310/lib/python3.10/site-packages/fsspec/core.py", line 100, in __enter__ + f = self.fs.open(self.path, mode=mode) + File "/home/kaiyue/venv310/lib/python3.10/site-packages/fsspec/spec.py", line 1309, in open + f = self._open( + File "/home/kaiyue/venv310/lib/python3.10/site-packages/gcsfs/core.py", line 1519, in _open + return GCSFile( + File "/home/kaiyue/venv310/lib/python3.10/site-packages/gcsfs/core.py", line 1678, in __init__ + super().__init__( + File "/home/kaiyue/venv310/lib/python3.10/site-packages/fsspec/spec.py", line 1665, in __init__ + self.size = self.details["size"] + File "/home/kaiyue/venv310/lib/python3.10/site-packages/gcsfs/core.py", line 1714, in details + self._details = self.fs.info(self.path, generation=self.generation) + File "/home/kaiyue/venv310/lib/python3.10/site-packages/fsspec/asyn.py", line 118, in wrapper + return sync(self.loop, func, *args, **kwargs) + File "/home/kaiyue/venv310/lib/python3.10/site-packages/fsspec/asyn.py", line 103, in sync + raise return_result + File "/home/kaiyue/venv310/lib/python3.10/site-packages/fsspec/asyn.py", line 56, in _runner + result[0] = await coro + File "/home/kaiyue/venv310/lib/python3.10/site-packages/gcsfs/core.py", line 962, in _info + exact = await self._get_object(path) + File "/home/kaiyue/venv310/lib/python3.10/site-packages/gcsfs/core.py", line 522, in _get_object + resp = await self._call( + File "/home/kaiyue/venv310/lib/python3.10/site-packages/gcsfs/core.py", line 437, in _call + status, headers, info, contents = await self._request( + File "/home/kaiyue/venv310/lib/python3.10/site-packages/decorator.py", line 221, in fun + return await caller(func, *(extras + args), **kw) + File "/home/kaiyue/venv310/lib/python3.10/site-packages/gcsfs/retry.py", line 122, in retry_request + return await func(*args, **kwargs) + File "/home/kaiyue/venv310/lib/python3.10/site-packages/gcsfs/core.py", line 430, in _request + validate_response(status, contents, path, args) + File "/home/kaiyue/venv310/lib/python3.10/site-packages/gcsfs/retry.py", line 103, in validate_response + raise OSError(f"Forbidden: {path}\n{msg}") +OSError: Forbidden: b/levanter-data/o +544074808685-compute@developer.gserviceaccount.com does not have storage.objects.list access to the Google Cloud Storage bucket. Permission 'storage.objects.list' denied on resource (or it may not exist). +Traceback (most recent call last): + File "/home/kaiyue/venv310/lib/python3.10/site-packages/gcsfs/core.py", line 516, in _get_object + res = await self._call( + File "/home/kaiyue/venv310/lib/python3.10/site-packages/gcsfs/core.py", line 437, in _call + status, headers, info, contents = await self._request( + File "/home/kaiyue/venv310/lib/python3.10/site-packages/decorator.py", line 221, in fun + return await caller(func, *(extras + args), **kw) + File "/home/kaiyue/venv310/lib/python3.10/site-packages/gcsfs/retry.py", line 122, in retry_request + return await func(*args, **kwargs) + File "/home/kaiyue/venv310/lib/python3.10/site-packages/gcsfs/core.py", line 430, in _request + validate_response(status, contents, path, args) + File "/home/kaiyue/venv310/lib/python3.10/site-packages/gcsfs/retry.py", line 103, in validate_response + raise OSError(f"Forbidden: {path}\n{msg}") +OSError: Forbidden: b/levanter-data/o/tokenized%2Fopenwebtext%2Fvalidation%2Fcache_ledger.json +544074808685-compute@developer.gserviceaccount.com does not have storage.objects.get access to the Google Cloud Storage object. Permission 'storage.objects.get' denied on resource (or it may not exist). + +During handling of the above exception, another exception occurred: + +Traceback (most recent call last): + File "/home/kaiyue/levanter/src/levanter/main/train_lm.py", line 195, in + levanter.config.main(main)() + File "/home/kaiyue/levanter/src/levanter/config.py", line 84, in wrapper_inner + response = fn(cfg, *args, **kwargs) + File "/home/kaiyue/levanter/src/levanter/main/train_lm.py", line 105, in main + eval_datasets = config.data.validation_sets(Pos.size) + File "/home/kaiyue/levanter/src/levanter/data/text.py", line 540, in validation_sets + validation_set = self.validation_set(seq_len, monitors) + File "/home/kaiyue/levanter/src/levanter/data/text.py", line 535, in validation_set + return self.token_seq_dataset("validation", seq_len, monitors) + File "/home/kaiyue/levanter/src/levanter/data/text.py", line 564, in token_seq_dataset + cache = self.build_or_load_cache(split, monitors=monitors) + File "/home/kaiyue/levanter/src/levanter/data/text.py", line 574, in build_or_load_cache + return TokenizedDocumentCache.load(split_cache_dir, flatten_docs=True) + File "/home/kaiyue/levanter/src/levanter/data/text.py", line 270, in load + cache = ShardCache.load(cache_dir, batch_size=batch_size) + File "/home/kaiyue/levanter/src/levanter/data/shard_cache.py", line 1267, in load + ledger = _load_cache_ledger(cache_dir) + File "/home/kaiyue/levanter/src/levanter/data/shard_cache.py", line 442, in _load_cache_ledger + with fsspec.open(ledger_path) as file: + File "/home/kaiyue/venv310/lib/python3.10/site-packages/fsspec/core.py", line 100, in __enter__ + f = self.fs.open(self.path, mode=mode) + File "/home/kaiyue/venv310/lib/python3.10/site-packages/fsspec/spec.py", line 1309, in open + f = self._open( + File "/home/kaiyue/venv310/lib/python3.10/site-packages/gcsfs/core.py", line 1519, in _open + return GCSFile( + File "/home/kaiyue/venv310/lib/python3.10/site-packages/gcsfs/core.py", line 1678, in __init__ + super().__init__( + File "/home/kaiyue/venv310/lib/python3.10/site-packages/fsspec/spec.py", line 1665, in __init__ + self.size = self.details["size"] + File "/home/kaiyue/venv310/lib/python3.10/site-packages/gcsfs/core.py", line 1714, in details + self._details = self.fs.info(self.path, generation=self.generation) + File "/home/kaiyue/venv310/lib/python3.10/site-packages/fsspec/asyn.py", line 118, in wrapper + return sync(self.loop, func, *args, **kwargs) + File "/home/kaiyue/venv310/lib/python3.10/site-packages/fsspec/asyn.py", line 103, in sync + raise return_result + File "/home/kaiyue/venv310/lib/python3.10/site-packages/fsspec/asyn.py", line 56, in _runner + result[0] = await coro + File "/home/kaiyue/venv310/lib/python3.10/site-packages/gcsfs/core.py", line 962, in _info + exact = await self._get_object(path) + File "/home/kaiyue/venv310/lib/python3.10/site-packages/gcsfs/core.py", line 522, in _get_object + resp = await self._call( + File "/home/kaiyue/venv310/lib/python3.10/site-packages/gcsfs/core.py", line 437, in _call + status, headers, info, contents = await self._request( + File "/home/kaiyue/venv310/lib/python3.10/site-packages/decorator.py", line 221, in fun + return await caller(func, *(extras + args), **kw) + File "/home/kaiyue/venv310/lib/python3.10/site-packages/gcsfs/retry.py", line 122, in retry_request + return await func(*args, **kwargs) + File "/home/kaiyue/venv310/lib/python3.10/site-packages/gcsfs/core.py", line 430, in _request + validate_response(status, contents, path, args) + File "/home/kaiyue/venv310/lib/python3.10/site-packages/gcsfs/retry.py", line 103, in validate_response + raise OSError(f"Forbidden: {path}\n{msg}") +OSError: Forbidden: b/levanter-data/o +544074808685-compute@developer.gserviceaccount.com does not have storage.objects.list access to the Google Cloud Storage bucket. Permission 'storage.objects.list' denied on resource (or it may not exist). +wandb: - 0.015 MB of 0.015 MB uploaded wandb: \ 0.015 MB of 0.015 MB uploaded wandb: | 0.015 MB of 0.015 MB uploaded wandb: / 0.015 MB of 0.015 MB uploaded wandb: - 0.015 MB of 0.015 MB uploaded wandb: \ 0.015 MB of 0.015 MB uploaded wandb: | 0.015 MB of 0.015 MB uploaded wandb: +wandb: Run summary: +wandb: backend tpu +wandb: num_devices 8 +wandb: num_hosts 1 +wandb: +wandb: 🚀 View run charmed-leaf-1 at: https://wandb.ai/understanding-sam/levanter/runs/8eyi8adv +wandb: ️⚡ View job at https://wandb.ai/understanding-sam/levanter/jobs/QXJ0aWZhY3RDb2xsZWN0aW9uOjEyNjY1ODAzOQ==/version_details/v0 +wandb: Synced 5 W&B file(s), 0 media file(s), 4 artifact file(s) and 0 other file(s) +wandb: Find logs at: ./wandb/run-20240101_072752-8eyi8adv/logs +2024-01-01 07:28:06,900 VINFO scripts.py:1085 -- Killed `/home/kaiyue/venv310/lib/python3.10/site-packages/ray/core/src/ray/raylet/raylet --raylet_socket_name=/tmp/ray/session_2024-01-01_07-27-46_779910_13059/sockets/raylet --store_socket_name=/tmp/ray/session_2024-01-01_07-27-46_779910_13059/sockets/plasma_store --object_manager_port=0 --min_worker_port=10002 --max_worker_port=19999 --node_manager_port=0 --node_ip_address=10.164.0.78 --maximum_startup_concurrency=96 --static_resource_list=node:10.164.0.78,1.0,node:__internal_head__,1.0,TPU,4,accelerator_type:TPU-V3,1,debug-8,1,TPU-v3-8-head,1,CPU,96,memory,237984248423,object_store_memory,106278963609 "--python_worker_command=/home/kaiyue/venv310/bin/python3.10 /home/kaiyue/venv310/lib/python3.10/site-packages/ray/_private/workers/setup_worker.py /home/kaiyue/venv310/lib/python3.10/site-packages/ray/_private/workers/default_worker.py --node-ip-address=10.164.0.78 --node-manager-port=RAY_NODE_MANAGER_PORT_PLACEHOLDER --object-store-name=/tmp/ray/session_2024-01-01_07-27-46_779910_13059/sockets/plasma_store --raylet-name=/tmp/ray/session_2024-01-01_07-27-46_779910_13059/sockets/raylet --redis-address=None --temp-dir=/tmp/ray --metrics-agent-port=53820 --runtime-env-agent-port=56966 --logging-rotate-bytes=536870912 --logging-rotate-backup-count=5 --runtime-env-agent-port=56966 --gcs-address=10.164.0.78:61964 --session-name=session_2024-01-01_07-27-46_779910_13059 --temp-dir=/tmp/ray --webui=10.164.0.78:8265 --cluster-id=3fceadd8168d8843aa4e8c1b4b06513d7b706b38e3056706a925c9f0 RAY_WORKER_DYNAMIC_OPTION_PLACEHOLDER" --java_worker_command= --cpp_worker_command= --native_library_path=/home/kaiyue/venv310/lib/python3.10/site-packages/ray/cpp/lib --temp_dir=/tmp/ray --session_dir=/tmp/ray/session_2024-01-01_07-27-46_779910_13059 --log_dir=/tmp/ray/session_2024-01-01_07-27-46_779910_13059/logs --resource_dir=/tmp/ray/session_2024-01-01_07-27-46_779910_13059/runtime_resources --metrics-agent-port=53820 --metrics_export_port=64689 --runtime_env_agent_port=56966 --object_store_memory=106278963609 --plasma_directory=/dev/shm --ray-debugger-external=0 --gcs-address=10.164.0.78:61964 --session-name=session_2024-01-01_07-27-46_779910_13059 --labels= --cluster-id=3fceadd8168d8843aa4e8c1b4b06513d7b706b38e3056706a925c9f0 --head --num_prestart_python_workers=96 "--dashboard_agent_command=/home/kaiyue/venv310/bin/python3.10 -u /home/kaiyue/venv310/lib/python3.10/site-packages/ray/dashboard/agent.py --node-ip-address=10.164.0.78 --metrics-export-port=64689 --dashboard-agent-port=53820 --listen-port=52365 --node-manager-port=RAY_NODE_MANAGER_PORT_PLACEHOLDER --object-store-name=/tmp/ray/session_2024-01-01_07-27-46_779910_13059/sockets/plasma_store --raylet-name=/tmp/ray/session_2024-01-01_07-27-46_779910_13059/sockets/raylet --temp-dir=/tmp/ray --session-dir=/tmp/ray/session_2024-01-01_07-27-46_779910_13059 --log-dir=/tmp/ray/session_2024-01-01_07-27-46_779910_13059/logs --logging-rotate-bytes=536870912 --logging-rotate-backup-count=5 --session-name=session_2024-01-01_07-27-46_779910_13059 --gcs-address=10.164.0.78:61964" "--runtime_env_agent_command=/home/kaiyue/venv310/bin/python3.10 -u /home/kaiyue/venv310/lib/python3.10/site-packages/ray/_private/runtime_env/agent/main.py --node-ip-address=10.164.0.78 --runtime-env-agent-port=56966 --gcs-address=10.164.0.78:61964 --runtime-env-dir=/tmp/ray/session_2024-01-01_07-27-46_779910_13059/runtime_resources --logging-rotate-bytes=536870912 --logging-rotate-backup-count=5 --log-dir=/tmp/ray/session_2024-01-01_07-27-46_779910_13059/logs --temp-dir=/tmp/ray"` (via SIGKILL) +2024-01-01 07:28:06,915 INFO scripts.py:1121 -- 1/1 stopped. 2024-01-01 07:28:07,324 VINFO scripts.py:1085 -- Killed `/home/kaiyue/venv310/bin/python3.10 -u /home/kaiyue/venv310/lib/python3.10/site-packages/ray/autoscaler/_private/monitor.py --logs-dir=/tmp/ray/session_2024-01-01_07-27-46_779910_13059/logs --logging-rotate-bytes=536870912 --logging-rotate-backup-count=5 --gcs-address=10.164.0.78:61964 --monitor-ip=10.164.0.78` (via SIGKILL) +2024-01-01 07:28:07,325 VINFO scripts.py:1085 -- Killed `/home/kaiyue/venv310/bin/python3.10 -u /home/kaiyue/venv310/lib/python3.10/site-packages/ray/_private/log_monitor.py --session-dir=/tmp/ray/session_2024-01-01_07-27-46_779910_13059 --logs-dir=/tmp/ray/session_2024-01-01_07-27-46_779910_13059/logs --gcs-address=10.164.0.78:61964 --logging-rotate-bytes=536870912 --logging-rotate-backup-count=5` (via SIGKILL) +2024-01-01 07:28:07,410 VINFO scripts.py:1085 -- Killed `/home/kaiyue/venv310/bin/python3.10 -m ray.util.client.server --address=10.164.0.78:61964 --host=0.0.0.0 --port=10001 --mode=proxy --runtime-env-agent-address=http://10.164.0.78:56966` (via SIGKILL) +2024-01-01 07:28:07,578 VINFO scripts.py:1099 -- Attempted to stop `ray::IDLE "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" ""`, but process was already dead. +2024-01-01 07:28:07,580 VINFO scripts.py:1099 -- Attempted to stop `ray::IDLE "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" ""`, but process was already dead. +2024-01-01 07:28:07,581 VINFO scripts.py:1099 -- Attempted to stop `ray::IDLE "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" ""`, but process was already dead. +2024-01-01 07:28:07,583 VINFO scripts.py:1099 -- Attempted to stop `ray::IDLE "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" ""`, but process was already dead. +2024-01-01 07:28:07,585 VINFO scripts.py:1099 -- Attempted to stop `ray::IDLE "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" ""`, but process was already dead. +2024-01-01 07:28:07,586 VINFO scripts.py:1099 -- Attempted to stop `ray::IDLE "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" ""`, but process was already dead. +2024-01-01 07:28:07,588 VINFO scripts.py:1099 -- Attempted to stop `ray::IDLE "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" ""`, but process was already dead. +2024-01-01 07:28:07,589 VINFO scripts.py:1099 -- Attempted to stop `ray::IDLE "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" ""`, but process was already dead. +2024-01-01 07:28:07,591 VINFO scripts.py:1099 -- Attempted to stop `ray::IDLE "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" ""`, but process was already dead. +2024-01-01 07:28:07,592 VINFO scripts.py:1099 -- Attempted to stop `ray::IDLE "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" ""`, but process was already dead. +2024-01-01 07:28:07,594 VINFO scripts.py:1099 -- Attempted to stop `ray::IDLE "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" ""`, but process was already dead. +2024-01-01 07:28:07,595 VINFO scripts.py:1099 -- Attempted to stop `ray::IDLE "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" ""`, but process was already dead. +2024-01-01 07:28:07,597 VINFO scripts.py:1099 -- Attempted to stop `ray::IDLE "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" ""`, but process was already dead. +2024-01-01 07:28:07,598 VINFO scripts.py:1099 -- Attempted to stop `ray::IDLE "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" ""`, but process was already dead. +2024-01-01 07:28:07,600 VINFO scripts.py:1099 -- Attempted to stop `ray::IDLE "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" ""`, but process was already dead. +2024-01-01 07:28:07,602 VINFO scripts.py:1099 -- Attempted to stop `ray::IDLE "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" ""`, but process was already dead. +2024-01-01 07:28:07,603 VINFO scripts.py:1099 -- Attempted to stop `ray::IDLE "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" ""`, but process was already dead. +2024-01-01 07:28:07,605 VINFO scripts.py:1099 -- Attempted to stop `ray::IDLE "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" ""`, but process was already dead. +2024-01-01 07:28:07,606 VINFO scripts.py:1099 -- Attempted to stop `ray::IDLE "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" ""`, but process was already dead. +2024-01-01 07:28:07,608 VINFO scripts.py:1099 -- Attempted to stop `ray::IDLE "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" ""`, but process was already dead. +2024-01-01 07:28:07,609 VINFO scripts.py:1099 -- Attempted to stop `ray::IDLE "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" ""`, but process was already dead. +2024-01-01 07:28:07,611 VINFO scripts.py:1099 -- Attempted to stop `ray::IDLE "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" ""`, but process was already dead. +2024-01-01 07:28:07,612 VINFO scripts.py:1099 -- Attempted to stop `ray::IDLE "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" ""`, but process was already dead. +2024-01-01 07:28:07,616 VINFO scripts.py:1099 -- Attempted to stop `ray::IDLE "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" ""`, but process was already dead. +2024-01-01 07:28:07,617 VINFO scripts.py:1099 -- Attempted to stop `ray::IDLE "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" ""`, but process was already dead. +2024-01-01 07:28:07,619 VINFO scripts.py:1099 -- Attempted to stop `ray::IDLE "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" ""`, but process was already dead. +2024-01-01 07:28:07,620 VINFO scripts.py:1099 -- Attempted to stop `ray::IDLE "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" ""`, but process was already dead. +2024-01-01 07:28:07,622 VINFO scripts.py:1099 -- Attempted to stop `ray::IDLE "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" ""`, but process was already dead. +2024-01-01 07:28:07,623 VINFO scripts.py:1099 -- Attempted to stop `ray::IDLE "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" ""`, but process was already dead. +2024-01-01 07:28:07,625 VINFO scripts.py:1099 -- Attempted to stop `ray::IDLE "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" ""`, but process was already dead. +2024-01-01 07:28:07,627 VINFO scripts.py:1099 -- Attempted to stop `ray::IDLE "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" ""`, but process was already dead. +2024-01-01 07:28:07,628 VINFO scripts.py:1099 -- Attempted to stop `ray::IDLE "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" ""`, but process was already dead. +2024-01-01 07:28:07,630 VINFO scripts.py:1099 -- Attempted to stop `ray::IDLE "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" ""`, but process was already dead. +2024-01-01 07:28:07,631 VINFO scripts.py:1099 -- Attempted to stop `ray::IDLE "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" ""`, but process was already dead. +2024-01-01 07:28:07,633 VINFO scripts.py:1099 -- Attempted to stop `ray::IDLE "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" ""`, but process was already dead. +2024-01-01 07:28:07,634 VINFO scripts.py:1099 -- Attempted to stop `ray::IDLE "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" ""`, but process was already dead. +2024-01-01 07:28:07,636 VINFO scripts.py:1099 -- Attempted to stop `ray::IDLE "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" ""`, but process was already dead. +2024-01-01 07:28:07,637 VINFO scripts.py:1099 -- Attempted to stop `ray::IDLE "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" ""`, but process was already dead. +2024-01-01 07:28:07,639 VINFO scripts.py:1099 -- Attempted to stop `ray::IDLE "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" ""`, but process was already dead. +2024-01-01 07:28:07,640 VINFO scripts.py:1099 -- Attempted to stop `ray::IDLE "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" ""`, but process was already dead. +2024-01-01 07:28:07,642 VINFO scripts.py:1099 -- Attempted to stop `ray::IDLE "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" ""`, but process was already dead. +2024-01-01 07:28:07,643 VINFO scripts.py:1099 -- Attempted to stop `ray::IDLE "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" ""`, but process was already dead. +2024-01-01 07:28:07,645 VINFO scripts.py:1099 -- Attempted to stop `ray::IDLE "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" ""`, but process was already dead. +2024-01-01 07:28:07,647 VINFO scripts.py:1099 -- Attempted to stop `ray::IDLE "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" ""`, but process was already dead. +2024-01-01 07:28:07,648 VINFO scripts.py:1099 -- Attempted to stop `ray::IDLE "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" ""`, but process was already dead. +2024-01-01 07:28:07,650 VINFO scripts.py:1099 -- Attempted to stop `ray::IDLE "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" ""`, but process was already dead. +2024-01-01 07:28:07,651 VINFO scripts.py:1099 -- Attempted to stop `ray::IDLE "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" ""`, but process was already dead. +2024-01-01 07:28:07,653 VINFO scripts.py:1099 -- Attempted to stop `ray::IDLE "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" ""`, but process was already dead. +2024-01-01 07:28:07,654 VINFO scripts.py:1099 -- Attempted to stop `ray::IDLE "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" ""`, but process was already dead. +2024-01-01 07:28:07,656 VINFO scripts.py:1099 -- Attempted to stop `ray::IDLE "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" ""`, but process was already dead. +2024-01-01 07:28:07,658 VINFO scripts.py:1099 -- Attempted to stop `ray::IDLE "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" ""`, but process was already dead. +2024-01-01 07:28:07,660 VINFO scripts.py:1099 -- Attempted to stop `ray::IDLE "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" ""`, but process was already dead. +2024-01-01 07:28:07,661 VINFO scripts.py:1099 -- Attempted to stop `ray::IDLE "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" ""`, but process was already dead. +2024-01-01 07:28:07,663 VINFO scripts.py:1099 -- Attempted to stop `ray::IDLE "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" ""`, but process was already dead. +2024-01-01 07:28:07,665 VINFO scripts.py:1099 -- Attempted to stop `ray::IDLE "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" ""`, but process was already dead. +Exception in thread NetStatThr: +Traceback (most recent call last): + File "/usr/lib/python3.10/threading.py", line 1016, in _bootstrap_inner +Exception in thread IntMsgThr: +Traceback (most recent call last): + File "/usr/lib/python3.10/threading.py", line 1016, in _bootstrap_inner + self.run() + self.run() File "/usr/lib/python3.10/threading.py", line 953, in run + + File "/usr/lib/python3.10/threading.py", line 953, in run + self._target(*self._args, **self._kwargs) + File "/home/kaiyue/venv310/lib/python3.10/site-packages/wandb/sdk/wandb_run.py", line 268, in check_network_status + self._target(*self._args, **self._kwargs) + File "/home/kaiyue/venv310/lib/python3.10/site-packages/wandb/sdk/wandb_run.py", line 300, in check_internal_messages + self._loop_check_status( + File "/home/kaiyue/venv310/lib/python3.10/site-packages/wandb/sdk/wandb_run.py", line 224, in _loop_check_status + self._loop_check_status( + File "/home/kaiyue/venv310/lib/python3.10/site-packages/wandb/sdk/wandb_run.py", line 224, in _loop_check_status + local_handle = request() + local_handle = request() File "/home/kaiyue/venv310/lib/python3.10/site-packages/wandb/sdk/interface/interface.py", line 756, in deliver_network_status + + File "/home/kaiyue/venv310/lib/python3.10/site-packages/wandb/sdk/interface/interface.py", line 764, in deliver_internal_messages + return self._deliver_network_status(status) + File "/home/kaiyue/venv310/lib/python3.10/site-packages/wandb/sdk/interface/interface_shared.py", line 484, in _deliver_network_status + return self._deliver_internal_messages(internal_message) + File "/home/kaiyue/venv310/lib/python3.10/site-packages/wandb/sdk/interface/interface_shared.py", line 490, in _deliver_internal_messages + return self._deliver_record(record) + File "/home/kaiyue/venv310/lib/python3.10/site-packages/wandb/sdk/interface/interface_shared.py", line 437, in _deliver_record + return self._deliver_record(record) + File "/home/kaiyue/venv310/lib/python3.10/site-packages/wandb/sdk/interface/interface_shared.py", line 437, in _deliver_record + handle = mailbox._deliver_record(record, interface=self) + File "/home/kaiyue/venv310/lib/python3.10/site-packages/wandb/sdk/lib/mailbox.py", line 455, in _deliver_record + handle = mailbox._deliver_record(record, interface=self) + File "/home/kaiyue/venv310/lib/python3.10/site-packages/wandb/sdk/lib/mailbox.py", line 455, in _deliver_record + interface._publish(record) + interface._publish(record) File "/home/kaiyue/venv310/lib/python3.10/site-packages/wandb/sdk/interface/interface_sock.py", line 51, in _publish + + File "/home/kaiyue/venv310/lib/python3.10/site-packages/wandb/sdk/interface/interface_sock.py", line 51, in _publish + self._sock_client.send_record_publish(record) + File "/home/kaiyue/venv310/lib/python3.10/site-packages/wandb/sdk/lib/sock_client.py", line 221, in send_record_publish + self._sock_client.send_record_publish(record) + File "/home/kaiyue/venv310/lib/python3.10/site-packages/wandb/sdk/lib/sock_client.py", line 221, in send_record_publish + self.send_server_request(server_req) + File "/home/kaiyue/venv310/lib/python3.10/site-packages/wandb/sdk/lib/sock_client.py", line 155, in send_server_request + self.send_server_request(server_req) + File "/home/kaiyue/venv310/lib/python3.10/site-packages/wandb/sdk/lib/sock_client.py", line 155, in send_server_request + self._send_message(msg) + File "/home/kaiyue/venv310/lib/python3.10/site-packages/wandb/sdk/lib/sock_client.py", line 152, in _send_message + self._send_message(msg) + File "/home/kaiyue/venv310/lib/python3.10/site-packages/wandb/sdk/lib/sock_client.py", line 152, in _send_message + self._sendall_with_error_handle(header + data) + File "/home/kaiyue/venv310/lib/python3.10/site-packages/wandb/sdk/lib/sock_client.py", line 130, in _sendall_with_error_handle + self._sendall_with_error_handle(header + data) + File "/home/kaiyue/venv310/lib/python3.10/site-packages/wandb/sdk/lib/sock_client.py", line 130, in _sendall_with_error_handle + sent = self._sock.send(data) +BrokenPipeError: [Errno 32] Broken pipe + sent = self._sock.send(data) +BrokenPipeError: [Errno 32] Broken pipe +2024-01-01 07:28:07,666 VINFO scripts.py:1099 -- Attempted to stop `ray::IDLE "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" ""`, but process was already dead. +2024-01-01 07:28:07,833 VINFO scripts.py:1099 -- Attempted to stop `/home/kaiyue/venv310/bin/python3.10 -u /home/kaiyue/venv310/lib/python3.10/site-packages/ray/_private/log_monitor.py --session-dir=/tmp/ray/session_2024-01-01_07-27-46_779910_13059 --logs-dir=/tmp/ray/session_2024-01-01_07-27-46_779910_13059/logs --gcs-address=10.164.0.78:61964 --logging-rotate-bytes=536870912 --logging-rotate-backup-count=5`, but process was already dead. +2024-01-01 07:28:08,000 VINFO scripts.py:1099 -- Attempted to stop `/home/kaiyue/venv310/bin/python3.10 -u /home/kaiyue/venv310/lib/python3.10/site-packages/ray/dashboard/agent.py --node-ip-address=10.164.0.78 --metrics-export-port=64689 --dashboard-agent-port=53820 --listen-port=52365 --node-manager-port=43807 --object-store-name=/tmp/ray/session_2024-01-01_07-27-46_779910_13059/sockets/plasma_store --raylet-name=/tmp/ray/session_2024-01-01_07-27-46_779910_13059/sockets/raylet --temp-dir=/tmp/ray --session-dir=/tmp/ray/session_2024-01-01_07-27-46_779910_13059 --log-dir=/tmp/ray/session_2024-01-01_07-27-46_779910_13059/logs --logging-rotate-bytes=536870912 --logging-rotate-backup-count=5 --session-name=session_2024-01-01_07-27-46_779910_13059 --gcs-address=10.164.0.78:61964 --agent-id 424238335`, but process was already dead. +2024-01-01 07:28:08,084 VINFO scripts.py:1085 -- Killed `/home/kaiyue/venv310/bin/python3.10 /home/kaiyue/venv310/lib/python3.10/site-packages/ray/dashboard/dashboard.py --host=0.0.0.0 --port=8265 --port-retries=0 --temp-dir=/tmp/ray --log-dir=/tmp/ray/session_2024-01-01_07-27-46_779910_13059/logs --session-dir=/tmp/ray/session_2024-01-01_07-27-46_779910_13059 --logging-rotate-bytes=536870912 --logging-rotate-backup-count=5 --gcs-address=10.164.0.78:61964 --node-ip-address=10.164.0.78` (via SIGKILL) +2024-01-01 07:28:08,168 VINFO scripts.py:1099 -- Attempted to stop `/home/kaiyue/venv310/bin/python3.10 -u /home/kaiyue/venv310/lib/python3.10/site-packages/ray/_private/runtime_env/agent/main.py --node-ip-address=10.164.0.78 --runtime-env-agent-port=56966 --gcs-address=10.164.0.78:61964 --runtime-env-dir=/tmp/ray/session_2024-01-01_07-27-46_779910_13059/runtime_resources --logging-rotate-bytes=536870912 --logging-rotate-backup-count=5 --log-dir=/tmp/ray/session_2024-01-01_07-27-46_779910_13059/logs --temp-dir=/tmp/ray`, but process was already dead. +2024-01-01 07:28:08,252 INFO scripts.py:1121 -- 1/4 stopped. 2024-01-01 07:28:08,252 INFO scripts.py:1121 -- 2/4 stopped. 2024-01-01 07:28:08,252 INFO scripts.py:1121 -- 3/4 stopped. 2024-01-01 07:28:08,252 INFO scripts.py:1121 -- 4/4 stopped. 2024-01-01 07:28:08,546 VINFO scripts.py:1085 -- Killed `/home/kaiyue/venv310/lib/python3.10/site-packages/ray/core/src/ray/gcs/gcs_server --log_dir=/tmp/ray/session_2024-01-01_07-27-46_779910_13059/logs --config_list=eyJvYmplY3Rfc3BpbGxpbmdfY29uZmlnIjogIntcInR5cGVcIjogXCJmaWxlc3lzdGVtXCIsIFwicGFyYW1zXCI6IHtcImRpcmVjdG9yeV9wYXRoXCI6IFwiL3RtcC9yYXkvc2Vzc2lvbl8yMDI0LTAxLTAxXzA3LTI3LTQ2Xzc3OTkxMF8xMzA1OVwifX0iLCAiaXNfZXh0ZXJuYWxfc3RvcmFnZV90eXBlX2ZzIjogdHJ1ZX0= --gcs_server_port=61964 --metrics-agent-port=53820 --node-ip-address=10.164.0.78 --session-name=session_2024-01-01_07-27-46_779910_13059` (via SIGKILL) +2024-01-01 07:28:08,599 INFO scripts.py:1121 -- 1/1 stopped. 2024-01-01 07:28:08,599 SUCC scripts.py:1166 -- Stopped all 6 Ray processes. From 8b0900f4e0707853f67e8f686ecc62bddf5aef1e Mon Sep 17 00:00:00 2001 From: Kaiyue Date: Mon, 1 Jan 2024 15:39:40 +0800 Subject: [PATCH 02/56] try dataset path --- config/gpt2_small_pile.yaml | 25 +++++++++++++++++++++++++ debug.sh | 1 + 2 files changed, 26 insertions(+) create mode 100644 config/gpt2_small_pile.yaml create mode 100644 debug.sh diff --git a/config/gpt2_small_pile.yaml b/config/gpt2_small_pile.yaml new file mode 100644 index 000000000..6a139674d --- /dev/null +++ b/config/gpt2_small_pile.yaml @@ -0,0 +1,25 @@ +data: + cache_dir: "gs://levanter-data-new/tokenized/pile-old/" + tokenizer: "EleutherAI/gpt-neox-20b" +model: + type: gpt2 + hidden_dim: 768 + num_heads: 12 + num_layers: 12 + seq_len: 1024 + gradient_checkpointing: true + scale_attn_by_inverse_layer_idx: true +trainer: + wandb: + project: "levanter" + tags: [ "pile", "gpt2"] + + mp: p=f32,c=bfloat16 + model_axis_size: 1 + per_device_parallelism: 4 + + train_batch_size: 512 +optimizer: + learning_rate: 6E-4 + weight_decay: 0.1 + min_lr_ratio: 0.1 diff --git a/debug.sh b/debug.sh new file mode 100644 index 000000000..9f4a67ef4 --- /dev/null +++ b/debug.sh @@ -0,0 +1 @@ +gcloud compute tpus tpu-vm ssh debug-8 --zone europe-west4-a --worker=all --command 'WANDB_API_KEY= levanter/infra/launch.sh python levanter/src/levanter/main/train_lm.py --config_path levanter/config/gpt2_small.yaml --trainer.checkpointer.base_path gs://levanter-checkpoints-new/gpt' \ No newline at end of file From 5bf8cd03130f12c623a51867ca1bac50a37065ad Mon Sep 17 00:00:00 2001 From: David Hall Date: Sat, 12 Oct 2024 12:40:41 -0700 Subject: [PATCH 03/56] use the jax serialization manager for deser in an attempt to fix crash. (it doesn't) --- src/levanter/tensorstore_serialization.py | 162 ++++++++------------- tests/test_tensorstore_serialization.py | 170 +++++++++++----------- 2 files changed, 150 insertions(+), 182 deletions(-) diff --git a/src/levanter/tensorstore_serialization.py b/src/levanter/tensorstore_serialization.py index 462c1cf2c..ba89fd423 100644 --- a/src/levanter/tensorstore_serialization.py +++ b/src/levanter/tensorstore_serialization.py @@ -1,7 +1,5 @@ # References: # * Orbax: https://github.com/google/orbax/blob/11d2934ecfff77e86b5e07d0fef02b67eff4511b/orbax/checkpoint/pytree_checkpoint_handler.py#L312 -import asyncio -import functools import logging import os from functools import partial @@ -13,12 +11,11 @@ import jax.numpy as jnp import jax.tree_util as jtu import numpy as np -import tensorstore -from jax.sharding import Mesh -from tensorstore import TensorStore +from jax.sharding import Mesh, Sharding +from jaxtyping import PyTree import haliax as hax -import haliax.tree_util as htu +from haliax.jax_utils import is_jax_array_like from haliax.partitioning import ResourceMapping from haliax.util import is_named_array @@ -45,14 +42,11 @@ def tree_serialize_leaves_tensorstore( else: manager_was_none = False - leaf_key_paths = jax_utils.leaf_key_paths(pytree, is_leaf=_is_named_or_none) - - def path_from_key_path(key_path): - return os.path.join(checkpoint_dir, *key_path.split(".")) + leaf_key_paths = jax_utils.leaf_key_paths(pytree, is_leaf=is_named_array) - paths = jtu.tree_map(path_from_key_path, leaf_key_paths, is_leaf=lambda x: x is None) - paths = jtu.tree_leaves(paths, is_leaf=lambda x: x is None) - leaves = jtu.tree_leaves(pytree, is_leaf=lambda x: x is None) + paths = _fs_paths_from_key_paths(checkpoint_dir, leaf_key_paths) + paths = jtu.tree_leaves(paths) + leaves = jtu.tree_leaves(pytree) assert len(leaves) == len(paths) # ok, not all of these are arrays, but we'll deal with that in the async function @@ -79,88 +73,38 @@ def _ensure_is_array(x): manager.wait_until_finished() -def _tensorstore_spec_for(checkpoint_dir, key_path: str): - checkpoint_path = os.path.join(checkpoint_dir, *key_path.split(".")) - ts_spec = array_ser.get_tensorstore_spec(checkpoint_path) - return ts_spec - - -async def _serialize_one_leaf(x, spec): - if isinstance(x, hax.NamedArray): - # we don't need to do anything special for named arrays to serialize, though we will for deserialization. - return await _serialize_one_leaf(x.array, spec) - elif isinstance(x, jax.Array): - if not x.is_fully_addressable: - return await array_ser.async_serialize(x, spec) - else: - return await save_array_to_tensorstore(x, spec) - elif isinstance(x, (bool, float, complex, int)): - return await save_array_to_tensorstore(np.array(x), spec) - elif x is None: - return - elif isinstance(x, jnp.ndarray): - return await save_array_to_tensorstore(x, spec) - elif isinstance(x, np.ndarray): - return await save_array_to_tensorstore(x, spec) - else: - raise TypeError(f"Can't serialize {type(x)}") - - -async def save_array_to_tensorstore(x, spec): - if jax.process_index() == 0: - if x.dtype == jnp.bfloat16: - # Tensorstore uses 'bfloat16', not ' Optional[jax.sharding.Sharding]: + if is_named_array(leaf): + return hax.partitioning.sharding_for_axis(leaf.axes, axis_mapping, mesh) + elif hasattr(leaf, "sharding") and getattr(leaf, "sharding") is not None: + return leaf.sharding + elif is_jax_array_like(leaf): + return _fully_replicated_sharding(mesh) + elif isinstance(leaf, (bool, float, complex, int, np.ndarray)): + return _fully_replicated_sharding(mesh) else: - raise TypeError(f"Can't deserialize {type(like)}") + print(f"Unknown leaf type {type(leaf)}") + return None -async def _deserialize_named_array(like, spec, axis_mapping, mesh): - # the main thing we're worried about is deserialized NamedArrays that are not yet arrays but are ShapedDtypeStructs. - # These don't (currently) have sharding info, but we can infer it from the axes - if isinstance(like.array, jax.ShapeDtypeStruct): - sharding = hax.partitioning.sharding_for_axis(like.axes, axis_mapping, mesh) - array = await array_ser.async_deserialize(sharding, spec, global_shape=like.array.shape, dtype=like.dtype) - assert sharding.is_equivalent_to(array.sharding, len(like.array.shape)) - return hax.NamedArray(array, like.axes) - else: - array = await _deserialize_one_leaf(like.array, spec, axis_mapping, mesh) - return hax.NamedArray(array, like.axes) +def _fully_replicated_sharding(mesh): + return hax.partitioning.sharding_for_axis((), {}, mesh) def tree_deserialize_leaves_tensorstore( - checkpoint_dir, pytree, axis_mapping: Optional[ResourceMapping] = None, mesh: Optional[Mesh] = None + checkpoint_dir, + pytree, + axis_mapping: Optional[ResourceMapping] = None, + mesh: Optional[Mesh] = None, + manager: Optional[array_ser.GlobalAsyncCheckpointManager] = None, ): """ Deserializes a PyTree of Arrays and NamedArrays from a Tensorstore checkpoint, returning a pytree with the same shape @@ -168,24 +112,42 @@ def tree_deserialize_leaves_tensorstore( (i.e. they are not yet arrays but are ShapedDtypeStructs), provided you pass in the axis_mapping and mesh (or they are available by context) - :param checkpoint_dir: the directory containing the tensorstore checkpoint, can be a local path or a GCS path - :param pytree: the exemplar pytree - :param axis_mapping: optional, the axis mapping for the NamedArrays (if they are not yet arrays) - :param mesh: optional, the mesh for the NamedArrays (if they are not yet arrays) + Args: + checkpoint_dir: the directory containing the tensorstore checkpoint, can be a local path or a GCS path + pytree: the exemplar pytree + axis_mapping: optional, the axis mapping for the NamedArrays (if they are not yet arrays) + mesh: optional, the mesh for the NamedArrays (if they are not yet arrays) + manager: optional, the checkpoint manager to use. If not provided, a new one will be created - :return: a pytree with the same shape as the exemplar pytree, but with the arrays deserialized from the checkpoint + Returns: + A pytree with the same shape as the exemplar pytree, but with the arrays deserialized from the checkpoint """ + if manager is None: + manager = array_ser.GlobalAsyncCheckpointManager() + + shardings: PyTree[Optional[Sharding]] = jtu.tree_map( + partial(_sharding_from_leaf, axis_mapping=axis_mapping, mesh=mesh), pytree, is_leaf=is_named_array + ) + # TODO: support ShapeDtypeStructs that are not NamedArrays - leaf_key_paths = jax_utils.leaf_key_paths(pytree, is_leaf=is_named_array) - specs = htu.tree_map(partial(_tensorstore_spec_for, checkpoint_dir), leaf_key_paths) + leaf_key_paths = jax_utils.leaf_key_paths(shardings, is_leaf=is_named_array) + paths = _fs_paths_from_key_paths(checkpoint_dir, leaf_key_paths) + paths = jtu.tree_leaves(paths) - deser_partial = functools.partial(_deserialize_one_leaf, axis_mapping=axis_mapping, mesh=mesh) + shardings_leaves, shardings_structure = jtu.tree_flatten(shardings) - futures = jtu.tree_map(deser_partial, pytree, specs, is_leaf=is_named_array) - leaves, structure = jtu.tree_flatten(futures, is_leaf=is_named_array) + assert len(shardings_leaves) == len(paths) - async def _do_deserialize(): - values = await asyncio.gather(*leaves) - return jtu.tree_unflatten(structure, values) + ret_leaves = manager.deserialize_with_paths(shardings=shardings_leaves, paths=paths) + + deser_arrays = jtu.tree_unflatten(shardings_structure, ret_leaves) + + # deser_arrays only has arrays, but we need named arrays for at least some. + # The original pytree has the structure we want, so we'll use that to rebuild the named arrays + def _rebuild_named_array(like, array): + if is_named_array(like): + return hax.NamedArray(array, like.axes) + else: + return array - return asyncio.run(_do_deserialize()) + return jtu.tree_map(_rebuild_named_array, pytree, deser_arrays, is_leaf=_is_named_or_none) diff --git a/tests/test_tensorstore_serialization.py b/tests/test_tensorstore_serialization.py index 4c1854df0..77d63d656 100644 --- a/tests/test_tensorstore_serialization.py +++ b/tests/test_tensorstore_serialization.py @@ -46,109 +46,115 @@ def make_state(key): def test_checkpoint_steps(): - key0 = jax.random.PRNGKey(0) - key1 = jax.random.PRNGKey(1) + mesh = jax.sharding.Mesh(jax.devices(), ("device",)) + with mesh: + key0 = jax.random.PRNGKey(0) + key1 = jax.random.PRNGKey(1) - optim = optax.adam(1e-4) + optim = optax.adam(1e-4) - def make_state(key): - model = MLP(in_size=2, out_size=1, width_size=2, depth=3, key=key) - opt_state = optim.init(arrays_only(model)) + def make_state(key): + model = MLP(in_size=2, out_size=1, width_size=2, depth=3, key=key) + opt_state = optim.init(arrays_only(model)) - return model, opt_state, key + return model, opt_state, key - initial_model, initial_opt_state, initial_key = make_state(key0) - data = jax.random.uniform(key0, (2, 2)) + initial_model, initial_opt_state, initial_key = make_state(key0) + data = jax.random.uniform(key0, (2, 2)) - @eqx.filter_grad - def loss_fn(model, data): - m = jax.vmap(model) - return jnp.mean(jnp.square(m(data))) + @eqx.filter_grad + def loss_fn(model, data): + m = jax.vmap(model) + return jnp.mean(jnp.square(m(data))) - model, state = initial_model, initial_opt_state - for i in range(3): - grad = loss_fn(model, data) - updates, state = optim.update(grad, state) - model = eqx.apply_updates(model, updates) + model, state = initial_model, initial_opt_state + for i in range(3): + grad = loss_fn(model, data) + updates, state = optim.update(grad, state) + model = eqx.apply_updates(model, updates) - assert_trees_not_close(model, initial_model) - assert_trees_not_close(state, initial_opt_state) + assert_trees_not_close(model, initial_model) + assert_trees_not_close(state, initial_opt_state) - rep_model, rep_state, rep_key = make_state(key1) - assert_trees_not_close(model, rep_model) - assert_trees_not_close(state, rep_state) + rep_model, rep_state, rep_key = make_state(key1) + assert_trees_not_close(model, rep_model) + assert_trees_not_close(state, rep_state) - with TemporaryDirectory() as tmpdir: - tree_serialize_leaves_tensorstore(tmpdir, (model, state, initial_key, 3)) - restored_model, restored_state, rkey, step = tree_deserialize_leaves_tensorstore( - tmpdir, - (rep_model, rep_state, rep_key, 0), - ) - assert step == 3 + with TemporaryDirectory() as tmpdir: + tree_serialize_leaves_tensorstore(tmpdir, (model, state, initial_key, 3)) + restored_model, restored_state, rkey, step = tree_deserialize_leaves_tensorstore( + tmpdir, + (rep_model, rep_state, rep_key, 0), + ) + assert step == 3 - assert_trees_all_close( - jax.tree_util.tree_leaves(arrays_only(restored_model)), - jax.tree_util.tree_leaves(arrays_only(model)), - ) - assert_trees_all_close( - jax.tree_util.tree_leaves(arrays_only(restored_state)), - jax.tree_util.tree_leaves(arrays_only(state)), - ) - assert step == 3 + assert_trees_all_close( + jax.tree_util.tree_leaves(arrays_only(restored_model)), + jax.tree_util.tree_leaves(arrays_only(model)), + ) + assert_trees_all_close( + jax.tree_util.tree_leaves(arrays_only(restored_state)), + jax.tree_util.tree_leaves(arrays_only(state)), + ) + assert step == 3 def test_tensorstore_gpt2_mlp(): - from levanter.models.gpt2 import Gpt2Mlp + mesh = jax.sharding.Mesh(jax.devices(), ("device",)) + with mesh: + from levanter.models.gpt2 import Gpt2Mlp - key0 = jax.random.PRNGKey(0) - key1 = jax.random.PRNGKey(1) + key0 = jax.random.PRNGKey(0) + key1 = jax.random.PRNGKey(1) - Embed = hax.Axis("embed", 64) - Intermediate = hax.Axis("intermediate", 128) + Embed = hax.Axis("embed", 64) + Intermediate = hax.Axis("intermediate", 128) - def make_state(key): - model = Gpt2Mlp.init(Embed, Intermediate, jax.nn.relu, key=key) - optim = optax.adam(1e-4) - opt_state = optim.init(arrays_only(model)) + def make_state(key): + model = Gpt2Mlp.init(Embed, Intermediate, jax.nn.relu, key=key) + optim = optax.adam(1e-4) + opt_state = optim.init(arrays_only(model)) - return arrays_only(model), arrays_only(opt_state), key + return arrays_only(model), arrays_only(opt_state), key - initial_model, initial_opt_state, initial_key = make_state(key0) - rep_model, rep_state, rep_key = make_state(key1) + initial_model, initial_opt_state, initial_key = make_state(key0) + rep_model, rep_state, rep_key = make_state(key1) - assert_trees_not_close(initial_model, rep_model) + assert_trees_not_close(initial_model, rep_model) - with TemporaryDirectory() as tmpdir: - tree_serialize_leaves_tensorstore(tmpdir, (initial_model, initial_opt_state, initial_key)) - restored_model, restored_optstate, rkey = tree_deserialize_leaves_tensorstore( - tmpdir, - (rep_model, rep_state, rep_key), - ) + with TemporaryDirectory() as tmpdir: + tree_serialize_leaves_tensorstore(tmpdir, (initial_model, initial_opt_state, initial_key)) + restored_model, restored_optstate, rkey = tree_deserialize_leaves_tensorstore( + tmpdir, + (rep_model, rep_state, rep_key), + ) - assert_trees_all_close( - jax.tree_util.tree_leaves(arrays_only(restored_model)), - jax.tree_util.tree_leaves(arrays_only(initial_model)), - ) + assert_trees_all_close( + jax.tree_util.tree_leaves(arrays_only(restored_model)), + jax.tree_util.tree_leaves(arrays_only(initial_model)), + ) def test_tensorstore_ok_with_nones(): - A = hax.Axis("A", 10) - - class MyModule(eqx.Module): - a: Any - b: Any - - m = MyModule(a=None, b=hax.zeros(A)) - m2 = MyModule(a=None, b=hax.ones(A)) - - with TemporaryDirectory() as tmpdir: - tree_serialize_leaves_tensorstore(tmpdir, m) - m3 = tree_deserialize_leaves_tensorstore(tmpdir, m2) - assert m3.a is None - assert hax.all(m3.b == hax.zeros(A)) - - m3 = MyModule(a=hax.zeros(A), b=hax.ones(A)) - with TemporaryDirectory() as tmpdir: - tree_serialize_leaves_tensorstore(tmpdir, m2) - with pytest.raises(ValueError): - tree_deserialize_leaves_tensorstore(tmpdir, m3) + mesh = jax.sharding.Mesh(jax.devices(), ("device",)) + with mesh: + A = hax.Axis("A", 10) + + class MyModule(eqx.Module): + a: Any + b: Any + + m = MyModule(a=None, b=hax.zeros(A)) + m2 = MyModule(a=None, b=hax.ones(A)) + + with TemporaryDirectory() as tmpdir: + tree_serialize_leaves_tensorstore(tmpdir, m) + m3 = tree_deserialize_leaves_tensorstore(tmpdir, m2) + assert m3.a is None + assert hax.all(m3.b == hax.zeros(A)) + + m3 = MyModule(a=hax.zeros(A), b=hax.ones(A)) + with TemporaryDirectory() as tmpdir: + tree_serialize_leaves_tensorstore(tmpdir, m2) + with pytest.raises(ValueError): + tree_deserialize_leaves_tensorstore(tmpdir, m3) From 20a45680593781d1e6f14d2d56435bd97c71859a Mon Sep 17 00:00:00 2001 From: David Hall Date: Sat, 12 Oct 2024 19:13:02 -0700 Subject: [PATCH 04/56] cleaner data loader but ti doesn't help :( --- src/levanter/data/loader.py | 176 +++++++++++++++++++----------------- 1 file changed, 94 insertions(+), 82 deletions(-) diff --git a/src/levanter/data/loader.py b/src/levanter/data/loader.py index ab97e0827..6e1932d02 100644 --- a/src/levanter/data/loader.py +++ b/src/levanter/data/loader.py @@ -21,7 +21,8 @@ from levanter.data.utils import batched from levanter.shapes import NamedShapeSpec, ShapeSpec, to_raw_shape from levanter.utils.background_iterable import BackgroundIterable -from levanter.utils.thread_utils import blocking_wait +from levanter.utils.jax_utils import local_cpu_mesh +from levanter.utils.thread_utils import AsyncIteratorWrapper, blocking_wait Ex = TypeVar("Ex") @@ -109,31 +110,32 @@ def __init__(self, data_loader: DataLoader, start_from_batch: Optional[int] = No if self.mapping is None: self.mapping = hax.partitioning.current_thread_local_mapping() - # TODO: bring back non-prefetching version buffered_batches = self.dl.max_buffered_batches - self._batches = iter(BackgroundIterable(self._produce_batches, max_capacity=buffered_batches)) + if buffered_batches == 0: + self._batches = AsyncIteratorWrapper(self._produce_batches()) + else: + self._batches = iter(BackgroundIterable(self._produce_batches, max_capacity=buffered_batches)) def __next__(self): time_start = time.time() - out = next(self._batches) + individual_data_batch = next(self._batches) + data_for_this_batch = {index: datum for index, datum in zip(self.dl._local_indices, individual_data_batch)} + batch = self._batchify_local_data(data_for_this_batch) + time_end = time.time() if (time_end - time_start) > 0.5: logger.info(f"Prefetch wasn't fast enough: {time_end - time_start:.3f}") - return out + return batch async def _produce_batches(self): batch_number = self._start_from_batch or 0 - total_ex_loaded = 0 done = False while not done: next_batch_numbers = [] for i in range(self.dl.prefetch_size): - if self.dl.data_store.is_finite(): - next_end = (batch_number + 1) * self.dl.batch_size - available_len = await self.dl.data_store.wait_until_len_at_least(next_end) - if available_len < next_end: - done = True - break + if await self._dataset_has_enough_examples_left(batch_number): + done = True + break next_batch_numbers.append(batch_number) batch_number += 1 @@ -141,83 +143,93 @@ async def _produce_batches(self): async for batch in self._retrieve_batches(next_batch_numbers): yield batch - total_ex_loaded += self.dl.batch_size * len(next_batch_numbers) + async def _dataset_has_enough_examples_left(self, batch_number): + past_the_end = False + if self.dl.data_store.is_finite(): + next_end = (batch_number + 1) * self.dl.batch_size + available_len = await self.dl.data_store.wait_until_len_at_least(next_end) + past_the_end = available_len < next_end + return past_the_end async def _retrieve_batches(self, batch_numbers: list[int]): - with hax.axis_mapping(self.mapping), self.dl.mesh: - indices_for_this_batch_of_batches: list[int] = [] - for bn in batch_numbers: - indices_this_batch = range(bn * self.dl.batch_size, (bn + 1) * self.dl.batch_size, 1) - indices_this_batch_this_process = [indices_this_batch[i] for i in self.dl._local_indices] - indices_for_this_batch_of_batches.extend(indices_this_batch_this_process) - + with local_cpu_mesh(): time_start = time.time() - individual_datums = await self.dl.data_store.get_batch(indices_for_this_batch_of_batches) + individual_datums_for_each_batch = await self._do_retrieve_batch_of_batches(batch_numbers) + # reshape to be per batch time_end = time.time() logger.debug(f"Time to get {len(batch_numbers)} batches: {time_end - time_start:.3f}") - time_start = time.time() - # reshape to be per batch - individual_datums = list(batched(individual_datums, len(self.dl._local_indices))) - - # below we're gonna get the indices relative to this batch (i.e. 0 to batch_size) - index_to_datum = [ - {index: datum for index, datum in zip(self.dl._local_indices, individual_data_batch)} - for individual_data_batch in individual_datums - ] - - def get_local_batch(bn: int, begin: int, end: int) -> list: - # TODO: if we ever do "big data" (i.e. huge examples) we might want to be able to load part of an example - # which will require support from the datastore (i.e. tensorstore) - device_batch = _stack_tree(self.dl.Batch.name, [index_to_datum[bn][i] for i in range(begin, end)]) - batch_leaves = hax.tree_util.tree_leaves(device_batch) - return batch_leaves - - def get_local_data_for_leaf(bn, indices: _TensorSliceIndex, leaf_index: int) -> Array: - batch_slice = indices[0] - begin, end, stride = batch_slice.indices(self.dl.batch_size) - if stride != 1: - raise ValueError("Stride must be 1") - - leaf_data = (get_local_batch(bn, begin, end))[leaf_index] - - if isinstance(leaf_data, hax.NamedArray): - # select out the batch axis - batch_index = index_where(lambda ax: ax.name == self.dl.Batch.name, leaf_data.axes) - new_indices = list(indices) - new_indices[batch_index] = slice(None) - return leaf_data.array[tuple(new_indices)] + for data in individual_datums_for_each_batch: + yield data + + def _batchify_local_data(self, data_for_this_batch: dict[int, Array]): + cache: dict[tuple[int, int], list[Array | hax.NamedArray]] = {} + + def get_local_batch(begin: int, end: int) -> list: + if (begin, end) in cache: + return cache[(begin, end)] + + # TODO: if we ever do "big data" (i.e. huge examples) we might want to be able to load part of an example + # which will require support from the datastore (i.e. tensorstore) + device_batch = _stack_tree(self.dl.Batch.name, [data_for_this_batch[i] for i in range(begin, end)]) + batch_leaves = hax.tree_util.tree_leaves(device_batch) + + cache[(begin, end)] = batch_leaves + + return batch_leaves + + def get_local_data_for_leaf(indices: _TensorSliceIndex, leaf_index: int) -> Array: + batch_slice = indices[0] + begin, end, stride = batch_slice.indices(self.dl.batch_size) + if stride != 1: + raise ValueError("Stride must be 1") + + leaf_data = get_local_batch(begin, end)[leaf_index] + + if isinstance(leaf_data, hax.NamedArray): + # select out the batch axis + batch_index = index_where(lambda ax: ax.name == self.dl.Batch.name, leaf_data.axes) + new_indices = list(indices) + new_indices[batch_index] = slice(None) + return leaf_data.array[tuple(new_indices)] + else: + other_indices = indices[1:] + if all(idx == slice(None) for idx in other_indices): + return leaf_data else: - other_indices = indices[1:] - if all(idx == slice(None) for idx in other_indices): - return leaf_data - else: - # TODO: this doesn't work with named axes - return leaf_data[(..., *other_indices)] - - for batch_offset, bn in enumerate(batch_numbers): - - def make_global_array_for_leaf(leaf_index, item_leaf_shape: ShapeSpec | NamedShapeSpec): - def get_data(indices): - return get_local_data_for_leaf(batch_offset, indices, leaf_index) - - raw_array = jax.make_array_from_callback( - to_raw_shape(item_leaf_shape), - jax.sharding.NamedSharding(self.dl.mesh, self._pspec_for(item_leaf_shape)), - get_data, - ) - if isinstance(item_leaf_shape, NamedShapeSpec): - return hax.NamedArray(raw_array, item_leaf_shape.shape) - else: - return raw_array - - gda_leaves = [ - make_global_array_for_leaf(leaf_index, _batchified_shape(self.dl.Batch, item_leaf)) - for leaf_index, item_leaf in enumerate(self.dl._ex_leaves) - ] - - gda_tree = jax.tree.unflatten(self.dl._ex_structure, gda_leaves) - yield gda_tree + # TODO: this doesn't work with named axes + return leaf_data[(..., *other_indices)] + + def make_global_array_for_leaf(leaf_index, item_leaf_shape: ShapeSpec | NamedShapeSpec): + def get_data(indices): + return get_local_data_for_leaf(indices, leaf_index) + + raw_array = jax.make_array_from_callback( + to_raw_shape(item_leaf_shape), + jax.sharding.NamedSharding(self.dl.mesh, self._pspec_for(item_leaf_shape)), + get_data, + ) + if isinstance(item_leaf_shape, NamedShapeSpec): + return hax.NamedArray(raw_array, item_leaf_shape.shape) + else: + return raw_array + + gda_leaves = [ + make_global_array_for_leaf(leaf_index, _batchified_shape(self.dl.Batch, item_leaf)) + for leaf_index, item_leaf in enumerate(self.dl._ex_leaves) + ] + gda_tree = jax.tree.unflatten(self.dl._ex_structure, gda_leaves) + return gda_tree + + async def _do_retrieve_batch_of_batches(self, batch_numbers): + indices_for_this_batch_of_batches: list[int] = [] + for bn in batch_numbers: + indices_this_batch = range(bn * self.dl.batch_size, (bn + 1) * self.dl.batch_size, 1) + indices_this_batch_this_process = [indices_this_batch[i] for i in self.dl._local_indices] + indices_for_this_batch_of_batches.extend(indices_this_batch_this_process) + individual_datums = await self.dl.data_store.get_batch(indices_for_this_batch_of_batches) + individual_datums_for_each_batch = list(batched(individual_datums, len(self.dl._local_indices))) + return individual_datums_for_each_batch def _pspec_for(self, shape_spec: ShapeSpec | NamedShapeSpec) -> PartitionSpec: if isinstance(shape_spec, ShapeSpec): # type: ignore From 2747705db159cb70f486ea5fda347d7821cc2246 Mon Sep 17 00:00:00 2001 From: David Hall Date: Sun, 13 Oct 2024 23:15:24 -0700 Subject: [PATCH 05/56] ok this maybe fixed it? --- src/levanter/data/loader.py | 29 +++++++++++++++++------------ 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/src/levanter/data/loader.py b/src/levanter/data/loader.py index 6e1932d02..826367c2e 100644 --- a/src/levanter/data/loader.py +++ b/src/levanter/data/loader.py @@ -131,25 +131,30 @@ async def _produce_batches(self): batch_number = self._start_from_batch or 0 done = False while not done: - next_batch_numbers = [] - for i in range(self.dl.prefetch_size): - if await self._dataset_has_enough_examples_left(batch_number): - done = True - break + target_next_batch_number = batch_number + self.dl.prefetch_size + max_achievable_batch_number = await self._dataset_get_available_batch_number(target_next_batch_number) + if max_achievable_batch_number < target_next_batch_number: + done = True - next_batch_numbers.append(batch_number) - batch_number += 1 + next_batch_numbers = list(range(batch_number, min(target_next_batch_number, max_achievable_batch_number))) + + if len(next_batch_numbers) == 0: + break + + batch_number = next_batch_numbers[-1] + 1 async for batch in self._retrieve_batches(next_batch_numbers): yield batch - async def _dataset_has_enough_examples_left(self, batch_number): - past_the_end = False + async def _dataset_get_available_batch_number(self, target_max_batch_number: int) -> int: if self.dl.data_store.is_finite(): - next_end = (batch_number + 1) * self.dl.batch_size + next_end = (target_max_batch_number + 1) * self.dl.batch_size available_len = await self.dl.data_store.wait_until_len_at_least(next_end) - past_the_end = available_len < next_end - return past_the_end + max_achievable_batch_number = available_len // self.dl.batch_size + + return max_achievable_batch_number + + return target_max_batch_number async def _retrieve_batches(self, batch_numbers: list[int]): with local_cpu_mesh(): From 538f0ede03c60418ff2b2e47af4f14d11d9145ec Mon Sep 17 00:00:00 2001 From: David Hall Date: Sun, 13 Oct 2024 23:22:41 -0700 Subject: [PATCH 06/56] cleanup --- src/levanter/data/loader.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/src/levanter/data/loader.py b/src/levanter/data/loader.py index 826367c2e..320e8266d 100644 --- a/src/levanter/data/loader.py +++ b/src/levanter/data/loader.py @@ -263,14 +263,6 @@ def _batchified_shape(Batch, leaf: hax.NamedArray | Array) -> ShapeSpec | NamedS return ShapeSpec((Batch.size,) + leaf.shape, leaf.dtype) -def _pspec_for(self, shape_spec: ShapeSpec | NamedShapeSpec) -> PartitionSpec: - if isinstance(shape_spec, ShapeSpec): # type: ignore - batch_name = hax.partitioning.physical_axis_name(self.Batch, self.axis_resources) - return PartitionSpec(batch_name, *((None,) * (len(shape_spec.shape) - 1))) - else: - return hax.partitioning.pspec_for_axis(shape_spec.shape, self.axis_resources) # type: ignore - - @functools.partial(jax.jit, static_argnums=(0,)) def _stack_tree(batch_name, individual_datums): def _stack_leaves_unchecked(*leaves): From 2c5ee4b015c84789e6a810fbf62f24b3bc08c0e7 Mon Sep 17 00:00:00 2001 From: David Hall Date: Sun, 13 Oct 2024 23:54:06 -0700 Subject: [PATCH 07/56] fix tests --- src/levanter/tensorstore_serialization.py | 4 ++-- src/levanter/utils/jax_utils.py | 8 +++++--- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/src/levanter/tensorstore_serialization.py b/src/levanter/tensorstore_serialization.py index ba89fd423..9a602a1bb 100644 --- a/src/levanter/tensorstore_serialization.py +++ b/src/levanter/tensorstore_serialization.py @@ -45,8 +45,8 @@ def tree_serialize_leaves_tensorstore( leaf_key_paths = jax_utils.leaf_key_paths(pytree, is_leaf=is_named_array) paths = _fs_paths_from_key_paths(checkpoint_dir, leaf_key_paths) - paths = jtu.tree_leaves(paths) - leaves = jtu.tree_leaves(pytree) + paths = jtu.tree_leaves(paths, is_leaf=lambda x: x is None) + leaves = jtu.tree_leaves(pytree, is_leaf=lambda x: x is None) assert len(leaves) == len(paths) # ok, not all of these are arrays, but we'll deal with that in the async function diff --git a/src/levanter/utils/jax_utils.py b/src/levanter/utils/jax_utils.py index 1d7205365..39fbf438c 100644 --- a/src/levanter/utils/jax_utils.py +++ b/src/levanter/utils/jax_utils.py @@ -170,13 +170,13 @@ def leaf_key_paths( if field.metadata.get("static", False): continue field_name = field.name - field = getattr(pytree, field_name) + field_value = getattr(pytree, field_name) names.append(field_name) if use_state_dict_keys and hasattr(pytree, "_state_dict_key_map"): field_name = pytree._state_dict_key_map().get(field_name, field_name) - rec_value = rec(field, field_name) + rec_value = rec(field_value, field_name) rec_values.append(rec_value) _, tree_def = eqx.tree_flatten_one_level(pytree) @@ -186,7 +186,9 @@ def leaf_key_paths( # return eqx.tree_at(lambda m: [getattr(m, name) for name in names], pytree, rec_values, is_leaf=lambda x: x is None) else: leaves, treedef = jax.tree_util.tree_flatten(pytree, is_leaf=is_leaf) - if len(leaves) == 1: + if len(leaves) == 0: + return None + elif len(leaves) == 1: return jax.tree_util.tree_unflatten(treedef, [f"{prefix}"]) else: return jax.tree_util.tree_unflatten(treedef, [join_key(prefix, str(i)) for i in range(len(leaves))]) From ab543d6ec30ee8d278e66ba775bc7f130bdd72ca Mon Sep 17 00:00:00 2001 From: David Hall Date: Mon, 14 Oct 2024 00:08:02 -0700 Subject: [PATCH 08/56] fix what is probably the underlying problem --- src/levanter/data/loader.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/levanter/data/loader.py b/src/levanter/data/loader.py index 320e8266d..fdecfa245 100644 --- a/src/levanter/data/loader.py +++ b/src/levanter/data/loader.py @@ -99,6 +99,8 @@ def __iter__(self): return self.iter_from_step(None) def iter_from_step(self, start_from_batch: Optional[int] = None): + # sometimes we pass in an array for the start_from_batch, so we need to check for that + start_from_batch = int(start_from_batch) if start_from_batch is not None else None return DataLoaderIterator(self, start_from_batch=start_from_batch) From 6395305a1e48969d6ca59f0588651e6e529e0922 Mon Sep 17 00:00:00 2001 From: David Hall Date: Mon, 14 Oct 2024 00:10:12 -0700 Subject: [PATCH 09/56] wip --- src/levanter/tensorstore_serialization.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/levanter/tensorstore_serialization.py b/src/levanter/tensorstore_serialization.py index 9a602a1bb..e5c651df8 100644 --- a/src/levanter/tensorstore_serialization.py +++ b/src/levanter/tensorstore_serialization.py @@ -132,7 +132,7 @@ def tree_deserialize_leaves_tensorstore( # TODO: support ShapeDtypeStructs that are not NamedArrays leaf_key_paths = jax_utils.leaf_key_paths(shardings, is_leaf=is_named_array) paths = _fs_paths_from_key_paths(checkpoint_dir, leaf_key_paths) - paths = jtu.tree_leaves(paths) + paths = jtu.tree_leaves(paths, is_leaf=lambda x: x is None) shardings_leaves, shardings_structure = jtu.tree_flatten(shardings) From f98e37604abffa19cd582a86cf7514be62bf785b Mon Sep 17 00:00:00 2001 From: Kaiyue Wen Date: Mon, 2 Dec 2024 20:44:50 -0800 Subject: [PATCH 10/56] Implement MARS (tested) and Muon (have bug in saving), example config in config/llama2_100M_{mars/muon}.yaml --- config/llama2_100M_mars.yaml | 34 +++++++ config/llama2_100M_muon.yaml | 34 +++++++ src/levanter/optim/__init__.py | 8 ++ src/levanter/optim/mars.py | 135 +++++++++++++++++++++++++++ src/levanter/optim/muon.py | 162 +++++++++++++++++++++++++++++++++ 5 files changed, 373 insertions(+) create mode 100644 config/llama2_100M_mars.yaml create mode 100644 config/llama2_100M_muon.yaml create mode 100644 src/levanter/optim/mars.py create mode 100644 src/levanter/optim/muon.py diff --git a/config/llama2_100M_mars.yaml b/config/llama2_100M_mars.yaml new file mode 100644 index 000000000..2c062d816 --- /dev/null +++ b/config/llama2_100M_mars.yaml @@ -0,0 +1,34 @@ +data: !include data/dclm_gpt_neo.yaml +model: + type: llama + seq_len: 4096 + hidden_dim: 768 + intermediate_dim: 3072 + num_layers: 12 + num_heads: 12 + num_kv_heads: 12 +trainer: + tracker: + project: "levanter" + tags: ["pile", "llama"] + mp: p=f32,c=bfloat16 + model_axis_size: 1 + checkpointer: + keep: + - every: 1000 + save_interval: 30m + + + train_batch_size: 1024 + per_device_parallelism: 4 # set for v3 TPU + per_device_eval_parallelism: 4 # set a larger batch size for eval + num_train_steps: 50001 +optimizer: + learning_rate: 4E-3 # set low for fine-tuning + weight_decay: 0.1 + min_lr_ratio: 0.0 + warmup: 2000 + cooldown: 0.4 + lr_schedule: constant + gamma: 0.025 + type: mars diff --git a/config/llama2_100M_muon.yaml b/config/llama2_100M_muon.yaml new file mode 100644 index 000000000..3f8194465 --- /dev/null +++ b/config/llama2_100M_muon.yaml @@ -0,0 +1,34 @@ +data: !include data/dclm_gpt_neo.yaml +model: + type: llama + seq_len: 4096 + hidden_dim: 768 + intermediate_dim: 3072 + num_layers: 12 + num_heads: 12 + num_kv_heads: 12 +trainer: + tracker: + project: "levanter" + tags: ["pile", "llama"] + mp: p=f32,c=bfloat16 + model_axis_size: 1 + checkpointer: + keep: + - every: 1000 + save_interval: 30m + + + train_batch_size: 1024 + per_device_parallelism: 4 # set for v3 TPU + per_device_eval_parallelism: 4 # set a larger batch size for eval + num_train_steps: 50001 +optimizer: + learning_rate: 2E-2 # set low for fine-tuning + weight_decay: 0 + warmup: 0 + cooldown: 0.1 + lr_schedule: constant + min_lr_ratio: 0.0 + max_grad_norm: 0.0 + type: muon diff --git a/src/levanter/optim/__init__.py b/src/levanter/optim/__init__.py index 7dec2ebb4..2cd5ad781 100644 --- a/src/levanter/optim/__init__.py +++ b/src/levanter/optim/__init__.py @@ -5,3 +5,11 @@ scale_by_sophia_g, scale_by_sophia_h, ) +from .muon import ( + MuonConfig, + ScaleByMuonState +) +from .mars import ( + MarsConfig, + ScaleByMarsState +) \ No newline at end of file diff --git a/src/levanter/optim/mars.py b/src/levanter/optim/mars.py new file mode 100644 index 000000000..c117a27f6 --- /dev/null +++ b/src/levanter/optim/mars.py @@ -0,0 +1,135 @@ +import abc +import functools +from dataclasses import dataclass +from typing import Any, NamedTuple, Optional, TypeVar + +import equinox as eqx +import jax +import jaxtyping +import optax +from jax import numpy as jnp +from jax.random import PRNGKey +from jaxtyping import PRNGKeyArray + +import levanter.tracker +from levanter.optim.config import HessianOptConfig, OptimizerConfig +from levanter.optim.util import hvp, tree_gaussian_like +from levanter.utils.jax_utils import parameter_count, tree_filter_like + + +@OptimizerConfig.register_subclass("mars") +@dataclass +class MarsConfig(OptimizerConfig): + weight_decay: float = 0.1 + beta1: float = 0.95 + # cf https://docs.mosaicml.com/projects/composer/en/latest/api_reference/generated/composer.optim.DecoupledAdamW.html + # https://x.com/giffmana/status/1692641748445438301 + beta2: float = 0.99 + gamma: float = 0.025 + epsilon: float = 1e-8 + max_grad_norm: Optional[float] = 1.0 + haps: Optional[list[int]] = None + schedule_list: Optional[list[str]] = None + + def build(self, num_train_steps): + """Creates the optimizer""" + # indirection makes it work with optax.inject_hyperparams so we can log the learning rate + def _optimizer(learning_rate): + components = [] + + + components.append(scale_by_mars(self.beta1, self.beta2, self.gamma, self.epsilon, max_grad_norm = self.max_grad_norm)) + + if self.weight_decay > 0: + components.append(optax.add_decayed_weights(self.weight_decay, self.build_weight_decay_mask())) + + # - learning rate for descent + components.append(optax.scale(-learning_rate)) + + optimizer = optax.chain(*components) + + return optimizer + + return optax.inject_hyperparams(_optimizer)(learning_rate=self.lr_scheduler(num_train_steps)) + +from optax import tree_utils as otu +import jax +import jax.numpy as jnp +from jax import jit + + +import chex + +class ScaleByMarsState(NamedTuple): + """State for the Mars algorithm.""" + count: chex.Array # shape=(), dtype=jnp.int32. + mu: optax.Updates + nu: optax.Updates + mog: optax.Updates + + +def scale_by_mars( + b1: float = 0.9, + b2: float = 0.999, + gamma: float = 0.05, + eps: float = 1e-8, + eps_root: float = 0.0, + max_grad_norm: float = 0.0, + mu_dtype: Optional[Any] = None +) -> optax.GradientTransformation: + r"""Rescale updates according to the MARS algorithm. + https://arxiv.org/abs/2411.10438 + See :func:optax.adam for more details. + + Args: + b1: Decay rate for the exponentially weighted average of grads. + b2: Decay rate for the exponentially weighted average of squared grads. + gamma: control the scale of variance reduction + eps: Term added to the denominator to improve numerical stability. + eps_root: Term added to the denominator inside the square-root to improve + numerical stability when backpropagating gradients through the rescaling. + mu_dtype: Optional dtype to be used for the first order accumulator; if + None then the dtype is inferred from params and updates. + Returns: + A :class:optax.GradientTransformation object. + """ + + mu_dtype = jax.dtypes.canonicalize_dtype(mu_dtype) + + def init_fn(params): + mu = otu.tree_zeros_like(params, dtype=mu_dtype) # First moment + nu = otu.tree_zeros_like(params) # Second moment + mog = otu.tree_zeros_like(params, dtype=mu_dtype) # gradient from + return ScaleByMarsState(count=jnp.zeros([], jnp.int32), mu=mu, nu=nu, mog = mog) + + def update_fn(updates, state, params=None): + c = jax.tree.map( + lambda og, g: None if g is None else g + (gamma * b1 / (1 - b1)) * (g - og), + state.mog, + updates, + is_leaf=lambda x: x is None, + ) + if max_grad_norm: + g_norm = optax.global_norm(c) + scale = jnp.minimum(1.0, max_grad_norm / (g_norm + 1e-6)) + c = jax.tree_map(lambda g: None if g is None else g * scale, + c, + is_leaf=lambda x: x is None + ) + mu = otu.tree_update_moment(c, state.mu, b1, 1) + nu = otu.tree_update_moment_per_elem_norm(c, state.nu, b2, 2) + count_inc = optax.safe_increment(state.count) + mu_hat = otu.tree_bias_correction(mu, b1, count_inc) + # Dozat 2016 https://openreview.net/pdf?id=OM0jvwB8jIp57ZJjtNEZ + # Algorithm 2 further multiplies Adam's standard nu_hat by b2. It is + # unclear why. Other Nadam implementations also omit the extra b2 factor. + nu_hat = otu.tree_bias_correction(nu, b2, count_inc) + adam_updates = jax.tree.map( + lambda m, v: None if m is None else m / (jnp.sqrt(v + eps_root) + eps), + mu_hat, + nu_hat, + is_leaf=lambda x: x is None, + ) + mu = otu.tree_cast(mu, mu_dtype) + return adam_updates, ScaleByMarsState(count=count_inc, mu=mu, nu=nu, mog = updates) + return optax.GradientTransformation(init_fn, update_fn) \ No newline at end of file diff --git a/src/levanter/optim/muon.py b/src/levanter/optim/muon.py new file mode 100644 index 000000000..6ea87af8a --- /dev/null +++ b/src/levanter/optim/muon.py @@ -0,0 +1,162 @@ +import re +from dataclasses import dataclass +from typing import Any, Callable, NamedTuple, Optional, Union + +import chex +import jax +import jax.numpy as jnp +import optax +from levanter.optim.config import OptimizerConfig +from levanter.utils.jax_utils import leaf_key_paths +from optax import tree_utils as otu +import equinox as eqx +import haliax +from functools import partial + +@OptimizerConfig.register_subclass("muon") +@dataclass +class MuonConfig(OptimizerConfig): + """ + Muon optimizer configuration: Momentum Orthogonalized by Newton-Schulz. + """ + lr: float = 0.02 + muon_to_adam_lr: float = 0.18 # Scaling factor between AdamW and Muon learning rates + momentum: float = 0.95 + nesterov: bool = True + backend_steps: int = 10 # Number of steps for Newton-Schulz orthogonalization + weight_decay: float = 0.0 + beta1: float = 0.9 + beta2: float = 0.95 + epsilon: float = 1e-8 + max_grad_norm: float = 1.0 + # adam_modules: Optional[list[str] | str] = None + # """A regex or a list of strings to identify where to mask weight. + # For nano-GPT, this field can be set as `r".*attn.*weight|.*mlp.*weight|.*token_embeddings|.*position_embeddings"`""" + # default_adam_mask: Optional[bool] = None + # """Whether to apply a default reasonable weight decay to modules not explicitly masked. None means it will if + # no weight_decay_modules are set. False means it will not. True means it will regardless of weight_decay_modules.""" + + def build(self, num_train_steps): + """ + Creates the optimizer. + """ + learning_rate_schedule = self.lr_scheduler(num_train_steps) + + def optimizer(learning_rate): + adam_lr = learning_rate * self.muon_to_adam_lr + + def muon_transform(): + components = [] + # Muon seems incompatible with gradient clipping, need to investigate + # if self.max_grad_norm: + # components.append(optax.clip_by_global_norm(self.max_grad_norm)) + components.append(scale_with_muon(self.momentum, self.nesterov, self.backend_steps)) + if self.weight_decay > 0: + components.append(optax.add_decayed_weights(self.weight_decay, self.build_weight_decay_mask())) + components.append(optax.scale(-learning_rate)) + optimizer = optax.chain(*components) + return optimizer + + def adamw_transform(): + components = [] + if self.max_grad_norm: + components.append(optax.clip_by_global_norm(self.max_grad_norm)) + components.append(optax.scale_by_adam(self.beta1, self.beta2, self.epsilon)) + if self.weight_decay > 0: + components.append(optax.add_decayed_weights(self.weight_decay, self.build_weight_decay_mask())) + components.append(optax.scale(-adam_lr)) + optimizer = optax.chain(*components) + return optimizer + transformations = { + 'muon': muon_transform(), + 'adamw': adamw_transform(), + } + + return optax.multi_transform(transformations, self.create_mask) + + return optax.inject_hyperparams(optimizer)(learning_rate=learning_rate_schedule) + + def create_mask(self, params): + """ + Creates a mask that labels parameters as 'muon' or 'adamw' based on their + dimensionality and module path, using AdamW for Embedding and lm_head parameters. + """ + paths = leaf_key_paths(params) + + def mask_fn(param, path): + path_str = '.'.join(path) if isinstance(path, (list, tuple)) else str(path) + if 'Embedding' in path_str or 'lm_head' in path_str: + return 'adamw' + elif param.ndim == 2: + return 'muon' + else: + return 'adamw' + + return jax.tree_util.tree_map(mask_fn, params, paths) + + +class ScaleByMuonState(NamedTuple): + """State for the Mars algorithm.""" + momentum_buffer: optax.Updates + +def scale_with_muon(momentum=0.95, nesterov=True, steps=5): + def init_fn(params): + momentum_buffer = otu.tree_zeros_like(params) # First moment + return ScaleByMuonState(momentum_buffer=momentum_buffer) + + + def update_fn(updates, state, params=None): + buf = state.momentum_buffer + buf = jax.tree.map( + lambda m, g: None if g is None else momentum * m + g, + buf, + updates, + is_leaf=lambda x: x is None, + ) + if nesterov: + updates = jax.tree.map( + lambda m, g: None if g is None else momentum * m + g, + buf, + updates, + is_leaf=lambda x: x is None, + ) + else: + updates = buf + + + updates = jax.tree.map( + lambda g: None if g is None else zeropower_via_newtonschulz5(g, steps=steps), + updates, + is_leaf=lambda x: x is None, + ) + + updates = jax.tree.map( + lambda g: None if g is None else jnp.sqrt(jnp.maximum(1, g.shape[0] / g.shape[1])) * g, + updates, + is_leaf=lambda x: x is None, + ) + + return updates, ScaleByMuonState(momentum_buffer=buf) + + return optax.GradientTransformation(init_fn, update_fn) + + + +def zeropower_via_newtonschulz5(X, steps=10, eps=1e-7): + """ + Newton-Schulz iteration to compute the zeroth power / orthogonalization of G. + """ + chex.assert_rank(X, 2) + a, b, c = (3.4445, -4.7750, 2.0315) + X /= (jnp.linalg.norm(X) + eps) # Ensure top singular value <= 1 + transpose = False + if X.shape[0] > X.shape[1]: + X = X.T + transpose = True + for _ in range(steps): + A = X @ X.T + B = b * A + c * A @ A + X = a * X + B @ X + if transpose: + X = X.T + return X From e961c95aac839f0ee722b7cde01d0adea53a40e0 Mon Sep 17 00:00:00 2001 From: Kaiyue Wen Date: Mon, 2 Dec 2024 20:47:32 -0800 Subject: [PATCH 11/56] Implement MARS (tested) and Muon (have bug in saving), example config in config/llama2_100M_{mars/muon}.yaml --- debug.sh | 1 - logs/log-t1v-n-d3fbbfef-w-0.log | 386 -------------------------------- 2 files changed, 387 deletions(-) delete mode 100644 debug.sh delete mode 100644 logs/log-t1v-n-d3fbbfef-w-0.log diff --git a/debug.sh b/debug.sh deleted file mode 100644 index 9f4a67ef4..000000000 --- a/debug.sh +++ /dev/null @@ -1 +0,0 @@ -gcloud compute tpus tpu-vm ssh debug-8 --zone europe-west4-a --worker=all --command 'WANDB_API_KEY= levanter/infra/launch.sh python levanter/src/levanter/main/train_lm.py --config_path levanter/config/gpt2_small.yaml --trainer.checkpointer.base_path gs://levanter-checkpoints-new/gpt' \ No newline at end of file diff --git a/logs/log-t1v-n-d3fbbfef-w-0.log b/logs/log-t1v-n-d3fbbfef-w-0.log deleted file mode 100644 index f9bf6d3a7..000000000 --- a/logs/log-t1v-n-d3fbbfef-w-0.log +++ /dev/null @@ -1,386 +0,0 @@ - config.json: 0%| | 0.00/665 [00:00 - levanter.config.main(main)() - File "/home/kaiyue/levanter/src/levanter/config.py", line 84, in wrapper_inner - response = fn(cfg, *args, **kwargs) - File "/home/kaiyue/levanter/src/levanter/main/train_lm.py", line 105, in main - eval_datasets = config.data.validation_sets(Pos.size) - File "/home/kaiyue/levanter/src/levanter/data/text.py", line 540, in validation_sets - validation_set = self.validation_set(seq_len, monitors) - File "/home/kaiyue/levanter/src/levanter/data/text.py", line 535, in validation_set - return self.token_seq_dataset("validation", seq_len, monitors) - File "/home/kaiyue/levanter/src/levanter/data/text.py", line 564, in token_seq_dataset - cache = self.build_or_load_cache(split, monitors=monitors) - File "/home/kaiyue/levanter/src/levanter/data/text.py", line 574, in build_or_load_cache - return TokenizedDocumentCache.load(split_cache_dir, flatten_docs=True) - File "/home/kaiyue/levanter/src/levanter/data/text.py", line 270, in load - cache = ShardCache.load(cache_dir, batch_size=batch_size) - File "/home/kaiyue/levanter/src/levanter/data/shard_cache.py", line 1267, in load - ledger = _load_cache_ledger(cache_dir) - File "/home/kaiyue/levanter/src/levanter/data/shard_cache.py", line 442, in _load_cache_ledger - with fsspec.open(ledger_path) as file: - File "/home/kaiyue/venv310/lib/python3.10/site-packages/fsspec/core.py", line 100, in __enter__ - f = self.fs.open(self.path, mode=mode) - File "/home/kaiyue/venv310/lib/python3.10/site-packages/fsspec/spec.py", line 1309, in open - f = self._open( - File "/home/kaiyue/venv310/lib/python3.10/site-packages/gcsfs/core.py", line 1519, in _open - return GCSFile( - File "/home/kaiyue/venv310/lib/python3.10/site-packages/gcsfs/core.py", line 1678, in __init__ - super().__init__( - File "/home/kaiyue/venv310/lib/python3.10/site-packages/fsspec/spec.py", line 1665, in __init__ - self.size = self.details["size"] - File "/home/kaiyue/venv310/lib/python3.10/site-packages/gcsfs/core.py", line 1714, in details - self._details = self.fs.info(self.path, generation=self.generation) - File "/home/kaiyue/venv310/lib/python3.10/site-packages/fsspec/asyn.py", line 118, in wrapper - return sync(self.loop, func, *args, **kwargs) - File "/home/kaiyue/venv310/lib/python3.10/site-packages/fsspec/asyn.py", line 103, in sync - raise return_result - File "/home/kaiyue/venv310/lib/python3.10/site-packages/fsspec/asyn.py", line 56, in _runner - result[0] = await coro - File "/home/kaiyue/venv310/lib/python3.10/site-packages/gcsfs/core.py", line 962, in _info - exact = await self._get_object(path) - File "/home/kaiyue/venv310/lib/python3.10/site-packages/gcsfs/core.py", line 522, in _get_object - resp = await self._call( - File "/home/kaiyue/venv310/lib/python3.10/site-packages/gcsfs/core.py", line 437, in _call - status, headers, info, contents = await self._request( - File "/home/kaiyue/venv310/lib/python3.10/site-packages/decorator.py", line 221, in fun - return await caller(func, *(extras + args), **kw) - File "/home/kaiyue/venv310/lib/python3.10/site-packages/gcsfs/retry.py", line 122, in retry_request - return await func(*args, **kwargs) - File "/home/kaiyue/venv310/lib/python3.10/site-packages/gcsfs/core.py", line 430, in _request - validate_response(status, contents, path, args) - File "/home/kaiyue/venv310/lib/python3.10/site-packages/gcsfs/retry.py", line 103, in validate_response - raise OSError(f"Forbidden: {path}\n{msg}") -OSError: Forbidden: b/levanter-data/o -544074808685-compute@developer.gserviceaccount.com does not have storage.objects.list access to the Google Cloud Storage bucket. Permission 'storage.objects.list' denied on resource (or it may not exist). -Traceback (most recent call last): - File "/home/kaiyue/venv310/lib/python3.10/site-packages/gcsfs/core.py", line 516, in _get_object - res = await self._call( - File "/home/kaiyue/venv310/lib/python3.10/site-packages/gcsfs/core.py", line 437, in _call - status, headers, info, contents = await self._request( - File "/home/kaiyue/venv310/lib/python3.10/site-packages/decorator.py", line 221, in fun - return await caller(func, *(extras + args), **kw) - File "/home/kaiyue/venv310/lib/python3.10/site-packages/gcsfs/retry.py", line 122, in retry_request - return await func(*args, **kwargs) - File "/home/kaiyue/venv310/lib/python3.10/site-packages/gcsfs/core.py", line 430, in _request - validate_response(status, contents, path, args) - File "/home/kaiyue/venv310/lib/python3.10/site-packages/gcsfs/retry.py", line 103, in validate_response - raise OSError(f"Forbidden: {path}\n{msg}") -OSError: Forbidden: b/levanter-data/o/tokenized%2Fopenwebtext%2Fvalidation%2Fcache_ledger.json -544074808685-compute@developer.gserviceaccount.com does not have storage.objects.get access to the Google Cloud Storage object. Permission 'storage.objects.get' denied on resource (or it may not exist). - -During handling of the above exception, another exception occurred: - -Traceback (most recent call last): - File "/home/kaiyue/levanter/src/levanter/main/train_lm.py", line 195, in - levanter.config.main(main)() - File "/home/kaiyue/levanter/src/levanter/config.py", line 84, in wrapper_inner - response = fn(cfg, *args, **kwargs) - File "/home/kaiyue/levanter/src/levanter/main/train_lm.py", line 105, in main - eval_datasets = config.data.validation_sets(Pos.size) - File "/home/kaiyue/levanter/src/levanter/data/text.py", line 540, in validation_sets - validation_set = self.validation_set(seq_len, monitors) - File "/home/kaiyue/levanter/src/levanter/data/text.py", line 535, in validation_set - return self.token_seq_dataset("validation", seq_len, monitors) - File "/home/kaiyue/levanter/src/levanter/data/text.py", line 564, in token_seq_dataset - cache = self.build_or_load_cache(split, monitors=monitors) - File "/home/kaiyue/levanter/src/levanter/data/text.py", line 574, in build_or_load_cache - return TokenizedDocumentCache.load(split_cache_dir, flatten_docs=True) - File "/home/kaiyue/levanter/src/levanter/data/text.py", line 270, in load - cache = ShardCache.load(cache_dir, batch_size=batch_size) - File "/home/kaiyue/levanter/src/levanter/data/shard_cache.py", line 1267, in load - ledger = _load_cache_ledger(cache_dir) - File "/home/kaiyue/levanter/src/levanter/data/shard_cache.py", line 442, in _load_cache_ledger - with fsspec.open(ledger_path) as file: - File "/home/kaiyue/venv310/lib/python3.10/site-packages/fsspec/core.py", line 100, in __enter__ - f = self.fs.open(self.path, mode=mode) - File "/home/kaiyue/venv310/lib/python3.10/site-packages/fsspec/spec.py", line 1309, in open - f = self._open( - File "/home/kaiyue/venv310/lib/python3.10/site-packages/gcsfs/core.py", line 1519, in _open - return GCSFile( - File "/home/kaiyue/venv310/lib/python3.10/site-packages/gcsfs/core.py", line 1678, in __init__ - super().__init__( - File "/home/kaiyue/venv310/lib/python3.10/site-packages/fsspec/spec.py", line 1665, in __init__ - self.size = self.details["size"] - File "/home/kaiyue/venv310/lib/python3.10/site-packages/gcsfs/core.py", line 1714, in details - self._details = self.fs.info(self.path, generation=self.generation) - File "/home/kaiyue/venv310/lib/python3.10/site-packages/fsspec/asyn.py", line 118, in wrapper - return sync(self.loop, func, *args, **kwargs) - File "/home/kaiyue/venv310/lib/python3.10/site-packages/fsspec/asyn.py", line 103, in sync - raise return_result - File "/home/kaiyue/venv310/lib/python3.10/site-packages/fsspec/asyn.py", line 56, in _runner - result[0] = await coro - File "/home/kaiyue/venv310/lib/python3.10/site-packages/gcsfs/core.py", line 962, in _info - exact = await self._get_object(path) - File "/home/kaiyue/venv310/lib/python3.10/site-packages/gcsfs/core.py", line 522, in _get_object - resp = await self._call( - File "/home/kaiyue/venv310/lib/python3.10/site-packages/gcsfs/core.py", line 437, in _call - status, headers, info, contents = await self._request( - File "/home/kaiyue/venv310/lib/python3.10/site-packages/decorator.py", line 221, in fun - return await caller(func, *(extras + args), **kw) - File "/home/kaiyue/venv310/lib/python3.10/site-packages/gcsfs/retry.py", line 122, in retry_request - return await func(*args, **kwargs) - File "/home/kaiyue/venv310/lib/python3.10/site-packages/gcsfs/core.py", line 430, in _request - validate_response(status, contents, path, args) - File "/home/kaiyue/venv310/lib/python3.10/site-packages/gcsfs/retry.py", line 103, in validate_response - raise OSError(f"Forbidden: {path}\n{msg}") -OSError: Forbidden: b/levanter-data/o -544074808685-compute@developer.gserviceaccount.com does not have storage.objects.list access to the Google Cloud Storage bucket. Permission 'storage.objects.list' denied on resource (or it may not exist). -wandb: - 0.015 MB of 0.015 MB uploaded wandb: \ 0.015 MB of 0.015 MB uploaded wandb: | 0.015 MB of 0.015 MB uploaded wandb: / 0.015 MB of 0.015 MB uploaded wandb: - 0.015 MB of 0.015 MB uploaded wandb: \ 0.015 MB of 0.015 MB uploaded wandb: | 0.015 MB of 0.015 MB uploaded wandb: -wandb: Run summary: -wandb: backend tpu -wandb: num_devices 8 -wandb: num_hosts 1 -wandb: -wandb: 🚀 View run charmed-leaf-1 at: https://wandb.ai/understanding-sam/levanter/runs/8eyi8adv -wandb: ️⚡ View job at https://wandb.ai/understanding-sam/levanter/jobs/QXJ0aWZhY3RDb2xsZWN0aW9uOjEyNjY1ODAzOQ==/version_details/v0 -wandb: Synced 5 W&B file(s), 0 media file(s), 4 artifact file(s) and 0 other file(s) -wandb: Find logs at: ./wandb/run-20240101_072752-8eyi8adv/logs -2024-01-01 07:28:06,900 VINFO scripts.py:1085 -- Killed `/home/kaiyue/venv310/lib/python3.10/site-packages/ray/core/src/ray/raylet/raylet --raylet_socket_name=/tmp/ray/session_2024-01-01_07-27-46_779910_13059/sockets/raylet --store_socket_name=/tmp/ray/session_2024-01-01_07-27-46_779910_13059/sockets/plasma_store --object_manager_port=0 --min_worker_port=10002 --max_worker_port=19999 --node_manager_port=0 --node_ip_address=10.164.0.78 --maximum_startup_concurrency=96 --static_resource_list=node:10.164.0.78,1.0,node:__internal_head__,1.0,TPU,4,accelerator_type:TPU-V3,1,debug-8,1,TPU-v3-8-head,1,CPU,96,memory,237984248423,object_store_memory,106278963609 "--python_worker_command=/home/kaiyue/venv310/bin/python3.10 /home/kaiyue/venv310/lib/python3.10/site-packages/ray/_private/workers/setup_worker.py /home/kaiyue/venv310/lib/python3.10/site-packages/ray/_private/workers/default_worker.py --node-ip-address=10.164.0.78 --node-manager-port=RAY_NODE_MANAGER_PORT_PLACEHOLDER --object-store-name=/tmp/ray/session_2024-01-01_07-27-46_779910_13059/sockets/plasma_store --raylet-name=/tmp/ray/session_2024-01-01_07-27-46_779910_13059/sockets/raylet --redis-address=None --temp-dir=/tmp/ray --metrics-agent-port=53820 --runtime-env-agent-port=56966 --logging-rotate-bytes=536870912 --logging-rotate-backup-count=5 --runtime-env-agent-port=56966 --gcs-address=10.164.0.78:61964 --session-name=session_2024-01-01_07-27-46_779910_13059 --temp-dir=/tmp/ray --webui=10.164.0.78:8265 --cluster-id=3fceadd8168d8843aa4e8c1b4b06513d7b706b38e3056706a925c9f0 RAY_WORKER_DYNAMIC_OPTION_PLACEHOLDER" --java_worker_command= --cpp_worker_command= --native_library_path=/home/kaiyue/venv310/lib/python3.10/site-packages/ray/cpp/lib --temp_dir=/tmp/ray --session_dir=/tmp/ray/session_2024-01-01_07-27-46_779910_13059 --log_dir=/tmp/ray/session_2024-01-01_07-27-46_779910_13059/logs --resource_dir=/tmp/ray/session_2024-01-01_07-27-46_779910_13059/runtime_resources --metrics-agent-port=53820 --metrics_export_port=64689 --runtime_env_agent_port=56966 --object_store_memory=106278963609 --plasma_directory=/dev/shm --ray-debugger-external=0 --gcs-address=10.164.0.78:61964 --session-name=session_2024-01-01_07-27-46_779910_13059 --labels= --cluster-id=3fceadd8168d8843aa4e8c1b4b06513d7b706b38e3056706a925c9f0 --head --num_prestart_python_workers=96 "--dashboard_agent_command=/home/kaiyue/venv310/bin/python3.10 -u /home/kaiyue/venv310/lib/python3.10/site-packages/ray/dashboard/agent.py --node-ip-address=10.164.0.78 --metrics-export-port=64689 --dashboard-agent-port=53820 --listen-port=52365 --node-manager-port=RAY_NODE_MANAGER_PORT_PLACEHOLDER --object-store-name=/tmp/ray/session_2024-01-01_07-27-46_779910_13059/sockets/plasma_store --raylet-name=/tmp/ray/session_2024-01-01_07-27-46_779910_13059/sockets/raylet --temp-dir=/tmp/ray --session-dir=/tmp/ray/session_2024-01-01_07-27-46_779910_13059 --log-dir=/tmp/ray/session_2024-01-01_07-27-46_779910_13059/logs --logging-rotate-bytes=536870912 --logging-rotate-backup-count=5 --session-name=session_2024-01-01_07-27-46_779910_13059 --gcs-address=10.164.0.78:61964" "--runtime_env_agent_command=/home/kaiyue/venv310/bin/python3.10 -u /home/kaiyue/venv310/lib/python3.10/site-packages/ray/_private/runtime_env/agent/main.py --node-ip-address=10.164.0.78 --runtime-env-agent-port=56966 --gcs-address=10.164.0.78:61964 --runtime-env-dir=/tmp/ray/session_2024-01-01_07-27-46_779910_13059/runtime_resources --logging-rotate-bytes=536870912 --logging-rotate-backup-count=5 --log-dir=/tmp/ray/session_2024-01-01_07-27-46_779910_13059/logs --temp-dir=/tmp/ray"` (via SIGKILL) -2024-01-01 07:28:06,915 INFO scripts.py:1121 -- 1/1 stopped. 2024-01-01 07:28:07,324 VINFO scripts.py:1085 -- Killed `/home/kaiyue/venv310/bin/python3.10 -u /home/kaiyue/venv310/lib/python3.10/site-packages/ray/autoscaler/_private/monitor.py --logs-dir=/tmp/ray/session_2024-01-01_07-27-46_779910_13059/logs --logging-rotate-bytes=536870912 --logging-rotate-backup-count=5 --gcs-address=10.164.0.78:61964 --monitor-ip=10.164.0.78` (via SIGKILL) -2024-01-01 07:28:07,325 VINFO scripts.py:1085 -- Killed `/home/kaiyue/venv310/bin/python3.10 -u /home/kaiyue/venv310/lib/python3.10/site-packages/ray/_private/log_monitor.py --session-dir=/tmp/ray/session_2024-01-01_07-27-46_779910_13059 --logs-dir=/tmp/ray/session_2024-01-01_07-27-46_779910_13059/logs --gcs-address=10.164.0.78:61964 --logging-rotate-bytes=536870912 --logging-rotate-backup-count=5` (via SIGKILL) -2024-01-01 07:28:07,410 VINFO scripts.py:1085 -- Killed `/home/kaiyue/venv310/bin/python3.10 -m ray.util.client.server --address=10.164.0.78:61964 --host=0.0.0.0 --port=10001 --mode=proxy --runtime-env-agent-address=http://10.164.0.78:56966` (via SIGKILL) -2024-01-01 07:28:07,578 VINFO scripts.py:1099 -- Attempted to stop `ray::IDLE "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" ""`, but process was already dead. -2024-01-01 07:28:07,580 VINFO scripts.py:1099 -- Attempted to stop `ray::IDLE "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" ""`, but process was already dead. -2024-01-01 07:28:07,581 VINFO scripts.py:1099 -- Attempted to stop `ray::IDLE "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" ""`, but process was already dead. -2024-01-01 07:28:07,583 VINFO scripts.py:1099 -- Attempted to stop `ray::IDLE "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" ""`, but process was already dead. -2024-01-01 07:28:07,585 VINFO scripts.py:1099 -- Attempted to stop `ray::IDLE "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" ""`, but process was already dead. -2024-01-01 07:28:07,586 VINFO scripts.py:1099 -- Attempted to stop `ray::IDLE "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" ""`, but process was already dead. -2024-01-01 07:28:07,588 VINFO scripts.py:1099 -- Attempted to stop `ray::IDLE "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" ""`, but process was already dead. -2024-01-01 07:28:07,589 VINFO scripts.py:1099 -- Attempted to stop `ray::IDLE "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" ""`, but process was already dead. -2024-01-01 07:28:07,591 VINFO scripts.py:1099 -- Attempted to stop `ray::IDLE "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" ""`, but process was already dead. -2024-01-01 07:28:07,592 VINFO scripts.py:1099 -- Attempted to stop `ray::IDLE "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" ""`, but process was already dead. -2024-01-01 07:28:07,594 VINFO scripts.py:1099 -- Attempted to stop `ray::IDLE "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" ""`, but process was already dead. -2024-01-01 07:28:07,595 VINFO scripts.py:1099 -- Attempted to stop `ray::IDLE "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" ""`, but process was already dead. -2024-01-01 07:28:07,597 VINFO scripts.py:1099 -- Attempted to stop `ray::IDLE "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" ""`, but process was already dead. -2024-01-01 07:28:07,598 VINFO scripts.py:1099 -- Attempted to stop `ray::IDLE "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" ""`, but process was already dead. -2024-01-01 07:28:07,600 VINFO scripts.py:1099 -- Attempted to stop `ray::IDLE "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" ""`, but process was already dead. -2024-01-01 07:28:07,602 VINFO scripts.py:1099 -- Attempted to stop `ray::IDLE "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" ""`, but process was already dead. -2024-01-01 07:28:07,603 VINFO scripts.py:1099 -- Attempted to stop `ray::IDLE "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" ""`, but process was already dead. -2024-01-01 07:28:07,605 VINFO scripts.py:1099 -- Attempted to stop `ray::IDLE "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" ""`, but process was already dead. -2024-01-01 07:28:07,606 VINFO scripts.py:1099 -- Attempted to stop `ray::IDLE "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" ""`, but process was already dead. -2024-01-01 07:28:07,608 VINFO scripts.py:1099 -- Attempted to stop `ray::IDLE "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" ""`, but process was already dead. -2024-01-01 07:28:07,609 VINFO scripts.py:1099 -- Attempted to stop `ray::IDLE "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" ""`, but process was already dead. -2024-01-01 07:28:07,611 VINFO scripts.py:1099 -- Attempted to stop `ray::IDLE "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" ""`, but process was already dead. -2024-01-01 07:28:07,612 VINFO scripts.py:1099 -- Attempted to stop `ray::IDLE "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" ""`, but process was already dead. -2024-01-01 07:28:07,616 VINFO scripts.py:1099 -- Attempted to stop `ray::IDLE "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" ""`, but process was already dead. -2024-01-01 07:28:07,617 VINFO scripts.py:1099 -- Attempted to stop `ray::IDLE "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" ""`, but process was already dead. -2024-01-01 07:28:07,619 VINFO scripts.py:1099 -- Attempted to stop `ray::IDLE "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" ""`, but process was already dead. -2024-01-01 07:28:07,620 VINFO scripts.py:1099 -- Attempted to stop `ray::IDLE "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" ""`, but process was already dead. -2024-01-01 07:28:07,622 VINFO scripts.py:1099 -- Attempted to stop `ray::IDLE "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" ""`, but process was already dead. -2024-01-01 07:28:07,623 VINFO scripts.py:1099 -- Attempted to stop `ray::IDLE "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" ""`, but process was already dead. -2024-01-01 07:28:07,625 VINFO scripts.py:1099 -- Attempted to stop `ray::IDLE "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" ""`, but process was already dead. -2024-01-01 07:28:07,627 VINFO scripts.py:1099 -- Attempted to stop `ray::IDLE "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" ""`, but process was already dead. -2024-01-01 07:28:07,628 VINFO scripts.py:1099 -- Attempted to stop `ray::IDLE "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" ""`, but process was already dead. -2024-01-01 07:28:07,630 VINFO scripts.py:1099 -- Attempted to stop `ray::IDLE "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" ""`, but process was already dead. -2024-01-01 07:28:07,631 VINFO scripts.py:1099 -- Attempted to stop `ray::IDLE "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" ""`, but process was already dead. -2024-01-01 07:28:07,633 VINFO scripts.py:1099 -- Attempted to stop `ray::IDLE "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" ""`, but process was already dead. -2024-01-01 07:28:07,634 VINFO scripts.py:1099 -- Attempted to stop `ray::IDLE "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" ""`, but process was already dead. -2024-01-01 07:28:07,636 VINFO scripts.py:1099 -- Attempted to stop `ray::IDLE "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" ""`, but process was already dead. -2024-01-01 07:28:07,637 VINFO scripts.py:1099 -- Attempted to stop `ray::IDLE "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" ""`, but process was already dead. -2024-01-01 07:28:07,639 VINFO scripts.py:1099 -- Attempted to stop `ray::IDLE "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" ""`, but process was already dead. -2024-01-01 07:28:07,640 VINFO scripts.py:1099 -- Attempted to stop `ray::IDLE "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" ""`, but process was already dead. -2024-01-01 07:28:07,642 VINFO scripts.py:1099 -- Attempted to stop `ray::IDLE "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" ""`, but process was already dead. -2024-01-01 07:28:07,643 VINFO scripts.py:1099 -- Attempted to stop `ray::IDLE "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" ""`, but process was already dead. -2024-01-01 07:28:07,645 VINFO scripts.py:1099 -- Attempted to stop `ray::IDLE "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" ""`, but process was already dead. -2024-01-01 07:28:07,647 VINFO scripts.py:1099 -- Attempted to stop `ray::IDLE "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" ""`, but process was already dead. -2024-01-01 07:28:07,648 VINFO scripts.py:1099 -- Attempted to stop `ray::IDLE "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" ""`, but process was already dead. -2024-01-01 07:28:07,650 VINFO scripts.py:1099 -- Attempted to stop `ray::IDLE "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" ""`, but process was already dead. -2024-01-01 07:28:07,651 VINFO scripts.py:1099 -- Attempted to stop `ray::IDLE "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" ""`, but process was already dead. -2024-01-01 07:28:07,653 VINFO scripts.py:1099 -- Attempted to stop `ray::IDLE "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" ""`, but process was already dead. -2024-01-01 07:28:07,654 VINFO scripts.py:1099 -- Attempted to stop `ray::IDLE "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" ""`, but process was already dead. -2024-01-01 07:28:07,656 VINFO scripts.py:1099 -- Attempted to stop `ray::IDLE "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" ""`, but process was already dead. -2024-01-01 07:28:07,658 VINFO scripts.py:1099 -- Attempted to stop `ray::IDLE "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" ""`, but process was already dead. -2024-01-01 07:28:07,660 VINFO scripts.py:1099 -- Attempted to stop `ray::IDLE "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" ""`, but process was already dead. -2024-01-01 07:28:07,661 VINFO scripts.py:1099 -- Attempted to stop `ray::IDLE "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" ""`, but process was already dead. -2024-01-01 07:28:07,663 VINFO scripts.py:1099 -- Attempted to stop `ray::IDLE "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" ""`, but process was already dead. -2024-01-01 07:28:07,665 VINFO scripts.py:1099 -- Attempted to stop `ray::IDLE "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" ""`, but process was already dead. -Exception in thread NetStatThr: -Traceback (most recent call last): - File "/usr/lib/python3.10/threading.py", line 1016, in _bootstrap_inner -Exception in thread IntMsgThr: -Traceback (most recent call last): - File "/usr/lib/python3.10/threading.py", line 1016, in _bootstrap_inner - self.run() - self.run() File "/usr/lib/python3.10/threading.py", line 953, in run - - File "/usr/lib/python3.10/threading.py", line 953, in run - self._target(*self._args, **self._kwargs) - File "/home/kaiyue/venv310/lib/python3.10/site-packages/wandb/sdk/wandb_run.py", line 268, in check_network_status - self._target(*self._args, **self._kwargs) - File "/home/kaiyue/venv310/lib/python3.10/site-packages/wandb/sdk/wandb_run.py", line 300, in check_internal_messages - self._loop_check_status( - File "/home/kaiyue/venv310/lib/python3.10/site-packages/wandb/sdk/wandb_run.py", line 224, in _loop_check_status - self._loop_check_status( - File "/home/kaiyue/venv310/lib/python3.10/site-packages/wandb/sdk/wandb_run.py", line 224, in _loop_check_status - local_handle = request() - local_handle = request() File "/home/kaiyue/venv310/lib/python3.10/site-packages/wandb/sdk/interface/interface.py", line 756, in deliver_network_status - - File "/home/kaiyue/venv310/lib/python3.10/site-packages/wandb/sdk/interface/interface.py", line 764, in deliver_internal_messages - return self._deliver_network_status(status) - File "/home/kaiyue/venv310/lib/python3.10/site-packages/wandb/sdk/interface/interface_shared.py", line 484, in _deliver_network_status - return self._deliver_internal_messages(internal_message) - File "/home/kaiyue/venv310/lib/python3.10/site-packages/wandb/sdk/interface/interface_shared.py", line 490, in _deliver_internal_messages - return self._deliver_record(record) - File "/home/kaiyue/venv310/lib/python3.10/site-packages/wandb/sdk/interface/interface_shared.py", line 437, in _deliver_record - return self._deliver_record(record) - File "/home/kaiyue/venv310/lib/python3.10/site-packages/wandb/sdk/interface/interface_shared.py", line 437, in _deliver_record - handle = mailbox._deliver_record(record, interface=self) - File "/home/kaiyue/venv310/lib/python3.10/site-packages/wandb/sdk/lib/mailbox.py", line 455, in _deliver_record - handle = mailbox._deliver_record(record, interface=self) - File "/home/kaiyue/venv310/lib/python3.10/site-packages/wandb/sdk/lib/mailbox.py", line 455, in _deliver_record - interface._publish(record) - interface._publish(record) File "/home/kaiyue/venv310/lib/python3.10/site-packages/wandb/sdk/interface/interface_sock.py", line 51, in _publish - - File "/home/kaiyue/venv310/lib/python3.10/site-packages/wandb/sdk/interface/interface_sock.py", line 51, in _publish - self._sock_client.send_record_publish(record) - File "/home/kaiyue/venv310/lib/python3.10/site-packages/wandb/sdk/lib/sock_client.py", line 221, in send_record_publish - self._sock_client.send_record_publish(record) - File "/home/kaiyue/venv310/lib/python3.10/site-packages/wandb/sdk/lib/sock_client.py", line 221, in send_record_publish - self.send_server_request(server_req) - File "/home/kaiyue/venv310/lib/python3.10/site-packages/wandb/sdk/lib/sock_client.py", line 155, in send_server_request - self.send_server_request(server_req) - File "/home/kaiyue/venv310/lib/python3.10/site-packages/wandb/sdk/lib/sock_client.py", line 155, in send_server_request - self._send_message(msg) - File "/home/kaiyue/venv310/lib/python3.10/site-packages/wandb/sdk/lib/sock_client.py", line 152, in _send_message - self._send_message(msg) - File "/home/kaiyue/venv310/lib/python3.10/site-packages/wandb/sdk/lib/sock_client.py", line 152, in _send_message - self._sendall_with_error_handle(header + data) - File "/home/kaiyue/venv310/lib/python3.10/site-packages/wandb/sdk/lib/sock_client.py", line 130, in _sendall_with_error_handle - self._sendall_with_error_handle(header + data) - File "/home/kaiyue/venv310/lib/python3.10/site-packages/wandb/sdk/lib/sock_client.py", line 130, in _sendall_with_error_handle - sent = self._sock.send(data) -BrokenPipeError: [Errno 32] Broken pipe - sent = self._sock.send(data) -BrokenPipeError: [Errno 32] Broken pipe -2024-01-01 07:28:07,666 VINFO scripts.py:1099 -- Attempted to stop `ray::IDLE "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" ""`, but process was already dead. -2024-01-01 07:28:07,833 VINFO scripts.py:1099 -- Attempted to stop `/home/kaiyue/venv310/bin/python3.10 -u /home/kaiyue/venv310/lib/python3.10/site-packages/ray/_private/log_monitor.py --session-dir=/tmp/ray/session_2024-01-01_07-27-46_779910_13059 --logs-dir=/tmp/ray/session_2024-01-01_07-27-46_779910_13059/logs --gcs-address=10.164.0.78:61964 --logging-rotate-bytes=536870912 --logging-rotate-backup-count=5`, but process was already dead. -2024-01-01 07:28:08,000 VINFO scripts.py:1099 -- Attempted to stop `/home/kaiyue/venv310/bin/python3.10 -u /home/kaiyue/venv310/lib/python3.10/site-packages/ray/dashboard/agent.py --node-ip-address=10.164.0.78 --metrics-export-port=64689 --dashboard-agent-port=53820 --listen-port=52365 --node-manager-port=43807 --object-store-name=/tmp/ray/session_2024-01-01_07-27-46_779910_13059/sockets/plasma_store --raylet-name=/tmp/ray/session_2024-01-01_07-27-46_779910_13059/sockets/raylet --temp-dir=/tmp/ray --session-dir=/tmp/ray/session_2024-01-01_07-27-46_779910_13059 --log-dir=/tmp/ray/session_2024-01-01_07-27-46_779910_13059/logs --logging-rotate-bytes=536870912 --logging-rotate-backup-count=5 --session-name=session_2024-01-01_07-27-46_779910_13059 --gcs-address=10.164.0.78:61964 --agent-id 424238335`, but process was already dead. -2024-01-01 07:28:08,084 VINFO scripts.py:1085 -- Killed `/home/kaiyue/venv310/bin/python3.10 /home/kaiyue/venv310/lib/python3.10/site-packages/ray/dashboard/dashboard.py --host=0.0.0.0 --port=8265 --port-retries=0 --temp-dir=/tmp/ray --log-dir=/tmp/ray/session_2024-01-01_07-27-46_779910_13059/logs --session-dir=/tmp/ray/session_2024-01-01_07-27-46_779910_13059 --logging-rotate-bytes=536870912 --logging-rotate-backup-count=5 --gcs-address=10.164.0.78:61964 --node-ip-address=10.164.0.78` (via SIGKILL) -2024-01-01 07:28:08,168 VINFO scripts.py:1099 -- Attempted to stop `/home/kaiyue/venv310/bin/python3.10 -u /home/kaiyue/venv310/lib/python3.10/site-packages/ray/_private/runtime_env/agent/main.py --node-ip-address=10.164.0.78 --runtime-env-agent-port=56966 --gcs-address=10.164.0.78:61964 --runtime-env-dir=/tmp/ray/session_2024-01-01_07-27-46_779910_13059/runtime_resources --logging-rotate-bytes=536870912 --logging-rotate-backup-count=5 --log-dir=/tmp/ray/session_2024-01-01_07-27-46_779910_13059/logs --temp-dir=/tmp/ray`, but process was already dead. -2024-01-01 07:28:08,252 INFO scripts.py:1121 -- 1/4 stopped. 2024-01-01 07:28:08,252 INFO scripts.py:1121 -- 2/4 stopped. 2024-01-01 07:28:08,252 INFO scripts.py:1121 -- 3/4 stopped. 2024-01-01 07:28:08,252 INFO scripts.py:1121 -- 4/4 stopped. 2024-01-01 07:28:08,546 VINFO scripts.py:1085 -- Killed `/home/kaiyue/venv310/lib/python3.10/site-packages/ray/core/src/ray/gcs/gcs_server --log_dir=/tmp/ray/session_2024-01-01_07-27-46_779910_13059/logs --config_list=eyJvYmplY3Rfc3BpbGxpbmdfY29uZmlnIjogIntcInR5cGVcIjogXCJmaWxlc3lzdGVtXCIsIFwicGFyYW1zXCI6IHtcImRpcmVjdG9yeV9wYXRoXCI6IFwiL3RtcC9yYXkvc2Vzc2lvbl8yMDI0LTAxLTAxXzA3LTI3LTQ2Xzc3OTkxMF8xMzA1OVwifX0iLCAiaXNfZXh0ZXJuYWxfc3RvcmFnZV90eXBlX2ZzIjogdHJ1ZX0= --gcs_server_port=61964 --metrics-agent-port=53820 --node-ip-address=10.164.0.78 --session-name=session_2024-01-01_07-27-46_779910_13059` (via SIGKILL) -2024-01-01 07:28:08,599 INFO scripts.py:1121 -- 1/1 stopped. 2024-01-01 07:28:08,599 SUCC scripts.py:1166 -- Stopped all 6 Ray processes. From 2b80af7f57e4a18fee807376df8dd48626b7ace3 Mon Sep 17 00:00:00 2001 From: David Hall Date: Mon, 2 Dec 2024 23:23:19 -0800 Subject: [PATCH 12/56] wip --- src/levanter/checkpoint.py | 16 ++++++++-------- src/levanter/tensorstore_serialization.py | 16 ++++++++-------- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/src/levanter/checkpoint.py b/src/levanter/checkpoint.py index ba684b8e5..c11dd6f4d 100644 --- a/src/levanter/checkpoint.py +++ b/src/levanter/checkpoint.py @@ -152,14 +152,14 @@ def load_model( def on_step(self, info, force: bool = False): step = info.step - if step == 0: - self._last_save_time = self._dt_now_injection() - if not force: - return # don't save checkpoint at step 0 unless forced - - if step == self._last_save_step: - # we've already saved a checkpoint at this step - return + # if step == 0: + # self._last_save_time = self._dt_now_injection() + # if not force: + # return # don't save checkpoint at step 0 unless forced + # + # if step == self._last_save_step and not force: + # # we've already saved a checkpoint at this step + # return # two reasons we can save: time or step # they have different behaviors for retention. diff --git a/src/levanter/tensorstore_serialization.py b/src/levanter/tensorstore_serialization.py index 462c1cf2c..20c19bb53 100644 --- a/src/levanter/tensorstore_serialization.py +++ b/src/levanter/tensorstore_serialization.py @@ -11,7 +11,6 @@ import jax import jax.experimental.array_serialization.serialization as array_ser import jax.numpy as jnp -import jax.tree_util as jtu import numpy as np import tensorstore from jax.sharding import Mesh @@ -50,10 +49,11 @@ def tree_serialize_leaves_tensorstore( def path_from_key_path(key_path): return os.path.join(checkpoint_dir, *key_path.split(".")) - paths = jtu.tree_map(path_from_key_path, leaf_key_paths, is_leaf=lambda x: x is None) - paths = jtu.tree_leaves(paths, is_leaf=lambda x: x is None) - leaves = jtu.tree_leaves(pytree, is_leaf=lambda x: x is None) - assert len(leaves) == len(paths) + paths = jax.tree.map(path_from_key_path, leaf_key_paths, is_leaf=lambda x: x is None) + print(pytree, paths) + paths = jax.tree.leaves(paths, is_leaf=lambda x: x is None) + leaves = jax.tree.leaves(pytree, is_leaf=lambda x: x is None) + assert len(leaves) == len(paths), f"leaves: {leaves}, paths: {paths}" # ok, not all of these are arrays, but we'll deal with that in the async function def _ensure_is_array(x): @@ -181,11 +181,11 @@ def tree_deserialize_leaves_tensorstore( deser_partial = functools.partial(_deserialize_one_leaf, axis_mapping=axis_mapping, mesh=mesh) - futures = jtu.tree_map(deser_partial, pytree, specs, is_leaf=is_named_array) - leaves, structure = jtu.tree_flatten(futures, is_leaf=is_named_array) + futures = jax.tree.map(deser_partial, pytree, specs, is_leaf=is_named_array) + leaves, structure = jax.tree.flatten(futures, is_leaf=is_named_array) async def _do_deserialize(): values = await asyncio.gather(*leaves) - return jtu.tree_unflatten(structure, values) + return jax.tree.unflatten(structure, values) return asyncio.run(_do_deserialize()) From 074d0ec8e50a9f45b484cd9b36efab2873dc1e86 Mon Sep 17 00:00:00 2001 From: David Hall Date: Tue, 3 Dec 2024 00:23:07 -0800 Subject: [PATCH 13/56] ok we're good --- tests/test_export_to_hf.py | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/tests/test_export_to_hf.py b/tests/test_export_to_hf.py index 31d44d34f..2e86900c6 100644 --- a/tests/test_export_to_hf.py +++ b/tests/test_export_to_hf.py @@ -41,13 +41,7 @@ def test_export_lm_to_hf(): config = export_lm_to_hf.ConvertLmConfig( checkpoint_path=f"{tmpdir}/ckpt", output_dir=f"{tmpdir}/output", - model=export_lm_to_hf.Gpt2Config( - num_layers=2, - num_heads=2, - seq_len=64, - use_flash_attention=True, - hidden_dim=32, - ), + model=model_config, ) export_lm_to_hf.main(config) From 722edaf9a78ac03537eecdbb83196f36e71e64c0 Mon Sep 17 00:00:00 2001 From: David Hall Date: Tue, 3 Dec 2024 09:03:45 -0800 Subject: [PATCH 14/56] fix tree leaf stuff --- src/levanter/checkpoint.py | 16 +++++++------- src/levanter/tensorstore_serialization.py | 21 ++++++++++++++---- src/levanter/utils/jax_utils.py | 26 +++++++++++++---------- 3 files changed, 40 insertions(+), 23 deletions(-) diff --git a/src/levanter/checkpoint.py b/src/levanter/checkpoint.py index c11dd6f4d..1b8b7a632 100644 --- a/src/levanter/checkpoint.py +++ b/src/levanter/checkpoint.py @@ -152,14 +152,14 @@ def load_model( def on_step(self, info, force: bool = False): step = info.step - # if step == 0: - # self._last_save_time = self._dt_now_injection() - # if not force: - # return # don't save checkpoint at step 0 unless forced - # - # if step == self._last_save_step and not force: - # # we've already saved a checkpoint at this step - # return + if step == 0: + self._last_save_time = self._dt_now_injection() + if not force: + return # don't save checkpoint at step 0 unless forced + + if step == self._last_save_step and not force: + # we've already saved a checkpoint at this step + return # two reasons we can save: time or step # they have different behaviors for retention. diff --git a/src/levanter/tensorstore_serialization.py b/src/levanter/tensorstore_serialization.py index 8521f1a73..8b3f63362 100644 --- a/src/levanter/tensorstore_serialization.py +++ b/src/levanter/tensorstore_serialization.py @@ -2,8 +2,9 @@ # * Orbax: https://github.com/google/orbax/blob/11d2934ecfff77e86b5e07d0fef02b67eff4511b/orbax/checkpoint/pytree_checkpoint_handler.py#L312 import logging import os +from dataclasses import dataclass from functools import partial -from typing import Callable, Optional +from typing import Any, Callable, Optional import equinox import jax @@ -42,11 +43,23 @@ def tree_serialize_leaves_tensorstore( manager_was_none = False leaf_key_paths = jax_utils.leaf_key_paths(pytree, is_leaf=is_named_array) + assert len(jax.tree.leaves(leaf_key_paths, is_leaf=is_named_array)) == len( + jax.tree.leaves(pytree, is_leaf=is_named_array) + ) paths = _fs_paths_from_key_paths(checkpoint_dir, leaf_key_paths) - paths = jax.tree.leaves(paths, is_leaf=lambda x: x is None) - leaves = jax.tree.leaves(pytree, is_leaf=lambda x: x is None) - assert len(leaves) == len(paths) + + # make a dataclass since tuples are pytrees + @dataclass + class Pair: + path: str + leaf: Any + + zipped = jax.tree.map(lambda x, y: Pair(x, y), paths, pytree, is_leaf=lambda x: x is None) + paired_leaves = jax.tree.leaves(zipped) + paths = [p.path for p in paired_leaves] + leaves = [p.leaf.array if is_named_array(p.leaf) else p.leaf for p in paired_leaves] + assert len(leaves) == len(paths), f"{len(leaves)} != {len(paths)}" # ok, not all of these are arrays, but we'll deal with that in the async function def _ensure_is_array(x): diff --git a/src/levanter/utils/jax_utils.py b/src/levanter/utils/jax_utils.py index 39fbf438c..734ec930c 100644 --- a/src/levanter/utils/jax_utils.py +++ b/src/levanter/utils/jax_utils.py @@ -152,17 +152,21 @@ def leaf_key_paths( x, prefix=join_key(prefix, p), is_leaf=is_leaf, use_state_dict_keys=use_state_dict_keys ) + out: PyTree[str] + if is_leaf is not None and is_leaf(pytree): - return prefix + out = prefix + elif pytree is None: + out = None elif isinstance(pytree, dict): - return {k: rec(v, k) for k, v in pytree.items()} + out = {k: rec(v, k) for k, v in pytree.items()} elif _isnamedtupleinstance(pytree): d = {k: rec(v, k) for k, v in pytree._asdict().items()} - return pytree.__class__(**d) + out = pytree.__class__(**d) elif isinstance(pytree, list): - return [rec(v, str(i)) for i, v in enumerate(pytree)] + out = [rec(v, str(i)) for i, v in enumerate(pytree)] elif isinstance(pytree, tuple): - return tuple(rec(v, str(i)) for i, v in enumerate(pytree)) + out = tuple(rec(v, str(i)) for i, v in enumerate(pytree)) elif isinstance(pytree, eqx.Module): names = [] rec_values = [] @@ -181,17 +185,17 @@ def leaf_key_paths( _, tree_def = eqx.tree_flatten_one_level(pytree) out = jax.tree_util.tree_unflatten(tree_def, rec_values) - return out - # this doesn't work reliably because tree_at doesn't like none values - # return eqx.tree_at(lambda m: [getattr(m, name) for name in names], pytree, rec_values, is_leaf=lambda x: x is None) else: leaves, treedef = jax.tree_util.tree_flatten(pytree, is_leaf=is_leaf) if len(leaves) == 0: - return None + out = None elif len(leaves) == 1: - return jax.tree_util.tree_unflatten(treedef, [f"{prefix}"]) + out = jax.tree_util.tree_unflatten(treedef, [f"{prefix}"]) else: - return jax.tree_util.tree_unflatten(treedef, [join_key(prefix, str(i)) for i in range(len(leaves))]) + out = jax.tree_util.tree_unflatten(treedef, [join_key(prefix, str(i)) for i in range(len(leaves))]) + + # assert len(jax.tree.leaves(out, is_leaf=is_leaf)) == len(jax.tree.leaves(pytree, is_leaf=is_leaf)), (out, pytree) + return out def join_key(prefix, k): From 569261102dae7ae48668a6e003eca504091d840d Mon Sep 17 00:00:00 2001 From: David Hall Date: Tue, 3 Dec 2024 17:28:05 -0800 Subject: [PATCH 15/56] add map_flattened_linear_layers use in muon --- src/levanter/optim/muon.py | 81 +++++++++++++++++++++----------------- src/levanter/optim/util.py | 57 +++++++++++++++++++++++++++ 2 files changed, 101 insertions(+), 37 deletions(-) diff --git a/src/levanter/optim/muon.py b/src/levanter/optim/muon.py index 6ea87af8a..81d9d9dec 100644 --- a/src/levanter/optim/muon.py +++ b/src/levanter/optim/muon.py @@ -1,17 +1,20 @@ -import re +import dataclasses from dataclasses import dataclass -from typing import Any, Callable, NamedTuple, Optional, Union +from typing import NamedTuple import chex import jax import jax.numpy as jnp import optax -from levanter.optim.config import OptimizerConfig -from levanter.utils.jax_utils import leaf_key_paths from optax import tree_utils as otu -import equinox as eqx + import haliax -from functools import partial +from haliax.nn import Linear + +from levanter.optim.config import OptimizerConfig +from levanter.optim.util import map_flattened_linear_layers +from levanter.utils.jax_utils import leaf_key_paths + @OptimizerConfig.register_subclass("muon") @dataclass @@ -19,6 +22,7 @@ class MuonConfig(OptimizerConfig): """ Muon optimizer configuration: Momentum Orthogonalized by Newton-Schulz. """ + lr: float = 0.02 muon_to_adam_lr: float = 0.18 # Scaling factor between AdamW and Muon learning rates momentum: float = 0.95 @@ -67,9 +71,10 @@ def adamw_transform(): components.append(optax.scale(-adam_lr)) optimizer = optax.chain(*components) return optimizer + transformations = { - 'muon': muon_transform(), - 'adamw': adamw_transform(), + "muon": muon_transform(), + "adamw": adamw_transform(), } return optax.multi_transform(transformations, self.create_mask) @@ -84,28 +89,30 @@ def create_mask(self, params): paths = leaf_key_paths(params) def mask_fn(param, path): - path_str = '.'.join(path) if isinstance(path, (list, tuple)) else str(path) - if 'Embedding' in path_str or 'lm_head' in path_str: - return 'adamw' - elif param.ndim == 2: - return 'muon' + path_str = ".".join(path) if isinstance(path, (list, tuple)) else str(path) + if "Embedding" in path_str or "lm_head" in path_str: + return "adamw" + elif isinstance(param, Linear): + # muon for linear layers + return dataclasses.replace(param, weight="muon", bias="adamw" if param.bias is not None else None) else: - return 'adamw' + return "adamw" + + return jax.tree_util.tree_map(mask_fn, params, paths, is_leaf=lambda x: isinstance(x, Linear)) + - return jax.tree_util.tree_map(mask_fn, params, paths) - - class ScaleByMuonState(NamedTuple): - """State for the Mars algorithm.""" - momentum_buffer: optax.Updates + """State for the Mars algorithm.""" + + momentum_buffer: optax.Updates + def scale_with_muon(momentum=0.95, nesterov=True, steps=5): def init_fn(params): momentum_buffer = otu.tree_zeros_like(params) # First moment return ScaleByMuonState(momentum_buffer=momentum_buffer) - - def update_fn(updates, state, params=None): + def update_fn(updates, state, params=None): buf = state.momentum_buffer buf = jax.tree.map( lambda m, g: None if g is None else momentum * m + g, @@ -122,33 +129,33 @@ def update_fn(updates, state, params=None): ) else: updates = buf - - - updates = jax.tree.map( - lambda g: None if g is None else zeropower_via_newtonschulz5(g, steps=steps), - updates, - is_leaf=lambda x: x is None, - ) - - updates = jax.tree.map( - lambda g: None if g is None else jnp.sqrt(jnp.maximum(1, g.shape[0] / g.shape[1])) * g, - updates, - is_leaf=lambda x: x is None, - ) - + + def transform_linear_layer(layer: haliax.nn.Linear): + assert layer.weight.ndim == 2 + + updated_weight_array = zeropower_via_newtonschulz5(layer.weight.array, steps=steps) + + scale = jnp.sqrt(jnp.maximum(1, updated_weight_array.shape[0] / updated_weight_array.shape[1])) + updated_weight_array *= scale + + updated_weight = dataclasses.replace(layer.weight, array=updated_weight_array) + + return dataclasses.replace(layer, weight=updated_weight) # type: ignore + + updates = map_flattened_linear_layers(transform_linear_layer, updates) + return updates, ScaleByMuonState(momentum_buffer=buf) return optax.GradientTransformation(init_fn, update_fn) - def zeropower_via_newtonschulz5(X, steps=10, eps=1e-7): """ Newton-Schulz iteration to compute the zeroth power / orthogonalization of G. """ chex.assert_rank(X, 2) a, b, c = (3.4445, -4.7750, 2.0315) - X /= (jnp.linalg.norm(X) + eps) # Ensure top singular value <= 1 + X /= jnp.linalg.norm(X) + eps # Ensure top singular value <= 1 transpose = False if X.shape[0] > X.shape[1]: X = X.T diff --git a/src/levanter/optim/util.py b/src/levanter/optim/util.py index 7fd3a41df..5eecf91d1 100644 --- a/src/levanter/optim/util.py +++ b/src/levanter/optim/util.py @@ -1,5 +1,12 @@ +from typing import Callable + import equinox as eqx import jax +from jaxtyping import PyTree + +import haliax +import haliax as hax +from haliax.tree_util import scan_aware_tree_map from levanter.utils.jax_utils import is_inexact_arrayish @@ -21,3 +28,53 @@ def tree_gaussian_like(key, tree): g = jax.tree_util.tree_unflatten(structure, g) return g + + +def map_flattened_linear_layers( + f: Callable[[hax.nn.Linear], hax.nn.Linear], + params: PyTree, + *, + or_else: Callable | None = None, + is_leaf: Callable | None = None, +): + """ + Apply a function to all Linear layers in a PyTree, flattening articulated input/output dims into single dims, then + unflattening them back into the original structure. This method also takes care of vmapping over scan layers. + + The linear layers will be passed to the function `f` and the result will be used to replace the original linear layer. + The linear layers passed to `f` will be flattened into 2D (named) arrays, and the result will be unflattened back into the original shape. + The bias term, if any, will be passed as a 1D named arrays. + The weight array will not be None, but the bias array may be None. + + Args: + f: The function to apply to each Linear layer + params: The PyTree of parameters + or_else: optional function to apply to non-Linear leaves + is_leaf: optional function to determine if a node is a leaf. Linears will always be considered leaves. + + Returns: + The PyTree with the function applied to all Linear layers and the structure preserved otherwise. + returned linear layers will be unfattened back to their original shape. + + """ + + if is_leaf is None: + is_leaf = lambda x: isinstance(x, hax.nn.Linear) + else: + _is_leaf = is_leaf + is_leaf = lambda x: isinstance(x, hax.nn.Linear) or _is_leaf(x) + + def map_fn(p): + if isinstance(p, hax.nn.Linear): + if p.weight is None: + return p + return f(p) + elif or_else is not None: + return or_else(p) + else: + return p + + flattened_linear = haliax.state_dict.flatten_linear_layers(params) + flattened_linear = scan_aware_tree_map(map_fn, flattened_linear, is_leaf=is_leaf) + + return haliax.state_dict.unflatten_linear_layers(params, flattened_linear) From 0f41ebb9e547bf14fa76d80ef642e965f280cd6b Mon Sep 17 00:00:00 2001 From: Evan Walters Date: Wed, 4 Dec 2024 15:17:42 -0700 Subject: [PATCH 16/56] adding kron file to optim --- src/levanter/optim/__init__.py | 3 +- src/levanter/optim/kron.py | 1743 ++++++++++++++++++++++++++++++++ 2 files changed, 1745 insertions(+), 1 deletion(-) create mode 100644 src/levanter/optim/kron.py diff --git a/src/levanter/optim/__init__.py b/src/levanter/optim/__init__.py index 2cd5ad781..64a51ea2c 100644 --- a/src/levanter/optim/__init__.py +++ b/src/levanter/optim/__init__.py @@ -12,4 +12,5 @@ from .mars import ( MarsConfig, ScaleByMarsState -) \ No newline at end of file +) +from .kron import KronConfig diff --git a/src/levanter/optim/kron.py b/src/levanter/optim/kron.py new file mode 100644 index 000000000..6959ce81d --- /dev/null +++ b/src/levanter/optim/kron.py @@ -0,0 +1,1743 @@ +from typing import Any, List, Optional, Union, Callable, Tuple +from collections import defaultdict +from functools import partial +import string +import numpy as np + +import chex +import jax +from jax import numpy as jnp, vmap +from jax.sharding import PartitionSpec +from jax.lax import with_sharding_constraint +from jax._src import mesh as mesh_lib +import flax.linen as nn +from optax import tree_utils as otu +from optax._src import base, transform +from optax._src.numerics import safe_int32_increment +from optax._src.utils import canonicalize_dtype +from optax._src.combine import chain + +from dataclasses import dataclass +import optax +from levanter.optim.config import OptimizerConfig + + +def precond_update_prob_schedule( + max_prob=1.0, min_prob=0.03, decay=0.001, flat_start=500 +): + """Anneal preconditioner update probability during beginning of training. + + PSGD benefits from more preconditioner updates at the beginning of training, + but once the preconditioner is learned the update probability can drop low. + + This schedule is an exponential anneal with a flat start. Default settings keep + update probability at 1.0 for 500 steps then exponentially anneal down to + `min_prob` by 4000 steps. Default settings work well for most models and + training regimes. + """ + + def _schedule(n): + """Exponential anneal with flat start.""" + return jnp.clip( + max_prob * jnp.exp(-decay * (n - flat_start)), min_prob, max_prob + ) + + return _schedule + + +@OptimizerConfig.register_subclass("kron") +@dataclass +class KronConfig(OptimizerConfig): + """Configuration for PSGD Kron optimizer. + + Attributes: + beta1: Momentum parameter. 0.9 or 0.95 are common values. + weight_decay: Weight decay coefficient. + max_grad_norm: Optional gradient norm clipping value. + normalize_grads: Whether to normalize the incoming gradients to unit norm layer-wise. + Can help with stability. + preconditioner_update_probability: Final probability of updating the preconditioner. Default + is 0.05 (update every 20 steps). The `precond_update_prob_schedule` holds probability at + 1.0 for `update_prob_flat_start` steps before annealing exponentially down to this + value within ~3000 steps. Training is slower while updates are done every step, but + training speeds up after update probability decays. + update_prob_flat_start: Number of steps to keep update probability at 1.0 before annealing. + Default value of 500 works well, but increasing this to 1000 or 2000 can benefit training. + However, this slows down training. A good balance is to keep update probability at 1.0 during + initial loss drop, then when you notice loss start to plateau, the preconditioner is mostly + learned and update probability can be decayed for faster training. + max_size_triangular: Max size for dim's preconditioner to be triangular. + min_ndim_triangular: Minimum number of dimensions a layer needs to have triangular preconditioners. + memory_save_mode: Memory saving mode for preconditioners. Options: + - None: All preconditioners are triangular (default) + - 'one_diag': Largest/last dim per layer uses diagonal preconditioner + - 'all_diag': All preconditioners are diagonal + mu_dtype: Dtype of the momentum buffer. Defaults to same dtype as parameters. + precond_dtype: Dtype of the preconditioners. Defaults to 'float32'. + precond_update_precision: Precision for matmul during preconditioner update. + Options: 'bfloat16', 'tensorfloat32', 'float32'. + precond_grads_precision: Precision for matmul during preconditioning grads. + Options: 'bfloat16', 'tensorfloat32', 'float32'. + scanned_layers: Tree of booleans same structure as params indicating scanned dimensions + for each layer. PSGD will vmap over leading dimension. + lax_map_scanned_layers: Whether to use lax.map for scanned layers instead of vmap. + Useful to save memory with large models. + lax_map_batch_size: Batch size for lax.map, see JAX docs for more info. + merge_small_dims: Whether to merge small dimensions to improve preconditioner efficiency. + target_merged_dim_size: Target size of merged dimensions. + partition_grads_into_blocks: Whether to partition grads into chunks of size block_size + for efficiency. + block_size: Block size to use for partitioning grads. + buffer_qq: Whether to buffer p=q@q.T for faster preconditioning. May not be beneficial + with sharded preconditioners (default False). Try True to check for speedup if not + sharding preconditioners. + params_sharding: Pytree same structure as params of jax.sharding.PartitionSpec. + preconditioner_sharding: PartitionSpec for preconditioner matrices. Best practice is to + shard first dimension across fsdp-like mesh axis, or largest/most common axis in params. + Example: PartitionSpec('fsdp') or PartitionSpec('fsdp', 'tp'). + """ + beta1: float = 0.9 + weight_decay: float = 0.1 + max_grad_norm: Optional[float] = 1.0 + normalize_grads: bool = False + preconditioner_update_probability: float = 0.05 + update_prob_flat_start: int = 500 + max_size_triangular: int = 8192 + min_ndim_triangular: int = 2 + memory_save_mode: Optional[str] = None + mu_dtype: Optional[Union[str, jnp.dtype]] = None + precond_dtype: Optional[Union[str, jnp.dtype]] = None + precond_update_precision: Optional[str] = "tensorfloat32" + precond_grads_precision: Optional[str] = None + scanned_layers: Optional[base.Params] = None + lax_map_scanned_layers: bool = False + lax_map_batch_size: int = 8 + merge_small_dims: bool = True + target_merged_dim_size: int = 4096 + partition_grads_into_blocks: bool = True + block_size: int = 512 + buffer_qq: bool = False + params_sharding: Optional[Any] = None + preconditioner_sharding: Optional[PartitionSpec[str, str]] = None + + def build(self, num_train_steps): + """Creates the optimizer.""" + def _optimizer(learning_rate) -> optax.GradientTransformation: + components = [] + components.append( + scale_by_kron( + b1=self.beta1, + normalize_grads=self.normalize_grads, + preconditioner_update_probability=precond_update_prob_schedule( + min_prob=self.preconditioner_update_probability, + flat_start=self.update_prob_flat_start + ), + max_size_triangular=self.max_size_triangular, + min_ndim_triangular=self.min_ndim_triangular, + memory_save_mode=self.memory_save_mode, + mu_dtype=self.mu_dtype, + precond_dtype=self.precond_dtype, + precond_update_precision=self.precond_update_precision, + precond_grads_precision=self.precond_grads_precision, + scanned_layers=self.scanned_layers, + lax_map_scanned_layers=self.lax_map_scanned_layers, + lax_map_batch_size=self.lax_map_batch_size, + merge_small_dims=self.merge_small_dims, + target_merged_dim_size=self.target_merged_dim_size, + partition_grads_into_blocks=self.partition_grads_into_blocks, + block_size=self.block_size, + buffer_qq=self.buffer_qq, + params_sharding=self.params_sharding, + preconditioner_sharding=self.preconditioner_sharding, + ) + ) + if self.weight_decay > 0 and not self.normalize_grads: + components.append(transform.add_decayed_weights(self.weight_decay, self.build_weight_decay_mask())) + components.append(transform.scale_by_learning_rate(learning_rate)) + return optax.chain(*components) + + return optax.inject_hyperparams(_optimizer)(learning_rate=self.lr_scheduler(num_train_steps)) + + + +def scale_by_kron( + b1: float = 0.9, + normalize_grads: bool = False, + preconditioner_update_probability: Union[ + float, Callable[[int], float] + ] = precond_update_prob_schedule(), + max_size_triangular: int = 8192, + min_ndim_triangular: int = 2, + memory_save_mode: Optional[str] = None, + mu_dtype: Optional[Union[str, jnp.dtype]] = None, + precond_dtype: Optional[Union[str, jnp.dtype]] = None, + precond_update_precision: Optional[str] = "tensorfloat32", + precond_grads_precision: Optional[str] = None, + scanned_layers: Optional[base.Params] = None, + lax_map_scanned_layers: bool = False, + lax_map_batch_size: int = 8, + merge_small_dims: bool = False, + target_merged_dim_size: int = 2048, + partition_grads_into_blocks: bool = False, + block_size: int = 256, + buffer_qq: bool = False, + params_sharding: Optional[Any] = None, + preconditioner_sharding: Optional[PartitionSpec[str, str]] = None, + **kwargs, +) -> base.GradientTransformation: + """ + Implements PSGD Kron from https://github.com/lixilinx/psgd_torch. + + Args: + b1: float, momentum parameter. 0.9 or 0.95 are common values. + normalize_grads: bool, whether to normalize the incoming gradients to unit + norm layer-wise. Can help with stability. + preconditioner_update_probability: float, probability of updating the + preconditioner. Default anneals from 1.0 to 0.03 by 4000 steps. + max_size_triangular: int, max size for dim's preconditioner to be triangular. + min_ndim_triangular: int, minimum number of dimensions a layer needs to have + triangular preconditioners. + memory_save_mode: optional str, None, 'one_diag', or 'all_diag', None is default + to set all preconditioners to be triangular, 'one_diag' sets the largest + or last dim to be diagonal per layer, and 'all_diag' sets all preconditioners + to be diagonal. + mu_dtype: optional str or jnp.dtype, dtype of the momentum buffer. Defaults to + same dtype as the parameters. + precond_dtype: optional str or jnp.dtype, dtype of the preconditioners. Defaults + to 'float32'. + precond_update_precision: str, precision for matmul during preconditioner update, + 'bfloat16', 'tensorfloat32', 'float32'. + precond_grads_precision: str, precision for matmul during preconditioning grads, + 'bfloat16', 'tensorfloat32', 'float32'. + scanned_layers: optional base.Params, tree of booleans same structure as + params indicating scanned dimensions for each layer. PSGD will vmap over + leading dimension. + lax_map_scanned_layers: bool, whether to use lax.map for scanned layers + instead of vmap. Useful to save memory with large models. + lax_map_batch_size: int, batch size for lax.map, see JAX docs for more info. + merge_small_dims: bool, whether to merge small dimensions to improve + preconditioner efficiency. + target_merged_dim_size: int, target size of merged dimensions. + partition_grads_into_blocks: bool, whether to partition grads into chunks of + size `block_size` for efficiency. + block_size: int, block size to use for partitioning grads. + buffer_qq: bool, whether to buffer p=q@q.T for faster preconditioning. This may + not be beneficial if using sharded preconditioners so default is False. If + not sharding preconditioners, try setting to True to see if there is a speedup. + params_sharding: pytree same structure as params of jax.sharding.PartitionSpec. + preconditioner_sharding: `None` or `PartitionSpec(str | None, str | None)`, + PartitionSpec for preconditioner matrices. `None` infers a strategy + from params_sharding that matches first preconditioner axis to + corresponding axis in params. Best practice, though, is to shard the first + dimension across fsdp-like mesh axis, or the largest, most common axis in + params. For example, PartitionSpec('fsdp') or PartitionSpec('fsdp', 'tp'). + + Returns: + optax.GradientTransformation + """ + mu_dtype = canonicalize_dtype(mu_dtype) + precond_dtype = canonicalize_dtype(precond_dtype) + preconditioner_lr = 0.1 + preconditioner_init_scale = 1.0 + lax_map = lax_map_scanned_layers + bs = lax_map_batch_size + + def init_fn(params, return_partition_specs_only=False): + current_mesh = mesh_lib.thread_resources.env.physical_mesh + if ( + current_mesh.empty + and buffer_qq + and any([params_sharding is not None, preconditioner_sharding is not None]) + and jax.process_index() == 0 + ): + print( + "PSGD Kron WARNING: buffering Q@Q.T with sharding but Mesh is empty. " + "Consider running Kron within a mesh context manager `with mesh:` or " + "setting buffer_qq=False to prevent potential sharding inefficiencies. " + "If only using replicated sharding, you can ignore this warning." + ) + + have_params_sharding = params_sharding is not None + have_qs_sharding = have_params_sharding or preconditioner_sharding is not None + + # unbox if flax style partitioned + params = jax.tree.map( + lambda x: x.unbox() if isinstance(x, nn.Partitioned) else x, + params, + is_leaf=lambda x: isinstance(x, nn.Partitioned), + ) + + # check that there is a PartitionSpec for every param + if params_sharding is not None: + assert len(jax.tree.leaves(params_sharding)) == len( + jax.tree.leaves(params) + ), "There must be a PartitionSpec for every parameter in PSGD Kron." + # check that preconditioner sharding length is at least 1 + if preconditioner_sharding is not None: + assert len(preconditioner_sharding) > 0, ( + "preconditioner_sharding must have length > 0. For example, " + "PartitionSpec(None) or PartitionSpec('fsdp', None) are valid." + ) + + # extend partition specs + params_sharding_ = params_sharding + if have_params_sharding: + params_sharding_ = jax.tree.map( + lambda p, sh: PartitionSpec(*(sh + (None,) * (len(p.shape) - len(sh)))), + params, + params_sharding_, + ) + preconditioner_sharding_ = preconditioner_sharding + if preconditioner_sharding is not None: + if len(preconditioner_sharding) < 2: + preconditioner_sharding_ = PartitionSpec( + preconditioner_sharding[0], None + ) + + # reshape params shaped () to (1,) to make things simpler + params = jax.tree.map(lambda p: p[None] if len(p.shape) == 0 else p, params) + if have_params_sharding: + params_sharding_ = jax.tree.map( + lambda sh: PartitionSpec(None) if sh == PartitionSpec() else sh, + params_sharding_, + ) + + # scanned layers + scanned_layers_ = scanned_layers + if scanned_layers is None: + scanned_layers_ = jax.tree.map(lambda _: False, params) + scanned_sizes = jax.tree.map( + lambda p, s: p.shape[0] if s else 0, params, scanned_layers_ + ) + + # momentum + mu = None + mu_sharding = params_sharding_ + if b1 > 0 and not return_partition_specs_only: + mu = jax.tree.map(lambda x: jnp.zeros_like(x, dtype=mu_dtype), params) + # apply params sharding to momentum buffer + if have_params_sharding: + mu = _safe_sharding_constraint(mu, params_sharding_) + + # which preconditioners will be diagonal + dim_diag = jax.tree.map( + lambda p, s: _get_preconditioner_types( + p.shape[int(s) :], + max_size_triangular, + min_ndim_triangular, + memory_save_mode, + ), + params, + scanned_layers_, + ) + + # split sharding specs + scanned_dim_sharding = None + sharding_without_scan = None + if have_params_sharding: + scanned_dim_sharding = jax.tree.map( + lambda sh, s: PartitionSpec(sh[0]) if s else None, + params_sharding_, + scanned_layers_, + ) + sharding_without_scan = jax.tree.map( + lambda sh, s: PartitionSpec(*(sh[int(s) :])), + params_sharding_, + scanned_layers_, + ) + + # merge small dimensions + nones = jax.tree.map(lambda _: None, params) + merged_shapes = jax.tree.map( + lambda p, s: p.shape[int(s) :], params, scanned_layers_ + ) + if merge_small_dims: + output = jax.tree.map( + lambda p, s, dd, sh: _merge_small_dims( + p.shape[int(s) :], target_merged_dim_size, dd, sh + ), + params, + scanned_layers_, + dim_diag, + sharding_without_scan if have_params_sharding else nones, + ) + merged_shapes, dim_diag, sharding_without_scan = [ + jax.tree.map(lambda _, x: x[i], params, output) for i in range(3) + ] + + # partition grads into blocks + partitioned_shapes = merged_shapes + if partition_grads_into_blocks: + partitioners = jax.tree.map( + lambda _, ps, dd: BlockPartitioner(ps, block_size, dd), + params, + merged_shapes, + dim_diag, + ) + # we can grab resulting shapes from partitioners + partitioned_shapes = jax.tree.map( + lambda _, p_cls: p_cls._padded_stacked_shape, params, partitioners + ) + + # initialize preconditioners + output = jax.tree.map( + lambda _, ps, dd, sh: list( + _init_Q_exprs( + ps[1:] if partition_grads_into_blocks else ps, + preconditioner_init_scale, + dd, + precond_dtype, + existing_Q=True if return_partition_specs_only else None, + precond_sharding=preconditioner_sharding_, + param_sharding=sh, + buffer_qq=buffer_qq, + current_mesh=current_mesh, + ) + ), + params, + partitioned_shapes, + dim_diag, + sharding_without_scan if have_params_sharding else nones, + ) + if return_partition_specs_only: + exprs, Qs_sharding_no_leading_dims = [ + jax.tree.map(lambda _, x: x[i], params, output) for i in range(2) + ] + else: + Qs, exprs, Qs_sharding_no_leading_dims = [ + jax.tree.map(lambda _, x: x[i], params, output) for i in range(3) + ] + Qs_sharding = None + if have_qs_sharding: + # add scan and stack dims to Qs sharding + def add_dims_to_spec(_, qss, sds): + if partition_grads_into_blocks: + qss = jax.tree.map(lambda qs: PartitionSpec(*((None,) + qs)), qss) + if sds is not None: + qss = jax.tree.map(lambda qs: PartitionSpec(*(sds + qs)), qss) + return qss + + Qs_sharding = jax.tree.map( + add_dims_to_spec, + params, + Qs_sharding_no_leading_dims, + scanned_dim_sharding, + ) + + if not return_partition_specs_only: + # broadcast Qs for stacks and scans + def broadcast_qs(_, ps, q, s): + stack_n = ps[0] + if partition_grads_into_blocks: + # add leading dim for stacked partitions + q = jax.tree.map( + lambda x: jnp.repeat(jnp.expand_dims(x, 0), stack_n, axis=0), q + ) + if s > 0: + # add leading dim if we're scanning this layer + q = jax.tree.map( + lambda d: jnp.repeat(jnp.expand_dims(d, 0), s, axis=0), q + ) + return q + + Qs = jax.tree.map( + broadcast_qs, params, partitioned_shapes, Qs, scanned_sizes + ) + if have_qs_sharding: + Qs = _safe_sharding_constraint(Qs, Qs_sharding) + + # Calculate and print sizes for preconditioners and momentum + Qs_n_elements = sum([q.size for q in jax.tree.leaves(Qs)]) + Qs_size_MB = sum( + [q.size * q.dtype.itemsize / (2**20) for q in jax.tree.leaves(Qs)] + ) + if jax.process_index() == 0: + print( + f"PSGD Preconditioners size: {Qs_n_elements} elements, " + f"{Qs_size_MB:.2f} MB" + ) + if mu is not None: + mu_n_elements = sum([p.size for p in jax.tree.leaves(mu)]) + mu_size_MB = sum( + [p.size * p.dtype.itemsize / (2**20) for p in jax.tree.leaves(mu)] + ) + if jax.process_index() == 0: + print( + f"PSGD Momentum size: {mu_n_elements} elements, {mu_size_MB:.2f} MB" + ) + + if return_partition_specs_only: + return dict( + count=PartitionSpec(), + mu=mu_sharding, + Qs_preconditioners=Qs_sharding, + update_counter=PartitionSpec(), + ) + + return dict( + count=jnp.zeros([], jnp.int32), + mu=mu, + Qs_preconditioners=Qs, + update_counter=jnp.zeros([], jnp.int32), + ) + + def update_fn(updates: base.Updates, state: dict, params: base.Params = None): + del params + count_inc = safe_int32_increment(state["count"]) + key = jax.random.fold_in(jax.random.PRNGKey(42), state["count"]) + + have_params_sharding = params_sharding is not None + have_qs_sharding = have_params_sharding or preconditioner_sharding is not None + + # unbox if flax style partitioned + boxed_updates, grads_structure = jax.tree.flatten( + updates, + is_leaf=lambda g: isinstance( + g, (chex.Array, nn.Partitioned, jax.ShapeDtypeStruct) + ), + ) + flax_partitioned = False + if isinstance(boxed_updates[0], nn.Partitioned): + flax_partitioned = True + updates = [g.unbox() for g in boxed_updates] + updates = grads_structure.unflatten(updates) + + # extend partition specs + params_sharding_ = params_sharding + if have_params_sharding: + params_sharding_ = jax.tree.map( + lambda g, sh: PartitionSpec(*(sh + (None,) * (len(g.shape) - len(sh)))), + updates, + params_sharding_, + ) + preconditioner_sharding_ = preconditioner_sharding + if preconditioner_sharding is not None: + if len(preconditioner_sharding) < 2: + preconditioner_sharding_ = PartitionSpec( + preconditioner_sharding[0], None + ) + + # reshape params shaped () to (1,) to make things simpler + input_shapes = jax.tree.map(lambda g: g.shape, updates) + updates = jax.tree.map(lambda g: g[None] if len(g.shape) == 0 else g, updates) + if have_params_sharding: + params_sharding_ = jax.tree.map( + lambda sh: PartitionSpec(None) if sh == PartitionSpec() else sh, + params_sharding_, + ) + + # scanned layers + scanned_layers_ = scanned_layers + if scanned_layers is None: + scanned_layers_ = jax.tree.map(lambda _: False, updates) + + # update probability can be scheduled + update_prob_in = preconditioner_update_probability + if isinstance(preconditioner_update_probability, Callable): + update_prob_in = preconditioner_update_probability(count_inc) + + # normalize grads + def norm_grads(g): + return g / (jnp.linalg.norm(g) + 1e-16) + + if normalize_grads: + updates = jax.tree.map(norm_grads, updates) + + # momentum + mu = None + momentum_updates = updates + if state["mu"] is not None: + mu = otu.tree_update_moment(updates, state["mu"], b1, 1) + if have_params_sharding: + mu = _safe_sharding_constraint(mu, params_sharding_) + momentum_updates = otu.tree_bias_correction(mu, b1, count_inc) + + # which preconditioners will be diagonal + dim_diag = jax.tree.map( + lambda g, s: _get_preconditioner_types( + g.shape[int(s) :], + max_size_triangular, + min_ndim_triangular, + memory_save_mode, + ), + momentum_updates, + scanned_layers_, + ) + + # split sharding specs + scanned_dim_sharding = None + sharding_without_scan = None + if have_params_sharding: + scanned_dim_sharding = jax.tree.map( + lambda sh, s: PartitionSpec(sh[0]) if s else None, + params_sharding_, + scanned_layers_, + ) + sharding_without_scan = jax.tree.map( + lambda sh, s: PartitionSpec(*(sh[int(s) :])), + params_sharding_, + scanned_layers_, + ) + + # merge small dimensions + dummy_updates_tree = jax.tree.map(lambda _: jnp.zeros([]), updates) + nones = jax.tree.map(lambda _: None, momentum_updates) + merged_params_sharding = params_sharding_ + original_shapes = None + if merge_small_dims: + original_shapes = jax.tree.map( + lambda g, s: g.shape[int(s) :], momentum_updates, scanned_layers_ + ) + output = jax.tree.map( + lambda g, dd, s, sh: _merge_small_dims( + g.shape[int(s) :], target_merged_dim_size, dd, sh + ), + momentum_updates, + dim_diag, + scanned_layers_, + sharding_without_scan if have_params_sharding else nones, + ) + merged_shapes, dim_diag, sharding_without_scan = [ + jax.tree.map(lambda _, x: x[i], momentum_updates, output) + for i in range(3) + ] + # reshape + momentum_updates = jax.tree.map( + lambda g, s, ns: _map_fn( + False, 0, int(s), lambda x, shape=ns: jnp.reshape(x, shape), g + ), + momentum_updates, + scanned_layers_, + merged_shapes, + ) + if have_params_sharding: + # scanned dim sharding + new merged sharding + merged_params_sharding = jax.tree.map( + lambda sws, sds: PartitionSpec( + *(sds + sws if sds is not None else sws) + ), + sharding_without_scan, + scanned_dim_sharding, + ) + # constrain sharding + momentum_updates = _safe_sharding_constraint( + momentum_updates, merged_params_sharding + ) + + # partition grads into blocks + partitioned_sharding = merged_params_sharding + n_dims_to_map = jax.tree.map(lambda s: int(s), scanned_layers_) + partitioners = None + partitioned_shapes = None + if partition_grads_into_blocks: + partitioners = jax.tree.map( + lambda g, dd, s: BlockPartitioner(g.shape[int(s) :], block_size, dd), + momentum_updates, + dim_diag, + scanned_layers_, + ) + # layers become tuples each containing layer's partitions + momentum_updates = jax.tree.map( + lambda g, p_cls, s: _map_fn(False, 0, int(s), p_cls.partition, g), + momentum_updates, + partitioners, + scanned_layers_, + ) + partitioned_shapes = jax.tree.map( + lambda _, g, s: jax.tree.map(lambda x: x.shape[int(s) :], g), + dummy_updates_tree, + momentum_updates, + scanned_layers_, + ) + if have_params_sharding: + # constrain partitions to same sharding as entire layer + momentum_updates = jax.tree.map( + lambda _, g, mps: jax.tree.map( + lambda x: _safe_sharding_constraint(x, mps), g + ), + dummy_updates_tree, + momentum_updates, + merged_params_sharding, + ) + # pad and stack partitions, tuples become arrays with new leading dim + momentum_updates = jax.tree.map( + lambda _, g, s: _map_fn( + False, + 0, + int(s), + lambda x, bs=block_size: _pad_and_stack_matrices(x, bs), + g, + ), + dummy_updates_tree, + momentum_updates, + scanned_layers_, + ) + if have_params_sharding: + # add dim to sharding specs for new stacked dim + partitioned_sharding = jax.tree.map( + lambda mps, s: PartitionSpec(*(mps[: int(s)] + (None,) + mps[1:])), + merged_params_sharding, + scanned_layers_, + ) + # constrain sharding + momentum_updates = _safe_sharding_constraint( + momentum_updates, partitioned_sharding + ) + n_dims_to_map = jax.tree.map(lambda x: x + 1, n_dims_to_map) + + # get einsum expressions and Qs sharding + Qs = state["Qs_preconditioners"] + Qs_sharding = None + exprs_and_sharding = jax.tree.map( + lambda g, dd, sh, nm: _init_Q_exprs( + g.shape[nm:], + preconditioner_init_scale, + dd, + precond_dtype, + existing_Q=True, + precond_sharding=preconditioner_sharding_, + param_sharding=sh, + buffer_qq=buffer_qq, + ), + momentum_updates, + dim_diag, + sharding_without_scan if have_params_sharding else nones, + n_dims_to_map, + ) + exprs, Qs_sharding_no_leading_dims = [ + jax.tree.map(lambda _, x: x[i], dummy_updates_tree, exprs_and_sharding) + for i in range(2) + ] + Qs_sharding = None + if have_qs_sharding: + # add scan and stack dims to Qs sharding + def add_dims_to_spec(_, qss, sds): + if partition_grads_into_blocks: + qss = jax.tree.map(lambda qs: PartitionSpec(*((None,) + qs)), qss) + if sds is not None: + qss = jax.tree.map(lambda qs: PartitionSpec(*(sds + qs)), qss) + return qss + + Qs_sharding = jax.tree.map( + add_dims_to_spec, + dummy_updates_tree, + Qs_sharding_no_leading_dims, + scanned_dim_sharding, + ) + + # pad sizes for buffering qq + pad_sizes = jax.tree.map( + lambda g, qs, nm: [q.shape[nm] - dim for q, dim in zip(qs, g.shape[nm:])], + momentum_updates, + Qs, + n_dims_to_map, + ) + + # maybe update preconditioner + def update_preconditioner(key, Qs): + with jax.default_matmul_precision(precond_update_precision): + # separate out q if we're buffering qq + if buffer_qq: + Qs = jax.tree.map( + lambda _, qs, nm, dd, psize, sh: jax.tree.map( + lambda q, d, ps, sh: ( + _map_fn( + False, + 0, + nm, + lambda q, pad_size=ps, sharding=( + sh if have_qs_sharding else None + ): _get_q(q, pad_size, sharding), + q, + ) + if not d + else q + ), + qs, + dd, + psize, + sh, + ), + dummy_updates_tree, + Qs, + n_dims_to_map, + dim_diag, + pad_sizes, + Qs_sharding_no_leading_dims, + ) + if have_qs_sharding: + Qs = _safe_sharding_constraint(Qs, Qs_sharding) + + # create random vectors + key, subkey = jax.random.split(key) + Vs = _tree_random_like(subkey, momentum_updates) + # apply params sharding to random vectors + if have_params_sharding: + Vs = _safe_sharding_constraint(Vs, partitioned_sharding) + + # balance preconditioners about every 100 updates + def balance_Qs(Qs_to_bal): + def _balance_Q(Q): + norms = jnp.array( + [jnp.max(jnp.abs(q)) for q in Q], dtype=jnp.float32 + ) + gmean = jnp.exp(jnp.mean(jnp.log(norms))) + to_mul = gmean / norms + return [q * x.astype(q.dtype) for q, x in zip(Q, to_mul)] + + return jax.tree.map( + lambda _, Q, nm: _map_fn(False, 0, nm, _balance_Q, Q), + dummy_updates_tree, + Qs_to_bal, + n_dims_to_map, + ) + + key, subkey = jax.random.split(key) + do_balances = jax.random.uniform(subkey) <= 0.01 + Qs = jax.lax.cond(do_balances, balance_Qs, lambda qs: qs, Qs) + if have_qs_sharding: + Qs = _safe_sharding_constraint(Qs, Qs_sharding) + + # form conjB + conjBs = jax.tree.map( + lambda g, Q, v, nm: _map_fn(lax_map, bs, nm, _conjB, Q, g, v), + momentum_updates, + Qs, + Vs, + n_dims_to_map, + ) + if have_params_sharding: + conjBs = _safe_sharding_constraint(conjBs, partitioned_sharding) + + # update Qs and constrain sharding + new_Qs = jax.tree.map( + lambda g, Q, conjb, expr, nm, qss, sh: _map_fn( + lax_map, + bs, + nm, + partial( + _update_precond, + exprs=expr, + precond_lr=preconditioner_lr, + qs_sharding=qss, + params_sharding=sh, + ), + Q, + g, + conjb, + ), + momentum_updates, + Qs, + conjBs, + exprs, + n_dims_to_map, + Qs_sharding_no_leading_dims if have_qs_sharding else nones, + sharding_without_scan if have_params_sharding else nones, + ) + if have_qs_sharding: + new_Qs = _safe_sharding_constraint(new_Qs, Qs_sharding) + + if buffer_qq: + # store half of qq in lower triangular part of Qs (Q is triu) + new_Qs = jax.tree.map( + lambda _, qs, nm, dd, psize, sh: jax.tree.map( + lambda q, d, ps, sh: ( + _map_fn( + False, + 0, + nm, + lambda q, pad_size=ps, sharding=( + sh if have_qs_sharding else None + ): _store_qq(q, pad_size, sharding), + q, + ) + if not d + else q + ), + qs, + dd, + psize, + sh, + ), + dummy_updates_tree, + new_Qs, + n_dims_to_map, + dim_diag, + pad_sizes, + Qs_sharding_no_leading_dims, + ) + new_Qs = otu.tree_cast(new_Qs, precond_dtype) + return new_Qs + + # update preconditioner deterministically + update_counter_inc = safe_int32_increment(state["update_counter"]) + do_update = update_counter_inc >= 1 / update_prob_in + update_counter_inc = jnp.where(do_update, 0, update_counter_inc) + key, subkey = jax.random.split(key) + new_Qs = jax.lax.cond( + do_update, update_preconditioner, lambda _, qs: qs, subkey, Qs + ) + if have_qs_sharding: + new_Qs = _safe_sharding_constraint(new_Qs, Qs_sharding) + + # precondition gradients + with jax.default_matmul_precision(precond_grads_precision): + # precondition with stale Qs + if buffer_qq: + # get qq out of Qs + Qs_in = jax.tree.map( + lambda _, qs, nm, dd, psize, sh: jax.tree.map( + lambda q, d, ps, sh: ( + _map_fn( + False, + 0, + nm, + lambda q, pad_size=ps, sharding=( + sh if have_qs_sharding else None + ): _get_qq(q, pad_size, sharding), + q, + ) + if not d + else q + ), + qs, + dd, + psize, + sh, + ), + dummy_updates_tree, + Qs, + n_dims_to_map, + dim_diag, + pad_sizes, + Qs_sharding_no_leading_dims, + ) + else: + Qs_in = Qs + if have_qs_sharding: + Qs_in = _safe_sharding_constraint(Qs_in, Qs_sharding) + + precond_gs = jax.tree.map( + lambda g, Q, expr, nm: _map_fn( + lax_map, + bs, + nm, + partial(_precond_grad, exprs=expr, buffer_qq=buffer_qq), + Q, + g, + ), + momentum_updates, + Qs_in, + exprs, + n_dims_to_map, + ) + if have_params_sharding: + precond_gs = _safe_sharding_constraint(precond_gs, partitioned_sharding) + + # unpartition grads + if partition_grads_into_blocks: + precond_gs = jax.tree.map( + lambda g, s, ps: _map_fn( + False, + 0, + int(s), + lambda p, shapes=ps: _unstack_and_unpad_matrices(p, shapes), + g, + ), + precond_gs, + scanned_layers_, + partitioned_shapes, + ) + if have_params_sharding: + precond_gs = _safe_sharding_constraint( + precond_gs, merged_params_sharding + ) + precond_gs = jax.tree.map( + lambda _, g, s, p_cls: _map_fn( + False, 0, int(s), p_cls.merge_partitions, g + ), + dummy_updates_tree, + precond_gs, + scanned_layers_, + partitioners, + ) + if have_params_sharding: + precond_gs = _safe_sharding_constraint( + precond_gs, merged_params_sharding + ) + + # un-merge dimensions + if merge_small_dims: + precond_gs = jax.tree.map( + lambda g, s, os: _map_fn( + False, 0, int(s), lambda p, shape=os: jnp.reshape(p, shape), g + ), + precond_gs, + scanned_layers_, + original_shapes, + ) + if have_params_sharding: + precond_gs = _safe_sharding_constraint(precond_gs, params_sharding_) + + # return scalars to original shape + precond_gs = jax.tree.map( + lambda g, s: jnp.reshape(g, s), precond_gs, input_shapes + ) + + # box preconditioned grads + if flax_partitioned: + flat_precond_gs, _ = jax.tree.flatten(precond_gs) + precond_gs = [ + bu.replace_boxed(g) for bu, g in zip(boxed_updates, flat_precond_gs) + ] + precond_gs = grads_structure.unflatten(precond_gs) + + # dtypes and new state + mu = otu.tree_cast(mu, mu_dtype) + new_Qs = otu.tree_cast(new_Qs, precond_dtype) + state = dict( + count=count_inc, + mu=mu, + Qs_preconditioners=new_Qs, + update_counter=update_counter_inc, + ) + + return precond_gs, state + + return base.GradientTransformation(init_fn, update_fn) + + +def kron( + learning_rate: Union[float, Callable[[int], float]] = 0.001, + b1: float = 0.9, + weight_decay: float = 0.0, + weight_decay_mask: Optional[Union[Any, Callable[[base.Params], Any]]] = None, + normalize_grads: bool = False, + preconditioner_update_probability: Union[ + float, Callable[[int], float] + ] = precond_update_prob_schedule(), + max_size_triangular: int = 8192, + min_ndim_triangular: int = 2, + memory_save_mode: Optional[str] = None, + mu_dtype: Optional[Union[str, jnp.dtype]] = None, + precond_dtype: Optional[Union[str, jnp.dtype]] = None, + precond_update_precision: Optional[str] = "tensorfloat32", + precond_grads_precision: Optional[str] = None, + scanned_layers: Optional[base.Params] = None, + lax_map_scanned_layers: bool = False, + lax_map_batch_size: int = 8, + merge_small_dims: bool = False, + target_merged_dim_size: int = 2048, + partition_grads_into_blocks: bool = False, + block_size: int = 256, + buffer_qq: bool = False, + params_sharding: Optional[Any] = None, + preconditioner_sharding: Optional[PartitionSpec[str, str]] = None, +) -> base.GradientTransformation: + """ + Implements PSGD Kron from https://github.com/lixilinx/psgd_torch. + + Args: + learning_rate: float or callable, learning rate schedule. + b1: float, momentum parameter. 0.9 or 0.95 are common values. + weight_decay: float, weight decay coefficient. + weight_decay_mask: optional pytree same structure as params, or callable + returning a pytree, that masks weight decay. Weight decay is applied to + leaves that are True. + normalize_grads: bool, whether to normalize the incoming gradients to unit + norm layer-wise. Can help with stability. + preconditioner_update_probability: float, probability of updating the + preconditioner. Default anneals from 1.0 to 0.03 by 4000 steps. + max_size_triangular: int, max size for dim's preconditioner to be triangular. + min_ndim_triangular: int, minimum number of dimensions a layer needs to have + triangular preconditioners. + memory_save_mode: optional str, None, 'one_diag', or 'all_diag', None is default + to set all preconditioners to be triangular, 'one_diag' sets the largest + or last dim to be diagonal per layer, and 'all_diag' sets all preconditioners + to be diagonal. + mu_dtype: optional str or jnp.dtype, dtype of the momentum buffer. Defaults to + same dtype as the parameters. + precond_dtype: optional str or jnp.dtype, dtype of the preconditioners. Defaults + to 'float32'. + precond_update_precision: str, precision for matmul during preconditioner update, + 'bfloat16', 'tensorfloat32', 'float32'. + precond_grads_precision: str, precision for matmul during preconditioning grads, + 'bfloat16', 'tensorfloat32', 'float32'. + scanned_layers: optional base.Params, tree of booleans same structure as + params indicating scanned dimensions for each layer. PSGD will vmap over + leading dimension. + lax_map_scanned_layers: bool, whether to use lax.map for scanned layers + instead of vmap. Useful to save memory with large models. + lax_map_batch_size: int, batch size for lax.map, see JAX docs for more info. + merge_small_dims: bool, whether to merge small dimensions to improve + preconditioner efficiency. + target_merged_dim_size: int, target size of merged dimensions. + partition_grads_into_blocks: bool, whether to partition grads into chunks of + size `block_size` for efficiency. + block_size: int, block size to use for partitioning grads. + buffer_qq: bool, whether to buffer p=q@q.T for faster preconditioning. This may + not be beneficial if using sharded preconditioners so default is False. If + not sharding preconditioners, try setting to True to see if there is a speedup. + params_sharding: pytree same structure as params of jax.sharding.PartitionSpec. + preconditioner_sharding: `None` or `PartitionSpec(str | None, str | None)`, + PartitionSpec for preconditioner matrices. `None` infers a strategy + from params_sharding that matches first preconditioner axis to + corresponding axis in params. Best practice, though, is to shard the first + dimension across fsdp-like mesh axis, or the largest, most common axis in + params. For example, PartitionSpec('fsdp') or PartitionSpec('fsdp', 'tp'). + + Returns: + optax.GradientTransformation + """ + optimizer = [ + scale_by_kron( + b1=b1, + normalize_grads=normalize_grads, + preconditioner_update_probability=preconditioner_update_probability, + max_size_triangular=max_size_triangular, + min_ndim_triangular=min_ndim_triangular, + memory_save_mode=memory_save_mode, + mu_dtype=mu_dtype, + precond_dtype=precond_dtype, + precond_update_precision=precond_update_precision, + precond_grads_precision=precond_grads_precision, + scanned_layers=scanned_layers, + lax_map_scanned_layers=lax_map_scanned_layers, + lax_map_batch_size=lax_map_batch_size, + merge_small_dims=merge_small_dims, + target_merged_dim_size=target_merged_dim_size, + partition_grads_into_blocks=partition_grads_into_blocks, + block_size=block_size, + buffer_qq=buffer_qq, + params_sharding=params_sharding, + preconditioner_sharding=preconditioner_sharding, + ) + ] + if weight_decay > 0.0: + optimizer.append(transform.add_decayed_weights(weight_decay, weight_decay_mask)) + optimizer.append(transform.scale_by_learning_rate(learning_rate)) + return chain(*optimizer) + + +def get_opt_state_partition_specs( + params: base.Params, scale_by_kron_only: bool = False, **kwargs +): + """Get tree of PartitionSpecs for kron optimizer state. + + params converted to jax.ShapeDtypeStructs, no arrays are used. + + Args: + params: pytree of Arrays, nn.Partitioned, or jax.ShapeDtypeStruct. + scale_by_kron_only: bool, If True, only returns partition specs for the + `scale_by_kron` function, otherwise the `kron` function. + kwargs: kwargs for kron (or scale_by_kron). + + Returns: + tree of PartitionSpecs for optimizer state. + """ + params_flat, params_struct = jax.tree.flatten(params) + if isinstance(params_flat[0], nn.Partitioned): + params_flat = [p.unbox(p) for p in params_flat] + if not isinstance(params_flat[0], jax.ShapeDtypeStruct): + params_flat = [jax.ShapeDtypeStruct(p.shape, p.dtype) for p in params_flat] + params = params_struct.unflatten(params_flat) + + specs = scale_by_kron(**kwargs).init(params, return_partition_specs_only=True) + + if not scale_by_kron_only: + specs = (specs,) + if kwargs.get("weight_decay", 0.0) > 0.0: + specs += (None,) + specs += (None,) + + return specs + + +def _get_preconditioner_types( + shape: Tuple[int, ...], max_size: int, min_ndim: int, mem_save_mode: Optional[str] +) -> List[bool]: + if len(shape) == 0: + return True + + if mem_save_mode is None: + dim_diag = [False for _ in shape] + elif mem_save_mode == "one_diag": + rev_sorted_dims = np.argsort(shape)[::-1] + dim_diag = [False for _ in shape] + dim_diag[rev_sorted_dims[0]] = True + elif mem_save_mode == "all_diag": + dim_diag = [True for _ in shape] + else: + raise ValueError( + f"Invalid mem_save_mode: {mem_save_mode}, must be one of " + "[None, 'one_diag', 'all_diag']" + ) + + for i, size in enumerate(shape): + if size == 1 or size > max_size or len(shape) < min_ndim: + dim_diag[i] = True + + return dim_diag + + +def _init_Q_exprs( + t_shape, + scale, + dim_diag, + dtype, + existing_Q=None, + precond_sharding=None, + param_sharding=None, + buffer_qq=False, + current_mesh: Optional[jax.sharding.Mesh] = None, +): + have_qs_sharding = precond_sharding is not None or param_sharding is not None + letters = string.ascii_lowercase + string.ascii_uppercase + if len(t_shape) == 0: # scalar + Q = ( + [scale * jnp.ones(t_shape, dtype=dtype)] + if existing_Q is None + else existing_Q + ) + exprA = ",->" + exprGs = [",->"] + exprP = ",,->" + + sharding_out = [None] + if have_qs_sharding: + sharding_out = [PartitionSpec()] + else: # tensor + if len(t_shape) > 13: + raise ValueError( + f"Got tensor with dim {len(t_shape.shape)}; Einstein runs out of letters!" + ) + scale = scale ** (1 / len(t_shape)) + Q = [] if existing_Q is None else existing_Q + piece1A, piece2A, piece3A = ([], "", "") + exprGs = [] + piece1P, piece2P, piece3P, piece4P = ([], [], "", "") + + params_specs = param_sharding + if param_sharding is None: + params_specs = PartitionSpec(*((None,) * len(t_shape))) + sharding_out = [None] * len(t_shape) + if have_qs_sharding: + sharding_out = [PartitionSpec(None)] * len(t_shape) + + for i, (size, dim_d, dim_sh) in enumerate(zip(t_shape, dim_diag, params_specs)): + if dim_d: + # use diagonal matrix as preconditioner for this dim + if existing_Q is None: + q = scale * jnp.ones(size, dtype=dtype) + Q.append(q) + + piece1A.append(letters[i]) + piece2A = piece2A + letters[i] + piece3A = piece3A + letters[i] + + piece1 = "".join( + [ + (letters[i + 13] if j == i else letters[j]) + for j in range(len(t_shape)) + ] + ) + exprGs.append(piece1 + "," + piece1 + "->" + letters[i + 13]) + + piece1P.append(letters[i + 13]) + piece2P.append(letters[i + 13]) + piece3P = piece3P + letters[i + 13] + piece4P = piece4P + letters[i + 13] + else: + # use triangular matrix as preconditioner for this dim + q_sharding = None + if have_qs_sharding: + # infer a so-so sharding scheme from params if nothing specified + # (first dim of q will match corresponding dim in params) + q_sharding = ( + precond_sharding + if precond_sharding is not None + else PartitionSpec(dim_sh, None) + ) + sharding_out[i] = q_sharding + if existing_Q is None: + q = scale * jnp.eye(size, dtype=dtype) + if have_qs_sharding: + q = _safe_sharding_constraint(q, q_sharding) + + # we can optionally store q @ q in tril for later + if buffer_qq: + pad_size = 1 + if have_qs_sharding and current_mesh is not None: + # pad size will be largest mesh axis size in q sharding + axis_sizes = [pad_size] + for ax in q_sharding: + if ax is not None: + axis_tuple = ax if isinstance(ax, tuple) else (ax,) + axis_size = np.prod( + [current_mesh.shape[a] for a in axis_tuple] + ) + axis_sizes.append(axis_size) + pad_size = max(axis_sizes) + q = _store_qq(q, pad_size, sharding=q_sharding) + + Q.append(q) + + piece1A.append(letters[i] + letters[i + 13]) + piece2A = piece2A + letters[i + 13] + piece3A = piece3A + letters[i] + + piece1 = "".join( + [ + (letters[i + 13] if j == i else letters[j]) + for j in range(len(t_shape)) + ] + ) + piece2 = "".join( + [ + (letters[i + 26] if j == i else letters[j]) + for j in range(len(t_shape)) + ] + ) + exprGs.append( + piece1 + "," + piece2 + "->" + letters[i + 13] + letters[i + 26] + ) + + a, b, c = (letters[i], letters[i + 13], letters[i + 26]) + piece1P.append(c + b if buffer_qq else a + b) + piece2P.append(a + c) + piece3P = piece3P + c + piece4P = piece4P + b + + exprA = ",".join(piece1A) + "," + piece2A + "->" + piece3A + if buffer_qq: + exprP = ",".join(piece1P) + "," + piece3P + "->" + piece4P + else: + exprP = ( + ",".join(piece1P) + + "," + + ",".join(piece2P) + + "," + + piece3P + + "->" + + piece4P + ) + + exprGs = tuple(exprGs) + if existing_Q is not None: + return (exprA, exprGs, exprP), sharding_out + return Q, (exprA, exprGs, exprP), sharding_out + + +def _store_qq(q, pad_size=1, sharding=None): + # after storing qq, precond update goes from + # an,bo,aA,bB,AB->no to cached:[aA,an->An, bB,bo->Bo], update:An,Bo,AB->no + p = jnp.einsum("aA,an->An", q, q) # keep first dim as contracting + if sharding is not None: + p = _safe_sharding_constraint(p, sharding) + q = jnp.pad(q, ((0, pad_size), (pad_size, 0))) + if sharding is not None: + q = _safe_sharding_constraint(q, sharding) + p = jnp.pad(p, ((pad_size, 0), (0, pad_size))) + if sharding is not None: + p = _safe_sharding_constraint(p, sharding) + q += jnp.tril(p, k=-pad_size) + if sharding is not None: + q = _safe_sharding_constraint(q, sharding) + return q + + +def _get_qq(q, pad_size=1, sharding=None): + p = jnp.tril(q[pad_size:, :-pad_size]) + if sharding is not None: + p = _safe_sharding_constraint(p, sharding) + p = p + p.T - jnp.diag(jnp.diag(p)) + if sharding is not None: + p = _safe_sharding_constraint(p, sharding) + return p + + +def _get_q(q, pad_size=1, sharding=None): + q = jnp.triu(q[:-pad_size, pad_size:]) + if sharding is not None: + q = _safe_sharding_constraint(q, sharding) + return q + + +def _norm_lower_bound(A: jax.Array): + """Returns a cheap lower bound for the spectral norm of A. + + Numerical results on random matrices with a wide range of distributions and + sizes suggest, norm(A) <= sqrt(2) * norm_lower_bound(A). Looks to be a very + tight lower bound. + + A is hermitian so we can always use dim 0 and not have to compare to dim 1. + """ + max_abs = jnp.max(jnp.abs(A)) + + def calc(A): + A = A / max_abs + aa = A * A + aa_sum0 = jnp.sum(aa, axis=0) + i = jnp.argmax(aa_sum0, 0) + x = jax.lax.dynamic_index_in_dim(A, i, 1, keepdims=False) + x = x @ A + return max_abs * jnp.linalg.norm((x / jnp.linalg.norm(x)) @ A.T) + + return jnp.where(max_abs > 0, calc(A), max_abs) + + +def _solve_triangular_right(X, A): + """Compute X @ inv(A). + + A triangular solve has roughly the same complexity as a matmul. + """ + X_ndim = X.ndim + if X_ndim < 2: + X = X[None, :] + + dtype_in = jnp.promote_types(A.dtype, X.dtype) + A, X = A.astype(dtype_in), X.astype(dtype_in) + leading_dims = 0 + if X.ndim > 2: + leading_dims = X.ndim - 2 + solve_fn = partial(jax.lax.linalg.triangular_solve, left_side=False, lower=False) + for _ in range(leading_dims): + solve_fn = vmap(solve_fn, in_axes=(None, 0)) + solution = solve_fn(A, X) + + if X_ndim < 2: + return solution[0] + return solution + + +def _conjB(Q, G, V): + """Compute conjB.""" + order = G.ndim + p = list(range(order)) + conjB = jnp.transpose(V, p[1:] + p[:1]) + for i, q in enumerate(Q): + conjB = conjB / q if q.ndim < 2 else _solve_triangular_right(conjB, q) + if i < order - 1: + conjB = jnp.swapaxes(conjB, i, order - 1) + return conjB + + +def _update_precond(Q, G, conjB, exprs, precond_lr, qs_sharding, params_sharding): + """Compute A and update Q.""" + exprA, exprGs, _ = exprs + + A = jnp.einsum(exprA, *Q, G) + + def _update_single_q(i, q): + term1 = jnp.einsum(exprGs[i], A, A) + term2 = jnp.einsum(exprGs[i], conjB, conjB) + + if q.ndim < 2: + q -= ( + precond_lr + / _add_tiny(jnp.max(jnp.abs(term1 + term2))) + * (term1 - term2) + * q + ) + else: + if qs_sharding is not None: + sharding = qs_sharding[i] + # transpose q sharding for terms + if len(sharding) < 2: + sharding = PartitionSpec(*((None,) + sharding)) + else: + assert len(sharding) == 2 + sharding = PartitionSpec(*(sharding[1:] + sharding[:1])) + term1 = _safe_sharding_constraint(term1, sharding) + term2 = _safe_sharding_constraint(term2, sharding) + q -= ( + precond_lr + / _add_tiny(_norm_lower_bound(term1 + term2)) + * jnp.triu(term1 - term2) + @ q + ) + return q + + return [_update_single_q(i, q) for i, q in enumerate(Q)] + + +def _precond_grad(Q, G, exprs, buffer_qq=False): + """Precondition gradient G with preconditioner Q.""" + exprP = exprs[-1] + if buffer_qq: + return jnp.einsum(exprP, *Q, G) + else: + return jnp.einsum(exprP, *Q, *Q, G) + + +def _safe_sharding_constraint(x, sharding): + if sharding is None: + return x + else: + return with_sharding_constraint(x, sharding) + + +def _add_tiny(x): + return x + jnp.finfo(x.dtype).tiny + + +def _map_fn(lax_map, bs, n_maps, fn, *args): + """Maybe map a fn along multiple leading axes.""" + if n_maps <= 0: + return fn(*args) + + if lax_map: + mapped_fn = lambda xs: _map_fn(lax_map, bs, n_maps - 1, fn, *xs) + return jax.lax.map(mapped_fn, xs=args, batch_size=bs if bs > 1 else None) + else: + mapped_fn = lambda *xs: _map_fn(lax_map, bs, n_maps - 1, fn, *xs) + return vmap(mapped_fn)(*args) + + +def _tree_random_like( + rng_key: chex.PRNGKey, target_tree: chex.ArrayTree, dtype=None +) -> chex.ArrayTree: + # adopted from optax + tree_def = jax.tree.structure(target_tree) + keys = jax.random.split(rng_key, tree_def.num_leaves) + keys_tree = jax.tree.unflatten(tree_def, keys) + return jax.tree.map( + lambda l, k: jax.random.normal( + k, l.shape, dtype if dtype is not None else l.dtype + ), + target_tree, + keys_tree, + ) + + +class BlockPartitioner: + """Partitions a tensor into smaller tensors. + + Modified from distributed_shampoo. + https://github.com/google-research/google-research/blob/master/scalable_shampoo/optax/distributed_shampoo.py + Scalable Second Order Optimization for Deep Learning, + Rohan Anil, Vineet Gupta, Tomer Koren, Kevin Regan, Yoram Singer + https://arxiv.org/abs/2002.09018 + """ + + def __init__(self, param_shape, block_size, dim_diag): + assert len(dim_diag) == len( + param_shape + ), "dim_diag must have same length as param_shape" + self._shape = param_shape + self._splits = [] + split_sizes = [] + # We split params into smaller blocks. Here we store the metadata to make + # that split. + for i, d in enumerate(param_shape): + if 0 < block_size < d and not dim_diag[i]: + # d-1, otherwise split appends a 0-size array. + nsplit = (d - 1) // block_size + indices = (np.arange(nsplit, dtype=np.int32) + 1) * block_size + sizes = np.ones(nsplit + 1, dtype=np.int32) * block_size + sizes[-1] = d - indices[-1] + self._splits.append((i, indices)) + split_sizes.append(sizes) + else: + split_sizes.append(np.array([d], dtype=np.int32)) + self._split_sizes = split_sizes + + # TODO (evanatyourservice) + # this might fail with scalar params but for now we're reshaping those + single_shape = [a[0] for a in split_sizes] + padded_single_shape = [ + -(-dim // block_size) * block_size for dim in single_shape + ] + stack_size = max(1, np.prod([max(1, len(s)) for s in split_sizes])) + self._padded_stacked_shape = tuple([stack_size] + padded_single_shape) + + def split_sizes(self): + return self._split_sizes + + def partition(self, tensor): + """Partition tensor into blocks.""" + + assert tensor.shape == self._shape + tensors = [tensor] + for i, indices in self._splits: + tensors_local = [] + for t in tensors: + tensors_local.extend(jnp.split(t, indices_or_sections=indices, axis=i)) + tensors = tensors_local + return tuple(tensors) + + def merge_partitions(self, partitions): + """Merge partitions back to original shape.""" + + for i, indices in reversed(self._splits): + n = len(indices) + 1 + partial_merged_tensors = [] + ind = 0 + while ind < len(partitions): + partial_merged_tensors.append( + jnp.concatenate(partitions[ind : ind + n], axis=i) + ) + ind += n + partitions = partial_merged_tensors + assert len(partitions) == 1 + return partitions[0] + + +def _partitions(lst): + """Generate all partitions of a list.""" + if not lst: + yield [[]] + else: + for i in range(len(lst)): + for part in _partitions(lst[i + 1 :]): + yield [lst[: i + 1]] + part + + +def _merge_small_dims( + shape_to_merge, max_dim, dim_diag, sharding_to_merge=None +) -> Tuple[List[int], List[bool], Optional[Tuple]]: + if not shape_to_merge: # handles scalar shape () + return [], [True], PartitionSpec() if sharding_to_merge is not None else None + if np.all(np.array(shape_to_merge) == 1): # handles shape (1,) + return ( + [1], + [True], + PartitionSpec(None) if sharding_to_merge is not None else None, + ) + + def dim2loss(d, dim0=max_dim): + """A heuristic map from dim to loss with the least loss occurs at dim0.""" + loss = 0 + if d < dim0: + loss += np.log2(dim0 / d) + too_small = dim0 / 8 + if d < too_small: + loss += 100 * np.log2(too_small / d) + else: + loss += 10 * np.log2(d / dim0) + too_large = 8 * dim0 + if d > too_large: + loss += 1000 * np.log2(d / too_large) + return loss + + best_loss = float("inf") + best_partition = None + + for p in _partitions(list(range(len(shape_to_merge)))): + loss = 0 + merged = [] + for group in p: + if not group: + continue + d = np.prod([shape_to_merge[i] for i in group]) + loss += dim2loss(d) + merged.append(group) + + if loss < best_loss: + best_loss = loss + best_partition = merged + + merged_shape = [] + merged_diag = [] + merged_sharding = [] + + for group in best_partition: + merged_shape.append(np.prod([shape_to_merge[i] for i in group])) + merged_diag.append(all(dim_diag[i] for i in group)) + if sharding_to_merge: + group_shardings = [sharding_to_merge[i] for i in group] + valid_shardings = [s for s in group_shardings if s is not None] + + if len(valid_shardings) > 1: + merged_sharding.append(tuple(valid_shardings)) + elif len(valid_shardings) == 1: + merged_sharding.append(valid_shardings[0]) + else: + merged_sharding.append(None) + + return ( + merged_shape, + merged_diag, + PartitionSpec(*merged_sharding) if sharding_to_merge else None, + ) + + +def _pad_and_stack_matrices(array_list, block_size): + # Handle scalar arrays by adding a dummy dimension + is_scalar = len(array_list[0].shape) == 0 + if is_scalar: + array_list = [arr[None] for arr in array_list] + + shapes = [arr.shape for arr in array_list] + max_dims = [max(shape[i] for shape in shapes) for i in range(len(shapes[0]))] + padded_shape = [-(-dim // block_size) * block_size for dim in max_dims] + padded_arrays = [] + for arr in array_list: + pad_width = [(0, padded_shape[i] - arr.shape[i]) for i in range(arr.ndim)] + padded = jnp.pad(arr, pad_width) + padded_arrays.append(padded) + + stacked = jnp.stack(padded_arrays) + return stacked + + + +def _unstack_and_unpad_matrices(stacked_array, original_shapes): + # Handle scalar arrays + is_scalar = len(original_shapes[0]) == 0 + + unstacked = jnp.split(stacked_array, stacked_array.shape[0], axis=0) + unpadded = [] + for arr, orig_shape in zip(unstacked, original_shapes): + arr = jnp.squeeze(arr, axis=0) + if is_scalar: + # For scalars, just take the first element + arr = arr[0] + else: + # For non-scalars, slice to original shape + slices = tuple(slice(0, dim) for dim in orig_shape) + arr = arr[slices] + unpadded.append(arr) + return tuple(unpadded) + + +# unused fns (can be used for stacking partitions without padding): +def _sort_and_group_matrices(matrix_shapes: List[Tuple[int, ...]]): + indexed_list = list(enumerate(matrix_shapes)) + sorted_indexed = sorted(indexed_list, key=lambda x: x[1]) + sorted_shapes = [shape for _, shape in sorted_indexed] + change_indices = [original_index for original_index, _ in sorted_indexed] + revert_indices = [0] * len(matrix_shapes) + for new_pos, (original_index, _) in enumerate(sorted_indexed): + revert_indices[original_index] = new_pos + shape_groups = defaultdict(list) + for i, shape in enumerate(sorted_shapes): + shape_groups[shape].append(i) + unique_sorted_shapes = list(shape_groups.keys()) + return unique_sorted_shapes, dict(shape_groups), change_indices, revert_indices + + +def _stack_matrices(array_list): + in_tuple = isinstance(array_list, tuple) + shapes = [arr.shape for arr in array_list] + unique_shapes, shape_groups, change_indices, _ = _sort_and_group_matrices(shapes) + sorted_arrays = [array_list[i] for i in change_indices] + stacked_arrays = [] + for shape in unique_shapes: + indices = shape_groups[shape] + stacked = jnp.stack([sorted_arrays[i] for i in indices]) + stacked_arrays.append(stacked) + if in_tuple: + return tuple(stacked_arrays) + return stacked_arrays + + +def _unstack_matrices(stacked_arrays, revert_indices): + in_tuple = isinstance(stacked_arrays, tuple) + unstacked = [] + for arr in stacked_arrays: + unstacked.extend(jnp.split(arr, arr.shape[0])) + array_list = [jnp.squeeze(unstacked[i], axis=0) for i in revert_indices] + if in_tuple: + return tuple(array_list) + return array_list From fe3ecc944defffd9f6447252ca46594d11ea82aa Mon Sep 17 00:00:00 2001 From: Evan Walters Date: Thu, 5 Dec 2024 14:12:29 -0700 Subject: [PATCH 17/56] testing 123 --- config/llama2_100M_kron_test.yaml | 34 ++ src/levanter/optim/kron.py | 538 ++++++++++++------------------ 2 files changed, 241 insertions(+), 331 deletions(-) create mode 100644 config/llama2_100M_kron_test.yaml diff --git a/config/llama2_100M_kron_test.yaml b/config/llama2_100M_kron_test.yaml new file mode 100644 index 000000000..84233bb3a --- /dev/null +++ b/config/llama2_100M_kron_test.yaml @@ -0,0 +1,34 @@ +data: !include data/dclm_gpt_neo.yaml +model: + type: llama + seq_len: 4096 + hidden_dim: 768 + intermediate_dim: 3072 + num_layers: 12 + num_heads: 12 + num_kv_heads: 12 +trainer: + tracker: + project: "levanter" + tags: ["pile", "llama"] + mp: p=f32,c=bfloat16 + model_axis_size: 1 + checkpointer: + keep: + - every: 1000 + save_interval: 30m + + + train_batch_size: 1024 + per_device_parallelism: 64 # set for v3 TPU + per_device_eval_parallelism: 64 # set a larger batch size for eval + num_train_steps: 50001 +optimizer: + learning_rate: 1E-3 + weight_decay: 0.1 + warmup: 1000 + cooldown: 0.1 + lr_schedule: constant + min_lr_ratio: 0.0 + max_grad_norm: 0.0 # don't need with kron's normalize_grads + type: kron diff --git a/src/levanter/optim/kron.py b/src/levanter/optim/kron.py index 6959ce81d..1c4f43e37 100644 --- a/src/levanter/optim/kron.py +++ b/src/levanter/optim/kron.py @@ -1,50 +1,8 @@ -from typing import Any, List, Optional, Union, Callable, Tuple -from collections import defaultdict -from functools import partial -import string -import numpy as np - -import chex -import jax -from jax import numpy as jnp, vmap -from jax.sharding import PartitionSpec -from jax.lax import with_sharding_constraint -from jax._src import mesh as mesh_lib -import flax.linen as nn -from optax import tree_utils as otu -from optax._src import base, transform -from optax._src.numerics import safe_int32_increment -from optax._src.utils import canonicalize_dtype -from optax._src.combine import chain - from dataclasses import dataclass import optax from levanter.optim.config import OptimizerConfig -def precond_update_prob_schedule( - max_prob=1.0, min_prob=0.03, decay=0.001, flat_start=500 -): - """Anneal preconditioner update probability during beginning of training. - - PSGD benefits from more preconditioner updates at the beginning of training, - but once the preconditioner is learned the update probability can drop low. - - This schedule is an exponential anneal with a flat start. Default settings keep - update probability at 1.0 for 500 steps then exponentially anneal down to - `min_prob` by 4000 steps. Default settings work well for most models and - training regimes. - """ - - def _schedule(n): - """Exponential anneal with flat start.""" - return jnp.clip( - max_prob * jnp.exp(-decay * (n - flat_start)), min_prob, max_prob - ) - - return _schedule - - @OptimizerConfig.register_subclass("kron") @dataclass class KronConfig(OptimizerConfig): @@ -54,7 +12,7 @@ class KronConfig(OptimizerConfig): beta1: Momentum parameter. 0.9 or 0.95 are common values. weight_decay: Weight decay coefficient. max_grad_norm: Optional gradient norm clipping value. - normalize_grads: Whether to normalize the incoming gradients to unit norm layer-wise. + normalize_grads: Whether to normalize the incoming gradients to unit norm layer-wise. Can help with stability. preconditioner_update_probability: Final probability of updating the preconditioner. Default is 0.05 (update every 20 steps). The `precond_update_prob_schedule` holds probability at @@ -88,21 +46,19 @@ class KronConfig(OptimizerConfig): partition_grads_into_blocks: Whether to partition grads into chunks of size block_size for efficiency. block_size: Block size to use for partitioning grads. - buffer_qq: Whether to buffer p=q@q.T for faster preconditioning. May not be beneficial - with sharded preconditioners (default False). Try True to check for speedup if not - sharding preconditioners. params_sharding: Pytree same structure as params of jax.sharding.PartitionSpec. preconditioner_sharding: PartitionSpec for preconditioner matrices. Best practice is to shard first dimension across fsdp-like mesh axis, or largest/most common axis in params. Example: PartitionSpec('fsdp') or PartitionSpec('fsdp', 'tp'). """ + # some of these are changed from kron defaults to better suit levanter beta1: float = 0.9 weight_decay: float = 0.1 - max_grad_norm: Optional[float] = 1.0 - normalize_grads: bool = False + max_grad_norm: Optional[float] = None + normalize_grads: bool = True preconditioner_update_probability: float = 0.05 - update_prob_flat_start: int = 500 - max_size_triangular: int = 8192 + update_prob_flat_start: int = 1000 + max_size_triangular: int = 10000 min_ndim_triangular: int = 2 memory_save_mode: Optional[str] = None mu_dtype: Optional[Union[str, jnp.dtype]] = None @@ -113,24 +69,31 @@ class KronConfig(OptimizerConfig): lax_map_scanned_layers: bool = False lax_map_batch_size: int = 8 merge_small_dims: bool = True - target_merged_dim_size: int = 4096 + target_merged_dim_size: int = 8192 partition_grads_into_blocks: bool = True block_size: int = 512 - buffer_qq: bool = False params_sharding: Optional[Any] = None - preconditioner_sharding: Optional[PartitionSpec[str, str]] = None + preconditioner_sharding: Optional[tuple[str, str]] = None def build(self, num_train_steps): """Creates the optimizer.""" + def _optimizer(learning_rate) -> optax.GradientTransformation: + precond_partition_spec = ( + PartitionSpec(*self.preconditioner_sharding) + if self.preconditioner_sharding is not None + else None + ) components = [] + if self.max_grad_norm and not self.normalize_grads: + components.append(optax.clip_by_global_norm(self.max_grad_norm)) components.append( scale_by_kron( b1=self.beta1, normalize_grads=self.normalize_grads, preconditioner_update_probability=precond_update_prob_schedule( min_prob=self.preconditioner_update_probability, - flat_start=self.update_prob_flat_start + flat_start=self.update_prob_flat_start, ), max_size_triangular=self.max_size_triangular, min_ndim_triangular=self.min_ndim_triangular, @@ -146,18 +109,75 @@ def _optimizer(learning_rate) -> optax.GradientTransformation: target_merged_dim_size=self.target_merged_dim_size, partition_grads_into_blocks=self.partition_grads_into_blocks, block_size=self.block_size, - buffer_qq=self.buffer_qq, params_sharding=self.params_sharding, - preconditioner_sharding=self.preconditioner_sharding, + preconditioner_sharding=precond_partition_spec, ) ) - if self.weight_decay > 0 and not self.normalize_grads: - components.append(transform.add_decayed_weights(self.weight_decay, self.build_weight_decay_mask())) - components.append(transform.scale_by_learning_rate(learning_rate)) + if self.weight_decay > 0: + components.append( + optax.add_decayed_weights( + self.weight_decay, self.build_weight_decay_mask() + ) + ) + components.append(optax.scale_by_learning_rate(learning_rate)) return optax.chain(*components) - return optax.inject_hyperparams(_optimizer)(learning_rate=self.lr_scheduler(num_train_steps)) + return optax.inject_hyperparams(_optimizer)( + learning_rate=self.lr_scheduler(num_train_steps) + ) + + +"""PSGD Kron""" +from typing import Any, List, Optional, Union, Callable, Tuple +from collections import defaultdict +from functools import partial +import string +import numpy as np + +import chex +import jax +from jax import numpy as jnp, vmap +from jax.sharding import PartitionSpec +from jax.lax import with_sharding_constraint +from optax import tree_utils as otu +from optax._src import base, transform +from optax._src.numerics import safe_int32_increment +from optax._src.utils import canonicalize_dtype +from optax._src.combine import chain + +try: + import flax.linen as nn + + have_flax = True +except ImportError: + have_flax = False +try: + import haliax as hax + have_hax = True +except ImportError: + have_hax = False + + +def precond_update_prob_schedule( + max_prob=1.0, min_prob=0.03, decay=0.001, flat_start=500 +): + """Anneal preconditioner update probability during beginning of training. + + PSGD benefits from more preconditioner updates at the beginning of training, + but once the preconditioner is learned the update probability can drop low. + + This schedule is an exponential anneal with a flat start. Default settings keep + update probability at 1.0 for 500 steps then exponentially anneal down to + `min_prob` by 4000 steps. Default settings work well for most models and + training regimes. + """ + + def _schedule(n): + """Exponential anneal with flat start.""" + return jnp.clip(max_prob * jnp.exp(-decay * (n - flat_start)), min_prob, max_prob) + + return _schedule def scale_by_kron( @@ -180,7 +200,6 @@ def scale_by_kron( target_merged_dim_size: int = 2048, partition_grads_into_blocks: bool = False, block_size: int = 256, - buffer_qq: bool = False, params_sharding: Optional[Any] = None, preconditioner_sharding: Optional[PartitionSpec[str, str]] = None, **kwargs, @@ -221,9 +240,6 @@ def scale_by_kron( partition_grads_into_blocks: bool, whether to partition grads into chunks of size `block_size` for efficiency. block_size: int, block size to use for partitioning grads. - buffer_qq: bool, whether to buffer p=q@q.T for faster preconditioning. This may - not be beneficial if using sharded preconditioners so default is False. If - not sharding preconditioners, try setting to True to see if there is a speedup. params_sharding: pytree same structure as params of jax.sharding.PartitionSpec. preconditioner_sharding: `None` or `PartitionSpec(str | None, str | None)`, PartitionSpec for preconditioner matrices. `None` infers a strategy @@ -236,40 +252,58 @@ def scale_by_kron( optax.GradientTransformation """ mu_dtype = canonicalize_dtype(mu_dtype) - precond_dtype = canonicalize_dtype(precond_dtype) + precond_dtype = canonicalize_dtype(precond_dtype or jnp.float32) preconditioner_lr = 0.1 preconditioner_init_scale = 1.0 lax_map = lax_map_scanned_layers bs = lax_map_batch_size def init_fn(params, return_partition_specs_only=False): - current_mesh = mesh_lib.thread_resources.env.physical_mesh - if ( - current_mesh.empty - and buffer_qq - and any([params_sharding is not None, preconditioner_sharding is not None]) - and jax.process_index() == 0 - ): - print( - "PSGD Kron WARNING: buffering Q@Q.T with sharding but Mesh is empty. " - "Consider running Kron within a mesh context manager `with mesh:` or " - "setting buffer_qq=False to prevent potential sharding inefficiencies. " - "If only using replicated sharding, you can ignore this warning." - ) - have_params_sharding = params_sharding is not None have_qs_sharding = have_params_sharding or preconditioner_sharding is not None + # unbox if haliax style partitioned + scanned_layers_ = scanned_layers + params_sharding_ = params_sharding + if have_hax: + if any( + isinstance(x, hax.NamedArray) + for x in jax.tree.leaves( + params, is_leaf=lambda x: isinstance(x, hax.NamedArray) + ) + ): + # if in haliax, we can grab scanned_layers and params_sharding from params + # this does not support nested stacks + if scanned_layers_ is None: + scanned_layers_ = jax.tree.map( + lambda x: ( + jax.tree.map(lambda _: True, x) + if isinstance(x, hax.nn.Stacked) + else False + ), + params, + is_leaf=lambda x: isinstance(x, hax.nn.Stacked), + ) + if params_sharding_ is None: + params_sharding_ = hax.partitioning.infer_resource_partitions(params) + params_sharding_ = jax.tree.map(lambda x: x.spec, params_sharding_) + params, params_struct = jax.tree.flatten(params) + scanned_layers_ = jax.tree.leaves(scanned_layers_) + print(f"scanned_layers_: {scanned_layers_}") + params_sharding_ = jax.tree.leaves(params_sharding_) + print(f"params_sharding_: {params_sharding_}") + # unbox if flax style partitioned - params = jax.tree.map( - lambda x: x.unbox() if isinstance(x, nn.Partitioned) else x, - params, - is_leaf=lambda x: isinstance(x, nn.Partitioned), - ) + if have_flax: + params = jax.tree.map( + lambda x: x.unbox() if isinstance(x, nn.Partitioned) else x, + params, + is_leaf=lambda x: isinstance(x, nn.Partitioned), + ) # check that there is a PartitionSpec for every param - if params_sharding is not None: - assert len(jax.tree.leaves(params_sharding)) == len( + if params_sharding_ is not None: + assert len(jax.tree.leaves(params_sharding_)) == len( jax.tree.leaves(params) ), "There must be a PartitionSpec for every parameter in PSGD Kron." # check that preconditioner sharding length is at least 1 @@ -280,7 +314,6 @@ def init_fn(params, return_partition_specs_only=False): ) # extend partition specs - params_sharding_ = params_sharding if have_params_sharding: params_sharding_ = jax.tree.map( lambda p, sh: PartitionSpec(*(sh + (None,) * (len(p.shape) - len(sh)))), @@ -290,9 +323,7 @@ def init_fn(params, return_partition_specs_only=False): preconditioner_sharding_ = preconditioner_sharding if preconditioner_sharding is not None: if len(preconditioner_sharding) < 2: - preconditioner_sharding_ = PartitionSpec( - preconditioner_sharding[0], None - ) + preconditioner_sharding_ = PartitionSpec(preconditioner_sharding[0], None) # reshape params shaped () to (1,) to make things simpler params = jax.tree.map(lambda p: p[None] if len(p.shape) == 0 else p, params) @@ -303,8 +334,7 @@ def init_fn(params, return_partition_specs_only=False): ) # scanned layers - scanned_layers_ = scanned_layers - if scanned_layers is None: + if scanned_layers_ is None: scanned_layers_ = jax.tree.map(lambda _: False, params) scanned_sizes = jax.tree.map( lambda p, s: p.shape[0] if s else 0, params, scanned_layers_ @@ -390,8 +420,6 @@ def init_fn(params, return_partition_specs_only=False): existing_Q=True if return_partition_specs_only else None, precond_sharding=preconditioner_sharding_, param_sharding=sh, - buffer_qq=buffer_qq, - current_mesh=current_mesh, ) ), params, @@ -440,9 +468,7 @@ def broadcast_qs(_, ps, q, s): ) return q - Qs = jax.tree.map( - broadcast_qs, params, partitioned_shapes, Qs, scanned_sizes - ) + Qs = jax.tree.map(broadcast_qs, params, partitioned_shapes, Qs, scanned_sizes) if have_qs_sharding: Qs = _safe_sharding_constraint(Qs, Qs_sharding) @@ -486,24 +512,57 @@ def update_fn(updates: base.Updates, state: dict, params: base.Params = None): count_inc = safe_int32_increment(state["count"]) key = jax.random.fold_in(jax.random.PRNGKey(42), state["count"]) - have_params_sharding = params_sharding is not None + # unbox if haliax style partitioned + scanned_layers_ = scanned_layers + params_sharding_ = params_sharding + hax_partitioned = False + if have_hax: + if any( + isinstance(x, hax.NamedArray) + for x in jax.tree.leaves( + updates, is_leaf=lambda x: isinstance(x, hax.NamedArray) + ) + ): + hax_partitioned = True + # if in haliax, we can grab scanned_layers and params_sharding from params + # this does not support nested stacks + if scanned_layers_ is None: + scanned_layers_ = jax.tree.map( + lambda x: ( + jax.tree.map(lambda _: True, x) + if isinstance(x, hax.nn.Stacked) + else False + ), + updates, + is_leaf=lambda x: isinstance(x, hax.nn.Stacked), + ) + if params_sharding_ is None: + params_sharding_ = hax.partitioning.infer_resource_partitions(updates) + params_sharding_ = jax.tree.map(lambda x: x.spec, params_sharding_) + updates, updates_struct = jax.tree.flatten(updates) + scanned_layers_ = jax.tree.leaves(scanned_layers_) + print(f"scanned_layers_: {scanned_layers_}") + params_sharding_ = jax.tree.leaves(params_sharding_) + print(f"params_sharding_: {params_sharding_}") + + have_params_sharding = params_sharding_ is not None have_qs_sharding = have_params_sharding or preconditioner_sharding is not None # unbox if flax style partitioned - boxed_updates, grads_structure = jax.tree.flatten( - updates, - is_leaf=lambda g: isinstance( - g, (chex.Array, nn.Partitioned, jax.ShapeDtypeStruct) - ), - ) flax_partitioned = False - if isinstance(boxed_updates[0], nn.Partitioned): - flax_partitioned = True - updates = [g.unbox() for g in boxed_updates] - updates = grads_structure.unflatten(updates) + if have_flax: + boxed_updates, grads_structure = jax.tree.flatten( + updates, + is_leaf=lambda g: isinstance( + g, (chex.Array, nn.Partitioned, jax.ShapeDtypeStruct) + ), + ) + if any(isinstance(g, nn.Partitioned) for g in boxed_updates): + flax_partitioned = True + updates = [g.unbox() for g in boxed_updates] + updates = grads_structure.unflatten(updates) # extend partition specs - params_sharding_ = params_sharding if have_params_sharding: params_sharding_ = jax.tree.map( lambda g, sh: PartitionSpec(*(sh + (None,) * (len(g.shape) - len(sh)))), @@ -513,9 +572,7 @@ def update_fn(updates: base.Updates, state: dict, params: base.Params = None): preconditioner_sharding_ = preconditioner_sharding if preconditioner_sharding is not None: if len(preconditioner_sharding) < 2: - preconditioner_sharding_ = PartitionSpec( - preconditioner_sharding[0], None - ) + preconditioner_sharding_ = PartitionSpec(preconditioner_sharding[0], None) # reshape params shaped () to (1,) to make things simpler input_shapes = jax.tree.map(lambda g: g.shape, updates) @@ -527,8 +584,7 @@ def update_fn(updates: base.Updates, state: dict, params: base.Params = None): ) # scanned layers - scanned_layers_ = scanned_layers - if scanned_layers is None: + if scanned_layers_ is None: scanned_layers_ = jax.tree.map(lambda _: False, updates) # update probability can be scheduled @@ -697,7 +753,6 @@ def norm_grads(g): existing_Q=True, precond_sharding=preconditioner_sharding_, param_sharding=sh, - buffer_qq=buffer_qq, ), momentum_updates, dim_diag, @@ -725,56 +780,9 @@ def add_dims_to_spec(_, qss, sds): scanned_dim_sharding, ) - # pad sizes for buffering qq - pad_sizes = jax.tree.map( - lambda g, qs, nm: [q.shape[nm] - dim for q, dim in zip(qs, g.shape[nm:])], - momentum_updates, - Qs, - n_dims_to_map, - ) - # maybe update preconditioner def update_preconditioner(key, Qs): with jax.default_matmul_precision(precond_update_precision): - # separate out q if we're buffering qq - if buffer_qq: - Qs = jax.tree.map( - lambda _, qs, nm, dd, psize, sh: jax.tree.map( - lambda q, d, ps, sh: ( - _map_fn( - False, - 0, - nm, - lambda q, pad_size=ps, sharding=( - sh if have_qs_sharding else None - ): _get_q(q, pad_size, sharding), - q, - ) - if not d - else q - ), - qs, - dd, - psize, - sh, - ), - dummy_updates_tree, - Qs, - n_dims_to_map, - dim_diag, - pad_sizes, - Qs_sharding_no_leading_dims, - ) - if have_qs_sharding: - Qs = _safe_sharding_constraint(Qs, Qs_sharding) - - # create random vectors - key, subkey = jax.random.split(key) - Vs = _tree_random_like(subkey, momentum_updates) - # apply params sharding to random vectors - if have_params_sharding: - Vs = _safe_sharding_constraint(Vs, partitioned_sharding) - # balance preconditioners about every 100 updates def balance_Qs(Qs_to_bal): def _balance_Q(Q): @@ -798,6 +806,22 @@ def _balance_Q(Q): if have_qs_sharding: Qs = _safe_sharding_constraint(Qs, Qs_sharding) + # create random vectors + key, subkey = jax.random.split(key) + Vs = _tree_random_like(subkey, momentum_updates, dtype=precond_dtype) + # apply params sharding to random vectors + if have_params_sharding: + Vs = _safe_sharding_constraint(Vs, partitioned_sharding) + + # damp based on machine precision + grads_in = otu.tree_cast(momentum_updates, precond_dtype) + damp_eps = jnp.sqrt(jnp.finfo(jnp.float32).eps) # bf16 eps too large + grads_in = jax.tree.map( + lambda g, v: g + damp_eps.astype(g.dtype) * jnp.mean(jnp.abs(g)) * v, + grads_in, + Vs, + ) + # form conjB conjBs = jax.tree.map( lambda g, Q, v, nm: _map_fn(lax_map, bs, nm, _conjB, Q, g, v), @@ -837,35 +861,6 @@ def _balance_Q(Q): if have_qs_sharding: new_Qs = _safe_sharding_constraint(new_Qs, Qs_sharding) - if buffer_qq: - # store half of qq in lower triangular part of Qs (Q is triu) - new_Qs = jax.tree.map( - lambda _, qs, nm, dd, psize, sh: jax.tree.map( - lambda q, d, ps, sh: ( - _map_fn( - False, - 0, - nm, - lambda q, pad_size=ps, sharding=( - sh if have_qs_sharding else None - ): _store_qq(q, pad_size, sharding), - q, - ) - if not d - else q - ), - qs, - dd, - psize, - sh, - ), - dummy_updates_tree, - new_Qs, - n_dims_to_map, - dim_diag, - pad_sizes, - Qs_sharding_no_leading_dims, - ) new_Qs = otu.tree_cast(new_Qs, precond_dtype) return new_Qs @@ -874,60 +869,20 @@ def _balance_Q(Q): do_update = update_counter_inc >= 1 / update_prob_in update_counter_inc = jnp.where(do_update, 0, update_counter_inc) key, subkey = jax.random.split(key) - new_Qs = jax.lax.cond( + Qs = jax.lax.cond( do_update, update_preconditioner, lambda _, qs: qs, subkey, Qs ) if have_qs_sharding: - new_Qs = _safe_sharding_constraint(new_Qs, Qs_sharding) + Qs = _safe_sharding_constraint(Qs, Qs_sharding) # precondition gradients with jax.default_matmul_precision(precond_grads_precision): - # precondition with stale Qs - if buffer_qq: - # get qq out of Qs - Qs_in = jax.tree.map( - lambda _, qs, nm, dd, psize, sh: jax.tree.map( - lambda q, d, ps, sh: ( - _map_fn( - False, - 0, - nm, - lambda q, pad_size=ps, sharding=( - sh if have_qs_sharding else None - ): _get_qq(q, pad_size, sharding), - q, - ) - if not d - else q - ), - qs, - dd, - psize, - sh, - ), - dummy_updates_tree, - Qs, - n_dims_to_map, - dim_diag, - pad_sizes, - Qs_sharding_no_leading_dims, - ) - else: - Qs_in = Qs - if have_qs_sharding: - Qs_in = _safe_sharding_constraint(Qs_in, Qs_sharding) - precond_gs = jax.tree.map( lambda g, Q, expr, nm: _map_fn( - lax_map, - bs, - nm, - partial(_precond_grad, exprs=expr, buffer_qq=buffer_qq), - Q, - g, + lax_map, bs, nm, partial(_precond_grad, exprs=expr), Q, g ), momentum_updates, - Qs_in, + Qs, exprs, n_dims_to_map, ) @@ -949,9 +904,7 @@ def _balance_Q(Q): partitioned_shapes, ) if have_params_sharding: - precond_gs = _safe_sharding_constraint( - precond_gs, merged_params_sharding - ) + precond_gs = _safe_sharding_constraint(precond_gs, merged_params_sharding) precond_gs = jax.tree.map( lambda _, g, s, p_cls: _map_fn( False, 0, int(s), p_cls.merge_partitions, g @@ -962,9 +915,7 @@ def _balance_Q(Q): partitioners, ) if have_params_sharding: - precond_gs = _safe_sharding_constraint( - precond_gs, merged_params_sharding - ) + precond_gs = _safe_sharding_constraint(precond_gs, merged_params_sharding) # un-merge dimensions if merge_small_dims: @@ -991,14 +942,16 @@ def _balance_Q(Q): bu.replace_boxed(g) for bu, g in zip(boxed_updates, flat_precond_gs) ] precond_gs = grads_structure.unflatten(precond_gs) + if hax_partitioned: + precond_gs = updates_struct.unflatten(precond_gs) # dtypes and new state mu = otu.tree_cast(mu, mu_dtype) - new_Qs = otu.tree_cast(new_Qs, precond_dtype) + Qs = otu.tree_cast(Qs, precond_dtype) state = dict( count=count_inc, mu=mu, - Qs_preconditioners=new_Qs, + Qs_preconditioners=Qs, update_counter=update_counter_inc, ) @@ -1030,7 +983,6 @@ def kron( target_merged_dim_size: int = 2048, partition_grads_into_blocks: bool = False, block_size: int = 256, - buffer_qq: bool = False, params_sharding: Optional[Any] = None, preconditioner_sharding: Optional[PartitionSpec[str, str]] = None, ) -> base.GradientTransformation: @@ -1075,9 +1027,6 @@ def kron( partition_grads_into_blocks: bool, whether to partition grads into chunks of size `block_size` for efficiency. block_size: int, block size to use for partitioning grads. - buffer_qq: bool, whether to buffer p=q@q.T for faster preconditioning. This may - not be beneficial if using sharded preconditioners so default is False. If - not sharding preconditioners, try setting to True to see if there is a speedup. params_sharding: pytree same structure as params of jax.sharding.PartitionSpec. preconditioner_sharding: `None` or `PartitionSpec(str | None, str | None)`, PartitionSpec for preconditioner matrices. `None` infers a strategy @@ -1108,7 +1057,6 @@ def kron( target_merged_dim_size=target_merged_dim_size, partition_grads_into_blocks=partition_grads_into_blocks, block_size=block_size, - buffer_qq=buffer_qq, params_sharding=params_sharding, preconditioner_sharding=preconditioner_sharding, ) @@ -1136,8 +1084,9 @@ def get_opt_state_partition_specs( tree of PartitionSpecs for optimizer state. """ params_flat, params_struct = jax.tree.flatten(params) - if isinstance(params_flat[0], nn.Partitioned): - params_flat = [p.unbox(p) for p in params_flat] + if have_flax: + if isinstance(params_flat[0], nn.Partitioned): + params_flat = [p.unbox(p) for p in params_flat] if not isinstance(params_flat[0], jax.ShapeDtypeStruct): params_flat = [jax.ShapeDtypeStruct(p.shape, p.dtype) for p in params_flat] params = params_struct.unflatten(params_flat) @@ -1188,17 +1137,11 @@ def _init_Q_exprs( existing_Q=None, precond_sharding=None, param_sharding=None, - buffer_qq=False, - current_mesh: Optional[jax.sharding.Mesh] = None, ): have_qs_sharding = precond_sharding is not None or param_sharding is not None letters = string.ascii_lowercase + string.ascii_uppercase if len(t_shape) == 0: # scalar - Q = ( - [scale * jnp.ones(t_shape, dtype=dtype)] - if existing_Q is None - else existing_Q - ) + Q = [scale * jnp.ones(t_shape, dtype=dtype)] if existing_Q is None else existing_Q exprA = ",->" exprGs = [",->"] exprP = ",,->" @@ -1263,23 +1206,6 @@ def _init_Q_exprs( q = scale * jnp.eye(size, dtype=dtype) if have_qs_sharding: q = _safe_sharding_constraint(q, q_sharding) - - # we can optionally store q @ q in tril for later - if buffer_qq: - pad_size = 1 - if have_qs_sharding and current_mesh is not None: - # pad size will be largest mesh axis size in q sharding - axis_sizes = [pad_size] - for ax in q_sharding: - if ax is not None: - axis_tuple = ax if isinstance(ax, tuple) else (ax,) - axis_size = np.prod( - [current_mesh.shape[a] for a in axis_tuple] - ) - axis_sizes.append(axis_size) - pad_size = max(axis_sizes) - q = _store_qq(q, pad_size, sharding=q_sharding) - Q.append(q) piece1A.append(letters[i] + letters[i + 13]) @@ -1303,24 +1229,15 @@ def _init_Q_exprs( ) a, b, c = (letters[i], letters[i + 13], letters[i + 26]) - piece1P.append(c + b if buffer_qq else a + b) + piece1P.append(a + b) piece2P.append(a + c) piece3P = piece3P + c piece4P = piece4P + b exprA = ",".join(piece1A) + "," + piece2A + "->" + piece3A - if buffer_qq: - exprP = ",".join(piece1P) + "," + piece3P + "->" + piece4P - else: - exprP = ( - ",".join(piece1P) - + "," - + ",".join(piece2P) - + "," - + piece3P - + "->" - + piece4P - ) + exprP = ( + ",".join(piece1P) + "," + ",".join(piece2P) + "," + piece3P + "->" + piece4P + ) exprGs = tuple(exprGs) if existing_Q is not None: @@ -1328,41 +1245,6 @@ def _init_Q_exprs( return Q, (exprA, exprGs, exprP), sharding_out -def _store_qq(q, pad_size=1, sharding=None): - # after storing qq, precond update goes from - # an,bo,aA,bB,AB->no to cached:[aA,an->An, bB,bo->Bo], update:An,Bo,AB->no - p = jnp.einsum("aA,an->An", q, q) # keep first dim as contracting - if sharding is not None: - p = _safe_sharding_constraint(p, sharding) - q = jnp.pad(q, ((0, pad_size), (pad_size, 0))) - if sharding is not None: - q = _safe_sharding_constraint(q, sharding) - p = jnp.pad(p, ((pad_size, 0), (0, pad_size))) - if sharding is not None: - p = _safe_sharding_constraint(p, sharding) - q += jnp.tril(p, k=-pad_size) - if sharding is not None: - q = _safe_sharding_constraint(q, sharding) - return q - - -def _get_qq(q, pad_size=1, sharding=None): - p = jnp.tril(q[pad_size:, :-pad_size]) - if sharding is not None: - p = _safe_sharding_constraint(p, sharding) - p = p + p.T - jnp.diag(jnp.diag(p)) - if sharding is not None: - p = _safe_sharding_constraint(p, sharding) - return p - - -def _get_q(q, pad_size=1, sharding=None): - q = jnp.triu(q[:-pad_size, pad_size:]) - if sharding is not None: - q = _safe_sharding_constraint(q, sharding) - return q - - def _norm_lower_bound(A: jax.Array): """Returns a cheap lower bound for the spectral norm of A. @@ -1461,13 +1343,10 @@ def _update_single_q(i, q): return [_update_single_q(i, q) for i, q in enumerate(Q)] -def _precond_grad(Q, G, exprs, buffer_qq=False): +def _precond_grad(Q, G, exprs): """Precondition gradient G with preconditioner Q.""" exprP = exprs[-1] - if buffer_qq: - return jnp.einsum(exprP, *Q, G) - else: - return jnp.einsum(exprP, *Q, *Q, G) + return jnp.einsum(exprP, *Q, *Q, G) def _safe_sharding_constraint(x, sharding): @@ -1545,9 +1424,7 @@ def __init__(self, param_shape, block_size, dim_diag): # TODO (evanatyourservice) # this might fail with scalar params but for now we're reshaping those single_shape = [a[0] for a in split_sizes] - padded_single_shape = [ - -(-dim // block_size) * block_size for dim in single_shape - ] + padded_single_shape = [-(-dim // block_size) * block_size for dim in single_shape] stack_size = max(1, np.prod([max(1, len(s)) for s in split_sizes])) self._padded_stacked_shape = tuple([stack_size] + padded_single_shape) @@ -1681,7 +1558,6 @@ def _pad_and_stack_matrices(array_list, block_size): return stacked - def _unstack_and_unpad_matrices(stacked_array, original_shapes): # Handle scalar arrays is_scalar = len(original_shapes[0]) == 0 From 37452c7d7beeac85d5fe8af31e8450ed1a429b77 Mon Sep 17 00:00:00 2001 From: Evan Walters Date: Thu, 5 Dec 2024 14:24:31 -0700 Subject: [PATCH 18/56] Update kron.py --- src/levanter/optim/kron.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/levanter/optim/kron.py b/src/levanter/optim/kron.py index 1c4f43e37..c0047de22 100644 --- a/src/levanter/optim/kron.py +++ b/src/levanter/optim/kron.py @@ -1,5 +1,10 @@ from dataclasses import dataclass +from typing import Any, Optional, Union + +import jax.numpy as jnp import optax +from jax.sharding import PartitionSpec + from levanter.optim.config import OptimizerConfig @@ -65,7 +70,7 @@ class KronConfig(OptimizerConfig): precond_dtype: Optional[Union[str, jnp.dtype]] = None precond_update_precision: Optional[str] = "tensorfloat32" precond_grads_precision: Optional[str] = None - scanned_layers: Optional[base.Params] = None + scanned_layers: Optional[optax.Params] = None lax_map_scanned_layers: bool = False lax_map_batch_size: int = 8 merge_small_dims: bool = True @@ -73,7 +78,7 @@ class KronConfig(OptimizerConfig): partition_grads_into_blocks: bool = True block_size: int = 512 params_sharding: Optional[Any] = None - preconditioner_sharding: Optional[tuple[str, str]] = None + preconditioner_sharding: Optional[tuple[str | None, str | None]] = None def build(self, num_train_steps): """Creates the optimizer.""" From 701956db0228bde42f67b4702c017cadb380e945 Mon Sep 17 00:00:00 2001 From: Evan Walters Date: Thu, 5 Dec 2024 14:29:16 -0700 Subject: [PATCH 19/56] Update llama2_100M_kron_test.yaml --- config/llama2_100M_kron_test.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/config/llama2_100M_kron_test.yaml b/config/llama2_100M_kron_test.yaml index 84233bb3a..da90576a3 100644 --- a/config/llama2_100M_kron_test.yaml +++ b/config/llama2_100M_kron_test.yaml @@ -1,4 +1,5 @@ -data: !include data/dclm_gpt_neo.yaml +data: + id: openwebtext model: type: llama seq_len: 4096 From aac1ceec1eef5a5bcf800a33b02fe4a58ef6506a Mon Sep 17 00:00:00 2001 From: Evan Walters Date: Thu, 5 Dec 2024 14:37:39 -0700 Subject: [PATCH 20/56] Update llama2_100M_kron_test.yaml --- config/llama2_100M_kron_test.yaml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/config/llama2_100M_kron_test.yaml b/config/llama2_100M_kron_test.yaml index da90576a3..c4809508e 100644 --- a/config/llama2_100M_kron_test.yaml +++ b/config/llama2_100M_kron_test.yaml @@ -20,9 +20,9 @@ trainer: save_interval: 30m - train_batch_size: 1024 - per_device_parallelism: 64 # set for v3 TPU - per_device_eval_parallelism: 64 # set a larger batch size for eval + train_batch_size: 512 + per_device_parallelism: 32 # set for v3 TPU + per_device_eval_parallelism: 32 # set a larger batch size for eval num_train_steps: 50001 optimizer: learning_rate: 1E-3 @@ -32,4 +32,4 @@ optimizer: lr_schedule: constant min_lr_ratio: 0.0 max_grad_norm: 0.0 # don't need with kron's normalize_grads - type: kron + type: kron From e44e7fab6911c418ad582508e6983ae5f3a6e86c Mon Sep 17 00:00:00 2001 From: Evan Walters Date: Thu, 5 Dec 2024 14:55:18 -0700 Subject: [PATCH 21/56] Update llama2_100M_kron_test.yaml --- config/llama2_100M_kron_test.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/config/llama2_100M_kron_test.yaml b/config/llama2_100M_kron_test.yaml index c4809508e..1038aa01b 100644 --- a/config/llama2_100M_kron_test.yaml +++ b/config/llama2_100M_kron_test.yaml @@ -20,7 +20,7 @@ trainer: save_interval: 30m - train_batch_size: 512 + train_batch_size: 1024 per_device_parallelism: 32 # set for v3 TPU per_device_eval_parallelism: 32 # set a larger batch size for eval num_train_steps: 50001 @@ -31,5 +31,5 @@ optimizer: cooldown: 0.1 lr_schedule: constant min_lr_ratio: 0.0 - max_grad_norm: 0.0 # don't need with kron's normalize_grads + max_grad_norm: 0.0 # don't need with kron's normalize_grads on (default) type: kron From 476ba36c4fb4284246d2a308a6e3911c6b3a3a38 Mon Sep 17 00:00:00 2001 From: Evan Walters Date: Thu, 5 Dec 2024 23:29:19 -0700 Subject: [PATCH 22/56] Update kron.py --- src/levanter/optim/kron.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/levanter/optim/kron.py b/src/levanter/optim/kron.py index c0047de22..b5ba3e18a 100644 --- a/src/levanter/optim/kron.py +++ b/src/levanter/optim/kron.py @@ -813,24 +813,23 @@ def _balance_Q(Q): # create random vectors key, subkey = jax.random.split(key) - Vs = _tree_random_like(subkey, momentum_updates, dtype=precond_dtype) + Vs = _tree_random_like(subkey, momentum_updates) # apply params sharding to random vectors if have_params_sharding: Vs = _safe_sharding_constraint(Vs, partitioned_sharding) # damp based on machine precision - grads_in = otu.tree_cast(momentum_updates, precond_dtype) damp_eps = jnp.sqrt(jnp.finfo(jnp.float32).eps) # bf16 eps too large grads_in = jax.tree.map( lambda g, v: g + damp_eps.astype(g.dtype) * jnp.mean(jnp.abs(g)) * v, - grads_in, + momentum_updates, Vs, ) # form conjB conjBs = jax.tree.map( lambda g, Q, v, nm: _map_fn(lax_map, bs, nm, _conjB, Q, g, v), - momentum_updates, + grads_in, Qs, Vs, n_dims_to_map, @@ -855,7 +854,7 @@ def _balance_Q(Q): g, conjb, ), - momentum_updates, + grads_in, Qs, conjBs, exprs, From 966b80e332509674b6cd8720ab89946572008f88 Mon Sep 17 00:00:00 2001 From: Evan Walters Date: Fri, 6 Dec 2024 17:56:25 -0700 Subject: [PATCH 23/56] expose precond lr and init --- src/levanter/optim/kron.py | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/src/levanter/optim/kron.py b/src/levanter/optim/kron.py index b5ba3e18a..1a9b3d168 100644 --- a/src/levanter/optim/kron.py +++ b/src/levanter/optim/kron.py @@ -35,6 +35,8 @@ class KronConfig(OptimizerConfig): - None: All preconditioners are triangular (default) - 'one_diag': Largest/last dim per layer uses diagonal preconditioner - 'all_diag': All preconditioners are diagonal + preconditioner_lr: Learning rate for preconditioner. + preconditioner_init_scale: Scale for preconditioner initialization. mu_dtype: Dtype of the momentum buffer. Defaults to same dtype as parameters. precond_dtype: Dtype of the preconditioners. Defaults to 'float32'. precond_update_precision: Precision for matmul during preconditioner update. @@ -66,6 +68,8 @@ class KronConfig(OptimizerConfig): max_size_triangular: int = 10000 min_ndim_triangular: int = 2 memory_save_mode: Optional[str] = None + preconditioner_lr: float = 0.1 + preconditioner_init_scale: float = 1.0 mu_dtype: Optional[Union[str, jnp.dtype]] = None precond_dtype: Optional[Union[str, jnp.dtype]] = None precond_update_precision: Optional[str] = "tensorfloat32" @@ -103,6 +107,8 @@ def _optimizer(learning_rate) -> optax.GradientTransformation: max_size_triangular=self.max_size_triangular, min_ndim_triangular=self.min_ndim_triangular, memory_save_mode=self.memory_save_mode, + preconditioner_lr=self.preconditioner_lr, + preconditioner_init_scale=self.preconditioner_init_scale, mu_dtype=self.mu_dtype, precond_dtype=self.precond_dtype, precond_update_precision=self.precond_update_precision, @@ -194,6 +200,8 @@ def scale_by_kron( max_size_triangular: int = 8192, min_ndim_triangular: int = 2, memory_save_mode: Optional[str] = None, + preconditioner_lr: float = 0.1, + preconditioner_init_scale: float = 1.0, mu_dtype: Optional[Union[str, jnp.dtype]] = None, precond_dtype: Optional[Union[str, jnp.dtype]] = None, precond_update_precision: Optional[str] = "tensorfloat32", @@ -225,6 +233,8 @@ def scale_by_kron( to set all preconditioners to be triangular, 'one_diag' sets the largest or last dim to be diagonal per layer, and 'all_diag' sets all preconditioners to be diagonal. + preconditioner_lr: float, learning rate for preconditioner. + preconditioner_init_scale: float, scale for preconditioner initialization. mu_dtype: optional str or jnp.dtype, dtype of the momentum buffer. Defaults to same dtype as the parameters. precond_dtype: optional str or jnp.dtype, dtype of the preconditioners. Defaults @@ -258,8 +268,6 @@ def scale_by_kron( """ mu_dtype = canonicalize_dtype(mu_dtype) precond_dtype = canonicalize_dtype(precond_dtype or jnp.float32) - preconditioner_lr = 0.1 - preconditioner_init_scale = 1.0 lax_map = lax_map_scanned_layers bs = lax_map_batch_size @@ -976,6 +984,8 @@ def kron( max_size_triangular: int = 8192, min_ndim_triangular: int = 2, memory_save_mode: Optional[str] = None, + preconditioner_lr: float = 0.1, + preconditioner_init_scale: float = 1.0, mu_dtype: Optional[Union[str, jnp.dtype]] = None, precond_dtype: Optional[Union[str, jnp.dtype]] = None, precond_update_precision: Optional[str] = "tensorfloat32", @@ -1011,6 +1021,8 @@ def kron( to set all preconditioners to be triangular, 'one_diag' sets the largest or last dim to be diagonal per layer, and 'all_diag' sets all preconditioners to be diagonal. + preconditioner_lr: float, learning rate for preconditioner. + preconditioner_init_scale: float, scale for preconditioner initialization. mu_dtype: optional str or jnp.dtype, dtype of the momentum buffer. Defaults to same dtype as the parameters. precond_dtype: optional str or jnp.dtype, dtype of the preconditioners. Defaults @@ -1050,6 +1062,8 @@ def kron( max_size_triangular=max_size_triangular, min_ndim_triangular=min_ndim_triangular, memory_save_mode=memory_save_mode, + preconditioner_lr=preconditioner_lr, + preconditioner_init_scale=preconditioner_init_scale, mu_dtype=mu_dtype, precond_dtype=precond_dtype, precond_update_precision=precond_update_precision, From 91e29e72f6a792dcc8eb7c0de225ca63ecaf17e9 Mon Sep 17 00:00:00 2001 From: Evan Walters Date: Tue, 10 Dec 2024 12:47:06 -0700 Subject: [PATCH 24/56] Update kron.py --- src/levanter/optim/kron.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/levanter/optim/kron.py b/src/levanter/optim/kron.py index 1a9b3d168..222d157ce 100644 --- a/src/levanter/optim/kron.py +++ b/src/levanter/optim/kron.py @@ -61,8 +61,8 @@ class KronConfig(OptimizerConfig): # some of these are changed from kron defaults to better suit levanter beta1: float = 0.9 weight_decay: float = 0.1 - max_grad_norm: Optional[float] = None - normalize_grads: bool = True + max_grad_norm: Optional[float] = 1.0 + normalize_grads: bool = False preconditioner_update_probability: float = 0.05 update_prob_flat_start: int = 1000 max_size_triangular: int = 10000 From 53efaa37b8835e7635938f894c8aae49f2961788 Mon Sep 17 00:00:00 2001 From: Evan Walters Date: Fri, 13 Dec 2024 19:21:55 -0700 Subject: [PATCH 25/56] Update llama2_100M_kron_test.yaml --- config/llama2_100M_kron_test.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config/llama2_100M_kron_test.yaml b/config/llama2_100M_kron_test.yaml index 1038aa01b..087c1ce3d 100644 --- a/config/llama2_100M_kron_test.yaml +++ b/config/llama2_100M_kron_test.yaml @@ -31,5 +31,5 @@ optimizer: cooldown: 0.1 lr_schedule: constant min_lr_ratio: 0.0 - max_grad_norm: 0.0 # don't need with kron's normalize_grads on (default) + max_grad_norm: 1.0 type: kron From 311da92bc527a268869626e9ee5245db67e247ec Mon Sep 17 00:00:00 2001 From: Evan Walters Date: Fri, 13 Dec 2024 19:27:26 -0700 Subject: [PATCH 26/56] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 7e4238e32..caa67a74c 100644 --- a/README.md +++ b/README.md @@ -104,7 +104,7 @@ If you're using a TPU, more complete documentation for setting that up is availa As a kind of hello world, here's how you can train a GPT-2 "nano"-sized model on a small dataset. ```bash -python -m levanter.main.train_lm --config_path config/gpt2_nano.yaml +python -m levanter.main.train_lm --config_path config/llama2_100M_kron_test.yaml # alternatively, if you didn't use -e and are in a different directory python -m levanter.main.train_lm --config_path gpt2_nano From 33c17f48a1d1f5707152a2987d96603152c752b6 Mon Sep 17 00:00:00 2001 From: Evan Walters Date: Fri, 13 Dec 2024 19:44:30 -0700 Subject: [PATCH 27/56] Update kron.py --- src/levanter/optim/kron.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/levanter/optim/kron.py b/src/levanter/optim/kron.py index 222d157ce..2ef2d36e2 100644 --- a/src/levanter/optim/kron.py +++ b/src/levanter/optim/kron.py @@ -65,7 +65,7 @@ class KronConfig(OptimizerConfig): normalize_grads: bool = False preconditioner_update_probability: float = 0.05 update_prob_flat_start: int = 1000 - max_size_triangular: int = 10000 + max_size_triangular: int = 25000 min_ndim_triangular: int = 2 memory_save_mode: Optional[str] = None preconditioner_lr: float = 0.1 From 8da6e34d72484c0da966adcb15a031712a311991 Mon Sep 17 00:00:00 2001 From: Evan Walters Date: Sat, 14 Dec 2024 12:10:04 -0700 Subject: [PATCH 28/56] Update kron.py --- src/levanter/optim/kron.py | 49 ++++++++++++++++++++++++++++---------- 1 file changed, 37 insertions(+), 12 deletions(-) diff --git a/src/levanter/optim/kron.py b/src/levanter/optim/kron.py index 2ef2d36e2..85fc86a73 100644 --- a/src/levanter/optim/kron.py +++ b/src/levanter/optim/kron.py @@ -302,9 +302,9 @@ def init_fn(params, return_partition_specs_only=False): params_sharding_ = jax.tree.map(lambda x: x.spec, params_sharding_) params, params_struct = jax.tree.flatten(params) scanned_layers_ = jax.tree.leaves(scanned_layers_) - print(f"scanned_layers_: {scanned_layers_}") + print(f"kron scanned_layers_: {scanned_layers_}") params_sharding_ = jax.tree.leaves(params_sharding_) - print(f"params_sharding_: {params_sharding_}") + print(f"kron params_sharding_: {params_sharding_}") # unbox if flax style partitioned if have_flax: @@ -554,12 +554,14 @@ def update_fn(updates: base.Updates, state: dict, params: base.Params = None): params_sharding_ = jax.tree.map(lambda x: x.spec, params_sharding_) updates, updates_struct = jax.tree.flatten(updates) scanned_layers_ = jax.tree.leaves(scanned_layers_) - print(f"scanned_layers_: {scanned_layers_}") + print(f"kron scanned_layers_: {scanned_layers_}") params_sharding_ = jax.tree.leaves(params_sharding_) - print(f"params_sharding_: {params_sharding_}") + print(f"kron params_sharding_: {params_sharding_}") have_params_sharding = params_sharding_ is not None - have_qs_sharding = have_params_sharding or preconditioner_sharding is not None + if have_params_sharding: + original_params_sharding_ = params_sharding_ + have_qs_sharding = have_params_sharding or preconditioner_sharding is not None or have_hax # unbox if flax style partitioned flax_partitioned = False @@ -947,6 +949,10 @@ def _balance_Q(Q): lambda g, s: jnp.reshape(g, s), precond_gs, input_shapes ) + # final constraint for good measure + if have_params_sharding: + precond_gs = _safe_sharding_constraint(precond_gs, original_params_sharding_) + # box preconditioned grads if flax_partitioned: flat_precond_gs, _ = jax.tree.flatten(precond_gs) @@ -1212,14 +1218,33 @@ def _init_Q_exprs( # use triangular matrix as preconditioner for this dim q_sharding = None if have_qs_sharding: - # infer a so-so sharding scheme from params if nothing specified - # (first dim of q will match corresponding dim in params) - q_sharding = ( - precond_sharding - if precond_sharding is not None - else PartitionSpec(dim_sh, None) - ) + if have_hax: + # if we're in haliax we can grab fsdp axis and shard accordingly + # get current mesh + mesh = hax.partitioning._get_mesh() + if mesh.devices.shape == (): + mesh = None + # get fsdp mesh axis + if mesh is not None: + fsdp_axis = mesh.axis_names.index(hax.partitioning.ResourceAxis.DATA) + fsdp_size = mesh.devices.shape[fsdp_axis] + if size % fsdp_size == 0: + q_sharding = PartitionSpec(fsdp_axis, None) + else: + q_sharding = PartitionSpec(None, None) + else: + q_sharding = PartitionSpec(None, None) + else: + # infer a so-so sharding scheme from params if nothing specified + # (first dim of q will match corresponding dim in params) + q_sharding = ( + precond_sharding + if precond_sharding is not None + else PartitionSpec(dim_sh, None) + ) + # TODO ensure array axis is divisible by mesh axis sharding_out[i] = q_sharding + if existing_Q is None: q = scale * jnp.eye(size, dtype=dtype) if have_qs_sharding: From 5607cecac8b1a04d8edf877661e161b67b482be7 Mon Sep 17 00:00:00 2001 From: Evan Walters Date: Sat, 14 Dec 2024 12:30:27 -0700 Subject: [PATCH 29/56] Update kron.py --- src/levanter/optim/kron.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/levanter/optim/kron.py b/src/levanter/optim/kron.py index 85fc86a73..4ca8ea818 100644 --- a/src/levanter/optim/kron.py +++ b/src/levanter/optim/kron.py @@ -1226,10 +1226,11 @@ def _init_Q_exprs( mesh = None # get fsdp mesh axis if mesh is not None: - fsdp_axis = mesh.axis_names.index(hax.partitioning.ResourceAxis.DATA) + fsdp_axis_name = hax.partitioning.ResourceAxis.DATA + fsdp_axis = mesh.axis_names.index(fsdp_axis_name) fsdp_size = mesh.devices.shape[fsdp_axis] if size % fsdp_size == 0: - q_sharding = PartitionSpec(fsdp_axis, None) + q_sharding = PartitionSpec(fsdp_axis_name, None) else: q_sharding = PartitionSpec(None, None) else: From 2fb6c344787d8bbf8f73873e194f15558b1c7b94 Mon Sep 17 00:00:00 2001 From: Evan Walters Date: Sun, 15 Dec 2024 15:12:28 -0700 Subject: [PATCH 30/56] trust remote code --- src/levanter/data/audio.py | 4 ++-- src/levanter/data/sharded_datasource.py | 2 +- src/levanter/data/text.py | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/levanter/data/audio.py b/src/levanter/data/audio.py index b2235e863..46e72b210 100644 --- a/src/levanter/data/audio.py +++ b/src/levanter/data/audio.py @@ -193,7 +193,7 @@ def decode(x): def doc_iterator(self, split: str) -> Iterator[Tuple[np.ndarray, int, str]]: if self.id is not None: - data = datasets.load_dataset(self.id, split=split, name=self.name, streaming=self.stream) + data = datasets.load_dataset(self.id, split=split, name=self.name, streaming=self.stream, trust_remote_code=True) for doc in data: yield (doc[self.audio_key]["array"], doc[self.audio_key]["sampling_rate"], doc[self.text_key]) else: @@ -385,7 +385,7 @@ def _has_validation_set(self): if self.id is not None: dataset = datasets.load_dataset( - self.id, name=self.name, streaming=self.stream, split=self.validation_split + self.id, name=self.name, streaming=self.stream, split=self.validation_split, trust_remote_code=True ) try: next(iter(dataset)) diff --git a/src/levanter/data/sharded_datasource.py b/src/levanter/data/sharded_datasource.py index 9dca9b618..30a7727d6 100644 --- a/src/levanter/data/sharded_datasource.py +++ b/src/levanter/data/sharded_datasource.py @@ -253,7 +253,7 @@ def open_shard_at_row(self, shard_name: str, row: int) -> Iterator[dict]: def _load_dataset(self): # obnoxiously, the dataset loading stuff doesn't work with ray because of multiprocessing # so we have to do this hacky thing where we load the dataset in the worker - return datasets.load_dataset(self.id, split=self.split, streaming=self.streaming, **self.kwargs) + return datasets.load_dataset(self.id, split=self.split, streaming=self.streaming, trust_remote_code=True, **self.kwargs) class TextUrlDataSource(ShardedDataSource[str]): diff --git a/src/levanter/data/text.py b/src/levanter/data/text.py index 13c7ea44b..3c71e94df 100644 --- a/src/levanter/data/text.py +++ b/src/levanter/data/text.py @@ -594,7 +594,7 @@ def get_shard_source(self, split) -> Optional[ShardedDataSource[str]]: def doc_iterator(self, split: str): if self.id is not None: - dataset = datasets.load_dataset(self.id, name=self.name, streaming=self.stream) + dataset = datasets.load_dataset(self.id, name=self.name, streaming=self.stream, trust_remote_code=True) data = dataset[split] for doc in data: yield doc[self.text_key] @@ -1065,7 +1065,7 @@ def _has_validation_set(self): return True if self.id is not None: - dataset = datasets.load_dataset(self.id, name=self.name, streaming=self.stream, split="validation") + dataset = datasets.load_dataset(self.id, name=self.name, streaming=self.stream, split="validation", trust_remote_code=True) try: next(iter(dataset)) return True From 336e1e1e163b3166323bb57fcb03f08a1e3d87f0 Mon Sep 17 00:00:00 2001 From: Evan Walters Date: Sun, 15 Dec 2024 15:16:12 -0700 Subject: [PATCH 31/56] settings defaults --- config/llama2_100M_kron_test.yaml | 2 +- src/levanter/optim/kron.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/config/llama2_100M_kron_test.yaml b/config/llama2_100M_kron_test.yaml index 087c1ce3d..1cc689629 100644 --- a/config/llama2_100M_kron_test.yaml +++ b/config/llama2_100M_kron_test.yaml @@ -25,7 +25,7 @@ trainer: per_device_eval_parallelism: 32 # set a larger batch size for eval num_train_steps: 50001 optimizer: - learning_rate: 1E-3 + learning_rate: 3E-4 weight_decay: 0.1 warmup: 1000 cooldown: 0.1 diff --git a/src/levanter/optim/kron.py b/src/levanter/optim/kron.py index 4ca8ea818..ee640618b 100644 --- a/src/levanter/optim/kron.py +++ b/src/levanter/optim/kron.py @@ -64,7 +64,7 @@ class KronConfig(OptimizerConfig): max_grad_norm: Optional[float] = 1.0 normalize_grads: bool = False preconditioner_update_probability: float = 0.05 - update_prob_flat_start: int = 1000 + update_prob_flat_start: int = 500 max_size_triangular: int = 25000 min_ndim_triangular: int = 2 memory_save_mode: Optional[str] = None @@ -80,7 +80,7 @@ class KronConfig(OptimizerConfig): merge_small_dims: bool = True target_merged_dim_size: int = 8192 partition_grads_into_blocks: bool = True - block_size: int = 512 + block_size: int = 256 params_sharding: Optional[Any] = None preconditioner_sharding: Optional[tuple[str | None, str | None]] = None From a5ff351ff37bf33591d08b44433b5be18874eef6 Mon Sep 17 00:00:00 2001 From: Evan Walters Date: Sun, 15 Dec 2024 16:21:35 -0700 Subject: [PATCH 32/56] no key, deterministic, pass all into cond, more sharding --- src/levanter/optim/kron.py | 54 ++++++++++++++++++++++++-------------- 1 file changed, 34 insertions(+), 20 deletions(-) diff --git a/src/levanter/optim/kron.py b/src/levanter/optim/kron.py index ee640618b..989e803d3 100644 --- a/src/levanter/optim/kron.py +++ b/src/levanter/optim/kron.py @@ -511,6 +511,7 @@ def broadcast_qs(_, ps, q, s): mu=mu_sharding, Qs_preconditioners=Qs_sharding, update_counter=PartitionSpec(), + balance_counter=PartitionSpec(), ) return dict( @@ -518,12 +519,12 @@ def broadcast_qs(_, ps, q, s): mu=mu, Qs_preconditioners=Qs, update_counter=jnp.zeros([], jnp.int32), + balance_counter=jnp.zeros([], jnp.int32), ) def update_fn(updates: base.Updates, state: dict, params: base.Params = None): del params count_inc = safe_int32_increment(state["count"]) - key = jax.random.fold_in(jax.random.PRNGKey(42), state["count"]) # unbox if haliax style partitioned scanned_layers_ = scanned_layers @@ -651,7 +652,6 @@ def norm_grads(g): ) # merge small dimensions - dummy_updates_tree = jax.tree.map(lambda _: jnp.zeros([]), updates) nones = jax.tree.map(lambda _: None, momentum_updates) merged_params_sharding = params_sharding_ original_shapes = None @@ -690,14 +690,16 @@ def norm_grads(g): sharding_without_scan, scanned_dim_sharding, ) - # constrain sharding - momentum_updates = _safe_sharding_constraint( - momentum_updates, merged_params_sharding - ) + # constrain sharding + if have_params_sharding: + momentum_updates = _safe_sharding_constraint( + momentum_updates, merged_params_sharding + ) # partition grads into blocks - partitioned_sharding = merged_params_sharding + dummy_updates_tree = jax.tree.map(lambda _: jnp.zeros([]), updates) n_dims_to_map = jax.tree.map(lambda s: int(s), scanned_layers_) + partitioned_sharding = merged_params_sharding partitioners = None partitioned_shapes = None if partition_grads_into_blocks: @@ -750,11 +752,12 @@ def norm_grads(g): merged_params_sharding, scanned_layers_, ) - # constrain sharding - momentum_updates = _safe_sharding_constraint( - momentum_updates, partitioned_sharding - ) n_dims_to_map = jax.tree.map(lambda x: x + 1, n_dims_to_map) + # constrain sharding + if have_params_sharding: + momentum_updates = _safe_sharding_constraint( + momentum_updates, partitioned_sharding + ) # get einsum expressions and Qs sharding Qs = state["Qs_preconditioners"] @@ -796,7 +799,7 @@ def add_dims_to_spec(_, qss, sds): ) # maybe update preconditioner - def update_preconditioner(key, Qs): + def update_preconditioner_fn(Qs, grads_in, bal_counter): with jax.default_matmul_precision(precond_update_precision): # balance preconditioners about every 100 updates def balance_Qs(Qs_to_bal): @@ -815,15 +818,16 @@ def _balance_Q(Q): n_dims_to_map, ) - key, subkey = jax.random.split(key) - do_balances = jax.random.uniform(subkey) <= 0.01 + balance_counter_inc = safe_int32_increment(bal_counter) + do_balances = balance_counter_inc >= 100 + balance_counter_inc = jnp.where(do_balances, 0, balance_counter_inc) Qs = jax.lax.cond(do_balances, balance_Qs, lambda qs: qs, Qs) if have_qs_sharding: Qs = _safe_sharding_constraint(Qs, Qs_sharding) # create random vectors key, subkey = jax.random.split(key) - Vs = _tree_random_like(subkey, momentum_updates) + Vs = _tree_random_like(subkey, grads_in) # apply params sharding to random vectors if have_params_sharding: Vs = _safe_sharding_constraint(Vs, partitioned_sharding) @@ -832,7 +836,7 @@ def _balance_Q(Q): damp_eps = jnp.sqrt(jnp.finfo(jnp.float32).eps) # bf16 eps too large grads_in = jax.tree.map( lambda g, v: g + damp_eps.astype(g.dtype) * jnp.mean(jnp.abs(g)) * v, - momentum_updates, + grads_in, Vs, ) @@ -876,15 +880,24 @@ def _balance_Q(Q): new_Qs = _safe_sharding_constraint(new_Qs, Qs_sharding) new_Qs = otu.tree_cast(new_Qs, precond_dtype) - return new_Qs + return new_Qs, balance_counter_inc + + def pass_through_fn(qs, grads_in, bal_counter): + if have_qs_sharding: + qs = _safe_sharding_constraint(qs, Qs_sharding) + return qs, bal_counter # update preconditioner deterministically update_counter_inc = safe_int32_increment(state["update_counter"]) do_update = update_counter_inc >= 1 / update_prob_in update_counter_inc = jnp.where(do_update, 0, update_counter_inc) - key, subkey = jax.random.split(key) - Qs = jax.lax.cond( - do_update, update_preconditioner, lambda _, qs: qs, subkey, Qs + Qs, balance_counter_inc = jax.lax.cond( + do_update, + update_preconditioner_fn, + pass_through_fn, + Qs, + momentum_updates, + state["balance_counter"], ) if have_qs_sharding: Qs = _safe_sharding_constraint(Qs, Qs_sharding) @@ -971,6 +984,7 @@ def _balance_Q(Q): mu=mu, Qs_preconditioners=Qs, update_counter=update_counter_inc, + balance_counter=balance_counter_inc, ) return precond_gs, state From 3a06e1caeea40663071e756d1f6e2432f85a6afd Mon Sep 17 00:00:00 2001 From: Evan Walters Date: Sun, 15 Dec 2024 16:27:05 -0700 Subject: [PATCH 33/56] set key in state --- src/levanter/optim/kron.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/src/levanter/optim/kron.py b/src/levanter/optim/kron.py index 989e803d3..c7de998ff 100644 --- a/src/levanter/optim/kron.py +++ b/src/levanter/optim/kron.py @@ -507,6 +507,7 @@ def broadcast_qs(_, ps, q, s): if return_partition_specs_only: return dict( + key=jax.random.PRNGKey(jax.process_index()), count=PartitionSpec(), mu=mu_sharding, Qs_preconditioners=Qs_sharding, @@ -799,7 +800,7 @@ def add_dims_to_spec(_, qss, sds): ) # maybe update preconditioner - def update_preconditioner_fn(Qs, grads_in, bal_counter): + def update_preconditioner_fn(key, Qs, grads_in, bal_counter): with jax.default_matmul_precision(precond_update_precision): # balance preconditioners about every 100 updates def balance_Qs(Qs_to_bal): @@ -880,21 +881,22 @@ def _balance_Q(Q): new_Qs = _safe_sharding_constraint(new_Qs, Qs_sharding) new_Qs = otu.tree_cast(new_Qs, precond_dtype) - return new_Qs, balance_counter_inc + return key, new_Qs, balance_counter_inc - def pass_through_fn(qs, grads_in, bal_counter): + def pass_through_fn(key, qs, grads_in, bal_counter): if have_qs_sharding: qs = _safe_sharding_constraint(qs, Qs_sharding) - return qs, bal_counter + return key, qs, bal_counter # update preconditioner deterministically update_counter_inc = safe_int32_increment(state["update_counter"]) do_update = update_counter_inc >= 1 / update_prob_in update_counter_inc = jnp.where(do_update, 0, update_counter_inc) - Qs, balance_counter_inc = jax.lax.cond( + key, Qs, balance_counter_inc = jax.lax.cond( do_update, update_preconditioner_fn, pass_through_fn, + key, Qs, momentum_updates, state["balance_counter"], @@ -980,6 +982,7 @@ def pass_through_fn(qs, grads_in, bal_counter): mu = otu.tree_cast(mu, mu_dtype) Qs = otu.tree_cast(Qs, precond_dtype) state = dict( + key=key, count=count_inc, mu=mu, Qs_preconditioners=Qs, From f7f2382019b4c37418b920a6c4d83030285c9680 Mon Sep 17 00:00:00 2001 From: Evan Walters Date: Sun, 15 Dec 2024 16:29:19 -0700 Subject: [PATCH 34/56] whoops --- src/levanter/optim/kron.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/levanter/optim/kron.py b/src/levanter/optim/kron.py index c7de998ff..bf5662d97 100644 --- a/src/levanter/optim/kron.py +++ b/src/levanter/optim/kron.py @@ -896,7 +896,7 @@ def pass_through_fn(key, qs, grads_in, bal_counter): do_update, update_preconditioner_fn, pass_through_fn, - key, + state["key"], Qs, momentum_updates, state["balance_counter"], From 07781e6efe0816bd53314bd3f545cdaa2a82eda7 Mon Sep 17 00:00:00 2001 From: Evan Walters Date: Sun, 15 Dec 2024 16:32:54 -0700 Subject: [PATCH 35/56] small fix --- src/levanter/optim/kron.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/levanter/optim/kron.py b/src/levanter/optim/kron.py index bf5662d97..a2b094819 100644 --- a/src/levanter/optim/kron.py +++ b/src/levanter/optim/kron.py @@ -507,7 +507,7 @@ def broadcast_qs(_, ps, q, s): if return_partition_specs_only: return dict( - key=jax.random.PRNGKey(jax.process_index()), + key=PartitionSpec(), count=PartitionSpec(), mu=mu_sharding, Qs_preconditioners=Qs_sharding, @@ -516,6 +516,7 @@ def broadcast_qs(_, ps, q, s): ) return dict( + key=jax.random.PRNGKey(jax.process_index()), count=jnp.zeros([], jnp.int32), mu=mu, Qs_preconditioners=Qs, From 9ef086914d0f4dfb29a09fcbb59e5497bc232332 Mon Sep 17 00:00:00 2001 From: Evan Walters Date: Sun, 15 Dec 2024 16:42:23 -0700 Subject: [PATCH 36/56] Update kron.py --- src/levanter/optim/kron.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/levanter/optim/kron.py b/src/levanter/optim/kron.py index a2b094819..6cbf583bf 100644 --- a/src/levanter/optim/kron.py +++ b/src/levanter/optim/kron.py @@ -516,7 +516,7 @@ def broadcast_qs(_, ps, q, s): ) return dict( - key=jax.random.PRNGKey(jax.process_index()), + key=jax.random.PRNGKey(0), count=jnp.zeros([], jnp.int32), mu=mu, Qs_preconditioners=Qs, From ed50cce7030b2c50c97e0d17b29300e93d9667ea Mon Sep 17 00:00:00 2001 From: Evan Walters Date: Sun, 15 Dec 2024 16:50:33 -0700 Subject: [PATCH 37/56] Update kron.py --- src/levanter/optim/kron.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/levanter/optim/kron.py b/src/levanter/optim/kron.py index 6cbf583bf..2de182052 100644 --- a/src/levanter/optim/kron.py +++ b/src/levanter/optim/kron.py @@ -527,6 +527,7 @@ def broadcast_qs(_, ps, q, s): def update_fn(updates: base.Updates, state: dict, params: base.Params = None): del params count_inc = safe_int32_increment(state["count"]) + key, subkey = jax.random.split(state["key"]) # unbox if haliax style partitioned scanned_layers_ = scanned_layers @@ -801,7 +802,7 @@ def add_dims_to_spec(_, qss, sds): ) # maybe update preconditioner - def update_preconditioner_fn(key, Qs, grads_in, bal_counter): + def update_preconditioner_fn(rngkey, Qs, grads_in, bal_counter): with jax.default_matmul_precision(precond_update_precision): # balance preconditioners about every 100 updates def balance_Qs(Qs_to_bal): @@ -828,8 +829,7 @@ def _balance_Q(Q): Qs = _safe_sharding_constraint(Qs, Qs_sharding) # create random vectors - key, subkey = jax.random.split(key) - Vs = _tree_random_like(subkey, grads_in) + Vs = _tree_random_like(rngkey, grads_in) # apply params sharding to random vectors if have_params_sharding: Vs = _safe_sharding_constraint(Vs, partitioned_sharding) @@ -882,22 +882,22 @@ def _balance_Q(Q): new_Qs = _safe_sharding_constraint(new_Qs, Qs_sharding) new_Qs = otu.tree_cast(new_Qs, precond_dtype) - return key, new_Qs, balance_counter_inc + return new_Qs, balance_counter_inc - def pass_through_fn(key, qs, grads_in, bal_counter): + def pass_through_fn(rngkey, qs, grads_in, bal_counter): if have_qs_sharding: qs = _safe_sharding_constraint(qs, Qs_sharding) - return key, qs, bal_counter + return qs, bal_counter # update preconditioner deterministically update_counter_inc = safe_int32_increment(state["update_counter"]) do_update = update_counter_inc >= 1 / update_prob_in update_counter_inc = jnp.where(do_update, 0, update_counter_inc) - key, Qs, balance_counter_inc = jax.lax.cond( + Qs, balance_counter_inc = jax.lax.cond( do_update, update_preconditioner_fn, pass_through_fn, - state["key"], + subkey, Qs, momentum_updates, state["balance_counter"], From 1dc0f43e5f296d7fae3eb9133aa21d748a63a826 Mon Sep 17 00:00:00 2001 From: Evan Walters Date: Sun, 15 Dec 2024 16:57:30 -0700 Subject: [PATCH 38/56] settings --- src/levanter/optim/kron.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/levanter/optim/kron.py b/src/levanter/optim/kron.py index 2de182052..dde7a8f81 100644 --- a/src/levanter/optim/kron.py +++ b/src/levanter/optim/kron.py @@ -77,9 +77,9 @@ class KronConfig(OptimizerConfig): scanned_layers: Optional[optax.Params] = None lax_map_scanned_layers: bool = False lax_map_batch_size: int = 8 - merge_small_dims: bool = True + merge_small_dims: bool = False target_merged_dim_size: int = 8192 - partition_grads_into_blocks: bool = True + partition_grads_into_blocks: bool = False block_size: int = 256 params_sharding: Optional[Any] = None preconditioner_sharding: Optional[tuple[str | None, str | None]] = None From f1c1b381215b1d0aaa853e285a65e31f74489c74 Mon Sep 17 00:00:00 2001 From: Evan Walters Date: Sun, 15 Dec 2024 17:16:28 -0700 Subject: [PATCH 39/56] small fix in init sharding --- src/levanter/optim/kron.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/levanter/optim/kron.py b/src/levanter/optim/kron.py index dde7a8f81..e41070c12 100644 --- a/src/levanter/optim/kron.py +++ b/src/levanter/optim/kron.py @@ -77,9 +77,9 @@ class KronConfig(OptimizerConfig): scanned_layers: Optional[optax.Params] = None lax_map_scanned_layers: bool = False lax_map_batch_size: int = 8 - merge_small_dims: bool = False + merge_small_dims: bool = True target_merged_dim_size: int = 8192 - partition_grads_into_blocks: bool = False + partition_grads_into_blocks: bool = True block_size: int = 256 params_sharding: Optional[Any] = None preconditioner_sharding: Optional[tuple[str | None, str | None]] = None @@ -272,9 +272,6 @@ def scale_by_kron( bs = lax_map_batch_size def init_fn(params, return_partition_specs_only=False): - have_params_sharding = params_sharding is not None - have_qs_sharding = have_params_sharding or preconditioner_sharding is not None - # unbox if haliax style partitioned scanned_layers_ = scanned_layers params_sharding_ = params_sharding @@ -306,6 +303,9 @@ def init_fn(params, return_partition_specs_only=False): params_sharding_ = jax.tree.leaves(params_sharding_) print(f"kron params_sharding_: {params_sharding_}") + have_params_sharding = params_sharding_ is not None + have_qs_sharding = have_params_sharding or preconditioner_sharding is not None or have_hax + # unbox if flax style partitioned if have_flax: params = jax.tree.map( From 7a6f501a39df980b3b5efcdb86e83f9e0cfb605f Mon Sep 17 00:00:00 2001 From: Evan Walters Date: Sun, 15 Dec 2024 17:23:53 -0700 Subject: [PATCH 40/56] trying repl only --- src/levanter/optim/kron.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/levanter/optim/kron.py b/src/levanter/optim/kron.py index e41070c12..5e4b6d68b 100644 --- a/src/levanter/optim/kron.py +++ b/src/levanter/optim/kron.py @@ -300,11 +300,11 @@ def init_fn(params, return_partition_specs_only=False): params, params_struct = jax.tree.flatten(params) scanned_layers_ = jax.tree.leaves(scanned_layers_) print(f"kron scanned_layers_: {scanned_layers_}") - params_sharding_ = jax.tree.leaves(params_sharding_) + params_sharding_ = None # jax.tree.leaves(params_sharding_) print(f"kron params_sharding_: {params_sharding_}") have_params_sharding = params_sharding_ is not None - have_qs_sharding = have_params_sharding or preconditioner_sharding is not None or have_hax + have_qs_sharding = have_params_sharding or preconditioner_sharding is not None # or have_hax # unbox if flax style partitioned if have_flax: @@ -559,13 +559,13 @@ def update_fn(updates: base.Updates, state: dict, params: base.Params = None): updates, updates_struct = jax.tree.flatten(updates) scanned_layers_ = jax.tree.leaves(scanned_layers_) print(f"kron scanned_layers_: {scanned_layers_}") - params_sharding_ = jax.tree.leaves(params_sharding_) + params_sharding_ = None # jax.tree.leaves(params_sharding_) print(f"kron params_sharding_: {params_sharding_}") have_params_sharding = params_sharding_ is not None if have_params_sharding: original_params_sharding_ = params_sharding_ - have_qs_sharding = have_params_sharding or preconditioner_sharding is not None or have_hax + have_qs_sharding = have_params_sharding or preconditioner_sharding is not None # or have_hax # unbox if flax style partitioned flax_partitioned = False @@ -1180,7 +1180,7 @@ def _init_Q_exprs( precond_sharding=None, param_sharding=None, ): - have_qs_sharding = precond_sharding is not None or param_sharding is not None + have_qs_sharding = False # precond_sharding is not None or param_sharding is not None letters = string.ascii_lowercase + string.ascii_uppercase if len(t_shape) == 0: # scalar Q = [scale * jnp.ones(t_shape, dtype=dtype)] if existing_Q is None else existing_Q From 3473eed22b10b918aff5f42b10430fd1416834f8 Mon Sep 17 00:00:00 2001 From: Evan Walters Date: Sun, 15 Dec 2024 17:31:10 -0700 Subject: [PATCH 41/56] Revert "trying repl only" This reverts commit 7a6f501a39df980b3b5efcdb86e83f9e0cfb605f. --- src/levanter/optim/kron.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/levanter/optim/kron.py b/src/levanter/optim/kron.py index 5e4b6d68b..e41070c12 100644 --- a/src/levanter/optim/kron.py +++ b/src/levanter/optim/kron.py @@ -300,11 +300,11 @@ def init_fn(params, return_partition_specs_only=False): params, params_struct = jax.tree.flatten(params) scanned_layers_ = jax.tree.leaves(scanned_layers_) print(f"kron scanned_layers_: {scanned_layers_}") - params_sharding_ = None # jax.tree.leaves(params_sharding_) + params_sharding_ = jax.tree.leaves(params_sharding_) print(f"kron params_sharding_: {params_sharding_}") have_params_sharding = params_sharding_ is not None - have_qs_sharding = have_params_sharding or preconditioner_sharding is not None # or have_hax + have_qs_sharding = have_params_sharding or preconditioner_sharding is not None or have_hax # unbox if flax style partitioned if have_flax: @@ -559,13 +559,13 @@ def update_fn(updates: base.Updates, state: dict, params: base.Params = None): updates, updates_struct = jax.tree.flatten(updates) scanned_layers_ = jax.tree.leaves(scanned_layers_) print(f"kron scanned_layers_: {scanned_layers_}") - params_sharding_ = None # jax.tree.leaves(params_sharding_) + params_sharding_ = jax.tree.leaves(params_sharding_) print(f"kron params_sharding_: {params_sharding_}") have_params_sharding = params_sharding_ is not None if have_params_sharding: original_params_sharding_ = params_sharding_ - have_qs_sharding = have_params_sharding or preconditioner_sharding is not None # or have_hax + have_qs_sharding = have_params_sharding or preconditioner_sharding is not None or have_hax # unbox if flax style partitioned flax_partitioned = False @@ -1180,7 +1180,7 @@ def _init_Q_exprs( precond_sharding=None, param_sharding=None, ): - have_qs_sharding = False # precond_sharding is not None or param_sharding is not None + have_qs_sharding = precond_sharding is not None or param_sharding is not None letters = string.ascii_lowercase + string.ascii_uppercase if len(t_shape) == 0: # scalar Q = [scale * jnp.ones(t_shape, dtype=dtype)] if existing_Q is None else existing_Q From 76847021b995c2ba7139eb69754f3663aed7a1b1 Mon Sep 17 00:00:00 2001 From: Evan Walters Date: Sun, 15 Dec 2024 18:29:52 -0700 Subject: [PATCH 42/56] trying while loop --- src/levanter/optim/kron.py | 31 +++++++++++++++++++++++-------- 1 file changed, 23 insertions(+), 8 deletions(-) diff --git a/src/levanter/optim/kron.py b/src/levanter/optim/kron.py index e41070c12..dc8e5154f 100644 --- a/src/levanter/optim/kron.py +++ b/src/levanter/optim/kron.py @@ -893,15 +893,30 @@ def pass_through_fn(rngkey, qs, grads_in, bal_counter): update_counter_inc = safe_int32_increment(state["update_counter"]) do_update = update_counter_inc >= 1 / update_prob_in update_counter_inc = jnp.where(do_update, 0, update_counter_inc) - Qs, balance_counter_inc = jax.lax.cond( - do_update, - update_preconditioner_fn, - pass_through_fn, - subkey, - Qs, - momentum_updates, - state["balance_counter"], + # Qs, balance_counter_inc = jax.lax.cond( + # do_update, + # update_preconditioner_fn, + # pass_through_fn, + # subkey, + # Qs, + # momentum_updates, + # state["balance_counter"], + # ) + + def cond_fn(state): + return state[-1] + + def iter_fn(state): + rngkey, qs, grads_in, bal_counter, _ = state + qs, bal_counter = update_preconditioner_fn(rngkey, qs, grads_in, bal_counter) + return rngkey, qs, grads_in, bal_counter, False + + while_out = jax.lax.while_loop( + cond_fn, + iter_fn, + (subkey, Qs, momentum_updates, state["balance_counter"], do_update), ) + _, Qs, _, balance_counter_inc, _ = while_out if have_qs_sharding: Qs = _safe_sharding_constraint(Qs, Qs_sharding) From d28451892c1faff307cc15110f4f2bdf1693f0ba Mon Sep 17 00:00:00 2001 From: Evan Walters Date: Wed, 18 Dec 2024 20:21:58 -0700 Subject: [PATCH 43/56] trying more simple psgd kron version --- src/levanter/optim/kron.py | 104 +++++++++++++++++++++++++------------ 1 file changed, 70 insertions(+), 34 deletions(-) diff --git a/src/levanter/optim/kron.py b/src/levanter/optim/kron.py index dc8e5154f..fb51193e3 100644 --- a/src/levanter/optim/kron.py +++ b/src/levanter/optim/kron.py @@ -7,6 +7,8 @@ from levanter.optim.config import OptimizerConfig +from psgd_jax.kron import scale_by_kron as sbk + @OptimizerConfig.register_subclass("kron") @dataclass @@ -97,31 +99,31 @@ def _optimizer(learning_rate) -> optax.GradientTransformation: if self.max_grad_norm and not self.normalize_grads: components.append(optax.clip_by_global_norm(self.max_grad_norm)) components.append( - scale_by_kron( + sbk( b1=self.beta1, - normalize_grads=self.normalize_grads, - preconditioner_update_probability=precond_update_prob_schedule( - min_prob=self.preconditioner_update_probability, - flat_start=self.update_prob_flat_start, - ), - max_size_triangular=self.max_size_triangular, - min_ndim_triangular=self.min_ndim_triangular, - memory_save_mode=self.memory_save_mode, - preconditioner_lr=self.preconditioner_lr, - preconditioner_init_scale=self.preconditioner_init_scale, - mu_dtype=self.mu_dtype, - precond_dtype=self.precond_dtype, - precond_update_precision=self.precond_update_precision, - precond_grads_precision=self.precond_grads_precision, - scanned_layers=self.scanned_layers, - lax_map_scanned_layers=self.lax_map_scanned_layers, - lax_map_batch_size=self.lax_map_batch_size, - merge_small_dims=self.merge_small_dims, - target_merged_dim_size=self.target_merged_dim_size, - partition_grads_into_blocks=self.partition_grads_into_blocks, - block_size=self.block_size, - params_sharding=self.params_sharding, - preconditioner_sharding=precond_partition_spec, + # normalize_grads=self.normalize_grads, + # preconditioner_update_probability=precond_update_prob_schedule( + # min_prob=self.preconditioner_update_probability, + # flat_start=self.update_prob_flat_start, + # ), + # max_size_triangular=self.max_size_triangular, + # min_ndim_triangular=self.min_ndim_triangular, + # memory_save_mode=self.memory_save_mode, + # preconditioner_lr=self.preconditioner_lr, + # preconditioner_init_scale=self.preconditioner_init_scale, + # mu_dtype=self.mu_dtype, + # precond_dtype=self.precond_dtype, + # precond_update_precision=self.precond_update_precision, + # precond_grads_precision=self.precond_grads_precision, + # scanned_layers=self.scanned_layers, + # lax_map_scanned_layers=self.lax_map_scanned_layers, + # lax_map_batch_size=self.lax_map_batch_size, + # merge_small_dims=self.merge_small_dims, + # target_merged_dim_size=self.target_merged_dim_size, + # partition_grads_into_blocks=self.partition_grads_into_blocks, + # block_size=self.block_size, + # params_sharding=self.params_sharding, + # preconditioner_sharding=precond_partition_spec, ) ) if self.weight_decay > 0: @@ -295,16 +297,20 @@ def init_fn(params, return_partition_specs_only=False): is_leaf=lambda x: isinstance(x, hax.nn.Stacked), ) if params_sharding_ is None: - params_sharding_ = hax.partitioning.infer_resource_partitions(params) - params_sharding_ = jax.tree.map(lambda x: x.spec, params_sharding_) + try: + params_sharding_ = hax.partitioning.infer_resource_partitions(params) + params_sharding_ = jax.tree.map(lambda x: x.spec, params_sharding_) + except: + params_sharding_ = None params, params_struct = jax.tree.flatten(params) scanned_layers_ = jax.tree.leaves(scanned_layers_) print(f"kron scanned_layers_: {scanned_layers_}") - params_sharding_ = jax.tree.leaves(params_sharding_) - print(f"kron params_sharding_: {params_sharding_}") + if params_sharding_ is not None: + params_sharding_ = jax.tree.leaves(params_sharding_) + print(f"kron params_sharding_: {params_sharding_}") have_params_sharding = params_sharding_ is not None - have_qs_sharding = have_params_sharding or preconditioner_sharding is not None or have_hax + have_qs_sharding = have_params_sharding or preconditioner_sharding is not None # unbox if flax style partitioned if have_flax: @@ -554,18 +560,22 @@ def update_fn(updates: base.Updates, state: dict, params: base.Params = None): is_leaf=lambda x: isinstance(x, hax.nn.Stacked), ) if params_sharding_ is None: - params_sharding_ = hax.partitioning.infer_resource_partitions(updates) - params_sharding_ = jax.tree.map(lambda x: x.spec, params_sharding_) + try: + params_sharding_ = hax.partitioning.infer_resource_partitions(updates) + params_sharding_ = jax.tree.map(lambda x: x.spec, params_sharding_) + except: + params_sharding_ = None updates, updates_struct = jax.tree.flatten(updates) scanned_layers_ = jax.tree.leaves(scanned_layers_) print(f"kron scanned_layers_: {scanned_layers_}") - params_sharding_ = jax.tree.leaves(params_sharding_) - print(f"kron params_sharding_: {params_sharding_}") + if params_sharding_ is not None: + params_sharding_ = jax.tree.leaves(params_sharding_) + print(f"kron params_sharding_: {params_sharding_}") have_params_sharding = params_sharding_ is not None if have_params_sharding: original_params_sharding_ = params_sharding_ - have_qs_sharding = have_params_sharding or preconditioner_sharding is not None or have_hax + have_qs_sharding = have_params_sharding or preconditioner_sharding is not None # unbox if flax style partitioned flax_partitioned = False @@ -1546,6 +1556,10 @@ def _partitions(lst): for part in _partitions(lst[i + 1 :]): yield [lst[: i + 1]] + part +""" +128, 4, 4, 8 +(128, 512) +""" def _merge_small_dims( shape_to_merge, max_dim, dim_diag, sharding_to_merge=None @@ -1694,3 +1708,25 @@ def _unstack_matrices(stacked_arrays, revert_indices): if in_tuple: return tuple(array_list) return array_list + + +if __name__ == "__main__": + import jax_sourceror + + axis_a = hax.Axis("d", 128) + axis_b = hax.Axis("b", 8) + + params = { + "w": hax.NamedArray(jnp.ones((128, 8)), (axis_a, axis_b)), + "b": hax.NamedArray(jnp.ones((128,)), (axis_a,)), + } + grads = { + "w": hax.NamedArray(jnp.ones((128, 8)), (axis_a, axis_b)), + "b": hax.NamedArray(jnp.ones((128,)), (axis_a,)), + } + + optimizer = kron() + opt_state = optimizer.init(params) + source_code = jax_sourceror.sourcerize(optimizer.update)(grads, opt_state, params) + + print(source_code) From 0c920b048f033929497b6bdff07bfff5c5c5a04e Mon Sep 17 00:00:00 2001 From: Evan Walters Date: Wed, 18 Dec 2024 20:22:54 -0700 Subject: [PATCH 44/56] Update kron.py --- src/levanter/optim/kron.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/levanter/optim/kron.py b/src/levanter/optim/kron.py index fb51193e3..6dc45c94b 100644 --- a/src/levanter/optim/kron.py +++ b/src/levanter/optim/kron.py @@ -115,7 +115,7 @@ def _optimizer(learning_rate) -> optax.GradientTransformation: # precond_dtype=self.precond_dtype, # precond_update_precision=self.precond_update_precision, # precond_grads_precision=self.precond_grads_precision, - # scanned_layers=self.scanned_layers, + scanned_layers=self.scanned_layers, # lax_map_scanned_layers=self.lax_map_scanned_layers, # lax_map_batch_size=self.lax_map_batch_size, # merge_small_dims=self.merge_small_dims, From 6a2e19fa6be65fcd89db35d64d62d039193fc67d Mon Sep 17 00:00:00 2001 From: Evan Walters Date: Wed, 18 Dec 2024 20:37:34 -0700 Subject: [PATCH 45/56] trying simple version --- src/levanter/optim/kron.py | 3158 ++++++++++++++++++++++-------------- 1 file changed, 1924 insertions(+), 1234 deletions(-) diff --git a/src/levanter/optim/kron.py b/src/levanter/optim/kron.py index 6dc45c94b..fb580a9e7 100644 --- a/src/levanter/optim/kron.py +++ b/src/levanter/optim/kron.py @@ -7,8 +7,6 @@ from levanter.optim.config import OptimizerConfig -from psgd_jax.kron import scale_by_kron as sbk - @OptimizerConfig.register_subclass("kron") @dataclass @@ -99,31 +97,31 @@ def _optimizer(learning_rate) -> optax.GradientTransformation: if self.max_grad_norm and not self.normalize_grads: components.append(optax.clip_by_global_norm(self.max_grad_norm)) components.append( - sbk( + scale_by_kron( b1=self.beta1, - # normalize_grads=self.normalize_grads, - # preconditioner_update_probability=precond_update_prob_schedule( - # min_prob=self.preconditioner_update_probability, - # flat_start=self.update_prob_flat_start, - # ), - # max_size_triangular=self.max_size_triangular, - # min_ndim_triangular=self.min_ndim_triangular, - # memory_save_mode=self.memory_save_mode, - # preconditioner_lr=self.preconditioner_lr, - # preconditioner_init_scale=self.preconditioner_init_scale, - # mu_dtype=self.mu_dtype, - # precond_dtype=self.precond_dtype, - # precond_update_precision=self.precond_update_precision, - # precond_grads_precision=self.precond_grads_precision, + normalize_grads=self.normalize_grads, + preconditioner_update_probability=precond_update_prob_schedule( + min_prob=self.preconditioner_update_probability, + flat_start=self.update_prob_flat_start, + ), + max_size_triangular=self.max_size_triangular, + min_ndim_triangular=self.min_ndim_triangular, + memory_save_mode=self.memory_save_mode, + preconditioner_lr=self.preconditioner_lr, + preconditioner_init_scale=self.preconditioner_init_scale, + mu_dtype=self.mu_dtype, + precond_dtype=self.precond_dtype, + precond_update_precision=self.precond_update_precision, + precond_grads_precision=self.precond_grads_precision, scanned_layers=self.scanned_layers, - # lax_map_scanned_layers=self.lax_map_scanned_layers, - # lax_map_batch_size=self.lax_map_batch_size, - # merge_small_dims=self.merge_small_dims, - # target_merged_dim_size=self.target_merged_dim_size, - # partition_grads_into_blocks=self.partition_grads_into_blocks, - # block_size=self.block_size, - # params_sharding=self.params_sharding, - # preconditioner_sharding=precond_partition_spec, + lax_map_scanned_layers=self.lax_map_scanned_layers, + lax_map_batch_size=self.lax_map_batch_size, + merge_small_dims=self.merge_small_dims, + target_merged_dim_size=self.target_merged_dim_size, + partition_grads_into_blocks=self.partition_grads_into_blocks, + block_size=self.block_size, + params_sharding=self.params_sharding, + preconditioner_sharding=precond_partition_spec, ) ) if self.weight_decay > 0: @@ -138,38 +136,25 @@ def _optimizer(learning_rate) -> optax.GradientTransformation: return optax.inject_hyperparams(_optimizer)( learning_rate=self.lr_scheduler(num_train_steps) ) + - -"""PSGD Kron""" -from typing import Any, List, Optional, Union, Callable, Tuple -from collections import defaultdict +from typing import Any, List, Optional, Union, Callable from functools import partial import string import numpy as np import chex import jax -from jax import numpy as jnp, vmap -from jax.sharding import PartitionSpec -from jax.lax import with_sharding_constraint +from jax import vmap +import jax.numpy as jnp +import flax.linen as nn from optax import tree_utils as otu from optax._src import base, transform from optax._src.numerics import safe_int32_increment from optax._src.utils import canonicalize_dtype from optax._src.combine import chain -try: - import flax.linen as nn - - have_flax = True -except ImportError: - have_flax = False -try: - import haliax as hax - - have_hax = True -except ImportError: - have_hax = False +import haliax as hax def precond_update_prob_schedule( @@ -181,14 +166,17 @@ def precond_update_prob_schedule( but once the preconditioner is learned the update probability can drop low. This schedule is an exponential anneal with a flat start. Default settings keep - update probability at 1.0 for 500 steps then exponentially anneal down to + update probability at 1.0 for 250 steps then exponentially anneal down to `min_prob` by 4000 steps. Default settings work well for most models and training regimes. """ def _schedule(n): """Exponential anneal with flat start.""" - return jnp.clip(max_prob * jnp.exp(-decay * (n - flat_start)), min_prob, max_prob) + return jnp.minimum( + jnp.maximum(max_prob * jnp.exp(-decay * (n - flat_start)), min_prob), + max_prob, + ) return _schedule @@ -202,6 +190,7 @@ def scale_by_kron( max_size_triangular: int = 8192, min_ndim_triangular: int = 2, memory_save_mode: Optional[str] = None, + momentum_into_precond_update: bool = True, preconditioner_lr: float = 0.1, preconditioner_init_scale: float = 1.0, mu_dtype: Optional[Union[str, jnp.dtype]] = None, @@ -211,21 +200,13 @@ def scale_by_kron( scanned_layers: Optional[base.Params] = None, lax_map_scanned_layers: bool = False, lax_map_batch_size: int = 8, - merge_small_dims: bool = False, - target_merged_dim_size: int = 2048, - partition_grads_into_blocks: bool = False, - block_size: int = 256, - params_sharding: Optional[Any] = None, - preconditioner_sharding: Optional[PartitionSpec[str, str]] = None, - **kwargs, -) -> base.GradientTransformation: +) -> base.GradientTransformationExtraArgs: """ Implements PSGD Kron from https://github.com/lixilinx/psgd_torch. Args: - b1: float, momentum parameter. 0.9 or 0.95 are common values. - normalize_grads: bool, whether to normalize the incoming gradients to unit - norm layer-wise. Can help with stability. + b1: float, momentum parameter. + normalize_grads: bool, whether to normalize gradients to unit norm layer-wise. preconditioner_update_probability: float, probability of updating the preconditioner. Default anneals from 1.0 to 0.03 by 4000 steps. max_size_triangular: int, max size for dim's preconditioner to be triangular. @@ -235,804 +216,317 @@ def scale_by_kron( to set all preconditioners to be triangular, 'one_diag' sets the largest or last dim to be diagonal per layer, and 'all_diag' sets all preconditioners to be diagonal. + momentum_into_precond_update: bool, whether to send momentum into preconditioner + update instead of raw gradients. preconditioner_lr: float, learning rate for preconditioner. preconditioner_init_scale: float, scale for preconditioner initialization. - mu_dtype: optional str or jnp.dtype, dtype of the momentum buffer. Defaults to - same dtype as the parameters. - precond_dtype: optional str or jnp.dtype, dtype of the preconditioners. Defaults - to 'float32'. + mu_dtype: optional str or jnp.dtype, dtype of the momentum accumulator. + Defaults to the same dtype as the parameters. + precond_dtype: optional str or jnp.dtype, dtype of the preconditioner. precond_update_precision: str, precision for matmul during preconditioner update, 'bfloat16', 'tensorfloat32', 'float32'. precond_grads_precision: str, precision for matmul during preconditioning grads, 'bfloat16', 'tensorfloat32', 'float32'. - scanned_layers: optional base.Params, tree of booleans same structure as - params indicating scanned dimensions for each layer. PSGD will vmap over - leading dimension. + scanned_layers: optional base.Params, tree of bool same structure as params + indicating scanned layers. PSGD will vmap over the first dim. lax_map_scanned_layers: bool, whether to use lax.map for scanned layers instead of vmap. Useful to save memory with large models. lax_map_batch_size: int, batch size for lax.map, see JAX docs for more info. - merge_small_dims: bool, whether to merge small dimensions to improve - preconditioner efficiency. - target_merged_dim_size: int, target size of merged dimensions. - partition_grads_into_blocks: bool, whether to partition grads into chunks of - size `block_size` for efficiency. - block_size: int, block size to use for partitioning grads. - params_sharding: pytree same structure as params of jax.sharding.PartitionSpec. - preconditioner_sharding: `None` or `PartitionSpec(str | None, str | None)`, - PartitionSpec for preconditioner matrices. `None` infers a strategy - from params_sharding that matches first preconditioner axis to - corresponding axis in params. Best practice, though, is to shard the first - dimension across fsdp-like mesh axis, or the largest, most common axis in - params. For example, PartitionSpec('fsdp') or PartitionSpec('fsdp', 'tp'). Returns: - optax.GradientTransformation + optax.GradientTransformationExtraArgs """ mu_dtype = canonicalize_dtype(mu_dtype) - precond_dtype = canonicalize_dtype(precond_dtype or jnp.float32) - lax_map = lax_map_scanned_layers - bs = lax_map_batch_size - - def init_fn(params, return_partition_specs_only=False): - # unbox if haliax style partitioned - scanned_layers_ = scanned_layers - params_sharding_ = params_sharding - if have_hax: - if any( - isinstance(x, hax.NamedArray) - for x in jax.tree.leaves( - params, is_leaf=lambda x: isinstance(x, hax.NamedArray) + precond_dtype = canonicalize_dtype(precond_dtype) + + def map_fn(do_map, fn, *args): + """Maybe map a fn along first axis.""" + if do_map: + if lax_map_scanned_layers: + return jax.lax.map( + lambda xs: fn(*xs), + xs=args, + batch_size=lax_map_batch_size if lax_map_batch_size > 1 else None, ) - ): - # if in haliax, we can grab scanned_layers and params_sharding from params - # this does not support nested stacks - if scanned_layers_ is None: - scanned_layers_ = jax.tree.map( - lambda x: ( - jax.tree.map(lambda _: True, x) - if isinstance(x, hax.nn.Stacked) - else False - ), - params, - is_leaf=lambda x: isinstance(x, hax.nn.Stacked), - ) - if params_sharding_ is None: - try: - params_sharding_ = hax.partitioning.infer_resource_partitions(params) - params_sharding_ = jax.tree.map(lambda x: x.spec, params_sharding_) - except: - params_sharding_ = None - params, params_struct = jax.tree.flatten(params) - scanned_layers_ = jax.tree.leaves(scanned_layers_) - print(f"kron scanned_layers_: {scanned_layers_}") - if params_sharding_ is not None: - params_sharding_ = jax.tree.leaves(params_sharding_) - print(f"kron params_sharding_: {params_sharding_}") - - have_params_sharding = params_sharding_ is not None - have_qs_sharding = have_params_sharding or preconditioner_sharding is not None - - # unbox if flax style partitioned - if have_flax: - params = jax.tree.map( - lambda x: x.unbox() if isinstance(x, nn.Partitioned) else x, - params, - is_leaf=lambda x: isinstance(x, nn.Partitioned), - ) + else: + return vmap(fn)(*args) + else: + return fn(*args) - # check that there is a PartitionSpec for every param - if params_sharding_ is not None: - assert len(jax.tree.leaves(params_sharding_)) == len( - jax.tree.leaves(params) - ), "There must be a PartitionSpec for every parameter in PSGD Kron." - # check that preconditioner sharding length is at least 1 - if preconditioner_sharding is not None: - assert len(preconditioner_sharding) > 0, ( - "preconditioner_sharding must have length > 0. For example, " - "PartitionSpec(None) or PartitionSpec('fsdp', None) are valid." - ) + def init_fn(params): + params = jax.tree.map( + lambda x: x.unbox() if isinstance(x, nn.Partitioned) else x, + params, + is_leaf=lambda v: isinstance(v, (chex.Array, nn.Partitioned)), + ) - # extend partition specs - if have_params_sharding: - params_sharding_ = jax.tree.map( - lambda p, sh: PartitionSpec(*(sh + (None,) * (len(p.shape) - len(sh)))), + scanned_layers_ = scanned_layers + if scanned_layers_ is None: + scanned_layers_ = jax.tree.map( + lambda x: ( + jax.tree.map(lambda _: True, x) + if isinstance(x, hax.nn.Stacked) + else False + ), params, - params_sharding_, - ) - preconditioner_sharding_ = preconditioner_sharding - if preconditioner_sharding is not None: - if len(preconditioner_sharding) < 2: - preconditioner_sharding_ = PartitionSpec(preconditioner_sharding[0], None) - - # reshape params shaped () to (1,) to make things simpler - params = jax.tree.map(lambda p: p[None] if len(p.shape) == 0 else p, params) - if have_params_sharding: - params_sharding_ = jax.tree.map( - lambda sh: PartitionSpec(None) if sh == PartitionSpec() else sh, - params_sharding_, + is_leaf=lambda x: isinstance(x, hax.nn.Stacked), ) - - # scanned layers - if scanned_layers_ is None: - scanned_layers_ = jax.tree.map(lambda _: False, params) - scanned_sizes = jax.tree.map( - lambda p, s: p.shape[0] if s else 0, params, scanned_layers_ - ) + params, params_struct = jax.tree.flatten(params) + scanned_layers_ = jax.tree.leaves(scanned_layers_) + print(f"kron scanned_layers_: {scanned_layers_}") # momentum mu = None - mu_sharding = params_sharding_ - if b1 > 0 and not return_partition_specs_only: + if b1 > 0: mu = jax.tree.map(lambda x: jnp.zeros_like(x, dtype=mu_dtype), params) - # apply params sharding to momentum buffer - if have_params_sharding: - mu = _safe_sharding_constraint(mu, params_sharding_) - - # which preconditioners will be diagonal - dim_diag = jax.tree.map( - lambda p, s: _get_preconditioner_types( - p.shape[int(s) :], + + # preconditioners + Qs = [ + _init_Q_exprs( + t[0] if s else t, + preconditioner_init_scale, max_size_triangular, min_ndim_triangular, memory_save_mode, - ), - params, - scanned_layers_, - ) - - # split sharding specs - scanned_dim_sharding = None - sharding_without_scan = None - if have_params_sharding: - scanned_dim_sharding = jax.tree.map( - lambda sh, s: PartitionSpec(sh[0]) if s else None, - params_sharding_, - scanned_layers_, - ) - sharding_without_scan = jax.tree.map( - lambda sh, s: PartitionSpec(*(sh[int(s) :])), - params_sharding_, - scanned_layers_, - ) - - # merge small dimensions - nones = jax.tree.map(lambda _: None, params) - merged_shapes = jax.tree.map( - lambda p, s: p.shape[int(s) :], params, scanned_layers_ - ) - if merge_small_dims: - output = jax.tree.map( - lambda p, s, dd, sh: _merge_small_dims( - p.shape[int(s) :], target_merged_dim_size, dd, sh - ), - params, - scanned_layers_, - dim_diag, - sharding_without_scan if have_params_sharding else nones, - ) - merged_shapes, dim_diag, sharding_without_scan = [ - jax.tree.map(lambda _, x: x[i], params, output) for i in range(3) - ] - - # partition grads into blocks - partitioned_shapes = merged_shapes - if partition_grads_into_blocks: - partitioners = jax.tree.map( - lambda _, ps, dd: BlockPartitioner(ps, block_size, dd), - params, - merged_shapes, - dim_diag, + precond_dtype, + )[0] + for t, s in zip(jax.tree.leaves(params), jax.tree.leaves(scanned_layers_)) + ] + # broadcast for scanned layers + Qs = [ + ( + jax.tree.map( + lambda d: jnp.repeat(jnp.expand_dims(d, 0), t.shape[0], axis=0), q + ) + if s + else q ) - # we can grab resulting shapes from partitioners - partitioned_shapes = jax.tree.map( - lambda _, p_cls: p_cls._padded_stacked_shape, params, partitioners + for q, t, s in zip( + Qs, jax.tree.leaves(params), jax.tree.leaves(scanned_layers_) ) + ] + Qs = jax.tree.structure(params).unflatten(Qs) - # initialize preconditioners - output = jax.tree.map( - lambda _, ps, dd, sh: list( - _init_Q_exprs( - ps[1:] if partition_grads_into_blocks else ps, - preconditioner_init_scale, - dd, - precond_dtype, - existing_Q=True if return_partition_specs_only else None, - precond_sharding=preconditioner_sharding_, - param_sharding=sh, - ) - ), - params, - partitioned_shapes, - dim_diag, - sharding_without_scan if have_params_sharding else nones, + # Calculate sizes for nu (preconditioner) and mu (momentum) + Qs_n_elements = sum([q.size for q in jax.tree.leaves(Qs)]) + Qs_size_MB = sum( + [q.size * q.dtype.itemsize / (2**20) for q in jax.tree.leaves(Qs)] ) - if return_partition_specs_only: - exprs, Qs_sharding_no_leading_dims = [ - jax.tree.map(lambda _, x: x[i], params, output) for i in range(2) - ] - else: - Qs, exprs, Qs_sharding_no_leading_dims = [ - jax.tree.map(lambda _, x: x[i], params, output) for i in range(3) - ] - Qs_sharding = None - if have_qs_sharding: - # add scan and stack dims to Qs sharding - def add_dims_to_spec(_, qss, sds): - if partition_grads_into_blocks: - qss = jax.tree.map(lambda qs: PartitionSpec(*((None,) + qs)), qss) - if sds is not None: - qss = jax.tree.map(lambda qs: PartitionSpec(*(sds + qs)), qss) - return qss - - Qs_sharding = jax.tree.map( - add_dims_to_spec, - params, - Qs_sharding_no_leading_dims, - scanned_dim_sharding, + if jax.process_index() == 0: + print( + f"PSGD Preconditioners size: {Qs_n_elements} elements, " + f"{Qs_size_MB:.2f} MB" ) - - if not return_partition_specs_only: - # broadcast Qs for stacks and scans - def broadcast_qs(_, ps, q, s): - stack_n = ps[0] - if partition_grads_into_blocks: - # add leading dim for stacked partitions - q = jax.tree.map( - lambda x: jnp.repeat(jnp.expand_dims(x, 0), stack_n, axis=0), q - ) - if s > 0: - # add leading dim if we're scanning this layer - q = jax.tree.map( - lambda d: jnp.repeat(jnp.expand_dims(d, 0), s, axis=0), q - ) - return q - - Qs = jax.tree.map(broadcast_qs, params, partitioned_shapes, Qs, scanned_sizes) - if have_qs_sharding: - Qs = _safe_sharding_constraint(Qs, Qs_sharding) - - # Calculate and print sizes for preconditioners and momentum - Qs_n_elements = sum([q.size for q in jax.tree.leaves(Qs)]) - Qs_size_MB = sum( - [q.size * q.dtype.itemsize / (2**20) for q in jax.tree.leaves(Qs)] + if mu is not None: + mu_n_elements = sum([p.size for p in jax.tree.leaves(mu)]) + mu_size_MB = sum( + [p.size * p.dtype.itemsize / (2**20) for p in jax.tree.leaves(mu)] ) if jax.process_index() == 0: print( - f"PSGD Preconditioners size: {Qs_n_elements} elements, " - f"{Qs_size_MB:.2f} MB" - ) - if mu is not None: - mu_n_elements = sum([p.size for p in jax.tree.leaves(mu)]) - mu_size_MB = sum( - [p.size * p.dtype.itemsize / (2**20) for p in jax.tree.leaves(mu)] + f"PSGD Momentum size: {mu_n_elements} elements, {mu_size_MB:.2f} MB" ) - if jax.process_index() == 0: - print( - f"PSGD Momentum size: {mu_n_elements} elements, {mu_size_MB:.2f} MB" - ) - - if return_partition_specs_only: - return dict( - key=PartitionSpec(), - count=PartitionSpec(), - mu=mu_sharding, - Qs_preconditioners=Qs_sharding, - update_counter=PartitionSpec(), - balance_counter=PartitionSpec(), - ) + # initial state return dict( - key=jax.random.PRNGKey(0), count=jnp.zeros([], jnp.int32), mu=mu, Qs_preconditioners=Qs, update_counter=jnp.zeros([], jnp.int32), - balance_counter=jnp.zeros([], jnp.int32), ) def update_fn(updates: base.Updates, state: dict, params: base.Params = None): del params count_inc = safe_int32_increment(state["count"]) - key, subkey = jax.random.split(state["key"]) + key = jax.random.fold_in(jax.random.PRNGKey(5318008), state["count"]) - # unbox if haliax style partitioned scanned_layers_ = scanned_layers - params_sharding_ = params_sharding - hax_partitioned = False - if have_hax: - if any( - isinstance(x, hax.NamedArray) - for x in jax.tree.leaves( - updates, is_leaf=lambda x: isinstance(x, hax.NamedArray) - ) - ): - hax_partitioned = True - # if in haliax, we can grab scanned_layers and params_sharding from params - # this does not support nested stacks - if scanned_layers_ is None: - scanned_layers_ = jax.tree.map( - lambda x: ( - jax.tree.map(lambda _: True, x) - if isinstance(x, hax.nn.Stacked) - else False - ), - updates, - is_leaf=lambda x: isinstance(x, hax.nn.Stacked), - ) - if params_sharding_ is None: - try: - params_sharding_ = hax.partitioning.infer_resource_partitions(updates) - params_sharding_ = jax.tree.map(lambda x: x.spec, params_sharding_) - except: - params_sharding_ = None - updates, updates_struct = jax.tree.flatten(updates) - scanned_layers_ = jax.tree.leaves(scanned_layers_) - print(f"kron scanned_layers_: {scanned_layers_}") - if params_sharding_ is not None: - params_sharding_ = jax.tree.leaves(params_sharding_) - print(f"kron params_sharding_: {params_sharding_}") - - have_params_sharding = params_sharding_ is not None - if have_params_sharding: - original_params_sharding_ = params_sharding_ - have_qs_sharding = have_params_sharding or preconditioner_sharding is not None - - # unbox if flax style partitioned - flax_partitioned = False - if have_flax: - boxed_updates, grads_structure = jax.tree.flatten( - updates, - is_leaf=lambda g: isinstance( - g, (chex.Array, nn.Partitioned, jax.ShapeDtypeStruct) + if scanned_layers_ is None: + scanned_layers_ = jax.tree.map( + lambda x: ( + jax.tree.map(lambda _: True, x) + if isinstance(x, hax.nn.Stacked) + else False ), - ) - if any(isinstance(g, nn.Partitioned) for g in boxed_updates): - flax_partitioned = True - updates = [g.unbox() for g in boxed_updates] - updates = grads_structure.unflatten(updates) - - # extend partition specs - if have_params_sharding: - params_sharding_ = jax.tree.map( - lambda g, sh: PartitionSpec(*(sh + (None,) * (len(g.shape) - len(sh)))), updates, - params_sharding_, - ) - preconditioner_sharding_ = preconditioner_sharding - if preconditioner_sharding is not None: - if len(preconditioner_sharding) < 2: - preconditioner_sharding_ = PartitionSpec(preconditioner_sharding[0], None) - - # reshape params shaped () to (1,) to make things simpler - input_shapes = jax.tree.map(lambda g: g.shape, updates) - updates = jax.tree.map(lambda g: g[None] if len(g.shape) == 0 else g, updates) - if have_params_sharding: - params_sharding_ = jax.tree.map( - lambda sh: PartitionSpec(None) if sh == PartitionSpec() else sh, - params_sharding_, + is_leaf=lambda x: isinstance(x, hax.nn.Stacked), ) + updates, updates_struct = jax.tree.flatten(updates) + scanned_layers_ = jax.tree.leaves(scanned_layers_) + print(f"kron scanned_layers_: {scanned_layers_}") - # scanned layers - if scanned_layers_ is None: - scanned_layers_ = jax.tree.map(lambda _: False, updates) + # account for flax.linen.Partitioned grads and params + boxed_updates, grads_structure = jax.tree.flatten( + updates, is_leaf=lambda v: isinstance(v, (chex.Array, nn.Partitioned)) + ) + flax_partitioned = False + if isinstance(boxed_updates[0], nn.Partitioned): + flax_partitioned = True + updates = [u.unbox() for u in boxed_updates] + updates = grads_structure.unflatten(updates) - # update probability can be scheduled update_prob_in = preconditioner_update_probability if isinstance(preconditioner_update_probability, Callable): update_prob_in = preconditioner_update_probability(count_inc) # normalize grads - def norm_grads(g): - return g / (jnp.linalg.norm(g) + 1e-16) - if normalize_grads: - updates = jax.tree.map(norm_grads, updates) + updates = jax.tree.map( + lambda g: g / (jnp.linalg.norm(g) + 1e-16), + updates, + ) # momentum mu = None momentum_updates = updates if state["mu"] is not None: mu = otu.tree_update_moment(updates, state["mu"], b1, 1) - if have_params_sharding: - mu = _safe_sharding_constraint(mu, params_sharding_) momentum_updates = otu.tree_bias_correction(mu, b1, count_inc) - # which preconditioners will be diagonal - dim_diag = jax.tree.map( - lambda g, s: _get_preconditioner_types( - g.shape[int(s) :], + # flatten pytrees + updates, grads_structure = jax.tree.flatten(updates) + momentum_updates = grads_structure.flatten_up_to(momentum_updates) + Qs = grads_structure.flatten_up_to(state["Qs_preconditioners"]) + scanned_layers_ = grads_structure.flatten_up_to(scanned_layers_) + + # get einsum expressions + expressions = [ + _init_Q_exprs( + t[0] if s else t, + preconditioner_init_scale, max_size_triangular, min_ndim_triangular, memory_save_mode, - ), - momentum_updates, - scanned_layers_, - ) - - # split sharding specs - scanned_dim_sharding = None - sharding_without_scan = None - if have_params_sharding: - scanned_dim_sharding = jax.tree.map( - lambda sh, s: PartitionSpec(sh[0]) if s else None, - params_sharding_, - scanned_layers_, - ) - sharding_without_scan = jax.tree.map( - lambda sh, s: PartitionSpec(*(sh[int(s) :])), - params_sharding_, - scanned_layers_, - ) - - # merge small dimensions - nones = jax.tree.map(lambda _: None, momentum_updates) - merged_params_sharding = params_sharding_ - original_shapes = None - if merge_small_dims: - original_shapes = jax.tree.map( - lambda g, s: g.shape[int(s) :], momentum_updates, scanned_layers_ - ) - output = jax.tree.map( - lambda g, dd, s, sh: _merge_small_dims( - g.shape[int(s) :], target_merged_dim_size, dd, sh - ), - momentum_updates, - dim_diag, - scanned_layers_, - sharding_without_scan if have_params_sharding else nones, - ) - merged_shapes, dim_diag, sharding_without_scan = [ - jax.tree.map(lambda _, x: x[i], momentum_updates, output) - for i in range(3) - ] - # reshape - momentum_updates = jax.tree.map( - lambda g, s, ns: _map_fn( - False, 0, int(s), lambda x, shape=ns: jnp.reshape(x, shape), g - ), - momentum_updates, - scanned_layers_, - merged_shapes, - ) - if have_params_sharding: - # scanned dim sharding + new merged sharding - merged_params_sharding = jax.tree.map( - lambda sws, sds: PartitionSpec( - *(sds + sws if sds is not None else sws) - ), - sharding_without_scan, - scanned_dim_sharding, - ) - # constrain sharding - if have_params_sharding: - momentum_updates = _safe_sharding_constraint( - momentum_updates, merged_params_sharding - ) - - # partition grads into blocks - dummy_updates_tree = jax.tree.map(lambda _: jnp.zeros([]), updates) - n_dims_to_map = jax.tree.map(lambda s: int(s), scanned_layers_) - partitioned_sharding = merged_params_sharding - partitioners = None - partitioned_shapes = None - if partition_grads_into_blocks: - partitioners = jax.tree.map( - lambda g, dd, s: BlockPartitioner(g.shape[int(s) :], block_size, dd), - momentum_updates, - dim_diag, - scanned_layers_, - ) - # layers become tuples each containing layer's partitions - momentum_updates = jax.tree.map( - lambda g, p_cls, s: _map_fn(False, 0, int(s), p_cls.partition, g), - momentum_updates, - partitioners, - scanned_layers_, - ) - partitioned_shapes = jax.tree.map( - lambda _, g, s: jax.tree.map(lambda x: x.shape[int(s) :], g), - dummy_updates_tree, - momentum_updates, - scanned_layers_, - ) - if have_params_sharding: - # constrain partitions to same sharding as entire layer - momentum_updates = jax.tree.map( - lambda _, g, mps: jax.tree.map( - lambda x: _safe_sharding_constraint(x, mps), g - ), - dummy_updates_tree, - momentum_updates, - merged_params_sharding, - ) - # pad and stack partitions, tuples become arrays with new leading dim - momentum_updates = jax.tree.map( - lambda _, g, s: _map_fn( - False, - 0, - int(s), - lambda x, bs=block_size: _pad_and_stack_matrices(x, bs), - g, - ), - dummy_updates_tree, - momentum_updates, - scanned_layers_, - ) - if have_params_sharding: - # add dim to sharding specs for new stacked dim - partitioned_sharding = jax.tree.map( - lambda mps, s: PartitionSpec(*(mps[: int(s)] + (None,) + mps[1:])), - merged_params_sharding, - scanned_layers_, - ) - n_dims_to_map = jax.tree.map(lambda x: x + 1, n_dims_to_map) - # constrain sharding - if have_params_sharding: - momentum_updates = _safe_sharding_constraint( - momentum_updates, partitioned_sharding - ) - - # get einsum expressions and Qs sharding - Qs = state["Qs_preconditioners"] - Qs_sharding = None - exprs_and_sharding = jax.tree.map( - lambda g, dd, sh, nm: _init_Q_exprs( - g.shape[nm:], - preconditioner_init_scale, - dd, precond_dtype, - existing_Q=True, - precond_sharding=preconditioner_sharding_, - param_sharding=sh, - ), - momentum_updates, - dim_diag, - sharding_without_scan if have_params_sharding else nones, - n_dims_to_map, - ) - exprs, Qs_sharding_no_leading_dims = [ - jax.tree.map(lambda _, x: x[i], dummy_updates_tree, exprs_and_sharding) - for i in range(2) - ] - Qs_sharding = None - if have_qs_sharding: - # add scan and stack dims to Qs sharding - def add_dims_to_spec(_, qss, sds): - if partition_grads_into_blocks: - qss = jax.tree.map(lambda qs: PartitionSpec(*((None,) + qs)), qss) - if sds is not None: - qss = jax.tree.map(lambda qs: PartitionSpec(*(sds + qs)), qss) - return qss - - Qs_sharding = jax.tree.map( - add_dims_to_spec, - dummy_updates_tree, - Qs_sharding_no_leading_dims, - scanned_dim_sharding, + existing_Q=jax.tree.map(lambda d: d[0], Q) if s else Q, ) + for t, s, Q in zip(updates, scanned_layers_, Qs) + ] # maybe update preconditioner - def update_preconditioner_fn(rngkey, Qs, grads_in, bal_counter): + def update_preconditioner(key, Qs): with jax.default_matmul_precision(precond_update_precision): + if momentum_into_precond_update: + precond_updates_in = momentum_updates + else: + precond_updates_in = updates + # balance preconditioners about every 100 updates - def balance_Qs(Qs_to_bal): - def _balance_Q(Q): + def balance_Qs(Qs: List[List[jax.Array]]): + def _balance_Q(Q: List[jax.Array]): norms = jnp.array( [jnp.max(jnp.abs(q)) for q in Q], dtype=jnp.float32 ) - gmean = jnp.exp(jnp.mean(jnp.log(norms))) + gmean = jnp.prod(norms) ** (1 / len(norms)) to_mul = gmean / norms return [q * x.astype(q.dtype) for q, x in zip(Q, to_mul)] - return jax.tree.map( - lambda _, Q, nm: _map_fn(False, 0, nm, _balance_Q, Q), - dummy_updates_tree, - Qs_to_bal, - n_dims_to_map, - ) + return [ + map_fn(s, _balance_Q, Q) if len(Q) > 1 else Q + for Q, s in zip(Qs, scanned_layers_) + ] - balance_counter_inc = safe_int32_increment(bal_counter) - do_balances = balance_counter_inc >= 100 - balance_counter_inc = jnp.where(do_balances, 0, balance_counter_inc) + key, subkey = jax.random.split(key) + do_balances = jax.random.uniform(subkey) < 0.01 Qs = jax.lax.cond(do_balances, balance_Qs, lambda qs: qs, Qs) - if have_qs_sharding: - Qs = _safe_sharding_constraint(Qs, Qs_sharding) # create random vectors - Vs = _tree_random_like(rngkey, grads_in) - # apply params sharding to random vectors - if have_params_sharding: - Vs = _safe_sharding_constraint(Vs, partitioned_sharding) - - # damp based on machine precision - damp_eps = jnp.sqrt(jnp.finfo(jnp.float32).eps) # bf16 eps too large - grads_in = jax.tree.map( + key, subkey = jax.random.split(key) + Vs_keys = jax.random.split(subkey, len(precond_updates_in)) + Vs = [ + jax.random.normal(k, shape=g.shape, dtype=g.dtype) + for k, g in zip(Vs_keys, precond_updates_in) + ] + + # damp based on machine precision (f32 probably enough) + damp_eps = jnp.sqrt(jnp.finfo(jnp.float32).eps) + precond_updates_in = jax.tree.map( lambda g, v: g + damp_eps.astype(g.dtype) * jnp.mean(jnp.abs(g)) * v, - grads_in, + precond_updates_in, Vs, ) # form conjB - conjBs = jax.tree.map( - lambda g, Q, v, nm: _map_fn(lax_map, bs, nm, _conjB, Q, g, v), - grads_in, - Qs, - Vs, - n_dims_to_map, - ) - if have_params_sharding: - conjBs = _safe_sharding_constraint(conjBs, partitioned_sharding) - - # update Qs and constrain sharding - new_Qs = jax.tree.map( - lambda g, Q, conjb, expr, nm, qss, sh: _map_fn( - lax_map, - bs, - nm, + conjBs = [ + map_fn(s, _conjB, Q, g, v) + for s, Q, g, v in zip(scanned_layers_, Qs, precond_updates_in, Vs) + ] + + # update Qs + new_Qs = [ + map_fn( + s, partial( - _update_precond, - exprs=expr, - precond_lr=preconditioner_lr, - qs_sharding=qss, - params_sharding=sh, + _update_precond, exprs=exprs, precond_lr=preconditioner_lr ), Q, g, conjb, - ), - grads_in, - Qs, - conjBs, - exprs, - n_dims_to_map, - Qs_sharding_no_leading_dims if have_qs_sharding else nones, - sharding_without_scan if have_params_sharding else nones, - ) - if have_qs_sharding: - new_Qs = _safe_sharding_constraint(new_Qs, Qs_sharding) + ) + for s, exprs, Q, g, conjb in zip( + scanned_layers_, expressions, Qs, precond_updates_in, conjBs + ) + ] new_Qs = otu.tree_cast(new_Qs, precond_dtype) - return new_Qs, balance_counter_inc - - def pass_through_fn(rngkey, qs, grads_in, bal_counter): - if have_qs_sharding: - qs = _safe_sharding_constraint(qs, Qs_sharding) - return qs, bal_counter + return new_Qs # update preconditioner deterministically update_counter_inc = safe_int32_increment(state["update_counter"]) do_update = update_counter_inc >= 1 / update_prob_in update_counter_inc = jnp.where(do_update, 0, update_counter_inc) - # Qs, balance_counter_inc = jax.lax.cond( - # do_update, - # update_preconditioner_fn, - # pass_through_fn, - # subkey, - # Qs, - # momentum_updates, - # state["balance_counter"], - # ) - - def cond_fn(state): - return state[-1] - - def iter_fn(state): - rngkey, qs, grads_in, bal_counter, _ = state - qs, bal_counter = update_preconditioner_fn(rngkey, qs, grads_in, bal_counter) - return rngkey, qs, grads_in, bal_counter, False - - while_out = jax.lax.while_loop( - cond_fn, - iter_fn, - (subkey, Qs, momentum_updates, state["balance_counter"], do_update), - ) - _, Qs, _, balance_counter_inc, _ = while_out - if have_qs_sharding: - Qs = _safe_sharding_constraint(Qs, Qs_sharding) + key, subkey = jax.random.split(key) + Qs = jax.lax.cond(do_update, update_preconditioner, lambda _, qs: qs, subkey, Qs) # precondition gradients with jax.default_matmul_precision(precond_grads_precision): - precond_gs = jax.tree.map( - lambda g, Q, expr, nm: _map_fn( - lax_map, bs, nm, partial(_precond_grad, exprs=expr), Q, g - ), - momentum_updates, - Qs, - exprs, - n_dims_to_map, - ) - if have_params_sharding: - precond_gs = _safe_sharding_constraint(precond_gs, partitioned_sharding) - - # unpartition grads - if partition_grads_into_blocks: - precond_gs = jax.tree.map( - lambda g, s, ps: _map_fn( - False, - 0, - int(s), - lambda p, shapes=ps: _unstack_and_unpad_matrices(p, shapes), - g, - ), - precond_gs, - scanned_layers_, - partitioned_shapes, - ) - if have_params_sharding: - precond_gs = _safe_sharding_constraint(precond_gs, merged_params_sharding) - precond_gs = jax.tree.map( - lambda _, g, s, p_cls: _map_fn( - False, 0, int(s), p_cls.merge_partitions, g - ), - dummy_updates_tree, - precond_gs, - scanned_layers_, - partitioners, - ) - if have_params_sharding: - precond_gs = _safe_sharding_constraint(precond_gs, merged_params_sharding) - - # un-merge dimensions - if merge_small_dims: - precond_gs = jax.tree.map( - lambda g, s, os: _map_fn( - False, 0, int(s), lambda p, shape=os: jnp.reshape(p, shape), g - ), - precond_gs, - scanned_layers_, - original_shapes, - ) - if have_params_sharding: - precond_gs = _safe_sharding_constraint(precond_gs, params_sharding_) - - # return scalars to original shape - precond_gs = jax.tree.map( - lambda g, s: jnp.reshape(g, s), precond_gs, input_shapes - ) - - # final constraint for good measure - if have_params_sharding: - precond_gs = _safe_sharding_constraint(precond_gs, original_params_sharding_) + precond_gs = [ + map_fn(s, partial(_precond_grad, exprs=exprs), Q, g) + for s, exprs, Q, g in zip( + scanned_layers_, expressions, Qs, momentum_updates + ) + ] # box preconditioned grads if flax_partitioned: - flat_precond_gs, _ = jax.tree.flatten(precond_gs) precond_gs = [ - bu.replace_boxed(g) for bu, g in zip(boxed_updates, flat_precond_gs) + u.replace_boxed(pg) for u, pg in zip(boxed_updates, precond_gs) ] - precond_gs = grads_structure.unflatten(precond_gs) - if hax_partitioned: - precond_gs = updates_struct.unflatten(precond_gs) + + # unflatten pytrees + updates = grads_structure.unflatten(precond_gs) + Qs = grads_structure.unflatten(Qs) + + precond_gs = updates_struct.unflatten(precond_gs) # dtypes and new state mu = otu.tree_cast(mu, mu_dtype) Qs = otu.tree_cast(Qs, precond_dtype) state = dict( - key=key, count=count_inc, mu=mu, Qs_preconditioners=Qs, update_counter=update_counter_inc, - balance_counter=balance_counter_inc, ) - return precond_gs, state + return updates, state - return base.GradientTransformation(init_fn, update_fn) + return base.GradientTransformationExtraArgs(init_fn, update_fn) def kron( learning_rate: Union[float, Callable[[int], float]] = 0.001, b1: float = 0.9, + normalize_grads: bool = False, weight_decay: float = 0.0, weight_decay_mask: Optional[Union[Any, Callable[[base.Params], Any]]] = None, - normalize_grads: bool = False, preconditioner_update_probability: Union[ float, Callable[[int], float] ] = precond_update_prob_schedule(), max_size_triangular: int = 8192, min_ndim_triangular: int = 2, memory_save_mode: Optional[str] = None, + momentum_into_precond_update: bool = True, preconditioner_lr: float = 0.1, preconditioner_init_scale: float = 1.0, mu_dtype: Optional[Union[str, jnp.dtype]] = None, @@ -1042,66 +536,45 @@ def kron( scanned_layers: Optional[base.Params] = None, lax_map_scanned_layers: bool = False, lax_map_batch_size: int = 8, - merge_small_dims: bool = False, - target_merged_dim_size: int = 2048, - partition_grads_into_blocks: bool = False, - block_size: int = 256, - params_sharding: Optional[Any] = None, - preconditioner_sharding: Optional[PartitionSpec[str, str]] = None, -) -> base.GradientTransformation: +) -> base.GradientTransformationExtraArgs: """ Implements PSGD Kron from https://github.com/lixilinx/psgd_torch. Args: - learning_rate: float or callable, learning rate schedule. - b1: float, momentum parameter. 0.9 or 0.95 are common values. - weight_decay: float, weight decay coefficient. - weight_decay_mask: optional pytree same structure as params, or callable - returning a pytree, that masks weight decay. Weight decay is applied to - leaves that are True. - normalize_grads: bool, whether to normalize the incoming gradients to unit - norm layer-wise. Can help with stability. + learning_rate: float or callable, learning rate. + b1: float, momentum parameter. + normalize_grads: bool, whether to normalize gradients to unit norm layer-wise. + weight_decay: float, weight decay. + weight_decay_mask: optional Any or callable, pytree of bool same structure + as params with weight decay applied to True elements. preconditioner_update_probability: float, probability of updating the preconditioner. Default anneals from 1.0 to 0.03 by 4000 steps. max_size_triangular: int, max size for dim's preconditioner to be triangular. min_ndim_triangular: int, minimum number of dimensions a layer needs to have triangular preconditioners. memory_save_mode: optional str, None, 'one_diag', or 'all_diag', None is default - to set all preconditioners to be triangular, 'one_diag' sets the largest - or last dim to be diagonal per layer, and 'all_diag' sets all preconditioners + to set all preconditioners to be triangular. 'one_diag' sets only the largest + or last dim in a layer to be diagonal, and 'all_diag' sets all preconditioners to be diagonal. + momentum_into_precond_update: bool, whether to send momentum into preconditioner + update instead of raw gradients. preconditioner_lr: float, learning rate for preconditioner. preconditioner_init_scale: float, scale for preconditioner initialization. - mu_dtype: optional str or jnp.dtype, dtype of the momentum buffer. Defaults to - same dtype as the parameters. - precond_dtype: optional str or jnp.dtype, dtype of the preconditioners. Defaults - to 'float32'. + mu_dtype: optional str or jnp.dtype, dtype of the momentum accumulator. + Defaults to the same dtype as the parameters. + precond_dtype: optional str or jnp.dtype, dtype of the preconditioner. precond_update_precision: str, precision for matmul during preconditioner update, - 'bfloat16', 'tensorfloat32', 'float32'. + 'bfloat16', 'tensorfloat32', 'float32'. precond_grads_precision: str, precision for matmul during preconditioning grads, - 'bfloat16', 'tensorfloat32', 'float32'. - scanned_layers: optional base.Params, tree of booleans same structure as - params indicating scanned dimensions for each layer. PSGD will vmap over - leading dimension. + 'bfloat16', 'tensorfloat32', 'float32'. + scanned_layers: optional base.Params, tree of bool same structure as params + indicating scanned layers. PSGD will vmap over the first dim. lax_map_scanned_layers: bool, whether to use lax.map for scanned layers instead of vmap. Useful to save memory with large models. lax_map_batch_size: int, batch size for lax.map, see JAX docs for more info. - merge_small_dims: bool, whether to merge small dimensions to improve - preconditioner efficiency. - target_merged_dim_size: int, target size of merged dimensions. - partition_grads_into_blocks: bool, whether to partition grads into chunks of - size `block_size` for efficiency. - block_size: int, block size to use for partitioning grads. - params_sharding: pytree same structure as params of jax.sharding.PartitionSpec. - preconditioner_sharding: `None` or `PartitionSpec(str | None, str | None)`, - PartitionSpec for preconditioner matrices. `None` infers a strategy - from params_sharding that matches first preconditioner axis to - corresponding axis in params. Best practice, though, is to shard the first - dimension across fsdp-like mesh axis, or the largest, most common axis in - params. For example, PartitionSpec('fsdp') or PartitionSpec('fsdp', 'tp'). Returns: - optax.GradientTransformation + optax.GradientTransformationExtraArgs """ optimizer = [ scale_by_kron( @@ -1111,6 +584,7 @@ def kron( max_size_triangular=max_size_triangular, min_ndim_triangular=min_ndim_triangular, memory_save_mode=memory_save_mode, + momentum_into_precond_update=momentum_into_precond_update, preconditioner_lr=preconditioner_lr, preconditioner_init_scale=preconditioner_init_scale, mu_dtype=mu_dtype, @@ -1120,12 +594,6 @@ def kron( scanned_layers=scanned_layers, lax_map_scanned_layers=lax_map_scanned_layers, lax_map_batch_size=lax_map_batch_size, - merge_small_dims=merge_small_dims, - target_merged_dim_size=target_merged_dim_size, - partition_grads_into_blocks=partition_grads_into_blocks, - block_size=block_size, - params_sharding=params_sharding, - preconditioner_sharding=preconditioner_sharding, ) ] if weight_decay > 0.0: @@ -1134,112 +602,104 @@ def kron( return chain(*optimizer) -def get_opt_state_partition_specs( - params: base.Params, scale_by_kron_only: bool = False, **kwargs -): - """Get tree of PartitionSpecs for kron optimizer state. +def _add_tiny(x): + return x + jnp.finfo(x.dtype).tiny - params converted to jax.ShapeDtypeStructs, no arrays are used. - Args: - params: pytree of Arrays, nn.Partitioned, or jax.ShapeDtypeStruct. - scale_by_kron_only: bool, If True, only returns partition specs for the - `scale_by_kron` function, otherwise the `kron` function. - kwargs: kwargs for kron (or scale_by_kron). +def _norm_lower_bound(A: jax.Array): + """Returns a cheap lower bound for the spectral norm of A. - Returns: - tree of PartitionSpecs for optimizer state. + Numerical results on random matrices with a wide range of distributions and + sizes suggest, norm(A) <= sqrt(2) * norm_lower_bound(A). Looks to be a very + tight lower bound. """ - params_flat, params_struct = jax.tree.flatten(params) - if have_flax: - if isinstance(params_flat[0], nn.Partitioned): - params_flat = [p.unbox(p) for p in params_flat] - if not isinstance(params_flat[0], jax.ShapeDtypeStruct): - params_flat = [jax.ShapeDtypeStruct(p.shape, p.dtype) for p in params_flat] - params = params_struct.unflatten(params_flat) - - specs = scale_by_kron(**kwargs).init(params, return_partition_specs_only=True) - - if not scale_by_kron_only: - specs = (specs,) - if kwargs.get("weight_decay", 0.0) > 0.0: - specs += (None,) - specs += (None,) - - return specs - - -def _get_preconditioner_types( - shape: Tuple[int, ...], max_size: int, min_ndim: int, mem_save_mode: Optional[str] -) -> List[bool]: - if len(shape) == 0: - return True - - if mem_save_mode is None: - dim_diag = [False for _ in shape] - elif mem_save_mode == "one_diag": - rev_sorted_dims = np.argsort(shape)[::-1] - dim_diag = [False for _ in shape] - dim_diag[rev_sorted_dims[0]] = True - elif mem_save_mode == "all_diag": - dim_diag = [True for _ in shape] - else: - raise ValueError( - f"Invalid mem_save_mode: {mem_save_mode}, must be one of " - "[None, 'one_diag', 'all_diag']" - ) + max_abs = jnp.max(jnp.abs(A)) + + def calc(A): + A = A / max_abs + A_conj = A.conj() + + aa = jnp.real(A * A_conj) + + aa_sum0 = jnp.sum(aa, axis=0) + aa_sum1 = jnp.sum(aa, axis=1) + i = jnp.argmax(aa_sum0, 0) + j = jnp.argmax(aa_sum1, 0) + value0 = jax.lax.dynamic_index_in_dim(aa_sum0, i, 0, keepdims=False) + value1 = jax.lax.dynamic_index_in_dim(aa_sum1, j, 0, keepdims=False) + + def gt_branch(): + x = jax.lax.dynamic_index_in_dim(A, i, 1, keepdims=False) + x = x.conj() @ A + return max_abs * jnp.linalg.norm((x / jnp.linalg.norm(x)) @ A_conj.T) + + def le_branch(): + x = jax.lax.dynamic_index_in_dim(A, j, 0, keepdims=False) + x = A @ x.conj() + return max_abs * jnp.linalg.norm(A_conj.T @ (x / jnp.linalg.norm(x))) - for i, size in enumerate(shape): - if size == 1 or size > max_size or len(shape) < min_ndim: - dim_diag[i] = True + return jax.lax.cond(value0 > value1, gt_branch, le_branch) - return dim_diag + def no_calc(_): + return max_abs + + return jax.lax.cond(max_abs > 0, calc, no_calc, A) def _init_Q_exprs( - t_shape, - scale, - dim_diag, - dtype, - existing_Q=None, - precond_sharding=None, - param_sharding=None, + t, scale, max_size, min_ndim_triangular, memory_save_mode, dtype, existing_Q=None ): - have_qs_sharding = precond_sharding is not None or param_sharding is not None + """For a scalar or tensor `t`, we initialize its preconditioner `Q` and + reusable contraction expressions for updating `Q` and preconditioning gradient. + """ letters = string.ascii_lowercase + string.ascii_uppercase - if len(t_shape) == 0: # scalar - Q = [scale * jnp.ones(t_shape, dtype=dtype)] if existing_Q is None else existing_Q + + shape = t.shape + if len(shape) == 0: # scalar + Q = ( + [scale * jnp.ones_like(t, dtype=dtype)] + if existing_Q is None + else existing_Q + ) exprA = ",->" exprGs = [",->"] exprP = ",,->" - - sharding_out = [None] - if have_qs_sharding: - sharding_out = [PartitionSpec()] else: # tensor - if len(t_shape) > 13: + if len(shape) > 13: + raise ValueError( + f"Got tensor with dim {len(t.shape)}; Einstein runs out of letters!" + ) + + scale = scale ** (1 / len(shape)) + + if memory_save_mode is None: + dim_diag = [False for _ in shape] + elif memory_save_mode == "one_diag": + rev_sorted_dims = np.argsort(shape)[::-1] + dim_diag = [False for _ in shape] + dim_diag[rev_sorted_dims[0]] = True + elif memory_save_mode == "all_diag": + dim_diag = [True for _ in shape] + else: raise ValueError( - f"Got tensor with dim {len(t_shape.shape)}; Einstein runs out of letters!" + f"Invalid memory_save_mode: {memory_save_mode}, must be one of " + "[None, 'one_diag', 'all_diag']" ) - scale = scale ** (1 / len(t_shape)) + Q = [] if existing_Q is None else existing_Q piece1A, piece2A, piece3A = ([], "", "") exprGs = [] piece1P, piece2P, piece3P, piece4P = ([], [], "", "") - - params_specs = param_sharding - if param_sharding is None: - params_specs = PartitionSpec(*((None,) * len(t_shape))) - sharding_out = [None] * len(t_shape) - if have_qs_sharding: - sharding_out = [PartitionSpec(None)] * len(t_shape) - - for i, (size, dim_d, dim_sh) in enumerate(zip(t_shape, dim_diag, params_specs)): - if dim_d: + for i, (size, dim_d) in enumerate(zip(shape, dim_diag)): + if ( + size == 1 + or size > max_size + or len(shape) < min_ndim_triangular + or dim_d + ): # use diagonal matrix as preconditioner for this dim if existing_Q is None: - q = scale * jnp.ones(size, dtype=dtype) - Q.append(q) + Q.append(scale * jnp.ones(size, dtype=dtype)) piece1A.append(letters[i]) piece2A = piece2A + letters[i] @@ -1248,7 +708,7 @@ def _init_Q_exprs( piece1 = "".join( [ (letters[i + 13] if j == i else letters[j]) - for j in range(len(t_shape)) + for j in range(len(shape)) ] ) exprGs.append(piece1 + "," + piece1 + "->" + letters[i + 13]) @@ -1259,41 +719,8 @@ def _init_Q_exprs( piece4P = piece4P + letters[i + 13] else: # use triangular matrix as preconditioner for this dim - q_sharding = None - if have_qs_sharding: - if have_hax: - # if we're in haliax we can grab fsdp axis and shard accordingly - # get current mesh - mesh = hax.partitioning._get_mesh() - if mesh.devices.shape == (): - mesh = None - # get fsdp mesh axis - if mesh is not None: - fsdp_axis_name = hax.partitioning.ResourceAxis.DATA - fsdp_axis = mesh.axis_names.index(fsdp_axis_name) - fsdp_size = mesh.devices.shape[fsdp_axis] - if size % fsdp_size == 0: - q_sharding = PartitionSpec(fsdp_axis_name, None) - else: - q_sharding = PartitionSpec(None, None) - else: - q_sharding = PartitionSpec(None, None) - else: - # infer a so-so sharding scheme from params if nothing specified - # (first dim of q will match corresponding dim in params) - q_sharding = ( - precond_sharding - if precond_sharding is not None - else PartitionSpec(dim_sh, None) - ) - # TODO ensure array axis is divisible by mesh axis - sharding_out[i] = q_sharding - if existing_Q is None: - q = scale * jnp.eye(size, dtype=dtype) - if have_qs_sharding: - q = _safe_sharding_constraint(q, q_sharding) - Q.append(q) + Q.append(scale * jnp.eye(size, dtype=dtype)) piece1A.append(letters[i] + letters[i + 13]) piece2A = piece2A + letters[i + 13] @@ -1302,13 +729,13 @@ def _init_Q_exprs( piece1 = "".join( [ (letters[i + 13] if j == i else letters[j]) - for j in range(len(t_shape)) + for j in range(len(shape)) ] ) piece2 = "".join( [ (letters[i + 26] if j == i else letters[j]) - for j in range(len(t_shape)) + for j in range(len(shape)) ] ) exprGs.append( @@ -1328,31 +755,8 @@ def _init_Q_exprs( exprGs = tuple(exprGs) if existing_Q is not None: - return (exprA, exprGs, exprP), sharding_out - return Q, (exprA, exprGs, exprP), sharding_out - - -def _norm_lower_bound(A: jax.Array): - """Returns a cheap lower bound for the spectral norm of A. - - Numerical results on random matrices with a wide range of distributions and - sizes suggest, norm(A) <= sqrt(2) * norm_lower_bound(A). Looks to be a very - tight lower bound. - - A is hermitian so we can always use dim 0 and not have to compare to dim 1. - """ - max_abs = jnp.max(jnp.abs(A)) - - def calc(A): - A = A / max_abs - aa = A * A - aa_sum0 = jnp.sum(aa, axis=0) - i = jnp.argmax(aa_sum0, 0) - x = jax.lax.dynamic_index_in_dim(A, i, 1, keepdims=False) - x = x @ A - return max_abs * jnp.linalg.norm((x / jnp.linalg.norm(x)) @ A.T) - - return jnp.where(max_abs > 0, calc(A), max_abs) + return exprA, exprGs, exprP + return [Q, (exprA, exprGs, exprP)] def _solve_triangular_right(X, A): @@ -1383,7 +787,7 @@ def _conjB(Q, G, V): """Compute conjB.""" order = G.ndim p = list(range(order)) - conjB = jnp.transpose(V, p[1:] + p[:1]) + conjB = jnp.transpose(V.conj(), p[1:] + p[:1]) for i, q in enumerate(Q): conjB = conjB / q if q.ndim < 2 else _solve_triangular_right(conjB, q) if i < order - 1: @@ -1391,40 +795,30 @@ def _conjB(Q, G, V): return conjB -def _update_precond(Q, G, conjB, exprs, precond_lr, qs_sharding, params_sharding): +def _update_precond(Q, G, conjB, exprs, precond_lr): """Compute A and update Q.""" exprA, exprGs, _ = exprs A = jnp.einsum(exprA, *Q, G) + A_conj = A.conj() + conjB_conj = conjB.conj() + def _update_single_q(i, q): - term1 = jnp.einsum(exprGs[i], A, A) - term2 = jnp.einsum(exprGs[i], conjB, conjB) + term1 = jnp.einsum(exprGs[i], A, A_conj) + term2 = jnp.einsum(exprGs[i], conjB_conj, conjB) + tmp = term1 - term2 + tmp *= precond_lr if q.ndim < 2: - q -= ( - precond_lr - / _add_tiny(jnp.max(jnp.abs(term1 + term2))) - * (term1 - term2) - * q - ) + tmp *= q + tmp /= _add_tiny(jnp.max(jnp.abs(term1 + term2))) + q -= tmp else: - if qs_sharding is not None: - sharding = qs_sharding[i] - # transpose q sharding for terms - if len(sharding) < 2: - sharding = PartitionSpec(*((None,) + sharding)) - else: - assert len(sharding) == 2 - sharding = PartitionSpec(*(sharding[1:] + sharding[:1])) - term1 = _safe_sharding_constraint(term1, sharding) - term2 = _safe_sharding_constraint(term2, sharding) - q -= ( - precond_lr - / _add_tiny(_norm_lower_bound(term1 + term2)) - * jnp.triu(term1 - term2) - @ q - ) + tmp = jnp.triu(tmp) + tmp /= _add_tiny(_norm_lower_bound(term1 + term2)) + tmp @= q + q -= tmp return q return [_update_single_q(i, q) for i, q in enumerate(Q)] @@ -1433,300 +827,1596 @@ def _update_single_q(i, q): def _precond_grad(Q, G, exprs): """Precondition gradient G with preconditioner Q.""" exprP = exprs[-1] - return jnp.einsum(exprP, *Q, *Q, G) - - -def _safe_sharding_constraint(x, sharding): - if sharding is None: - return x - else: - return with_sharding_constraint(x, sharding) - - -def _add_tiny(x): - return x + jnp.finfo(x.dtype).tiny - - -def _map_fn(lax_map, bs, n_maps, fn, *args): - """Maybe map a fn along multiple leading axes.""" - if n_maps <= 0: - return fn(*args) - - if lax_map: - mapped_fn = lambda xs: _map_fn(lax_map, bs, n_maps - 1, fn, *xs) - return jax.lax.map(mapped_fn, xs=args, batch_size=bs if bs > 1 else None) - else: - mapped_fn = lambda *xs: _map_fn(lax_map, bs, n_maps - 1, fn, *xs) - return vmap(mapped_fn)(*args) - - -def _tree_random_like( - rng_key: chex.PRNGKey, target_tree: chex.ArrayTree, dtype=None -) -> chex.ArrayTree: - # adopted from optax - tree_def = jax.tree.structure(target_tree) - keys = jax.random.split(rng_key, tree_def.num_leaves) - keys_tree = jax.tree.unflatten(tree_def, keys) - return jax.tree.map( - lambda l, k: jax.random.normal( - k, l.shape, dtype if dtype is not None else l.dtype - ), - target_tree, - keys_tree, - ) - - -class BlockPartitioner: - """Partitions a tensor into smaller tensors. - - Modified from distributed_shampoo. - https://github.com/google-research/google-research/blob/master/scalable_shampoo/optax/distributed_shampoo.py - Scalable Second Order Optimization for Deep Learning, - Rohan Anil, Vineet Gupta, Tomer Koren, Kevin Regan, Yoram Singer - https://arxiv.org/abs/2002.09018 - """ - - def __init__(self, param_shape, block_size, dim_diag): - assert len(dim_diag) == len( - param_shape - ), "dim_diag must have same length as param_shape" - self._shape = param_shape - self._splits = [] - split_sizes = [] - # We split params into smaller blocks. Here we store the metadata to make - # that split. - for i, d in enumerate(param_shape): - if 0 < block_size < d and not dim_diag[i]: - # d-1, otherwise split appends a 0-size array. - nsplit = (d - 1) // block_size - indices = (np.arange(nsplit, dtype=np.int32) + 1) * block_size - sizes = np.ones(nsplit + 1, dtype=np.int32) * block_size - sizes[-1] = d - indices[-1] - self._splits.append((i, indices)) - split_sizes.append(sizes) - else: - split_sizes.append(np.array([d], dtype=np.int32)) - self._split_sizes = split_sizes - - # TODO (evanatyourservice) - # this might fail with scalar params but for now we're reshaping those - single_shape = [a[0] for a in split_sizes] - padded_single_shape = [-(-dim // block_size) * block_size for dim in single_shape] - stack_size = max(1, np.prod([max(1, len(s)) for s in split_sizes])) - self._padded_stacked_shape = tuple([stack_size] + padded_single_shape) - - def split_sizes(self): - return self._split_sizes - - def partition(self, tensor): - """Partition tensor into blocks.""" - - assert tensor.shape == self._shape - tensors = [tensor] - for i, indices in self._splits: - tensors_local = [] - for t in tensors: - tensors_local.extend(jnp.split(t, indices_or_sections=indices, axis=i)) - tensors = tensors_local - return tuple(tensors) - - def merge_partitions(self, partitions): - """Merge partitions back to original shape.""" - - for i, indices in reversed(self._splits): - n = len(indices) + 1 - partial_merged_tensors = [] - ind = 0 - while ind < len(partitions): - partial_merged_tensors.append( - jnp.concatenate(partitions[ind : ind + n], axis=i) - ) - ind += n - partitions = partial_merged_tensors - assert len(partitions) == 1 - return partitions[0] - - -def _partitions(lst): - """Generate all partitions of a list.""" - if not lst: - yield [[]] - else: - for i in range(len(lst)): - for part in _partitions(lst[i + 1 :]): - yield [lst[: i + 1]] + part - -""" -128, 4, 4, 8 -(128, 512) -""" - -def _merge_small_dims( - shape_to_merge, max_dim, dim_diag, sharding_to_merge=None -) -> Tuple[List[int], List[bool], Optional[Tuple]]: - if not shape_to_merge: # handles scalar shape () - return [], [True], PartitionSpec() if sharding_to_merge is not None else None - if np.all(np.array(shape_to_merge) == 1): # handles shape (1,) - return ( - [1], - [True], - PartitionSpec(None) if sharding_to_merge is not None else None, - ) - - def dim2loss(d, dim0=max_dim): - """A heuristic map from dim to loss with the least loss occurs at dim0.""" - loss = 0 - if d < dim0: - loss += np.log2(dim0 / d) - too_small = dim0 / 8 - if d < too_small: - loss += 100 * np.log2(too_small / d) - else: - loss += 10 * np.log2(d / dim0) - too_large = 8 * dim0 - if d > too_large: - loss += 1000 * np.log2(d / too_large) - return loss - - best_loss = float("inf") - best_partition = None - - for p in _partitions(list(range(len(shape_to_merge)))): - loss = 0 - merged = [] - for group in p: - if not group: - continue - d = np.prod([shape_to_merge[i] for i in group]) - loss += dim2loss(d) - merged.append(group) - - if loss < best_loss: - best_loss = loss - best_partition = merged - - merged_shape = [] - merged_diag = [] - merged_sharding = [] - - for group in best_partition: - merged_shape.append(np.prod([shape_to_merge[i] for i in group])) - merged_diag.append(all(dim_diag[i] for i in group)) - if sharding_to_merge: - group_shardings = [sharding_to_merge[i] for i in group] - valid_shardings = [s for s in group_shardings if s is not None] - - if len(valid_shardings) > 1: - merged_sharding.append(tuple(valid_shardings)) - elif len(valid_shardings) == 1: - merged_sharding.append(valid_shardings[0]) - else: - merged_sharding.append(None) - - return ( - merged_shape, - merged_diag, - PartitionSpec(*merged_sharding) if sharding_to_merge else None, - ) - - -def _pad_and_stack_matrices(array_list, block_size): - # Handle scalar arrays by adding a dummy dimension - is_scalar = len(array_list[0].shape) == 0 - if is_scalar: - array_list = [arr[None] for arr in array_list] - - shapes = [arr.shape for arr in array_list] - max_dims = [max(shape[i] for shape in shapes) for i in range(len(shapes[0]))] - padded_shape = [-(-dim // block_size) * block_size for dim in max_dims] - padded_arrays = [] - for arr in array_list: - pad_width = [(0, padded_shape[i] - arr.shape[i]) for i in range(arr.ndim)] - padded = jnp.pad(arr, pad_width) - padded_arrays.append(padded) - - stacked = jnp.stack(padded_arrays) - return stacked - - -def _unstack_and_unpad_matrices(stacked_array, original_shapes): - # Handle scalar arrays - is_scalar = len(original_shapes[0]) == 0 - - unstacked = jnp.split(stacked_array, stacked_array.shape[0], axis=0) - unpadded = [] - for arr, orig_shape in zip(unstacked, original_shapes): - arr = jnp.squeeze(arr, axis=0) - if is_scalar: - # For scalars, just take the first element - arr = arr[0] - else: - # For non-scalars, slice to original shape - slices = tuple(slice(0, dim) for dim in orig_shape) - arr = arr[slices] - unpadded.append(arr) - return tuple(unpadded) - - -# unused fns (can be used for stacking partitions without padding): -def _sort_and_group_matrices(matrix_shapes: List[Tuple[int, ...]]): - indexed_list = list(enumerate(matrix_shapes)) - sorted_indexed = sorted(indexed_list, key=lambda x: x[1]) - sorted_shapes = [shape for _, shape in sorted_indexed] - change_indices = [original_index for original_index, _ in sorted_indexed] - revert_indices = [0] * len(matrix_shapes) - for new_pos, (original_index, _) in enumerate(sorted_indexed): - revert_indices[original_index] = new_pos - shape_groups = defaultdict(list) - for i, shape in enumerate(sorted_shapes): - shape_groups[shape].append(i) - unique_sorted_shapes = list(shape_groups.keys()) - return unique_sorted_shapes, dict(shape_groups), change_indices, revert_indices - - -def _stack_matrices(array_list): - in_tuple = isinstance(array_list, tuple) - shapes = [arr.shape for arr in array_list] - unique_shapes, shape_groups, change_indices, _ = _sort_and_group_matrices(shapes) - sorted_arrays = [array_list[i] for i in change_indices] - stacked_arrays = [] - for shape in unique_shapes: - indices = shape_groups[shape] - stacked = jnp.stack([sorted_arrays[i] for i in indices]) - stacked_arrays.append(stacked) - if in_tuple: - return tuple(stacked_arrays) - return stacked_arrays - - -def _unstack_matrices(stacked_arrays, revert_indices): - in_tuple = isinstance(stacked_arrays, tuple) - unstacked = [] - for arr in stacked_arrays: - unstacked.extend(jnp.split(arr, arr.shape[0])) - array_list = [jnp.squeeze(unstacked[i], axis=0) for i in revert_indices] - if in_tuple: - return tuple(array_list) - return array_list - - -if __name__ == "__main__": - import jax_sourceror - - axis_a = hax.Axis("d", 128) - axis_b = hax.Axis("b", 8) - - params = { - "w": hax.NamedArray(jnp.ones((128, 8)), (axis_a, axis_b)), - "b": hax.NamedArray(jnp.ones((128,)), (axis_a,)), - } - grads = { - "w": hax.NamedArray(jnp.ones((128, 8)), (axis_a, axis_b)), - "b": hax.NamedArray(jnp.ones((128,)), (axis_a,)), - } - - optimizer = kron() - opt_state = optimizer.init(params) - source_code = jax_sourceror.sourcerize(optimizer.update)(grads, opt_state, params) - - print(source_code) + return jnp.einsum(exprP, *[q.conj() for q in Q], *Q, G) + + +# """PSGD Kron""" +# from typing import Any, List, Optional, Union, Callable, Tuple +# from collections import defaultdict +# from functools import partial +# import string +# import numpy as np + +# import chex +# import jax +# from jax import numpy as jnp, vmap +# from jax.sharding import PartitionSpec +# from jax.lax import with_sharding_constraint +# from optax import tree_utils as otu +# from optax._src import base, transform +# from optax._src.numerics import safe_int32_increment +# from optax._src.utils import canonicalize_dtype +# from optax._src.combine import chain + +# try: +# import flax.linen as nn + +# have_flax = True +# except ImportError: +# have_flax = False +# try: +# import haliax as hax + +# have_hax = True +# except ImportError: +# have_hax = False + + +# def precond_update_prob_schedule( +# max_prob=1.0, min_prob=0.03, decay=0.001, flat_start=500 +# ): +# """Anneal preconditioner update probability during beginning of training. + +# PSGD benefits from more preconditioner updates at the beginning of training, +# but once the preconditioner is learned the update probability can drop low. + +# This schedule is an exponential anneal with a flat start. Default settings keep +# update probability at 1.0 for 500 steps then exponentially anneal down to +# `min_prob` by 4000 steps. Default settings work well for most models and +# training regimes. +# """ + +# def _schedule(n): +# """Exponential anneal with flat start.""" +# return jnp.clip(max_prob * jnp.exp(-decay * (n - flat_start)), min_prob, max_prob) + +# return _schedule + + +# def scale_by_kron( +# b1: float = 0.9, +# normalize_grads: bool = False, +# preconditioner_update_probability: Union[ +# float, Callable[[int], float] +# ] = precond_update_prob_schedule(), +# max_size_triangular: int = 8192, +# min_ndim_triangular: int = 2, +# memory_save_mode: Optional[str] = None, +# preconditioner_lr: float = 0.1, +# preconditioner_init_scale: float = 1.0, +# mu_dtype: Optional[Union[str, jnp.dtype]] = None, +# precond_dtype: Optional[Union[str, jnp.dtype]] = None, +# precond_update_precision: Optional[str] = "tensorfloat32", +# precond_grads_precision: Optional[str] = None, +# scanned_layers: Optional[base.Params] = None, +# lax_map_scanned_layers: bool = False, +# lax_map_batch_size: int = 8, +# merge_small_dims: bool = False, +# target_merged_dim_size: int = 2048, +# partition_grads_into_blocks: bool = False, +# block_size: int = 256, +# params_sharding: Optional[Any] = None, +# preconditioner_sharding: Optional[PartitionSpec[str, str]] = None, +# **kwargs, +# ) -> base.GradientTransformation: +# """ +# Implements PSGD Kron from https://github.com/lixilinx/psgd_torch. + +# Args: +# b1: float, momentum parameter. 0.9 or 0.95 are common values. +# normalize_grads: bool, whether to normalize the incoming gradients to unit +# norm layer-wise. Can help with stability. +# preconditioner_update_probability: float, probability of updating the +# preconditioner. Default anneals from 1.0 to 0.03 by 4000 steps. +# max_size_triangular: int, max size for dim's preconditioner to be triangular. +# min_ndim_triangular: int, minimum number of dimensions a layer needs to have +# triangular preconditioners. +# memory_save_mode: optional str, None, 'one_diag', or 'all_diag', None is default +# to set all preconditioners to be triangular, 'one_diag' sets the largest +# or last dim to be diagonal per layer, and 'all_diag' sets all preconditioners +# to be diagonal. +# preconditioner_lr: float, learning rate for preconditioner. +# preconditioner_init_scale: float, scale for preconditioner initialization. +# mu_dtype: optional str or jnp.dtype, dtype of the momentum buffer. Defaults to +# same dtype as the parameters. +# precond_dtype: optional str or jnp.dtype, dtype of the preconditioners. Defaults +# to 'float32'. +# precond_update_precision: str, precision for matmul during preconditioner update, +# 'bfloat16', 'tensorfloat32', 'float32'. +# precond_grads_precision: str, precision for matmul during preconditioning grads, +# 'bfloat16', 'tensorfloat32', 'float32'. +# scanned_layers: optional base.Params, tree of booleans same structure as +# params indicating scanned dimensions for each layer. PSGD will vmap over +# leading dimension. +# lax_map_scanned_layers: bool, whether to use lax.map for scanned layers +# instead of vmap. Useful to save memory with large models. +# lax_map_batch_size: int, batch size for lax.map, see JAX docs for more info. +# merge_small_dims: bool, whether to merge small dimensions to improve +# preconditioner efficiency. +# target_merged_dim_size: int, target size of merged dimensions. +# partition_grads_into_blocks: bool, whether to partition grads into chunks of +# size `block_size` for efficiency. +# block_size: int, block size to use for partitioning grads. +# params_sharding: pytree same structure as params of jax.sharding.PartitionSpec. +# preconditioner_sharding: `None` or `PartitionSpec(str | None, str | None)`, +# PartitionSpec for preconditioner matrices. `None` infers a strategy +# from params_sharding that matches first preconditioner axis to +# corresponding axis in params. Best practice, though, is to shard the first +# dimension across fsdp-like mesh axis, or the largest, most common axis in +# params. For example, PartitionSpec('fsdp') or PartitionSpec('fsdp', 'tp'). + +# Returns: +# optax.GradientTransformation +# """ +# mu_dtype = canonicalize_dtype(mu_dtype) +# precond_dtype = canonicalize_dtype(precond_dtype or jnp.float32) +# lax_map = lax_map_scanned_layers +# bs = lax_map_batch_size + +# def init_fn(params, return_partition_specs_only=False): +# # unbox if haliax style partitioned +# scanned_layers_ = scanned_layers +# params_sharding_ = params_sharding +# if have_hax: +# if any( +# isinstance(x, hax.NamedArray) +# for x in jax.tree.leaves( +# params, is_leaf=lambda x: isinstance(x, hax.NamedArray) +# ) +# ): +# # if in haliax, we can grab scanned_layers and params_sharding from params +# # this does not support nested stacks +# if scanned_layers_ is None: +# scanned_layers_ = jax.tree.map( +# lambda x: ( +# jax.tree.map(lambda _: True, x) +# if isinstance(x, hax.nn.Stacked) +# else False +# ), +# params, +# is_leaf=lambda x: isinstance(x, hax.nn.Stacked), +# ) +# if params_sharding_ is None: +# try: +# params_sharding_ = hax.partitioning.infer_resource_partitions(params) +# params_sharding_ = jax.tree.map(lambda x: x.spec, params_sharding_) +# except: +# params_sharding_ = None +# params, params_struct = jax.tree.flatten(params) +# scanned_layers_ = jax.tree.leaves(scanned_layers_) +# print(f"kron scanned_layers_: {scanned_layers_}") +# if params_sharding_ is not None: +# params_sharding_ = jax.tree.leaves(params_sharding_) +# print(f"kron params_sharding_: {params_sharding_}") + +# have_params_sharding = params_sharding_ is not None +# have_qs_sharding = have_params_sharding or preconditioner_sharding is not None + +# # unbox if flax style partitioned +# if have_flax: +# params = jax.tree.map( +# lambda x: x.unbox() if isinstance(x, nn.Partitioned) else x, +# params, +# is_leaf=lambda x: isinstance(x, nn.Partitioned), +# ) + +# # check that there is a PartitionSpec for every param +# if params_sharding_ is not None: +# assert len(jax.tree.leaves(params_sharding_)) == len( +# jax.tree.leaves(params) +# ), "There must be a PartitionSpec for every parameter in PSGD Kron." +# # check that preconditioner sharding length is at least 1 +# if preconditioner_sharding is not None: +# assert len(preconditioner_sharding) > 0, ( +# "preconditioner_sharding must have length > 0. For example, " +# "PartitionSpec(None) or PartitionSpec('fsdp', None) are valid." +# ) + +# # extend partition specs +# if have_params_sharding: +# params_sharding_ = jax.tree.map( +# lambda p, sh: PartitionSpec(*(sh + (None,) * (len(p.shape) - len(sh)))), +# params, +# params_sharding_, +# ) +# preconditioner_sharding_ = preconditioner_sharding +# if preconditioner_sharding is not None: +# if len(preconditioner_sharding) < 2: +# preconditioner_sharding_ = PartitionSpec(preconditioner_sharding[0], None) + +# # reshape params shaped () to (1,) to make things simpler +# params = jax.tree.map(lambda p: p[None] if len(p.shape) == 0 else p, params) +# if have_params_sharding: +# params_sharding_ = jax.tree.map( +# lambda sh: PartitionSpec(None) if sh == PartitionSpec() else sh, +# params_sharding_, +# ) + +# # scanned layers +# if scanned_layers_ is None: +# scanned_layers_ = jax.tree.map(lambda _: False, params) +# scanned_sizes = jax.tree.map( +# lambda p, s: p.shape[0] if s else 0, params, scanned_layers_ +# ) + +# # momentum +# mu = None +# mu_sharding = params_sharding_ +# if b1 > 0 and not return_partition_specs_only: +# mu = jax.tree.map(lambda x: jnp.zeros_like(x, dtype=mu_dtype), params) +# # apply params sharding to momentum buffer +# if have_params_sharding: +# mu = _safe_sharding_constraint(mu, params_sharding_) + +# # which preconditioners will be diagonal +# dim_diag = jax.tree.map( +# lambda p, s: _get_preconditioner_types( +# p.shape[int(s) :], +# max_size_triangular, +# min_ndim_triangular, +# memory_save_mode, +# ), +# params, +# scanned_layers_, +# ) + +# # split sharding specs +# scanned_dim_sharding = None +# sharding_without_scan = None +# if have_params_sharding: +# scanned_dim_sharding = jax.tree.map( +# lambda sh, s: PartitionSpec(sh[0]) if s else None, +# params_sharding_, +# scanned_layers_, +# ) +# sharding_without_scan = jax.tree.map( +# lambda sh, s: PartitionSpec(*(sh[int(s) :])), +# params_sharding_, +# scanned_layers_, +# ) + +# # merge small dimensions +# nones = jax.tree.map(lambda _: None, params) +# merged_shapes = jax.tree.map( +# lambda p, s: p.shape[int(s) :], params, scanned_layers_ +# ) +# if merge_small_dims: +# output = jax.tree.map( +# lambda p, s, dd, sh: _merge_small_dims( +# p.shape[int(s) :], target_merged_dim_size, dd, sh +# ), +# params, +# scanned_layers_, +# dim_diag, +# sharding_without_scan if have_params_sharding else nones, +# ) +# merged_shapes, dim_diag, sharding_without_scan = [ +# jax.tree.map(lambda _, x: x[i], params, output) for i in range(3) +# ] + +# # partition grads into blocks +# partitioned_shapes = merged_shapes +# if partition_grads_into_blocks: +# partitioners = jax.tree.map( +# lambda _, ps, dd: BlockPartitioner(ps, block_size, dd), +# params, +# merged_shapes, +# dim_diag, +# ) +# # we can grab resulting shapes from partitioners +# partitioned_shapes = jax.tree.map( +# lambda _, p_cls: p_cls._padded_stacked_shape, params, partitioners +# ) + +# # initialize preconditioners +# output = jax.tree.map( +# lambda _, ps, dd, sh: list( +# _init_Q_exprs( +# ps[1:] if partition_grads_into_blocks else ps, +# preconditioner_init_scale, +# dd, +# precond_dtype, +# existing_Q=True if return_partition_specs_only else None, +# precond_sharding=preconditioner_sharding_, +# param_sharding=sh, +# ) +# ), +# params, +# partitioned_shapes, +# dim_diag, +# sharding_without_scan if have_params_sharding else nones, +# ) +# if return_partition_specs_only: +# exprs, Qs_sharding_no_leading_dims = [ +# jax.tree.map(lambda _, x: x[i], params, output) for i in range(2) +# ] +# else: +# Qs, exprs, Qs_sharding_no_leading_dims = [ +# jax.tree.map(lambda _, x: x[i], params, output) for i in range(3) +# ] +# Qs_sharding = None +# if have_qs_sharding: +# # add scan and stack dims to Qs sharding +# def add_dims_to_spec(_, qss, sds): +# if partition_grads_into_blocks: +# qss = jax.tree.map(lambda qs: PartitionSpec(*((None,) + qs)), qss) +# if sds is not None: +# qss = jax.tree.map(lambda qs: PartitionSpec(*(sds + qs)), qss) +# return qss + +# Qs_sharding = jax.tree.map( +# add_dims_to_spec, +# params, +# Qs_sharding_no_leading_dims, +# scanned_dim_sharding, +# ) + +# if not return_partition_specs_only: +# # broadcast Qs for stacks and scans +# def broadcast_qs(_, ps, q, s): +# stack_n = ps[0] +# if partition_grads_into_blocks: +# # add leading dim for stacked partitions +# q = jax.tree.map( +# lambda x: jnp.repeat(jnp.expand_dims(x, 0), stack_n, axis=0), q +# ) +# if s > 0: +# # add leading dim if we're scanning this layer +# q = jax.tree.map( +# lambda d: jnp.repeat(jnp.expand_dims(d, 0), s, axis=0), q +# ) +# return q + +# Qs = jax.tree.map(broadcast_qs, params, partitioned_shapes, Qs, scanned_sizes) +# if have_qs_sharding: +# Qs = _safe_sharding_constraint(Qs, Qs_sharding) + +# # Calculate and print sizes for preconditioners and momentum +# Qs_n_elements = sum([q.size for q in jax.tree.leaves(Qs)]) +# Qs_size_MB = sum( +# [q.size * q.dtype.itemsize / (2**20) for q in jax.tree.leaves(Qs)] +# ) +# if jax.process_index() == 0: +# print( +# f"PSGD Preconditioners size: {Qs_n_elements} elements, " +# f"{Qs_size_MB:.2f} MB" +# ) +# if mu is not None: +# mu_n_elements = sum([p.size for p in jax.tree.leaves(mu)]) +# mu_size_MB = sum( +# [p.size * p.dtype.itemsize / (2**20) for p in jax.tree.leaves(mu)] +# ) +# if jax.process_index() == 0: +# print( +# f"PSGD Momentum size: {mu_n_elements} elements, {mu_size_MB:.2f} MB" +# ) + +# if return_partition_specs_only: +# return dict( +# key=PartitionSpec(), +# count=PartitionSpec(), +# mu=mu_sharding, +# Qs_preconditioners=Qs_sharding, +# update_counter=PartitionSpec(), +# balance_counter=PartitionSpec(), +# ) + +# return dict( +# key=jax.random.PRNGKey(0), +# count=jnp.zeros([], jnp.int32), +# mu=mu, +# Qs_preconditioners=Qs, +# update_counter=jnp.zeros([], jnp.int32), +# balance_counter=jnp.zeros([], jnp.int32), +# ) + +# def update_fn(updates: base.Updates, state: dict, params: base.Params = None): +# del params +# count_inc = safe_int32_increment(state["count"]) +# key, subkey = jax.random.split(state["key"]) + +# # unbox if haliax style partitioned +# scanned_layers_ = scanned_layers +# params_sharding_ = params_sharding +# hax_partitioned = False +# if have_hax: +# if any( +# isinstance(x, hax.NamedArray) +# for x in jax.tree.leaves( +# updates, is_leaf=lambda x: isinstance(x, hax.NamedArray) +# ) +# ): +# hax_partitioned = True +# # if in haliax, we can grab scanned_layers and params_sharding from params +# # this does not support nested stacks +# if scanned_layers_ is None: +# scanned_layers_ = jax.tree.map( +# lambda x: ( +# jax.tree.map(lambda _: True, x) +# if isinstance(x, hax.nn.Stacked) +# else False +# ), +# updates, +# is_leaf=lambda x: isinstance(x, hax.nn.Stacked), +# ) +# if params_sharding_ is None: +# try: +# params_sharding_ = hax.partitioning.infer_resource_partitions(updates) +# params_sharding_ = jax.tree.map(lambda x: x.spec, params_sharding_) +# except: +# params_sharding_ = None +# updates, updates_struct = jax.tree.flatten(updates) +# scanned_layers_ = jax.tree.leaves(scanned_layers_) +# print(f"kron scanned_layers_: {scanned_layers_}") +# if params_sharding_ is not None: +# params_sharding_ = jax.tree.leaves(params_sharding_) +# print(f"kron params_sharding_: {params_sharding_}") + +# have_params_sharding = params_sharding_ is not None +# if have_params_sharding: +# original_params_sharding_ = params_sharding_ +# have_qs_sharding = have_params_sharding or preconditioner_sharding is not None + +# # unbox if flax style partitioned +# flax_partitioned = False +# if have_flax: +# boxed_updates, grads_structure = jax.tree.flatten( +# updates, +# is_leaf=lambda g: isinstance( +# g, (chex.Array, nn.Partitioned, jax.ShapeDtypeStruct) +# ), +# ) +# if any(isinstance(g, nn.Partitioned) for g in boxed_updates): +# flax_partitioned = True +# updates = [g.unbox() for g in boxed_updates] +# updates = grads_structure.unflatten(updates) + +# # extend partition specs +# if have_params_sharding: +# params_sharding_ = jax.tree.map( +# lambda g, sh: PartitionSpec(*(sh + (None,) * (len(g.shape) - len(sh)))), +# updates, +# params_sharding_, +# ) +# preconditioner_sharding_ = preconditioner_sharding +# if preconditioner_sharding is not None: +# if len(preconditioner_sharding) < 2: +# preconditioner_sharding_ = PartitionSpec(preconditioner_sharding[0], None) + +# # reshape params shaped () to (1,) to make things simpler +# input_shapes = jax.tree.map(lambda g: g.shape, updates) +# updates = jax.tree.map(lambda g: g[None] if len(g.shape) == 0 else g, updates) +# if have_params_sharding: +# params_sharding_ = jax.tree.map( +# lambda sh: PartitionSpec(None) if sh == PartitionSpec() else sh, +# params_sharding_, +# ) + +# # scanned layers +# if scanned_layers_ is None: +# scanned_layers_ = jax.tree.map(lambda _: False, updates) + +# # update probability can be scheduled +# update_prob_in = preconditioner_update_probability +# if isinstance(preconditioner_update_probability, Callable): +# update_prob_in = preconditioner_update_probability(count_inc) + +# # normalize grads +# def norm_grads(g): +# return g / (jnp.linalg.norm(g) + 1e-16) + +# if normalize_grads: +# updates = jax.tree.map(norm_grads, updates) + +# # momentum +# mu = None +# momentum_updates = updates +# if state["mu"] is not None: +# mu = otu.tree_update_moment(updates, state["mu"], b1, 1) +# if have_params_sharding: +# mu = _safe_sharding_constraint(mu, params_sharding_) +# momentum_updates = otu.tree_bias_correction(mu, b1, count_inc) + +# # which preconditioners will be diagonal +# dim_diag = jax.tree.map( +# lambda g, s: _get_preconditioner_types( +# g.shape[int(s) :], +# max_size_triangular, +# min_ndim_triangular, +# memory_save_mode, +# ), +# momentum_updates, +# scanned_layers_, +# ) + +# # split sharding specs +# scanned_dim_sharding = None +# sharding_without_scan = None +# if have_params_sharding: +# scanned_dim_sharding = jax.tree.map( +# lambda sh, s: PartitionSpec(sh[0]) if s else None, +# params_sharding_, +# scanned_layers_, +# ) +# sharding_without_scan = jax.tree.map( +# lambda sh, s: PartitionSpec(*(sh[int(s) :])), +# params_sharding_, +# scanned_layers_, +# ) + +# # merge small dimensions +# nones = jax.tree.map(lambda _: None, momentum_updates) +# merged_params_sharding = params_sharding_ +# original_shapes = None +# if merge_small_dims: +# original_shapes = jax.tree.map( +# lambda g, s: g.shape[int(s) :], momentum_updates, scanned_layers_ +# ) +# output = jax.tree.map( +# lambda g, dd, s, sh: _merge_small_dims( +# g.shape[int(s) :], target_merged_dim_size, dd, sh +# ), +# momentum_updates, +# dim_diag, +# scanned_layers_, +# sharding_without_scan if have_params_sharding else nones, +# ) +# merged_shapes, dim_diag, sharding_without_scan = [ +# jax.tree.map(lambda _, x: x[i], momentum_updates, output) +# for i in range(3) +# ] +# # reshape +# momentum_updates = jax.tree.map( +# lambda g, s, ns: _map_fn( +# False, 0, int(s), lambda x, shape=ns: jnp.reshape(x, shape), g +# ), +# momentum_updates, +# scanned_layers_, +# merged_shapes, +# ) +# if have_params_sharding: +# # scanned dim sharding + new merged sharding +# merged_params_sharding = jax.tree.map( +# lambda sws, sds: PartitionSpec( +# *(sds + sws if sds is not None else sws) +# ), +# sharding_without_scan, +# scanned_dim_sharding, +# ) +# # constrain sharding +# if have_params_sharding: +# momentum_updates = _safe_sharding_constraint( +# momentum_updates, merged_params_sharding +# ) + +# # partition grads into blocks +# dummy_updates_tree = jax.tree.map(lambda _: jnp.zeros([]), updates) +# n_dims_to_map = jax.tree.map(lambda s: int(s), scanned_layers_) +# partitioned_sharding = merged_params_sharding +# partitioners = None +# partitioned_shapes = None +# if partition_grads_into_blocks: +# partitioners = jax.tree.map( +# lambda g, dd, s: BlockPartitioner(g.shape[int(s) :], block_size, dd), +# momentum_updates, +# dim_diag, +# scanned_layers_, +# ) +# # layers become tuples each containing layer's partitions +# momentum_updates = jax.tree.map( +# lambda g, p_cls, s: _map_fn(False, 0, int(s), p_cls.partition, g), +# momentum_updates, +# partitioners, +# scanned_layers_, +# ) +# partitioned_shapes = jax.tree.map( +# lambda _, g, s: jax.tree.map(lambda x: x.shape[int(s) :], g), +# dummy_updates_tree, +# momentum_updates, +# scanned_layers_, +# ) +# if have_params_sharding: +# # constrain partitions to same sharding as entire layer +# momentum_updates = jax.tree.map( +# lambda _, g, mps: jax.tree.map( +# lambda x: _safe_sharding_constraint(x, mps), g +# ), +# dummy_updates_tree, +# momentum_updates, +# merged_params_sharding, +# ) +# # pad and stack partitions, tuples become arrays with new leading dim +# momentum_updates = jax.tree.map( +# lambda _, g, s: _map_fn( +# False, +# 0, +# int(s), +# lambda x, bs=block_size: _pad_and_stack_matrices(x, bs), +# g, +# ), +# dummy_updates_tree, +# momentum_updates, +# scanned_layers_, +# ) +# if have_params_sharding: +# # add dim to sharding specs for new stacked dim +# partitioned_sharding = jax.tree.map( +# lambda mps, s: PartitionSpec(*(mps[: int(s)] + (None,) + mps[1:])), +# merged_params_sharding, +# scanned_layers_, +# ) +# n_dims_to_map = jax.tree.map(lambda x: x + 1, n_dims_to_map) +# # constrain sharding +# if have_params_sharding: +# momentum_updates = _safe_sharding_constraint( +# momentum_updates, partitioned_sharding +# ) + +# # get einsum expressions and Qs sharding +# Qs = state["Qs_preconditioners"] +# Qs_sharding = None +# exprs_and_sharding = jax.tree.map( +# lambda g, dd, sh, nm: _init_Q_exprs( +# g.shape[nm:], +# preconditioner_init_scale, +# dd, +# precond_dtype, +# existing_Q=True, +# precond_sharding=preconditioner_sharding_, +# param_sharding=sh, +# ), +# momentum_updates, +# dim_diag, +# sharding_without_scan if have_params_sharding else nones, +# n_dims_to_map, +# ) +# exprs, Qs_sharding_no_leading_dims = [ +# jax.tree.map(lambda _, x: x[i], dummy_updates_tree, exprs_and_sharding) +# for i in range(2) +# ] +# Qs_sharding = None +# if have_qs_sharding: +# # add scan and stack dims to Qs sharding +# def add_dims_to_spec(_, qss, sds): +# if partition_grads_into_blocks: +# qss = jax.tree.map(lambda qs: PartitionSpec(*((None,) + qs)), qss) +# if sds is not None: +# qss = jax.tree.map(lambda qs: PartitionSpec(*(sds + qs)), qss) +# return qss + +# Qs_sharding = jax.tree.map( +# add_dims_to_spec, +# dummy_updates_tree, +# Qs_sharding_no_leading_dims, +# scanned_dim_sharding, +# ) + +# # maybe update preconditioner +# def update_preconditioner_fn(rngkey, Qs, grads_in, bal_counter): +# with jax.default_matmul_precision(precond_update_precision): +# # balance preconditioners about every 100 updates +# def balance_Qs(Qs_to_bal): +# def _balance_Q(Q): +# norms = jnp.array( +# [jnp.max(jnp.abs(q)) for q in Q], dtype=jnp.float32 +# ) +# gmean = jnp.exp(jnp.mean(jnp.log(norms))) +# to_mul = gmean / norms +# return [q * x.astype(q.dtype) for q, x in zip(Q, to_mul)] + +# return jax.tree.map( +# lambda _, Q, nm: _map_fn(False, 0, nm, _balance_Q, Q), +# dummy_updates_tree, +# Qs_to_bal, +# n_dims_to_map, +# ) + +# balance_counter_inc = safe_int32_increment(bal_counter) +# do_balances = balance_counter_inc >= 100 +# balance_counter_inc = jnp.where(do_balances, 0, balance_counter_inc) +# Qs = jax.lax.cond(do_balances, balance_Qs, lambda qs: qs, Qs) +# if have_qs_sharding: +# Qs = _safe_sharding_constraint(Qs, Qs_sharding) + +# # create random vectors +# Vs = _tree_random_like(rngkey, grads_in) +# # apply params sharding to random vectors +# if have_params_sharding: +# Vs = _safe_sharding_constraint(Vs, partitioned_sharding) + +# # damp based on machine precision +# damp_eps = jnp.sqrt(jnp.finfo(jnp.float32).eps) # bf16 eps too large +# grads_in = jax.tree.map( +# lambda g, v: g + damp_eps.astype(g.dtype) * jnp.mean(jnp.abs(g)) * v, +# grads_in, +# Vs, +# ) + +# # form conjB +# conjBs = jax.tree.map( +# lambda g, Q, v, nm: _map_fn(lax_map, bs, nm, _conjB, Q, g, v), +# grads_in, +# Qs, +# Vs, +# n_dims_to_map, +# ) +# if have_params_sharding: +# conjBs = _safe_sharding_constraint(conjBs, partitioned_sharding) + +# # update Qs and constrain sharding +# new_Qs = jax.tree.map( +# lambda g, Q, conjb, expr, nm, qss, sh: _map_fn( +# lax_map, +# bs, +# nm, +# partial( +# _update_precond, +# exprs=expr, +# precond_lr=preconditioner_lr, +# qs_sharding=qss, +# params_sharding=sh, +# ), +# Q, +# g, +# conjb, +# ), +# grads_in, +# Qs, +# conjBs, +# exprs, +# n_dims_to_map, +# Qs_sharding_no_leading_dims if have_qs_sharding else nones, +# sharding_without_scan if have_params_sharding else nones, +# ) +# if have_qs_sharding: +# new_Qs = _safe_sharding_constraint(new_Qs, Qs_sharding) + +# new_Qs = otu.tree_cast(new_Qs, precond_dtype) +# return new_Qs, balance_counter_inc + +# def pass_through_fn(rngkey, qs, grads_in, bal_counter): +# if have_qs_sharding: +# qs = _safe_sharding_constraint(qs, Qs_sharding) +# return qs, bal_counter + +# # update preconditioner deterministically +# update_counter_inc = safe_int32_increment(state["update_counter"]) +# do_update = update_counter_inc >= 1 / update_prob_in +# update_counter_inc = jnp.where(do_update, 0, update_counter_inc) +# # Qs, balance_counter_inc = jax.lax.cond( +# # do_update, +# # update_preconditioner_fn, +# # pass_through_fn, +# # subkey, +# # Qs, +# # momentum_updates, +# # state["balance_counter"], +# # ) + +# def cond_fn(state): +# return state[-1] + +# def iter_fn(state): +# rngkey, qs, grads_in, bal_counter, _ = state +# qs, bal_counter = update_preconditioner_fn(rngkey, qs, grads_in, bal_counter) +# return rngkey, qs, grads_in, bal_counter, False + +# while_out = jax.lax.while_loop( +# cond_fn, +# iter_fn, +# (subkey, Qs, momentum_updates, state["balance_counter"], do_update), +# ) +# _, Qs, _, balance_counter_inc, _ = while_out +# if have_qs_sharding: +# Qs = _safe_sharding_constraint(Qs, Qs_sharding) + +# # precondition gradients +# with jax.default_matmul_precision(precond_grads_precision): +# precond_gs = jax.tree.map( +# lambda g, Q, expr, nm: _map_fn( +# lax_map, bs, nm, partial(_precond_grad, exprs=expr), Q, g +# ), +# momentum_updates, +# Qs, +# exprs, +# n_dims_to_map, +# ) +# if have_params_sharding: +# precond_gs = _safe_sharding_constraint(precond_gs, partitioned_sharding) + +# # unpartition grads +# if partition_grads_into_blocks: +# precond_gs = jax.tree.map( +# lambda g, s, ps: _map_fn( +# False, +# 0, +# int(s), +# lambda p, shapes=ps: _unstack_and_unpad_matrices(p, shapes), +# g, +# ), +# precond_gs, +# scanned_layers_, +# partitioned_shapes, +# ) +# if have_params_sharding: +# precond_gs = _safe_sharding_constraint(precond_gs, merged_params_sharding) +# precond_gs = jax.tree.map( +# lambda _, g, s, p_cls: _map_fn( +# False, 0, int(s), p_cls.merge_partitions, g +# ), +# dummy_updates_tree, +# precond_gs, +# scanned_layers_, +# partitioners, +# ) +# if have_params_sharding: +# precond_gs = _safe_sharding_constraint(precond_gs, merged_params_sharding) + +# # un-merge dimensions +# if merge_small_dims: +# precond_gs = jax.tree.map( +# lambda g, s, os: _map_fn( +# False, 0, int(s), lambda p, shape=os: jnp.reshape(p, shape), g +# ), +# precond_gs, +# scanned_layers_, +# original_shapes, +# ) +# if have_params_sharding: +# precond_gs = _safe_sharding_constraint(precond_gs, params_sharding_) + +# # return scalars to original shape +# precond_gs = jax.tree.map( +# lambda g, s: jnp.reshape(g, s), precond_gs, input_shapes +# ) + +# # final constraint for good measure +# if have_params_sharding: +# precond_gs = _safe_sharding_constraint(precond_gs, original_params_sharding_) + +# # box preconditioned grads +# if flax_partitioned: +# flat_precond_gs, _ = jax.tree.flatten(precond_gs) +# precond_gs = [ +# bu.replace_boxed(g) for bu, g in zip(boxed_updates, flat_precond_gs) +# ] +# precond_gs = grads_structure.unflatten(precond_gs) +# if hax_partitioned: +# precond_gs = updates_struct.unflatten(precond_gs) + +# # dtypes and new state +# mu = otu.tree_cast(mu, mu_dtype) +# Qs = otu.tree_cast(Qs, precond_dtype) +# state = dict( +# key=key, +# count=count_inc, +# mu=mu, +# Qs_preconditioners=Qs, +# update_counter=update_counter_inc, +# balance_counter=balance_counter_inc, +# ) + +# return precond_gs, state + +# return base.GradientTransformation(init_fn, update_fn) + + +# def kron( +# learning_rate: Union[float, Callable[[int], float]] = 0.001, +# b1: float = 0.9, +# weight_decay: float = 0.0, +# weight_decay_mask: Optional[Union[Any, Callable[[base.Params], Any]]] = None, +# normalize_grads: bool = False, +# preconditioner_update_probability: Union[ +# float, Callable[[int], float] +# ] = precond_update_prob_schedule(), +# max_size_triangular: int = 8192, +# min_ndim_triangular: int = 2, +# memory_save_mode: Optional[str] = None, +# preconditioner_lr: float = 0.1, +# preconditioner_init_scale: float = 1.0, +# mu_dtype: Optional[Union[str, jnp.dtype]] = None, +# precond_dtype: Optional[Union[str, jnp.dtype]] = None, +# precond_update_precision: Optional[str] = "tensorfloat32", +# precond_grads_precision: Optional[str] = None, +# scanned_layers: Optional[base.Params] = None, +# lax_map_scanned_layers: bool = False, +# lax_map_batch_size: int = 8, +# merge_small_dims: bool = False, +# target_merged_dim_size: int = 2048, +# partition_grads_into_blocks: bool = False, +# block_size: int = 256, +# params_sharding: Optional[Any] = None, +# preconditioner_sharding: Optional[PartitionSpec[str, str]] = None, +# ) -> base.GradientTransformation: +# """ +# Implements PSGD Kron from https://github.com/lixilinx/psgd_torch. + +# Args: +# learning_rate: float or callable, learning rate schedule. +# b1: float, momentum parameter. 0.9 or 0.95 are common values. +# weight_decay: float, weight decay coefficient. +# weight_decay_mask: optional pytree same structure as params, or callable +# returning a pytree, that masks weight decay. Weight decay is applied to +# leaves that are True. +# normalize_grads: bool, whether to normalize the incoming gradients to unit +# norm layer-wise. Can help with stability. +# preconditioner_update_probability: float, probability of updating the +# preconditioner. Default anneals from 1.0 to 0.03 by 4000 steps. +# max_size_triangular: int, max size for dim's preconditioner to be triangular. +# min_ndim_triangular: int, minimum number of dimensions a layer needs to have +# triangular preconditioners. +# memory_save_mode: optional str, None, 'one_diag', or 'all_diag', None is default +# to set all preconditioners to be triangular, 'one_diag' sets the largest +# or last dim to be diagonal per layer, and 'all_diag' sets all preconditioners +# to be diagonal. +# preconditioner_lr: float, learning rate for preconditioner. +# preconditioner_init_scale: float, scale for preconditioner initialization. +# mu_dtype: optional str or jnp.dtype, dtype of the momentum buffer. Defaults to +# same dtype as the parameters. +# precond_dtype: optional str or jnp.dtype, dtype of the preconditioners. Defaults +# to 'float32'. +# precond_update_precision: str, precision for matmul during preconditioner update, +# 'bfloat16', 'tensorfloat32', 'float32'. +# precond_grads_precision: str, precision for matmul during preconditioning grads, +# 'bfloat16', 'tensorfloat32', 'float32'. +# scanned_layers: optional base.Params, tree of booleans same structure as +# params indicating scanned dimensions for each layer. PSGD will vmap over +# leading dimension. +# lax_map_scanned_layers: bool, whether to use lax.map for scanned layers +# instead of vmap. Useful to save memory with large models. +# lax_map_batch_size: int, batch size for lax.map, see JAX docs for more info. +# merge_small_dims: bool, whether to merge small dimensions to improve +# preconditioner efficiency. +# target_merged_dim_size: int, target size of merged dimensions. +# partition_grads_into_blocks: bool, whether to partition grads into chunks of +# size `block_size` for efficiency. +# block_size: int, block size to use for partitioning grads. +# params_sharding: pytree same structure as params of jax.sharding.PartitionSpec. +# preconditioner_sharding: `None` or `PartitionSpec(str | None, str | None)`, +# PartitionSpec for preconditioner matrices. `None` infers a strategy +# from params_sharding that matches first preconditioner axis to +# corresponding axis in params. Best practice, though, is to shard the first +# dimension across fsdp-like mesh axis, or the largest, most common axis in +# params. For example, PartitionSpec('fsdp') or PartitionSpec('fsdp', 'tp'). + +# Returns: +# optax.GradientTransformation +# """ +# optimizer = [ +# scale_by_kron( +# b1=b1, +# normalize_grads=normalize_grads, +# preconditioner_update_probability=preconditioner_update_probability, +# max_size_triangular=max_size_triangular, +# min_ndim_triangular=min_ndim_triangular, +# memory_save_mode=memory_save_mode, +# preconditioner_lr=preconditioner_lr, +# preconditioner_init_scale=preconditioner_init_scale, +# mu_dtype=mu_dtype, +# precond_dtype=precond_dtype, +# precond_update_precision=precond_update_precision, +# precond_grads_precision=precond_grads_precision, +# scanned_layers=scanned_layers, +# lax_map_scanned_layers=lax_map_scanned_layers, +# lax_map_batch_size=lax_map_batch_size, +# merge_small_dims=merge_small_dims, +# target_merged_dim_size=target_merged_dim_size, +# partition_grads_into_blocks=partition_grads_into_blocks, +# block_size=block_size, +# params_sharding=params_sharding, +# preconditioner_sharding=preconditioner_sharding, +# ) +# ] +# if weight_decay > 0.0: +# optimizer.append(transform.add_decayed_weights(weight_decay, weight_decay_mask)) +# optimizer.append(transform.scale_by_learning_rate(learning_rate)) +# return chain(*optimizer) + + +# def get_opt_state_partition_specs( +# params: base.Params, scale_by_kron_only: bool = False, **kwargs +# ): +# """Get tree of PartitionSpecs for kron optimizer state. + +# params converted to jax.ShapeDtypeStructs, no arrays are used. + +# Args: +# params: pytree of Arrays, nn.Partitioned, or jax.ShapeDtypeStruct. +# scale_by_kron_only: bool, If True, only returns partition specs for the +# `scale_by_kron` function, otherwise the `kron` function. +# kwargs: kwargs for kron (or scale_by_kron). + +# Returns: +# tree of PartitionSpecs for optimizer state. +# """ +# params_flat, params_struct = jax.tree.flatten(params) +# if have_flax: +# if isinstance(params_flat[0], nn.Partitioned): +# params_flat = [p.unbox(p) for p in params_flat] +# if not isinstance(params_flat[0], jax.ShapeDtypeStruct): +# params_flat = [jax.ShapeDtypeStruct(p.shape, p.dtype) for p in params_flat] +# params = params_struct.unflatten(params_flat) + +# specs = scale_by_kron(**kwargs).init(params, return_partition_specs_only=True) + +# if not scale_by_kron_only: +# specs = (specs,) +# if kwargs.get("weight_decay", 0.0) > 0.0: +# specs += (None,) +# specs += (None,) + +# return specs + + +# def _get_preconditioner_types( +# shape: Tuple[int, ...], max_size: int, min_ndim: int, mem_save_mode: Optional[str] +# ) -> List[bool]: +# if len(shape) == 0: +# return True + +# if mem_save_mode is None: +# dim_diag = [False for _ in shape] +# elif mem_save_mode == "one_diag": +# rev_sorted_dims = np.argsort(shape)[::-1] +# dim_diag = [False for _ in shape] +# dim_diag[rev_sorted_dims[0]] = True +# elif mem_save_mode == "all_diag": +# dim_diag = [True for _ in shape] +# else: +# raise ValueError( +# f"Invalid mem_save_mode: {mem_save_mode}, must be one of " +# "[None, 'one_diag', 'all_diag']" +# ) + +# for i, size in enumerate(shape): +# if size == 1 or size > max_size or len(shape) < min_ndim: +# dim_diag[i] = True + +# return dim_diag + + +# def _init_Q_exprs( +# t_shape, +# scale, +# dim_diag, +# dtype, +# existing_Q=None, +# precond_sharding=None, +# param_sharding=None, +# ): +# have_qs_sharding = precond_sharding is not None or param_sharding is not None +# letters = string.ascii_lowercase + string.ascii_uppercase +# if len(t_shape) == 0: # scalar +# Q = [scale * jnp.ones(t_shape, dtype=dtype)] if existing_Q is None else existing_Q +# exprA = ",->" +# exprGs = [",->"] +# exprP = ",,->" + +# sharding_out = [None] +# if have_qs_sharding: +# sharding_out = [PartitionSpec()] +# else: # tensor +# if len(t_shape) > 13: +# raise ValueError( +# f"Got tensor with dim {len(t_shape.shape)}; Einstein runs out of letters!" +# ) +# scale = scale ** (1 / len(t_shape)) +# Q = [] if existing_Q is None else existing_Q +# piece1A, piece2A, piece3A = ([], "", "") +# exprGs = [] +# piece1P, piece2P, piece3P, piece4P = ([], [], "", "") + +# params_specs = param_sharding +# if param_sharding is None: +# params_specs = PartitionSpec(*((None,) * len(t_shape))) +# sharding_out = [None] * len(t_shape) +# if have_qs_sharding: +# sharding_out = [PartitionSpec(None)] * len(t_shape) + +# for i, (size, dim_d, dim_sh) in enumerate(zip(t_shape, dim_diag, params_specs)): +# if dim_d: +# # use diagonal matrix as preconditioner for this dim +# if existing_Q is None: +# q = scale * jnp.ones(size, dtype=dtype) +# Q.append(q) + +# piece1A.append(letters[i]) +# piece2A = piece2A + letters[i] +# piece3A = piece3A + letters[i] + +# piece1 = "".join( +# [ +# (letters[i + 13] if j == i else letters[j]) +# for j in range(len(t_shape)) +# ] +# ) +# exprGs.append(piece1 + "," + piece1 + "->" + letters[i + 13]) + +# piece1P.append(letters[i + 13]) +# piece2P.append(letters[i + 13]) +# piece3P = piece3P + letters[i + 13] +# piece4P = piece4P + letters[i + 13] +# else: +# # use triangular matrix as preconditioner for this dim +# q_sharding = None +# if have_qs_sharding: +# if have_hax: +# # if we're in haliax we can grab fsdp axis and shard accordingly +# # get current mesh +# mesh = hax.partitioning._get_mesh() +# if mesh.devices.shape == (): +# mesh = None +# # get fsdp mesh axis +# if mesh is not None: +# fsdp_axis_name = hax.partitioning.ResourceAxis.DATA +# fsdp_axis = mesh.axis_names.index(fsdp_axis_name) +# fsdp_size = mesh.devices.shape[fsdp_axis] +# if size % fsdp_size == 0: +# q_sharding = PartitionSpec(fsdp_axis_name, None) +# else: +# q_sharding = PartitionSpec(None, None) +# else: +# q_sharding = PartitionSpec(None, None) +# else: +# # infer a so-so sharding scheme from params if nothing specified +# # (first dim of q will match corresponding dim in params) +# q_sharding = ( +# precond_sharding +# if precond_sharding is not None +# else PartitionSpec(dim_sh, None) +# ) +# # TODO ensure array axis is divisible by mesh axis +# sharding_out[i] = q_sharding + +# if existing_Q is None: +# q = scale * jnp.eye(size, dtype=dtype) +# if have_qs_sharding: +# q = _safe_sharding_constraint(q, q_sharding) +# Q.append(q) + +# piece1A.append(letters[i] + letters[i + 13]) +# piece2A = piece2A + letters[i + 13] +# piece3A = piece3A + letters[i] + +# piece1 = "".join( +# [ +# (letters[i + 13] if j == i else letters[j]) +# for j in range(len(t_shape)) +# ] +# ) +# piece2 = "".join( +# [ +# (letters[i + 26] if j == i else letters[j]) +# for j in range(len(t_shape)) +# ] +# ) +# exprGs.append( +# piece1 + "," + piece2 + "->" + letters[i + 13] + letters[i + 26] +# ) + +# a, b, c = (letters[i], letters[i + 13], letters[i + 26]) +# piece1P.append(a + b) +# piece2P.append(a + c) +# piece3P = piece3P + c +# piece4P = piece4P + b + +# exprA = ",".join(piece1A) + "," + piece2A + "->" + piece3A +# exprP = ( +# ",".join(piece1P) + "," + ",".join(piece2P) + "," + piece3P + "->" + piece4P +# ) + +# exprGs = tuple(exprGs) +# if existing_Q is not None: +# return (exprA, exprGs, exprP), sharding_out +# return Q, (exprA, exprGs, exprP), sharding_out + + +# def _norm_lower_bound(A: jax.Array): +# """Returns a cheap lower bound for the spectral norm of A. + +# Numerical results on random matrices with a wide range of distributions and +# sizes suggest, norm(A) <= sqrt(2) * norm_lower_bound(A). Looks to be a very +# tight lower bound. + +# A is hermitian so we can always use dim 0 and not have to compare to dim 1. +# """ +# max_abs = jnp.max(jnp.abs(A)) + +# def calc(A): +# A = A / max_abs +# aa = A * A +# aa_sum0 = jnp.sum(aa, axis=0) +# i = jnp.argmax(aa_sum0, 0) +# x = jax.lax.dynamic_index_in_dim(A, i, 1, keepdims=False) +# x = x @ A +# return max_abs * jnp.linalg.norm((x / jnp.linalg.norm(x)) @ A.T) + +# return jnp.where(max_abs > 0, calc(A), max_abs) + + +# def _solve_triangular_right(X, A): +# """Compute X @ inv(A). + +# A triangular solve has roughly the same complexity as a matmul. +# """ +# X_ndim = X.ndim +# if X_ndim < 2: +# X = X[None, :] + +# dtype_in = jnp.promote_types(A.dtype, X.dtype) +# A, X = A.astype(dtype_in), X.astype(dtype_in) +# leading_dims = 0 +# if X.ndim > 2: +# leading_dims = X.ndim - 2 +# solve_fn = partial(jax.lax.linalg.triangular_solve, left_side=False, lower=False) +# for _ in range(leading_dims): +# solve_fn = vmap(solve_fn, in_axes=(None, 0)) +# solution = solve_fn(A, X) + +# if X_ndim < 2: +# return solution[0] +# return solution + + +# def _conjB(Q, G, V): +# """Compute conjB.""" +# order = G.ndim +# p = list(range(order)) +# conjB = jnp.transpose(V, p[1:] + p[:1]) +# for i, q in enumerate(Q): +# conjB = conjB / q if q.ndim < 2 else _solve_triangular_right(conjB, q) +# if i < order - 1: +# conjB = jnp.swapaxes(conjB, i, order - 1) +# return conjB + + +# def _update_precond(Q, G, conjB, exprs, precond_lr, qs_sharding, params_sharding): +# """Compute A and update Q.""" +# exprA, exprGs, _ = exprs + +# A = jnp.einsum(exprA, *Q, G) + +# def _update_single_q(i, q): +# term1 = jnp.einsum(exprGs[i], A, A) +# term2 = jnp.einsum(exprGs[i], conjB, conjB) + +# if q.ndim < 2: +# q -= ( +# precond_lr +# / _add_tiny(jnp.max(jnp.abs(term1 + term2))) +# * (term1 - term2) +# * q +# ) +# else: +# if qs_sharding is not None: +# sharding = qs_sharding[i] +# # transpose q sharding for terms +# if len(sharding) < 2: +# sharding = PartitionSpec(*((None,) + sharding)) +# else: +# assert len(sharding) == 2 +# sharding = PartitionSpec(*(sharding[1:] + sharding[:1])) +# term1 = _safe_sharding_constraint(term1, sharding) +# term2 = _safe_sharding_constraint(term2, sharding) +# q -= ( +# precond_lr +# / _add_tiny(_norm_lower_bound(term1 + term2)) +# * jnp.triu(term1 - term2) +# @ q +# ) +# return q + +# return [_update_single_q(i, q) for i, q in enumerate(Q)] + + +# def _precond_grad(Q, G, exprs): +# """Precondition gradient G with preconditioner Q.""" +# exprP = exprs[-1] +# return jnp.einsum(exprP, *Q, *Q, G) + + +# def _safe_sharding_constraint(x, sharding): +# if sharding is None: +# return x +# else: +# return with_sharding_constraint(x, sharding) + + +# def _add_tiny(x): +# return x + jnp.finfo(x.dtype).tiny + + +# def _map_fn(lax_map, bs, n_maps, fn, *args): +# """Maybe map a fn along multiple leading axes.""" +# if n_maps <= 0: +# return fn(*args) + +# if lax_map: +# mapped_fn = lambda xs: _map_fn(lax_map, bs, n_maps - 1, fn, *xs) +# return jax.lax.map(mapped_fn, xs=args, batch_size=bs if bs > 1 else None) +# else: +# mapped_fn = lambda *xs: _map_fn(lax_map, bs, n_maps - 1, fn, *xs) +# return vmap(mapped_fn)(*args) + + +# def _tree_random_like( +# rng_key: chex.PRNGKey, target_tree: chex.ArrayTree, dtype=None +# ) -> chex.ArrayTree: +# # adopted from optax +# tree_def = jax.tree.structure(target_tree) +# keys = jax.random.split(rng_key, tree_def.num_leaves) +# keys_tree = jax.tree.unflatten(tree_def, keys) +# return jax.tree.map( +# lambda l, k: jax.random.normal( +# k, l.shape, dtype if dtype is not None else l.dtype +# ), +# target_tree, +# keys_tree, +# ) + + +# class BlockPartitioner: +# """Partitions a tensor into smaller tensors. + +# Modified from distributed_shampoo. +# https://github.com/google-research/google-research/blob/master/scalable_shampoo/optax/distributed_shampoo.py +# Scalable Second Order Optimization for Deep Learning, +# Rohan Anil, Vineet Gupta, Tomer Koren, Kevin Regan, Yoram Singer +# https://arxiv.org/abs/2002.09018 +# """ + +# def __init__(self, param_shape, block_size, dim_diag): +# assert len(dim_diag) == len( +# param_shape +# ), "dim_diag must have same length as param_shape" +# self._shape = param_shape +# self._splits = [] +# split_sizes = [] +# # We split params into smaller blocks. Here we store the metadata to make +# # that split. +# for i, d in enumerate(param_shape): +# if 0 < block_size < d and not dim_diag[i]: +# # d-1, otherwise split appends a 0-size array. +# nsplit = (d - 1) // block_size +# indices = (np.arange(nsplit, dtype=np.int32) + 1) * block_size +# sizes = np.ones(nsplit + 1, dtype=np.int32) * block_size +# sizes[-1] = d - indices[-1] +# self._splits.append((i, indices)) +# split_sizes.append(sizes) +# else: +# split_sizes.append(np.array([d], dtype=np.int32)) +# self._split_sizes = split_sizes + +# # TODO (evanatyourservice) +# # this might fail with scalar params but for now we're reshaping those +# single_shape = [a[0] for a in split_sizes] +# padded_single_shape = [-(-dim // block_size) * block_size for dim in single_shape] +# stack_size = max(1, np.prod([max(1, len(s)) for s in split_sizes])) +# self._padded_stacked_shape = tuple([stack_size] + padded_single_shape) + +# def split_sizes(self): +# return self._split_sizes + +# def partition(self, tensor): +# """Partition tensor into blocks.""" + +# assert tensor.shape == self._shape +# tensors = [tensor] +# for i, indices in self._splits: +# tensors_local = [] +# for t in tensors: +# tensors_local.extend(jnp.split(t, indices_or_sections=indices, axis=i)) +# tensors = tensors_local +# return tuple(tensors) + +# def merge_partitions(self, partitions): +# """Merge partitions back to original shape.""" + +# for i, indices in reversed(self._splits): +# n = len(indices) + 1 +# partial_merged_tensors = [] +# ind = 0 +# while ind < len(partitions): +# partial_merged_tensors.append( +# jnp.concatenate(partitions[ind : ind + n], axis=i) +# ) +# ind += n +# partitions = partial_merged_tensors +# assert len(partitions) == 1 +# return partitions[0] + + +# def _partitions(lst): +# """Generate all partitions of a list.""" +# if not lst: +# yield [[]] +# else: +# for i in range(len(lst)): +# for part in _partitions(lst[i + 1 :]): +# yield [lst[: i + 1]] + part + +# """ +# 128, 4, 4, 8 +# (128, 512) +# """ + +# def _merge_small_dims( +# shape_to_merge, max_dim, dim_diag, sharding_to_merge=None +# ) -> Tuple[List[int], List[bool], Optional[Tuple]]: +# if not shape_to_merge: # handles scalar shape () +# return [], [True], PartitionSpec() if sharding_to_merge is not None else None +# if np.all(np.array(shape_to_merge) == 1): # handles shape (1,) +# return ( +# [1], +# [True], +# PartitionSpec(None) if sharding_to_merge is not None else None, +# ) + +# def dim2loss(d, dim0=max_dim): +# """A heuristic map from dim to loss with the least loss occurs at dim0.""" +# loss = 0 +# if d < dim0: +# loss += np.log2(dim0 / d) +# too_small = dim0 / 8 +# if d < too_small: +# loss += 100 * np.log2(too_small / d) +# else: +# loss += 10 * np.log2(d / dim0) +# too_large = 8 * dim0 +# if d > too_large: +# loss += 1000 * np.log2(d / too_large) +# return loss + +# best_loss = float("inf") +# best_partition = None + +# for p in _partitions(list(range(len(shape_to_merge)))): +# loss = 0 +# merged = [] +# for group in p: +# if not group: +# continue +# d = np.prod([shape_to_merge[i] for i in group]) +# loss += dim2loss(d) +# merged.append(group) + +# if loss < best_loss: +# best_loss = loss +# best_partition = merged + +# merged_shape = [] +# merged_diag = [] +# merged_sharding = [] + +# for group in best_partition: +# merged_shape.append(np.prod([shape_to_merge[i] for i in group])) +# merged_diag.append(all(dim_diag[i] for i in group)) +# if sharding_to_merge: +# group_shardings = [sharding_to_merge[i] for i in group] +# valid_shardings = [s for s in group_shardings if s is not None] + +# if len(valid_shardings) > 1: +# merged_sharding.append(tuple(valid_shardings)) +# elif len(valid_shardings) == 1: +# merged_sharding.append(valid_shardings[0]) +# else: +# merged_sharding.append(None) + +# return ( +# merged_shape, +# merged_diag, +# PartitionSpec(*merged_sharding) if sharding_to_merge else None, +# ) + + +# def _pad_and_stack_matrices(array_list, block_size): +# # Handle scalar arrays by adding a dummy dimension +# is_scalar = len(array_list[0].shape) == 0 +# if is_scalar: +# array_list = [arr[None] for arr in array_list] + +# shapes = [arr.shape for arr in array_list] +# max_dims = [max(shape[i] for shape in shapes) for i in range(len(shapes[0]))] +# padded_shape = [-(-dim // block_size) * block_size for dim in max_dims] +# padded_arrays = [] +# for arr in array_list: +# pad_width = [(0, padded_shape[i] - arr.shape[i]) for i in range(arr.ndim)] +# padded = jnp.pad(arr, pad_width) +# padded_arrays.append(padded) + +# stacked = jnp.stack(padded_arrays) +# return stacked + + +# def _unstack_and_unpad_matrices(stacked_array, original_shapes): +# # Handle scalar arrays +# is_scalar = len(original_shapes[0]) == 0 + +# unstacked = jnp.split(stacked_array, stacked_array.shape[0], axis=0) +# unpadded = [] +# for arr, orig_shape in zip(unstacked, original_shapes): +# arr = jnp.squeeze(arr, axis=0) +# if is_scalar: +# # For scalars, just take the first element +# arr = arr[0] +# else: +# # For non-scalars, slice to original shape +# slices = tuple(slice(0, dim) for dim in orig_shape) +# arr = arr[slices] +# unpadded.append(arr) +# return tuple(unpadded) + + +# # unused fns (can be used for stacking partitions without padding): +# def _sort_and_group_matrices(matrix_shapes: List[Tuple[int, ...]]): +# indexed_list = list(enumerate(matrix_shapes)) +# sorted_indexed = sorted(indexed_list, key=lambda x: x[1]) +# sorted_shapes = [shape for _, shape in sorted_indexed] +# change_indices = [original_index for original_index, _ in sorted_indexed] +# revert_indices = [0] * len(matrix_shapes) +# for new_pos, (original_index, _) in enumerate(sorted_indexed): +# revert_indices[original_index] = new_pos +# shape_groups = defaultdict(list) +# for i, shape in enumerate(sorted_shapes): +# shape_groups[shape].append(i) +# unique_sorted_shapes = list(shape_groups.keys()) +# return unique_sorted_shapes, dict(shape_groups), change_indices, revert_indices + + +# def _stack_matrices(array_list): +# in_tuple = isinstance(array_list, tuple) +# shapes = [arr.shape for arr in array_list] +# unique_shapes, shape_groups, change_indices, _ = _sort_and_group_matrices(shapes) +# sorted_arrays = [array_list[i] for i in change_indices] +# stacked_arrays = [] +# for shape in unique_shapes: +# indices = shape_groups[shape] +# stacked = jnp.stack([sorted_arrays[i] for i in indices]) +# stacked_arrays.append(stacked) +# if in_tuple: +# return tuple(stacked_arrays) +# return stacked_arrays + + +# def _unstack_matrices(stacked_arrays, revert_indices): +# in_tuple = isinstance(stacked_arrays, tuple) +# unstacked = [] +# for arr in stacked_arrays: +# unstacked.extend(jnp.split(arr, arr.shape[0])) +# array_list = [jnp.squeeze(unstacked[i], axis=0) for i in revert_indices] +# if in_tuple: +# return tuple(array_list) +# return array_list + + +# if __name__ == "__main__": +# import jax_sourceror + +# axis_a = hax.Axis("d", 128) +# axis_b = hax.Axis("b", 8) + +# params = { +# "w": hax.NamedArray(jnp.ones((128, 8)), (axis_a, axis_b)), +# "b": hax.NamedArray(jnp.ones((128,)), (axis_a,)), +# } +# grads = { +# "w": hax.NamedArray(jnp.ones((128, 8)), (axis_a, axis_b)), +# "b": hax.NamedArray(jnp.ones((128,)), (axis_a,)), +# } + +# optimizer = kron() +# opt_state = optimizer.init(params) +# source_code = jax_sourceror.sourcerize(optimizer.update)(grads, opt_state, params) + +# print(source_code) From 5108be03a26f1667bec6fa87c0367be37d8486ec Mon Sep 17 00:00:00 2001 From: Evan Walters Date: Wed, 18 Dec 2024 20:40:49 -0700 Subject: [PATCH 46/56] take out unavailable args --- src/levanter/optim/kron.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/levanter/optim/kron.py b/src/levanter/optim/kron.py index fb580a9e7..c09304665 100644 --- a/src/levanter/optim/kron.py +++ b/src/levanter/optim/kron.py @@ -116,12 +116,12 @@ def _optimizer(learning_rate) -> optax.GradientTransformation: scanned_layers=self.scanned_layers, lax_map_scanned_layers=self.lax_map_scanned_layers, lax_map_batch_size=self.lax_map_batch_size, - merge_small_dims=self.merge_small_dims, - target_merged_dim_size=self.target_merged_dim_size, - partition_grads_into_blocks=self.partition_grads_into_blocks, - block_size=self.block_size, - params_sharding=self.params_sharding, - preconditioner_sharding=precond_partition_spec, + # merge_small_dims=self.merge_small_dims, + # target_merged_dim_size=self.target_merged_dim_size, + # partition_grads_into_blocks=self.partition_grads_into_blocks, + # block_size=self.block_size, + # params_sharding=self.params_sharding, + # preconditioner_sharding=precond_partition_spec, ) ) if self.weight_decay > 0: From 975a2d7526f0e802657520522e08f6615925c341 Mon Sep 17 00:00:00 2001 From: Evan Walters Date: Wed, 18 Dec 2024 20:49:28 -0700 Subject: [PATCH 47/56] no extra args --- src/levanter/optim/kron.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/levanter/optim/kron.py b/src/levanter/optim/kron.py index c09304665..682591d59 100644 --- a/src/levanter/optim/kron.py +++ b/src/levanter/optim/kron.py @@ -200,7 +200,7 @@ def scale_by_kron( scanned_layers: Optional[base.Params] = None, lax_map_scanned_layers: bool = False, lax_map_batch_size: int = 8, -) -> base.GradientTransformationExtraArgs: +) -> base.GradientTransformation: """ Implements PSGD Kron from https://github.com/lixilinx/psgd_torch. @@ -234,7 +234,7 @@ def scale_by_kron( lax_map_batch_size: int, batch size for lax.map, see JAX docs for more info. Returns: - optax.GradientTransformationExtraArgs + optax.GradientTransformation """ mu_dtype = canonicalize_dtype(mu_dtype) precond_dtype = canonicalize_dtype(precond_dtype) @@ -511,7 +511,7 @@ def _balance_Q(Q: List[jax.Array]): return updates, state - return base.GradientTransformationExtraArgs(init_fn, update_fn) + return base.GradientTransformation(init_fn, update_fn) def kron( @@ -536,7 +536,7 @@ def kron( scanned_layers: Optional[base.Params] = None, lax_map_scanned_layers: bool = False, lax_map_batch_size: int = 8, -) -> base.GradientTransformationExtraArgs: +) -> base.GradientTransformation: """ Implements PSGD Kron from https://github.com/lixilinx/psgd_torch. @@ -574,7 +574,7 @@ def kron( lax_map_batch_size: int, batch size for lax.map, see JAX docs for more info. Returns: - optax.GradientTransformationExtraArgs + optax.GradientTransformation """ optimizer = [ scale_by_kron( From 3fa70ab573d05977be27ec020f0b9fc2e2207737 Mon Sep 17 00:00:00 2001 From: Evan Walters Date: Wed, 18 Dec 2024 20:53:32 -0700 Subject: [PATCH 48/56] trying this --- src/levanter/optim/kron.py | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/src/levanter/optim/kron.py b/src/levanter/optim/kron.py index 682591d59..196cfb527 100644 --- a/src/levanter/optim/kron.py +++ b/src/levanter/optim/kron.py @@ -254,11 +254,11 @@ def map_fn(do_map, fn, *args): return fn(*args) def init_fn(params): - params = jax.tree.map( - lambda x: x.unbox() if isinstance(x, nn.Partitioned) else x, - params, - is_leaf=lambda v: isinstance(v, (chex.Array, nn.Partitioned)), - ) + # params = jax.tree.map( + # lambda x: x.unbox() if isinstance(x, nn.Partitioned) else x, + # params, + # is_leaf=lambda v: isinstance(v, (chex.Array, nn.Partitioned)), + # ) scanned_layers_ = scanned_layers if scanned_layers_ is None: @@ -340,6 +340,16 @@ def update_fn(updates: base.Updates, state: dict, params: base.Params = None): count_inc = safe_int32_increment(state["count"]) key = jax.random.fold_in(jax.random.PRNGKey(5318008), state["count"]) + # account for flax.linen.Partitioned grads and params + # boxed_updates, grads_structure = jax.tree.flatten( + # updates, is_leaf=lambda v: isinstance(v, (chex.Array, nn.Partitioned)) + # ) + # flax_partitioned = False + # if isinstance(boxed_updates[0], nn.Partitioned): + # flax_partitioned = True + # updates = [u.unbox() for u in boxed_updates] + # updates = grads_structure.unflatten(updates) + scanned_layers_ = scanned_layers if scanned_layers_ is None: scanned_layers_ = jax.tree.map( @@ -355,16 +365,6 @@ def update_fn(updates: base.Updates, state: dict, params: base.Params = None): scanned_layers_ = jax.tree.leaves(scanned_layers_) print(f"kron scanned_layers_: {scanned_layers_}") - # account for flax.linen.Partitioned grads and params - boxed_updates, grads_structure = jax.tree.flatten( - updates, is_leaf=lambda v: isinstance(v, (chex.Array, nn.Partitioned)) - ) - flax_partitioned = False - if isinstance(boxed_updates[0], nn.Partitioned): - flax_partitioned = True - updates = [u.unbox() for u in boxed_updates] - updates = grads_structure.unflatten(updates) - update_prob_in = preconditioner_update_probability if isinstance(preconditioner_update_probability, Callable): update_prob_in = preconditioner_update_probability(count_inc) @@ -488,10 +488,10 @@ def _balance_Q(Q: List[jax.Array]): ] # box preconditioned grads - if flax_partitioned: - precond_gs = [ - u.replace_boxed(pg) for u, pg in zip(boxed_updates, precond_gs) - ] + # if flax_partitioned: + # precond_gs = [ + # u.replace_boxed(pg) for u, pg in zip(boxed_updates, precond_gs) + # ] # unflatten pytrees updates = grads_structure.unflatten(precond_gs) From b62963ef05e353af9a026f89a007fd1a21c9007d Mon Sep 17 00:00:00 2001 From: Evan Walters Date: Wed, 18 Dec 2024 20:59:05 -0700 Subject: [PATCH 49/56] Update kron.py --- src/levanter/optim/kron.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/levanter/optim/kron.py b/src/levanter/optim/kron.py index 196cfb527..e24f1833b 100644 --- a/src/levanter/optim/kron.py +++ b/src/levanter/optim/kron.py @@ -133,9 +133,10 @@ def _optimizer(learning_rate) -> optax.GradientTransformation: components.append(optax.scale_by_learning_rate(learning_rate)) return optax.chain(*components) - return optax.inject_hyperparams(_optimizer)( - learning_rate=self.lr_scheduler(num_train_steps) - ) + # return optax.inject_hyperparams(_optimizer)( + # learning_rate=self.lr_scheduler(num_train_steps) + # ) + return _optimizer(self.lr_scheduler(num_train_steps)) from typing import Any, List, Optional, Union, Callable From 5bafdcf0249eaf894558260a8098d57a424aa21d Mon Sep 17 00:00:00 2001 From: Evan Walters Date: Wed, 18 Dec 2024 21:00:59 -0700 Subject: [PATCH 50/56] Revert "Update kron.py" This reverts commit b62963ef05e353af9a026f89a007fd1a21c9007d. --- src/levanter/optim/kron.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/levanter/optim/kron.py b/src/levanter/optim/kron.py index e24f1833b..196cfb527 100644 --- a/src/levanter/optim/kron.py +++ b/src/levanter/optim/kron.py @@ -133,10 +133,9 @@ def _optimizer(learning_rate) -> optax.GradientTransformation: components.append(optax.scale_by_learning_rate(learning_rate)) return optax.chain(*components) - # return optax.inject_hyperparams(_optimizer)( - # learning_rate=self.lr_scheduler(num_train_steps) - # ) - return _optimizer(self.lr_scheduler(num_train_steps)) + return optax.inject_hyperparams(_optimizer)( + learning_rate=self.lr_scheduler(num_train_steps) + ) from typing import Any, List, Optional, Union, Callable From c47c4c52e9cdb67aea19df84a033aa6af2cdbef5 Mon Sep 17 00:00:00 2001 From: Evan Walters Date: Wed, 18 Dec 2024 21:03:05 -0700 Subject: [PATCH 51/56] small fix --- src/levanter/optim/kron.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/levanter/optim/kron.py b/src/levanter/optim/kron.py index 196cfb527..352f2e29f 100644 --- a/src/levanter/optim/kron.py +++ b/src/levanter/optim/kron.py @@ -497,7 +497,7 @@ def _balance_Q(Q: List[jax.Array]): updates = grads_structure.unflatten(precond_gs) Qs = grads_structure.unflatten(Qs) - precond_gs = updates_struct.unflatten(precond_gs) + updates = updates_struct.unflatten(updates) # dtypes and new state mu = otu.tree_cast(mu, mu_dtype) From ee747c09e65d12bb6c7ea30c13df39e1af173e33 Mon Sep 17 00:00:00 2001 From: Evan Walters Date: Wed, 18 Dec 2024 23:41:43 -0700 Subject: [PATCH 52/56] settings --- config/llama2_100M_kron_test.yaml | 3 +-- src/levanter/optim/kron.py | 10 +++++----- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/config/llama2_100M_kron_test.yaml b/config/llama2_100M_kron_test.yaml index 1cc689629..9993da34d 100644 --- a/config/llama2_100M_kron_test.yaml +++ b/config/llama2_100M_kron_test.yaml @@ -27,9 +27,8 @@ trainer: optimizer: learning_rate: 3E-4 weight_decay: 0.1 - warmup: 1000 + warmup: 2000 cooldown: 0.1 lr_schedule: constant min_lr_ratio: 0.0 - max_grad_norm: 1.0 type: kron diff --git a/src/levanter/optim/kron.py b/src/levanter/optim/kron.py index 352f2e29f..76997fad4 100644 --- a/src/levanter/optim/kron.py +++ b/src/levanter/optim/kron.py @@ -61,10 +61,10 @@ class KronConfig(OptimizerConfig): # some of these are changed from kron defaults to better suit levanter beta1: float = 0.9 weight_decay: float = 0.1 - max_grad_norm: Optional[float] = 1.0 - normalize_grads: bool = False - preconditioner_update_probability: float = 0.05 - update_prob_flat_start: int = 500 + max_grad_norm: Optional[float] = 0.0 + normalize_grads: bool = True + preconditioner_update_probability: float = 0.03 + update_prob_flat_start: int = 1000 max_size_triangular: int = 25000 min_ndim_triangular: int = 2 memory_save_mode: Optional[str] = None @@ -72,7 +72,7 @@ class KronConfig(OptimizerConfig): preconditioner_init_scale: float = 1.0 mu_dtype: Optional[Union[str, jnp.dtype]] = None precond_dtype: Optional[Union[str, jnp.dtype]] = None - precond_update_precision: Optional[str] = "tensorfloat32" + precond_update_precision: Optional[str] = "float32" precond_grads_precision: Optional[str] = None scanned_layers: Optional[optax.Params] = None lax_map_scanned_layers: bool = False From 59f2c10d40e31ffb732d66b42afd4bb4ac6dfe37 Mon Sep 17 00:00:00 2001 From: Evan Walters Date: Sun, 22 Dec 2024 09:35:46 -0700 Subject: [PATCH 53/56] small changes/moving to remote --- src/levanter/optim/kron.py | 37 +++++++++++-------------------------- 1 file changed, 11 insertions(+), 26 deletions(-) diff --git a/src/levanter/optim/kron.py b/src/levanter/optim/kron.py index 76997fad4..ed4aa066c 100644 --- a/src/levanter/optim/kron.py +++ b/src/levanter/optim/kron.py @@ -18,7 +18,7 @@ class KronConfig(OptimizerConfig): weight_decay: Weight decay coefficient. max_grad_norm: Optional gradient norm clipping value. normalize_grads: Whether to normalize the incoming gradients to unit norm layer-wise. - Can help with stability. + Can help with stability but likely not necessary in this scenario. preconditioner_update_probability: Final probability of updating the preconditioner. Default is 0.05 (update every 20 steps). The `precond_update_prob_schedule` holds probability at 1.0 for `update_prob_flat_start` steps before annealing exponentially down to this @@ -50,20 +50,14 @@ class KronConfig(OptimizerConfig): lax_map_batch_size: Batch size for lax.map, see JAX docs for more info. merge_small_dims: Whether to merge small dimensions to improve preconditioner efficiency. target_merged_dim_size: Target size of merged dimensions. - partition_grads_into_blocks: Whether to partition grads into chunks of size block_size - for efficiency. - block_size: Block size to use for partitioning grads. params_sharding: Pytree same structure as params of jax.sharding.PartitionSpec. - preconditioner_sharding: PartitionSpec for preconditioner matrices. Best practice is to - shard first dimension across fsdp-like mesh axis, or largest/most common axis in params. - Example: PartitionSpec('fsdp') or PartitionSpec('fsdp', 'tp'). """ # some of these are changed from kron defaults to better suit levanter beta1: float = 0.9 weight_decay: float = 0.1 max_grad_norm: Optional[float] = 0.0 - normalize_grads: bool = True - preconditioner_update_probability: float = 0.03 + normalize_grads: bool = False + preconditioner_update_probability: float = 0.05 update_prob_flat_start: int = 1000 max_size_triangular: int = 25000 min_ndim_triangular: int = 2 @@ -72,27 +66,19 @@ class KronConfig(OptimizerConfig): preconditioner_init_scale: float = 1.0 mu_dtype: Optional[Union[str, jnp.dtype]] = None precond_dtype: Optional[Union[str, jnp.dtype]] = None - precond_update_precision: Optional[str] = "float32" + precond_update_precision: Optional[str] = "tensorfloat32" precond_grads_precision: Optional[str] = None scanned_layers: Optional[optax.Params] = None lax_map_scanned_layers: bool = False lax_map_batch_size: int = 8 merge_small_dims: bool = True target_merged_dim_size: int = 8192 - partition_grads_into_blocks: bool = True - block_size: int = 256 params_sharding: Optional[Any] = None - preconditioner_sharding: Optional[tuple[str | None, str | None]] = None def build(self, num_train_steps): """Creates the optimizer.""" def _optimizer(learning_rate) -> optax.GradientTransformation: - precond_partition_spec = ( - PartitionSpec(*self.preconditioner_sharding) - if self.preconditioner_sharding is not None - else None - ) components = [] if self.max_grad_norm and not self.normalize_grads: components.append(optax.clip_by_global_norm(self.max_grad_norm)) @@ -116,14 +102,15 @@ def _optimizer(learning_rate) -> optax.GradientTransformation: scanned_layers=self.scanned_layers, lax_map_scanned_layers=self.lax_map_scanned_layers, lax_map_batch_size=self.lax_map_batch_size, - # merge_small_dims=self.merge_small_dims, - # target_merged_dim_size=self.target_merged_dim_size, - # partition_grads_into_blocks=self.partition_grads_into_blocks, - # block_size=self.block_size, - # params_sharding=self.params_sharding, - # preconditioner_sharding=precond_partition_spec, + merge_small_dims=self.merge_small_dims, + target_merged_dim_size=self.target_merged_dim_size, + params_sharding=self.params_sharding, ) ) + # PSGD's output should be RMS=1.0, so we can clip at 1.1 in case of + # gradient spike. This is better than clipping incoming grads because this + # gets rid of information for the preconditioner. + components.append(optax.clip_by_block_rms(1.1)) if self.weight_decay > 0: components.append( optax.add_decayed_weights( @@ -143,11 +130,9 @@ def _optimizer(learning_rate) -> optax.GradientTransformation: import string import numpy as np -import chex import jax from jax import vmap import jax.numpy as jnp -import flax.linen as nn from optax import tree_utils as otu from optax._src import base, transform from optax._src.numerics import safe_int32_increment From 25a2c20e95ce09d38c513194532080c01e72c101 Mon Sep 17 00:00:00 2001 From: Evan Walters Date: Sun, 22 Dec 2024 19:16:17 +0000 Subject: [PATCH 54/56] simplified kron is working, need to test on larger pod --- src/levanter/optim/kron.py | 1845 +++--------------------------------- 1 file changed, 136 insertions(+), 1709 deletions(-) diff --git a/src/levanter/optim/kron.py b/src/levanter/optim/kron.py index ed4aa066c..51f57f2cc 100644 --- a/src/levanter/optim/kron.py +++ b/src/levanter/optim/kron.py @@ -1,9 +1,8 @@ from dataclasses import dataclass -from typing import Any, Optional, Union +from typing import Optional, Union import jax.numpy as jnp import optax -from jax.sharding import PartitionSpec from levanter.optim.config import OptimizerConfig @@ -43,14 +42,9 @@ class KronConfig(OptimizerConfig): Options: 'bfloat16', 'tensorfloat32', 'float32'. precond_grads_precision: Precision for matmul during preconditioning grads. Options: 'bfloat16', 'tensorfloat32', 'float32'. - scanned_layers: Tree of booleans same structure as params indicating scanned dimensions - for each layer. PSGD will vmap over leading dimension. lax_map_scanned_layers: Whether to use lax.map for scanned layers instead of vmap. Useful to save memory with large models. lax_map_batch_size: Batch size for lax.map, see JAX docs for more info. - merge_small_dims: Whether to merge small dimensions to improve preconditioner efficiency. - target_merged_dim_size: Target size of merged dimensions. - params_sharding: Pytree same structure as params of jax.sharding.PartitionSpec. """ # some of these are changed from kron defaults to better suit levanter beta1: float = 0.9 @@ -68,12 +62,8 @@ class KronConfig(OptimizerConfig): precond_dtype: Optional[Union[str, jnp.dtype]] = None precond_update_precision: Optional[str] = "tensorfloat32" precond_grads_precision: Optional[str] = None - scanned_layers: Optional[optax.Params] = None lax_map_scanned_layers: bool = False lax_map_batch_size: int = 8 - merge_small_dims: bool = True - target_merged_dim_size: int = 8192 - params_sharding: Optional[Any] = None def build(self, num_train_steps): """Creates the optimizer.""" @@ -83,7 +73,7 @@ def _optimizer(learning_rate) -> optax.GradientTransformation: if self.max_grad_norm and not self.normalize_grads: components.append(optax.clip_by_global_norm(self.max_grad_norm)) components.append( - scale_by_kron( + scale_by_kron_for_levanter( b1=self.beta1, normalize_grads=self.normalize_grads, preconditioner_update_probability=precond_update_prob_schedule( @@ -99,17 +89,13 @@ def _optimizer(learning_rate) -> optax.GradientTransformation: precond_dtype=self.precond_dtype, precond_update_precision=self.precond_update_precision, precond_grads_precision=self.precond_grads_precision, - scanned_layers=self.scanned_layers, lax_map_scanned_layers=self.lax_map_scanned_layers, lax_map_batch_size=self.lax_map_batch_size, - merge_small_dims=self.merge_small_dims, - target_merged_dim_size=self.target_merged_dim_size, - params_sharding=self.params_sharding, ) ) - # PSGD's output should be RMS=1.0, so we can clip at 1.1 in case of - # gradient spike. This is better than clipping incoming grads because this - # gets rid of information for the preconditioner. + # PSGD's output should be RMS=1.0, so we can clip at 1.1 in case of incoming + # gradient spike. This is better than clipping incoming grads because that would + # get rid of valuable information for the preconditioner. components.append(optax.clip_by_block_rms(1.1)) if self.weight_decay > 0: components.append( @@ -125,6 +111,7 @@ def _optimizer(learning_rate) -> optax.GradientTransformation: ) +"""PSGD Kron""" from typing import Any, List, Optional, Union, Callable from functools import partial import string @@ -133,6 +120,8 @@ def _optimizer(learning_rate) -> optax.GradientTransformation: import jax from jax import vmap import jax.numpy as jnp +from jax.sharding import PartitionSpec +from jax.lax import with_sharding_constraint from optax import tree_utils as otu from optax._src import base, transform from optax._src.numerics import safe_int32_increment @@ -166,7 +155,7 @@ def _schedule(n): return _schedule -def scale_by_kron( +def scale_by_kron_for_levanter( b1: float = 0.9, normalize_grads: bool = False, preconditioner_update_probability: Union[ @@ -182,13 +171,15 @@ def scale_by_kron( precond_dtype: Optional[Union[str, jnp.dtype]] = None, precond_update_precision: Optional[str] = "tensorfloat32", precond_grads_precision: Optional[str] = None, - scanned_layers: Optional[base.Params] = None, lax_map_scanned_layers: bool = False, lax_map_batch_size: int = 8, ) -> base.GradientTransformation: """ Implements PSGD Kron from https://github.com/lixilinx/psgd_torch. + A simple version of scale_by_kron that is focused on working only within levanter + with FSDP sharding for preconditioners. + Args: b1: float, momentum parameter. normalize_grads: bool, whether to normalize gradients to unit norm layer-wise. @@ -212,8 +203,6 @@ def scale_by_kron( 'bfloat16', 'tensorfloat32', 'float32'. precond_grads_precision: str, precision for matmul during preconditioning grads, 'bfloat16', 'tensorfloat32', 'float32'. - scanned_layers: optional base.Params, tree of bool same structure as params - indicating scanned layers. PSGD will vmap over the first dim. lax_map_scanned_layers: bool, whether to use lax.map for scanned layers instead of vmap. Useful to save memory with large models. lax_map_batch_size: int, batch size for lax.map, see JAX docs for more info. @@ -239,31 +228,38 @@ def map_fn(do_map, fn, *args): return fn(*args) def init_fn(params): - # params = jax.tree.map( - # lambda x: x.unbox() if isinstance(x, nn.Partitioned) else x, - # params, - # is_leaf=lambda v: isinstance(v, (chex.Array, nn.Partitioned)), - # ) - - scanned_layers_ = scanned_layers - if scanned_layers_ is None: - scanned_layers_ = jax.tree.map( - lambda x: ( - jax.tree.map(lambda _: True, x) - if isinstance(x, hax.nn.Stacked) - else False - ), - params, - is_leaf=lambda x: isinstance(x, hax.nn.Stacked), - ) - params, params_struct = jax.tree.flatten(params) - scanned_layers_ = jax.tree.leaves(scanned_layers_) - print(f"kron scanned_layers_: {scanned_layers_}") + def fsdp_size(): + mesh = hax.partitioning._get_mesh() + fsdp_axis_name = hax.partitioning.ResourceAxis.DATA + fsdp_axis = mesh.axis_names.index(fsdp_axis_name) + fsdp_size = mesh.devices.shape[fsdp_axis] + return fsdp_size + + # grab scanned layers and params sharding + scanned_layers_ = jax.tree.map( + lambda x: ( + jax.tree.map(lambda _: True, x, is_leaf=lambda x: isinstance(x, jax.Array)) + if isinstance(x, hax.nn.Stacked) + else jax.tree.map(lambda _: False, x, is_leaf=lambda x: isinstance(x, jax.Array)) + ), + params, + is_leaf=lambda x: isinstance(x, hax.nn.Stacked), + ) + params_sharding_ = hax.partitioning.infer_resource_partitions(params) + params_sharding_ = jax.tree.map(lambda x: x.spec, params_sharding_) + + params, params_structure = jax.tree.flatten(params, is_leaf=lambda x: isinstance(x, jax.Array)) + scanned_layers_ = params_structure.flatten_up_to(scanned_layers_) + params_sharding_ = jax.tree.leaves(params_sharding_, is_leaf=lambda x: isinstance(x, PartitionSpec)) + # print(f"kron params: {jax.tree.map(lambda x: x.shape, params)}") + # print(f"kron scanned_layers_: {scanned_layers_}") + # print(f"kron params_sharding_: {params_sharding_}") # momentum mu = None if b1 > 0: mu = jax.tree.map(lambda x: jnp.zeros_like(x, dtype=mu_dtype), params) + mu = with_sharding_constraint(mu, params_sharding_) # preconditioners Qs = [ @@ -278,10 +274,19 @@ def init_fn(params): for t, s in zip(jax.tree.leaves(params), jax.tree.leaves(scanned_layers_)) ] # broadcast for scanned layers + def shard_q(q, s): + q_shape_no_s = q.shape[int(s):] + if len(q_shape_no_s) > 1 and q_shape_no_s[0] % fsdp_size() == 0: + return with_sharding_constraint( + q, PartitionSpec(None, 'data') if s else PartitionSpec('data') + ) + else: + return with_sharding_constraint(q, PartitionSpec(None)) + Qs = [ ( jax.tree.map( - lambda d: jnp.repeat(jnp.expand_dims(d, 0), t.shape[0], axis=0), q + lambda d: shard_q(jnp.repeat(jnp.expand_dims(d, 0), t.shape[0], axis=0), s), q ) if s else q @@ -290,7 +295,6 @@ def init_fn(params): Qs, jax.tree.leaves(params), jax.tree.leaves(scanned_layers_) ) ] - Qs = jax.tree.structure(params).unflatten(Qs) # Calculate sizes for nu (preconditioner) and mu (momentum) Qs_n_elements = sum([q.size for q in jax.tree.leaves(Qs)]) @@ -323,32 +327,35 @@ def init_fn(params): def update_fn(updates: base.Updates, state: dict, params: base.Params = None): del params count_inc = safe_int32_increment(state["count"]) - key = jax.random.fold_in(jax.random.PRNGKey(5318008), state["count"]) - - # account for flax.linen.Partitioned grads and params - # boxed_updates, grads_structure = jax.tree.flatten( - # updates, is_leaf=lambda v: isinstance(v, (chex.Array, nn.Partitioned)) - # ) - # flax_partitioned = False - # if isinstance(boxed_updates[0], nn.Partitioned): - # flax_partitioned = True - # updates = [u.unbox() for u in boxed_updates] - # updates = grads_structure.unflatten(updates) - - scanned_layers_ = scanned_layers - if scanned_layers_ is None: - scanned_layers_ = jax.tree.map( - lambda x: ( - jax.tree.map(lambda _: True, x) - if isinstance(x, hax.nn.Stacked) - else False - ), - updates, - is_leaf=lambda x: isinstance(x, hax.nn.Stacked), - ) - updates, updates_struct = jax.tree.flatten(updates) - scanned_layers_ = jax.tree.leaves(scanned_layers_) - print(f"kron scanned_layers_: {scanned_layers_}") + key = jax.random.fold_in(jax.random.PRNGKey(42), state["count"]) + + def fsdp_size(): + mesh = hax.partitioning._get_mesh() + fsdp_axis_name = hax.partitioning.ResourceAxis.DATA + fsdp_axis = mesh.axis_names.index(fsdp_axis_name) + fsdp_size = mesh.devices.shape[fsdp_axis] + return fsdp_size + + # grab scanned layers and params sharding + scanned_layers_ = jax.tree.map( + lambda x: ( + jax.tree.map(lambda _: True, x, is_leaf=lambda x: isinstance(x, jax.Array)) + if isinstance(x, hax.nn.Stacked) + else jax.tree.map(lambda _: False, x, is_leaf=lambda x: isinstance(x, jax.Array)) + ), + updates, + is_leaf=lambda x: isinstance(x, hax.nn.Stacked), + ) + params_sharding_ = hax.partitioning.infer_resource_partitions(updates) + params_sharding_ = jax.tree.map(lambda x: x.spec, params_sharding_) + + updates, grads_structure = jax.tree.flatten(updates, is_leaf=lambda x: isinstance(x, jax.Array)) + scanned_layers_ = grads_structure.flatten_up_to(scanned_layers_) + params_sharding_ = jax.tree.leaves(params_sharding_, is_leaf=lambda x: isinstance(x, PartitionSpec)) + Qs = state["Qs_preconditioners"] + # print(f"kron updates: {jax.tree.map(lambda x: x.shape, updates)}") + # print(f"kron scanned_layers_: {scanned_layers_}") + # print(f"kron params_sharding_: {params_sharding_}") update_prob_in = preconditioner_update_probability if isinstance(preconditioner_update_probability, Callable): @@ -366,13 +373,9 @@ def update_fn(updates: base.Updates, state: dict, params: base.Params = None): momentum_updates = updates if state["mu"] is not None: mu = otu.tree_update_moment(updates, state["mu"], b1, 1) + mu = with_sharding_constraint(mu, params_sharding_) momentum_updates = otu.tree_bias_correction(mu, b1, count_inc) - - # flatten pytrees - updates, grads_structure = jax.tree.flatten(updates) - momentum_updates = grads_structure.flatten_up_to(momentum_updates) - Qs = grads_structure.flatten_up_to(state["Qs_preconditioners"]) - scanned_layers_ = grads_structure.flatten_up_to(scanned_layers_) + momentum_updates = with_sharding_constraint(momentum_updates, params_sharding_) # get einsum expressions expressions = [ @@ -388,6 +391,16 @@ def update_fn(updates: base.Updates, state: dict, params: base.Params = None): for t, s, Q in zip(updates, scanned_layers_, Qs) ] + # qs sharding + def get_q_sharding(q, s): + q_shape_no_s = q.shape[int(s):] + if len(q_shape_no_s) > 1 and q_shape_no_s[0] % fsdp_size() == 0: + return PartitionSpec(None, 'data') if s else PartitionSpec('data') + else: + return PartitionSpec(None) + + qs_sharding_ = [[get_q_sharding(q, s)for q in Q] for Q, s in zip(Qs, scanned_layers_)] + # maybe update preconditioner def update_preconditioner(key, Qs): with jax.default_matmul_precision(precond_update_precision): @@ -414,6 +427,7 @@ def _balance_Q(Q: List[jax.Array]): key, subkey = jax.random.split(key) do_balances = jax.random.uniform(subkey) < 0.01 Qs = jax.lax.cond(do_balances, balance_Qs, lambda qs: qs, Qs) + Qs = with_sharding_constraint(Qs, qs_sharding_) # create random vectors key, subkey = jax.random.split(key) @@ -422,6 +436,7 @@ def _balance_Q(Q: List[jax.Array]): jax.random.normal(k, shape=g.shape, dtype=g.dtype) for k, g in zip(Vs_keys, precond_updates_in) ] + Vs = with_sharding_constraint(Vs, params_sharding_) # damp based on machine precision (f32 probably enough) damp_eps = jnp.sqrt(jnp.finfo(jnp.float32).eps) @@ -436,6 +451,7 @@ def _balance_Q(Q: List[jax.Array]): map_fn(s, _conjB, Q, g, v) for s, Q, g, v in zip(scanned_layers_, Qs, precond_updates_in, Vs) ] + conjBs = with_sharding_constraint(conjBs, params_sharding_) # update Qs new_Qs = [ @@ -452,6 +468,7 @@ def _balance_Q(Q: List[jax.Array]): scanned_layers_, expressions, Qs, precond_updates_in, conjBs ) ] + new_Qs = with_sharding_constraint(new_Qs, qs_sharding_) new_Qs = otu.tree_cast(new_Qs, precond_dtype) return new_Qs @@ -462,6 +479,7 @@ def _balance_Q(Q: List[jax.Array]): update_counter_inc = jnp.where(do_update, 0, update_counter_inc) key, subkey = jax.random.split(key) Qs = jax.lax.cond(do_update, update_preconditioner, lambda _, qs: qs, subkey, Qs) + Qs = with_sharding_constraint(Qs, qs_sharding_) # precondition gradients with jax.default_matmul_precision(precond_grads_precision): @@ -471,18 +489,10 @@ def _balance_Q(Q: List[jax.Array]): scanned_layers_, expressions, Qs, momentum_updates ) ] - - # box preconditioned grads - # if flax_partitioned: - # precond_gs = [ - # u.replace_boxed(pg) for u, pg in zip(boxed_updates, precond_gs) - # ] + precond_gs = with_sharding_constraint(precond_gs, params_sharding_) # unflatten pytrees - updates = grads_structure.unflatten(precond_gs) - Qs = grads_structure.unflatten(Qs) - - updates = updates_struct.unflatten(updates) + precond_gs = grads_structure.unflatten(precond_gs) # dtypes and new state mu = otu.tree_cast(mu, mu_dtype) @@ -494,7 +504,7 @@ def _balance_Q(Q: List[jax.Array]): update_counter=update_counter_inc, ) - return updates, state + return precond_gs, state return base.GradientTransformation(init_fn, update_fn) @@ -518,7 +528,6 @@ def kron( precond_dtype: Optional[Union[str, jnp.dtype]] = None, precond_update_precision: Optional[str] = "tensorfloat32", precond_grads_precision: Optional[str] = None, - scanned_layers: Optional[base.Params] = None, lax_map_scanned_layers: bool = False, lax_map_batch_size: int = 8, ) -> base.GradientTransformation: @@ -552,8 +561,6 @@ def kron( 'bfloat16', 'tensorfloat32', 'float32'. precond_grads_precision: str, precision for matmul during preconditioning grads, 'bfloat16', 'tensorfloat32', 'float32'. - scanned_layers: optional base.Params, tree of bool same structure as params - indicating scanned layers. PSGD will vmap over the first dim. lax_map_scanned_layers: bool, whether to use lax.map for scanned layers instead of vmap. Useful to save memory with large models. lax_map_batch_size: int, batch size for lax.map, see JAX docs for more info. @@ -562,7 +569,7 @@ def kron( optax.GradientTransformation """ optimizer = [ - scale_by_kron( + scale_by_kron_for_levanter( b1=b1, normalize_grads=normalize_grads, preconditioner_update_probability=preconditioner_update_probability, @@ -576,7 +583,6 @@ def kron( precond_dtype=precond_dtype, precond_update_precision=precond_update_precision, precond_grads_precision=precond_grads_precision, - scanned_layers=scanned_layers, lax_map_scanned_layers=lax_map_scanned_layers, lax_map_batch_size=lax_map_batch_size, ) @@ -602,9 +608,7 @@ def _norm_lower_bound(A: jax.Array): def calc(A): A = A / max_abs - A_conj = A.conj() - - aa = jnp.real(A * A_conj) + aa = A * A aa_sum0 = jnp.sum(aa, axis=0) aa_sum1 = jnp.sum(aa, axis=1) @@ -615,13 +619,13 @@ def calc(A): def gt_branch(): x = jax.lax.dynamic_index_in_dim(A, i, 1, keepdims=False) - x = x.conj() @ A - return max_abs * jnp.linalg.norm((x / jnp.linalg.norm(x)) @ A_conj.T) + x = x @ A + return max_abs * jnp.linalg.norm((x / jnp.linalg.norm(x)) @ A.T) def le_branch(): x = jax.lax.dynamic_index_in_dim(A, j, 0, keepdims=False) - x = A @ x.conj() - return max_abs * jnp.linalg.norm(A_conj.T @ (x / jnp.linalg.norm(x))) + x = A @ x + return max_abs * jnp.linalg.norm(A.T @ (x / jnp.linalg.norm(x))) return jax.lax.cond(value0 > value1, gt_branch, le_branch) @@ -705,7 +709,17 @@ def _init_Q_exprs( else: # use triangular matrix as preconditioner for this dim if existing_Q is None: - Q.append(scale * jnp.eye(size, dtype=dtype)) + def fsdp_size(): + mesh = hax.partitioning._get_mesh() + fsdp_axis_name = hax.partitioning.ResourceAxis.DATA + fsdp_axis = mesh.axis_names.index(fsdp_axis_name) + fsdp_size = mesh.devices.shape[fsdp_axis] + return fsdp_size + + new_q = scale * jnp.eye(size, dtype=dtype) + if new_q.shape[0] % fsdp_size() == 0: + new_q = with_sharding_constraint(new_q, PartitionSpec('data')) + Q.append(new_q) piece1A.append(letters[i] + letters[i + 13]) piece2A = piece2A + letters[i + 13] @@ -772,7 +786,7 @@ def _conjB(Q, G, V): """Compute conjB.""" order = G.ndim p = list(range(order)) - conjB = jnp.transpose(V.conj(), p[1:] + p[:1]) + conjB = jnp.transpose(V, p[1:] + p[:1]) for i, q in enumerate(Q): conjB = conjB / q if q.ndim < 2 else _solve_triangular_right(conjB, q) if i < order - 1: @@ -786,24 +800,29 @@ def _update_precond(Q, G, conjB, exprs, precond_lr): A = jnp.einsum(exprA, *Q, G) - A_conj = A.conj() - conjB_conj = conjB.conj() - def _update_single_q(i, q): - term1 = jnp.einsum(exprGs[i], A, A_conj) - term2 = jnp.einsum(exprGs[i], conjB_conj, conjB) + term1 = jnp.einsum(exprGs[i], A, A) + term2 = jnp.einsum(exprGs[i], conjB, conjB) - tmp = term1 - term2 - tmp *= precond_lr if q.ndim < 2: - tmp *= q - tmp /= _add_tiny(jnp.max(jnp.abs(term1 + term2))) - q -= tmp + q -= ( + precond_lr + / _add_tiny(jnp.max(jnp.abs(term1 + term2))) + * (term1 - term2) + * q + ) else: - tmp = jnp.triu(tmp) - tmp /= _add_tiny(_norm_lower_bound(term1 + term2)) - tmp @= q - q -= tmp + # main place I've found so far that needs specific sharding constraint is + # here on terms with transposed q sharding + term1 = with_sharding_constraint(term1, PartitionSpec(None, 'data')) + term2 = with_sharding_constraint(term2, PartitionSpec(None, 'data')) + + q -= ( + precond_lr + / _add_tiny(_norm_lower_bound(term1 + term2)) + * jnp.triu(term1 - term2) + @ q + ) return q return [_update_single_q(i, q) for i, q in enumerate(Q)] @@ -812,1596 +831,4 @@ def _update_single_q(i, q): def _precond_grad(Q, G, exprs): """Precondition gradient G with preconditioner Q.""" exprP = exprs[-1] - return jnp.einsum(exprP, *[q.conj() for q in Q], *Q, G) - - -# """PSGD Kron""" -# from typing import Any, List, Optional, Union, Callable, Tuple -# from collections import defaultdict -# from functools import partial -# import string -# import numpy as np - -# import chex -# import jax -# from jax import numpy as jnp, vmap -# from jax.sharding import PartitionSpec -# from jax.lax import with_sharding_constraint -# from optax import tree_utils as otu -# from optax._src import base, transform -# from optax._src.numerics import safe_int32_increment -# from optax._src.utils import canonicalize_dtype -# from optax._src.combine import chain - -# try: -# import flax.linen as nn - -# have_flax = True -# except ImportError: -# have_flax = False -# try: -# import haliax as hax - -# have_hax = True -# except ImportError: -# have_hax = False - - -# def precond_update_prob_schedule( -# max_prob=1.0, min_prob=0.03, decay=0.001, flat_start=500 -# ): -# """Anneal preconditioner update probability during beginning of training. - -# PSGD benefits from more preconditioner updates at the beginning of training, -# but once the preconditioner is learned the update probability can drop low. - -# This schedule is an exponential anneal with a flat start. Default settings keep -# update probability at 1.0 for 500 steps then exponentially anneal down to -# `min_prob` by 4000 steps. Default settings work well for most models and -# training regimes. -# """ - -# def _schedule(n): -# """Exponential anneal with flat start.""" -# return jnp.clip(max_prob * jnp.exp(-decay * (n - flat_start)), min_prob, max_prob) - -# return _schedule - - -# def scale_by_kron( -# b1: float = 0.9, -# normalize_grads: bool = False, -# preconditioner_update_probability: Union[ -# float, Callable[[int], float] -# ] = precond_update_prob_schedule(), -# max_size_triangular: int = 8192, -# min_ndim_triangular: int = 2, -# memory_save_mode: Optional[str] = None, -# preconditioner_lr: float = 0.1, -# preconditioner_init_scale: float = 1.0, -# mu_dtype: Optional[Union[str, jnp.dtype]] = None, -# precond_dtype: Optional[Union[str, jnp.dtype]] = None, -# precond_update_precision: Optional[str] = "tensorfloat32", -# precond_grads_precision: Optional[str] = None, -# scanned_layers: Optional[base.Params] = None, -# lax_map_scanned_layers: bool = False, -# lax_map_batch_size: int = 8, -# merge_small_dims: bool = False, -# target_merged_dim_size: int = 2048, -# partition_grads_into_blocks: bool = False, -# block_size: int = 256, -# params_sharding: Optional[Any] = None, -# preconditioner_sharding: Optional[PartitionSpec[str, str]] = None, -# **kwargs, -# ) -> base.GradientTransformation: -# """ -# Implements PSGD Kron from https://github.com/lixilinx/psgd_torch. - -# Args: -# b1: float, momentum parameter. 0.9 or 0.95 are common values. -# normalize_grads: bool, whether to normalize the incoming gradients to unit -# norm layer-wise. Can help with stability. -# preconditioner_update_probability: float, probability of updating the -# preconditioner. Default anneals from 1.0 to 0.03 by 4000 steps. -# max_size_triangular: int, max size for dim's preconditioner to be triangular. -# min_ndim_triangular: int, minimum number of dimensions a layer needs to have -# triangular preconditioners. -# memory_save_mode: optional str, None, 'one_diag', or 'all_diag', None is default -# to set all preconditioners to be triangular, 'one_diag' sets the largest -# or last dim to be diagonal per layer, and 'all_diag' sets all preconditioners -# to be diagonal. -# preconditioner_lr: float, learning rate for preconditioner. -# preconditioner_init_scale: float, scale for preconditioner initialization. -# mu_dtype: optional str or jnp.dtype, dtype of the momentum buffer. Defaults to -# same dtype as the parameters. -# precond_dtype: optional str or jnp.dtype, dtype of the preconditioners. Defaults -# to 'float32'. -# precond_update_precision: str, precision for matmul during preconditioner update, -# 'bfloat16', 'tensorfloat32', 'float32'. -# precond_grads_precision: str, precision for matmul during preconditioning grads, -# 'bfloat16', 'tensorfloat32', 'float32'. -# scanned_layers: optional base.Params, tree of booleans same structure as -# params indicating scanned dimensions for each layer. PSGD will vmap over -# leading dimension. -# lax_map_scanned_layers: bool, whether to use lax.map for scanned layers -# instead of vmap. Useful to save memory with large models. -# lax_map_batch_size: int, batch size for lax.map, see JAX docs for more info. -# merge_small_dims: bool, whether to merge small dimensions to improve -# preconditioner efficiency. -# target_merged_dim_size: int, target size of merged dimensions. -# partition_grads_into_blocks: bool, whether to partition grads into chunks of -# size `block_size` for efficiency. -# block_size: int, block size to use for partitioning grads. -# params_sharding: pytree same structure as params of jax.sharding.PartitionSpec. -# preconditioner_sharding: `None` or `PartitionSpec(str | None, str | None)`, -# PartitionSpec for preconditioner matrices. `None` infers a strategy -# from params_sharding that matches first preconditioner axis to -# corresponding axis in params. Best practice, though, is to shard the first -# dimension across fsdp-like mesh axis, or the largest, most common axis in -# params. For example, PartitionSpec('fsdp') or PartitionSpec('fsdp', 'tp'). - -# Returns: -# optax.GradientTransformation -# """ -# mu_dtype = canonicalize_dtype(mu_dtype) -# precond_dtype = canonicalize_dtype(precond_dtype or jnp.float32) -# lax_map = lax_map_scanned_layers -# bs = lax_map_batch_size - -# def init_fn(params, return_partition_specs_only=False): -# # unbox if haliax style partitioned -# scanned_layers_ = scanned_layers -# params_sharding_ = params_sharding -# if have_hax: -# if any( -# isinstance(x, hax.NamedArray) -# for x in jax.tree.leaves( -# params, is_leaf=lambda x: isinstance(x, hax.NamedArray) -# ) -# ): -# # if in haliax, we can grab scanned_layers and params_sharding from params -# # this does not support nested stacks -# if scanned_layers_ is None: -# scanned_layers_ = jax.tree.map( -# lambda x: ( -# jax.tree.map(lambda _: True, x) -# if isinstance(x, hax.nn.Stacked) -# else False -# ), -# params, -# is_leaf=lambda x: isinstance(x, hax.nn.Stacked), -# ) -# if params_sharding_ is None: -# try: -# params_sharding_ = hax.partitioning.infer_resource_partitions(params) -# params_sharding_ = jax.tree.map(lambda x: x.spec, params_sharding_) -# except: -# params_sharding_ = None -# params, params_struct = jax.tree.flatten(params) -# scanned_layers_ = jax.tree.leaves(scanned_layers_) -# print(f"kron scanned_layers_: {scanned_layers_}") -# if params_sharding_ is not None: -# params_sharding_ = jax.tree.leaves(params_sharding_) -# print(f"kron params_sharding_: {params_sharding_}") - -# have_params_sharding = params_sharding_ is not None -# have_qs_sharding = have_params_sharding or preconditioner_sharding is not None - -# # unbox if flax style partitioned -# if have_flax: -# params = jax.tree.map( -# lambda x: x.unbox() if isinstance(x, nn.Partitioned) else x, -# params, -# is_leaf=lambda x: isinstance(x, nn.Partitioned), -# ) - -# # check that there is a PartitionSpec for every param -# if params_sharding_ is not None: -# assert len(jax.tree.leaves(params_sharding_)) == len( -# jax.tree.leaves(params) -# ), "There must be a PartitionSpec for every parameter in PSGD Kron." -# # check that preconditioner sharding length is at least 1 -# if preconditioner_sharding is not None: -# assert len(preconditioner_sharding) > 0, ( -# "preconditioner_sharding must have length > 0. For example, " -# "PartitionSpec(None) or PartitionSpec('fsdp', None) are valid." -# ) - -# # extend partition specs -# if have_params_sharding: -# params_sharding_ = jax.tree.map( -# lambda p, sh: PartitionSpec(*(sh + (None,) * (len(p.shape) - len(sh)))), -# params, -# params_sharding_, -# ) -# preconditioner_sharding_ = preconditioner_sharding -# if preconditioner_sharding is not None: -# if len(preconditioner_sharding) < 2: -# preconditioner_sharding_ = PartitionSpec(preconditioner_sharding[0], None) - -# # reshape params shaped () to (1,) to make things simpler -# params = jax.tree.map(lambda p: p[None] if len(p.shape) == 0 else p, params) -# if have_params_sharding: -# params_sharding_ = jax.tree.map( -# lambda sh: PartitionSpec(None) if sh == PartitionSpec() else sh, -# params_sharding_, -# ) - -# # scanned layers -# if scanned_layers_ is None: -# scanned_layers_ = jax.tree.map(lambda _: False, params) -# scanned_sizes = jax.tree.map( -# lambda p, s: p.shape[0] if s else 0, params, scanned_layers_ -# ) - -# # momentum -# mu = None -# mu_sharding = params_sharding_ -# if b1 > 0 and not return_partition_specs_only: -# mu = jax.tree.map(lambda x: jnp.zeros_like(x, dtype=mu_dtype), params) -# # apply params sharding to momentum buffer -# if have_params_sharding: -# mu = _safe_sharding_constraint(mu, params_sharding_) - -# # which preconditioners will be diagonal -# dim_diag = jax.tree.map( -# lambda p, s: _get_preconditioner_types( -# p.shape[int(s) :], -# max_size_triangular, -# min_ndim_triangular, -# memory_save_mode, -# ), -# params, -# scanned_layers_, -# ) - -# # split sharding specs -# scanned_dim_sharding = None -# sharding_without_scan = None -# if have_params_sharding: -# scanned_dim_sharding = jax.tree.map( -# lambda sh, s: PartitionSpec(sh[0]) if s else None, -# params_sharding_, -# scanned_layers_, -# ) -# sharding_without_scan = jax.tree.map( -# lambda sh, s: PartitionSpec(*(sh[int(s) :])), -# params_sharding_, -# scanned_layers_, -# ) - -# # merge small dimensions -# nones = jax.tree.map(lambda _: None, params) -# merged_shapes = jax.tree.map( -# lambda p, s: p.shape[int(s) :], params, scanned_layers_ -# ) -# if merge_small_dims: -# output = jax.tree.map( -# lambda p, s, dd, sh: _merge_small_dims( -# p.shape[int(s) :], target_merged_dim_size, dd, sh -# ), -# params, -# scanned_layers_, -# dim_diag, -# sharding_without_scan if have_params_sharding else nones, -# ) -# merged_shapes, dim_diag, sharding_without_scan = [ -# jax.tree.map(lambda _, x: x[i], params, output) for i in range(3) -# ] - -# # partition grads into blocks -# partitioned_shapes = merged_shapes -# if partition_grads_into_blocks: -# partitioners = jax.tree.map( -# lambda _, ps, dd: BlockPartitioner(ps, block_size, dd), -# params, -# merged_shapes, -# dim_diag, -# ) -# # we can grab resulting shapes from partitioners -# partitioned_shapes = jax.tree.map( -# lambda _, p_cls: p_cls._padded_stacked_shape, params, partitioners -# ) - -# # initialize preconditioners -# output = jax.tree.map( -# lambda _, ps, dd, sh: list( -# _init_Q_exprs( -# ps[1:] if partition_grads_into_blocks else ps, -# preconditioner_init_scale, -# dd, -# precond_dtype, -# existing_Q=True if return_partition_specs_only else None, -# precond_sharding=preconditioner_sharding_, -# param_sharding=sh, -# ) -# ), -# params, -# partitioned_shapes, -# dim_diag, -# sharding_without_scan if have_params_sharding else nones, -# ) -# if return_partition_specs_only: -# exprs, Qs_sharding_no_leading_dims = [ -# jax.tree.map(lambda _, x: x[i], params, output) for i in range(2) -# ] -# else: -# Qs, exprs, Qs_sharding_no_leading_dims = [ -# jax.tree.map(lambda _, x: x[i], params, output) for i in range(3) -# ] -# Qs_sharding = None -# if have_qs_sharding: -# # add scan and stack dims to Qs sharding -# def add_dims_to_spec(_, qss, sds): -# if partition_grads_into_blocks: -# qss = jax.tree.map(lambda qs: PartitionSpec(*((None,) + qs)), qss) -# if sds is not None: -# qss = jax.tree.map(lambda qs: PartitionSpec(*(sds + qs)), qss) -# return qss - -# Qs_sharding = jax.tree.map( -# add_dims_to_spec, -# params, -# Qs_sharding_no_leading_dims, -# scanned_dim_sharding, -# ) - -# if not return_partition_specs_only: -# # broadcast Qs for stacks and scans -# def broadcast_qs(_, ps, q, s): -# stack_n = ps[0] -# if partition_grads_into_blocks: -# # add leading dim for stacked partitions -# q = jax.tree.map( -# lambda x: jnp.repeat(jnp.expand_dims(x, 0), stack_n, axis=0), q -# ) -# if s > 0: -# # add leading dim if we're scanning this layer -# q = jax.tree.map( -# lambda d: jnp.repeat(jnp.expand_dims(d, 0), s, axis=0), q -# ) -# return q - -# Qs = jax.tree.map(broadcast_qs, params, partitioned_shapes, Qs, scanned_sizes) -# if have_qs_sharding: -# Qs = _safe_sharding_constraint(Qs, Qs_sharding) - -# # Calculate and print sizes for preconditioners and momentum -# Qs_n_elements = sum([q.size for q in jax.tree.leaves(Qs)]) -# Qs_size_MB = sum( -# [q.size * q.dtype.itemsize / (2**20) for q in jax.tree.leaves(Qs)] -# ) -# if jax.process_index() == 0: -# print( -# f"PSGD Preconditioners size: {Qs_n_elements} elements, " -# f"{Qs_size_MB:.2f} MB" -# ) -# if mu is not None: -# mu_n_elements = sum([p.size for p in jax.tree.leaves(mu)]) -# mu_size_MB = sum( -# [p.size * p.dtype.itemsize / (2**20) for p in jax.tree.leaves(mu)] -# ) -# if jax.process_index() == 0: -# print( -# f"PSGD Momentum size: {mu_n_elements} elements, {mu_size_MB:.2f} MB" -# ) - -# if return_partition_specs_only: -# return dict( -# key=PartitionSpec(), -# count=PartitionSpec(), -# mu=mu_sharding, -# Qs_preconditioners=Qs_sharding, -# update_counter=PartitionSpec(), -# balance_counter=PartitionSpec(), -# ) - -# return dict( -# key=jax.random.PRNGKey(0), -# count=jnp.zeros([], jnp.int32), -# mu=mu, -# Qs_preconditioners=Qs, -# update_counter=jnp.zeros([], jnp.int32), -# balance_counter=jnp.zeros([], jnp.int32), -# ) - -# def update_fn(updates: base.Updates, state: dict, params: base.Params = None): -# del params -# count_inc = safe_int32_increment(state["count"]) -# key, subkey = jax.random.split(state["key"]) - -# # unbox if haliax style partitioned -# scanned_layers_ = scanned_layers -# params_sharding_ = params_sharding -# hax_partitioned = False -# if have_hax: -# if any( -# isinstance(x, hax.NamedArray) -# for x in jax.tree.leaves( -# updates, is_leaf=lambda x: isinstance(x, hax.NamedArray) -# ) -# ): -# hax_partitioned = True -# # if in haliax, we can grab scanned_layers and params_sharding from params -# # this does not support nested stacks -# if scanned_layers_ is None: -# scanned_layers_ = jax.tree.map( -# lambda x: ( -# jax.tree.map(lambda _: True, x) -# if isinstance(x, hax.nn.Stacked) -# else False -# ), -# updates, -# is_leaf=lambda x: isinstance(x, hax.nn.Stacked), -# ) -# if params_sharding_ is None: -# try: -# params_sharding_ = hax.partitioning.infer_resource_partitions(updates) -# params_sharding_ = jax.tree.map(lambda x: x.spec, params_sharding_) -# except: -# params_sharding_ = None -# updates, updates_struct = jax.tree.flatten(updates) -# scanned_layers_ = jax.tree.leaves(scanned_layers_) -# print(f"kron scanned_layers_: {scanned_layers_}") -# if params_sharding_ is not None: -# params_sharding_ = jax.tree.leaves(params_sharding_) -# print(f"kron params_sharding_: {params_sharding_}") - -# have_params_sharding = params_sharding_ is not None -# if have_params_sharding: -# original_params_sharding_ = params_sharding_ -# have_qs_sharding = have_params_sharding or preconditioner_sharding is not None - -# # unbox if flax style partitioned -# flax_partitioned = False -# if have_flax: -# boxed_updates, grads_structure = jax.tree.flatten( -# updates, -# is_leaf=lambda g: isinstance( -# g, (chex.Array, nn.Partitioned, jax.ShapeDtypeStruct) -# ), -# ) -# if any(isinstance(g, nn.Partitioned) for g in boxed_updates): -# flax_partitioned = True -# updates = [g.unbox() for g in boxed_updates] -# updates = grads_structure.unflatten(updates) - -# # extend partition specs -# if have_params_sharding: -# params_sharding_ = jax.tree.map( -# lambda g, sh: PartitionSpec(*(sh + (None,) * (len(g.shape) - len(sh)))), -# updates, -# params_sharding_, -# ) -# preconditioner_sharding_ = preconditioner_sharding -# if preconditioner_sharding is not None: -# if len(preconditioner_sharding) < 2: -# preconditioner_sharding_ = PartitionSpec(preconditioner_sharding[0], None) - -# # reshape params shaped () to (1,) to make things simpler -# input_shapes = jax.tree.map(lambda g: g.shape, updates) -# updates = jax.tree.map(lambda g: g[None] if len(g.shape) == 0 else g, updates) -# if have_params_sharding: -# params_sharding_ = jax.tree.map( -# lambda sh: PartitionSpec(None) if sh == PartitionSpec() else sh, -# params_sharding_, -# ) - -# # scanned layers -# if scanned_layers_ is None: -# scanned_layers_ = jax.tree.map(lambda _: False, updates) - -# # update probability can be scheduled -# update_prob_in = preconditioner_update_probability -# if isinstance(preconditioner_update_probability, Callable): -# update_prob_in = preconditioner_update_probability(count_inc) - -# # normalize grads -# def norm_grads(g): -# return g / (jnp.linalg.norm(g) + 1e-16) - -# if normalize_grads: -# updates = jax.tree.map(norm_grads, updates) - -# # momentum -# mu = None -# momentum_updates = updates -# if state["mu"] is not None: -# mu = otu.tree_update_moment(updates, state["mu"], b1, 1) -# if have_params_sharding: -# mu = _safe_sharding_constraint(mu, params_sharding_) -# momentum_updates = otu.tree_bias_correction(mu, b1, count_inc) - -# # which preconditioners will be diagonal -# dim_diag = jax.tree.map( -# lambda g, s: _get_preconditioner_types( -# g.shape[int(s) :], -# max_size_triangular, -# min_ndim_triangular, -# memory_save_mode, -# ), -# momentum_updates, -# scanned_layers_, -# ) - -# # split sharding specs -# scanned_dim_sharding = None -# sharding_without_scan = None -# if have_params_sharding: -# scanned_dim_sharding = jax.tree.map( -# lambda sh, s: PartitionSpec(sh[0]) if s else None, -# params_sharding_, -# scanned_layers_, -# ) -# sharding_without_scan = jax.tree.map( -# lambda sh, s: PartitionSpec(*(sh[int(s) :])), -# params_sharding_, -# scanned_layers_, -# ) - -# # merge small dimensions -# nones = jax.tree.map(lambda _: None, momentum_updates) -# merged_params_sharding = params_sharding_ -# original_shapes = None -# if merge_small_dims: -# original_shapes = jax.tree.map( -# lambda g, s: g.shape[int(s) :], momentum_updates, scanned_layers_ -# ) -# output = jax.tree.map( -# lambda g, dd, s, sh: _merge_small_dims( -# g.shape[int(s) :], target_merged_dim_size, dd, sh -# ), -# momentum_updates, -# dim_diag, -# scanned_layers_, -# sharding_without_scan if have_params_sharding else nones, -# ) -# merged_shapes, dim_diag, sharding_without_scan = [ -# jax.tree.map(lambda _, x: x[i], momentum_updates, output) -# for i in range(3) -# ] -# # reshape -# momentum_updates = jax.tree.map( -# lambda g, s, ns: _map_fn( -# False, 0, int(s), lambda x, shape=ns: jnp.reshape(x, shape), g -# ), -# momentum_updates, -# scanned_layers_, -# merged_shapes, -# ) -# if have_params_sharding: -# # scanned dim sharding + new merged sharding -# merged_params_sharding = jax.tree.map( -# lambda sws, sds: PartitionSpec( -# *(sds + sws if sds is not None else sws) -# ), -# sharding_without_scan, -# scanned_dim_sharding, -# ) -# # constrain sharding -# if have_params_sharding: -# momentum_updates = _safe_sharding_constraint( -# momentum_updates, merged_params_sharding -# ) - -# # partition grads into blocks -# dummy_updates_tree = jax.tree.map(lambda _: jnp.zeros([]), updates) -# n_dims_to_map = jax.tree.map(lambda s: int(s), scanned_layers_) -# partitioned_sharding = merged_params_sharding -# partitioners = None -# partitioned_shapes = None -# if partition_grads_into_blocks: -# partitioners = jax.tree.map( -# lambda g, dd, s: BlockPartitioner(g.shape[int(s) :], block_size, dd), -# momentum_updates, -# dim_diag, -# scanned_layers_, -# ) -# # layers become tuples each containing layer's partitions -# momentum_updates = jax.tree.map( -# lambda g, p_cls, s: _map_fn(False, 0, int(s), p_cls.partition, g), -# momentum_updates, -# partitioners, -# scanned_layers_, -# ) -# partitioned_shapes = jax.tree.map( -# lambda _, g, s: jax.tree.map(lambda x: x.shape[int(s) :], g), -# dummy_updates_tree, -# momentum_updates, -# scanned_layers_, -# ) -# if have_params_sharding: -# # constrain partitions to same sharding as entire layer -# momentum_updates = jax.tree.map( -# lambda _, g, mps: jax.tree.map( -# lambda x: _safe_sharding_constraint(x, mps), g -# ), -# dummy_updates_tree, -# momentum_updates, -# merged_params_sharding, -# ) -# # pad and stack partitions, tuples become arrays with new leading dim -# momentum_updates = jax.tree.map( -# lambda _, g, s: _map_fn( -# False, -# 0, -# int(s), -# lambda x, bs=block_size: _pad_and_stack_matrices(x, bs), -# g, -# ), -# dummy_updates_tree, -# momentum_updates, -# scanned_layers_, -# ) -# if have_params_sharding: -# # add dim to sharding specs for new stacked dim -# partitioned_sharding = jax.tree.map( -# lambda mps, s: PartitionSpec(*(mps[: int(s)] + (None,) + mps[1:])), -# merged_params_sharding, -# scanned_layers_, -# ) -# n_dims_to_map = jax.tree.map(lambda x: x + 1, n_dims_to_map) -# # constrain sharding -# if have_params_sharding: -# momentum_updates = _safe_sharding_constraint( -# momentum_updates, partitioned_sharding -# ) - -# # get einsum expressions and Qs sharding -# Qs = state["Qs_preconditioners"] -# Qs_sharding = None -# exprs_and_sharding = jax.tree.map( -# lambda g, dd, sh, nm: _init_Q_exprs( -# g.shape[nm:], -# preconditioner_init_scale, -# dd, -# precond_dtype, -# existing_Q=True, -# precond_sharding=preconditioner_sharding_, -# param_sharding=sh, -# ), -# momentum_updates, -# dim_diag, -# sharding_without_scan if have_params_sharding else nones, -# n_dims_to_map, -# ) -# exprs, Qs_sharding_no_leading_dims = [ -# jax.tree.map(lambda _, x: x[i], dummy_updates_tree, exprs_and_sharding) -# for i in range(2) -# ] -# Qs_sharding = None -# if have_qs_sharding: -# # add scan and stack dims to Qs sharding -# def add_dims_to_spec(_, qss, sds): -# if partition_grads_into_blocks: -# qss = jax.tree.map(lambda qs: PartitionSpec(*((None,) + qs)), qss) -# if sds is not None: -# qss = jax.tree.map(lambda qs: PartitionSpec(*(sds + qs)), qss) -# return qss - -# Qs_sharding = jax.tree.map( -# add_dims_to_spec, -# dummy_updates_tree, -# Qs_sharding_no_leading_dims, -# scanned_dim_sharding, -# ) - -# # maybe update preconditioner -# def update_preconditioner_fn(rngkey, Qs, grads_in, bal_counter): -# with jax.default_matmul_precision(precond_update_precision): -# # balance preconditioners about every 100 updates -# def balance_Qs(Qs_to_bal): -# def _balance_Q(Q): -# norms = jnp.array( -# [jnp.max(jnp.abs(q)) for q in Q], dtype=jnp.float32 -# ) -# gmean = jnp.exp(jnp.mean(jnp.log(norms))) -# to_mul = gmean / norms -# return [q * x.astype(q.dtype) for q, x in zip(Q, to_mul)] - -# return jax.tree.map( -# lambda _, Q, nm: _map_fn(False, 0, nm, _balance_Q, Q), -# dummy_updates_tree, -# Qs_to_bal, -# n_dims_to_map, -# ) - -# balance_counter_inc = safe_int32_increment(bal_counter) -# do_balances = balance_counter_inc >= 100 -# balance_counter_inc = jnp.where(do_balances, 0, balance_counter_inc) -# Qs = jax.lax.cond(do_balances, balance_Qs, lambda qs: qs, Qs) -# if have_qs_sharding: -# Qs = _safe_sharding_constraint(Qs, Qs_sharding) - -# # create random vectors -# Vs = _tree_random_like(rngkey, grads_in) -# # apply params sharding to random vectors -# if have_params_sharding: -# Vs = _safe_sharding_constraint(Vs, partitioned_sharding) - -# # damp based on machine precision -# damp_eps = jnp.sqrt(jnp.finfo(jnp.float32).eps) # bf16 eps too large -# grads_in = jax.tree.map( -# lambda g, v: g + damp_eps.astype(g.dtype) * jnp.mean(jnp.abs(g)) * v, -# grads_in, -# Vs, -# ) - -# # form conjB -# conjBs = jax.tree.map( -# lambda g, Q, v, nm: _map_fn(lax_map, bs, nm, _conjB, Q, g, v), -# grads_in, -# Qs, -# Vs, -# n_dims_to_map, -# ) -# if have_params_sharding: -# conjBs = _safe_sharding_constraint(conjBs, partitioned_sharding) - -# # update Qs and constrain sharding -# new_Qs = jax.tree.map( -# lambda g, Q, conjb, expr, nm, qss, sh: _map_fn( -# lax_map, -# bs, -# nm, -# partial( -# _update_precond, -# exprs=expr, -# precond_lr=preconditioner_lr, -# qs_sharding=qss, -# params_sharding=sh, -# ), -# Q, -# g, -# conjb, -# ), -# grads_in, -# Qs, -# conjBs, -# exprs, -# n_dims_to_map, -# Qs_sharding_no_leading_dims if have_qs_sharding else nones, -# sharding_without_scan if have_params_sharding else nones, -# ) -# if have_qs_sharding: -# new_Qs = _safe_sharding_constraint(new_Qs, Qs_sharding) - -# new_Qs = otu.tree_cast(new_Qs, precond_dtype) -# return new_Qs, balance_counter_inc - -# def pass_through_fn(rngkey, qs, grads_in, bal_counter): -# if have_qs_sharding: -# qs = _safe_sharding_constraint(qs, Qs_sharding) -# return qs, bal_counter - -# # update preconditioner deterministically -# update_counter_inc = safe_int32_increment(state["update_counter"]) -# do_update = update_counter_inc >= 1 / update_prob_in -# update_counter_inc = jnp.where(do_update, 0, update_counter_inc) -# # Qs, balance_counter_inc = jax.lax.cond( -# # do_update, -# # update_preconditioner_fn, -# # pass_through_fn, -# # subkey, -# # Qs, -# # momentum_updates, -# # state["balance_counter"], -# # ) - -# def cond_fn(state): -# return state[-1] - -# def iter_fn(state): -# rngkey, qs, grads_in, bal_counter, _ = state -# qs, bal_counter = update_preconditioner_fn(rngkey, qs, grads_in, bal_counter) -# return rngkey, qs, grads_in, bal_counter, False - -# while_out = jax.lax.while_loop( -# cond_fn, -# iter_fn, -# (subkey, Qs, momentum_updates, state["balance_counter"], do_update), -# ) -# _, Qs, _, balance_counter_inc, _ = while_out -# if have_qs_sharding: -# Qs = _safe_sharding_constraint(Qs, Qs_sharding) - -# # precondition gradients -# with jax.default_matmul_precision(precond_grads_precision): -# precond_gs = jax.tree.map( -# lambda g, Q, expr, nm: _map_fn( -# lax_map, bs, nm, partial(_precond_grad, exprs=expr), Q, g -# ), -# momentum_updates, -# Qs, -# exprs, -# n_dims_to_map, -# ) -# if have_params_sharding: -# precond_gs = _safe_sharding_constraint(precond_gs, partitioned_sharding) - -# # unpartition grads -# if partition_grads_into_blocks: -# precond_gs = jax.tree.map( -# lambda g, s, ps: _map_fn( -# False, -# 0, -# int(s), -# lambda p, shapes=ps: _unstack_and_unpad_matrices(p, shapes), -# g, -# ), -# precond_gs, -# scanned_layers_, -# partitioned_shapes, -# ) -# if have_params_sharding: -# precond_gs = _safe_sharding_constraint(precond_gs, merged_params_sharding) -# precond_gs = jax.tree.map( -# lambda _, g, s, p_cls: _map_fn( -# False, 0, int(s), p_cls.merge_partitions, g -# ), -# dummy_updates_tree, -# precond_gs, -# scanned_layers_, -# partitioners, -# ) -# if have_params_sharding: -# precond_gs = _safe_sharding_constraint(precond_gs, merged_params_sharding) - -# # un-merge dimensions -# if merge_small_dims: -# precond_gs = jax.tree.map( -# lambda g, s, os: _map_fn( -# False, 0, int(s), lambda p, shape=os: jnp.reshape(p, shape), g -# ), -# precond_gs, -# scanned_layers_, -# original_shapes, -# ) -# if have_params_sharding: -# precond_gs = _safe_sharding_constraint(precond_gs, params_sharding_) - -# # return scalars to original shape -# precond_gs = jax.tree.map( -# lambda g, s: jnp.reshape(g, s), precond_gs, input_shapes -# ) - -# # final constraint for good measure -# if have_params_sharding: -# precond_gs = _safe_sharding_constraint(precond_gs, original_params_sharding_) - -# # box preconditioned grads -# if flax_partitioned: -# flat_precond_gs, _ = jax.tree.flatten(precond_gs) -# precond_gs = [ -# bu.replace_boxed(g) for bu, g in zip(boxed_updates, flat_precond_gs) -# ] -# precond_gs = grads_structure.unflatten(precond_gs) -# if hax_partitioned: -# precond_gs = updates_struct.unflatten(precond_gs) - -# # dtypes and new state -# mu = otu.tree_cast(mu, mu_dtype) -# Qs = otu.tree_cast(Qs, precond_dtype) -# state = dict( -# key=key, -# count=count_inc, -# mu=mu, -# Qs_preconditioners=Qs, -# update_counter=update_counter_inc, -# balance_counter=balance_counter_inc, -# ) - -# return precond_gs, state - -# return base.GradientTransformation(init_fn, update_fn) - - -# def kron( -# learning_rate: Union[float, Callable[[int], float]] = 0.001, -# b1: float = 0.9, -# weight_decay: float = 0.0, -# weight_decay_mask: Optional[Union[Any, Callable[[base.Params], Any]]] = None, -# normalize_grads: bool = False, -# preconditioner_update_probability: Union[ -# float, Callable[[int], float] -# ] = precond_update_prob_schedule(), -# max_size_triangular: int = 8192, -# min_ndim_triangular: int = 2, -# memory_save_mode: Optional[str] = None, -# preconditioner_lr: float = 0.1, -# preconditioner_init_scale: float = 1.0, -# mu_dtype: Optional[Union[str, jnp.dtype]] = None, -# precond_dtype: Optional[Union[str, jnp.dtype]] = None, -# precond_update_precision: Optional[str] = "tensorfloat32", -# precond_grads_precision: Optional[str] = None, -# scanned_layers: Optional[base.Params] = None, -# lax_map_scanned_layers: bool = False, -# lax_map_batch_size: int = 8, -# merge_small_dims: bool = False, -# target_merged_dim_size: int = 2048, -# partition_grads_into_blocks: bool = False, -# block_size: int = 256, -# params_sharding: Optional[Any] = None, -# preconditioner_sharding: Optional[PartitionSpec[str, str]] = None, -# ) -> base.GradientTransformation: -# """ -# Implements PSGD Kron from https://github.com/lixilinx/psgd_torch. - -# Args: -# learning_rate: float or callable, learning rate schedule. -# b1: float, momentum parameter. 0.9 or 0.95 are common values. -# weight_decay: float, weight decay coefficient. -# weight_decay_mask: optional pytree same structure as params, or callable -# returning a pytree, that masks weight decay. Weight decay is applied to -# leaves that are True. -# normalize_grads: bool, whether to normalize the incoming gradients to unit -# norm layer-wise. Can help with stability. -# preconditioner_update_probability: float, probability of updating the -# preconditioner. Default anneals from 1.0 to 0.03 by 4000 steps. -# max_size_triangular: int, max size for dim's preconditioner to be triangular. -# min_ndim_triangular: int, minimum number of dimensions a layer needs to have -# triangular preconditioners. -# memory_save_mode: optional str, None, 'one_diag', or 'all_diag', None is default -# to set all preconditioners to be triangular, 'one_diag' sets the largest -# or last dim to be diagonal per layer, and 'all_diag' sets all preconditioners -# to be diagonal. -# preconditioner_lr: float, learning rate for preconditioner. -# preconditioner_init_scale: float, scale for preconditioner initialization. -# mu_dtype: optional str or jnp.dtype, dtype of the momentum buffer. Defaults to -# same dtype as the parameters. -# precond_dtype: optional str or jnp.dtype, dtype of the preconditioners. Defaults -# to 'float32'. -# precond_update_precision: str, precision for matmul during preconditioner update, -# 'bfloat16', 'tensorfloat32', 'float32'. -# precond_grads_precision: str, precision for matmul during preconditioning grads, -# 'bfloat16', 'tensorfloat32', 'float32'. -# scanned_layers: optional base.Params, tree of booleans same structure as -# params indicating scanned dimensions for each layer. PSGD will vmap over -# leading dimension. -# lax_map_scanned_layers: bool, whether to use lax.map for scanned layers -# instead of vmap. Useful to save memory with large models. -# lax_map_batch_size: int, batch size for lax.map, see JAX docs for more info. -# merge_small_dims: bool, whether to merge small dimensions to improve -# preconditioner efficiency. -# target_merged_dim_size: int, target size of merged dimensions. -# partition_grads_into_blocks: bool, whether to partition grads into chunks of -# size `block_size` for efficiency. -# block_size: int, block size to use for partitioning grads. -# params_sharding: pytree same structure as params of jax.sharding.PartitionSpec. -# preconditioner_sharding: `None` or `PartitionSpec(str | None, str | None)`, -# PartitionSpec for preconditioner matrices. `None` infers a strategy -# from params_sharding that matches first preconditioner axis to -# corresponding axis in params. Best practice, though, is to shard the first -# dimension across fsdp-like mesh axis, or the largest, most common axis in -# params. For example, PartitionSpec('fsdp') or PartitionSpec('fsdp', 'tp'). - -# Returns: -# optax.GradientTransformation -# """ -# optimizer = [ -# scale_by_kron( -# b1=b1, -# normalize_grads=normalize_grads, -# preconditioner_update_probability=preconditioner_update_probability, -# max_size_triangular=max_size_triangular, -# min_ndim_triangular=min_ndim_triangular, -# memory_save_mode=memory_save_mode, -# preconditioner_lr=preconditioner_lr, -# preconditioner_init_scale=preconditioner_init_scale, -# mu_dtype=mu_dtype, -# precond_dtype=precond_dtype, -# precond_update_precision=precond_update_precision, -# precond_grads_precision=precond_grads_precision, -# scanned_layers=scanned_layers, -# lax_map_scanned_layers=lax_map_scanned_layers, -# lax_map_batch_size=lax_map_batch_size, -# merge_small_dims=merge_small_dims, -# target_merged_dim_size=target_merged_dim_size, -# partition_grads_into_blocks=partition_grads_into_blocks, -# block_size=block_size, -# params_sharding=params_sharding, -# preconditioner_sharding=preconditioner_sharding, -# ) -# ] -# if weight_decay > 0.0: -# optimizer.append(transform.add_decayed_weights(weight_decay, weight_decay_mask)) -# optimizer.append(transform.scale_by_learning_rate(learning_rate)) -# return chain(*optimizer) - - -# def get_opt_state_partition_specs( -# params: base.Params, scale_by_kron_only: bool = False, **kwargs -# ): -# """Get tree of PartitionSpecs for kron optimizer state. - -# params converted to jax.ShapeDtypeStructs, no arrays are used. - -# Args: -# params: pytree of Arrays, nn.Partitioned, or jax.ShapeDtypeStruct. -# scale_by_kron_only: bool, If True, only returns partition specs for the -# `scale_by_kron` function, otherwise the `kron` function. -# kwargs: kwargs for kron (or scale_by_kron). - -# Returns: -# tree of PartitionSpecs for optimizer state. -# """ -# params_flat, params_struct = jax.tree.flatten(params) -# if have_flax: -# if isinstance(params_flat[0], nn.Partitioned): -# params_flat = [p.unbox(p) for p in params_flat] -# if not isinstance(params_flat[0], jax.ShapeDtypeStruct): -# params_flat = [jax.ShapeDtypeStruct(p.shape, p.dtype) for p in params_flat] -# params = params_struct.unflatten(params_flat) - -# specs = scale_by_kron(**kwargs).init(params, return_partition_specs_only=True) - -# if not scale_by_kron_only: -# specs = (specs,) -# if kwargs.get("weight_decay", 0.0) > 0.0: -# specs += (None,) -# specs += (None,) - -# return specs - - -# def _get_preconditioner_types( -# shape: Tuple[int, ...], max_size: int, min_ndim: int, mem_save_mode: Optional[str] -# ) -> List[bool]: -# if len(shape) == 0: -# return True - -# if mem_save_mode is None: -# dim_diag = [False for _ in shape] -# elif mem_save_mode == "one_diag": -# rev_sorted_dims = np.argsort(shape)[::-1] -# dim_diag = [False for _ in shape] -# dim_diag[rev_sorted_dims[0]] = True -# elif mem_save_mode == "all_diag": -# dim_diag = [True for _ in shape] -# else: -# raise ValueError( -# f"Invalid mem_save_mode: {mem_save_mode}, must be one of " -# "[None, 'one_diag', 'all_diag']" -# ) - -# for i, size in enumerate(shape): -# if size == 1 or size > max_size or len(shape) < min_ndim: -# dim_diag[i] = True - -# return dim_diag - - -# def _init_Q_exprs( -# t_shape, -# scale, -# dim_diag, -# dtype, -# existing_Q=None, -# precond_sharding=None, -# param_sharding=None, -# ): -# have_qs_sharding = precond_sharding is not None or param_sharding is not None -# letters = string.ascii_lowercase + string.ascii_uppercase -# if len(t_shape) == 0: # scalar -# Q = [scale * jnp.ones(t_shape, dtype=dtype)] if existing_Q is None else existing_Q -# exprA = ",->" -# exprGs = [",->"] -# exprP = ",,->" - -# sharding_out = [None] -# if have_qs_sharding: -# sharding_out = [PartitionSpec()] -# else: # tensor -# if len(t_shape) > 13: -# raise ValueError( -# f"Got tensor with dim {len(t_shape.shape)}; Einstein runs out of letters!" -# ) -# scale = scale ** (1 / len(t_shape)) -# Q = [] if existing_Q is None else existing_Q -# piece1A, piece2A, piece3A = ([], "", "") -# exprGs = [] -# piece1P, piece2P, piece3P, piece4P = ([], [], "", "") - -# params_specs = param_sharding -# if param_sharding is None: -# params_specs = PartitionSpec(*((None,) * len(t_shape))) -# sharding_out = [None] * len(t_shape) -# if have_qs_sharding: -# sharding_out = [PartitionSpec(None)] * len(t_shape) - -# for i, (size, dim_d, dim_sh) in enumerate(zip(t_shape, dim_diag, params_specs)): -# if dim_d: -# # use diagonal matrix as preconditioner for this dim -# if existing_Q is None: -# q = scale * jnp.ones(size, dtype=dtype) -# Q.append(q) - -# piece1A.append(letters[i]) -# piece2A = piece2A + letters[i] -# piece3A = piece3A + letters[i] - -# piece1 = "".join( -# [ -# (letters[i + 13] if j == i else letters[j]) -# for j in range(len(t_shape)) -# ] -# ) -# exprGs.append(piece1 + "," + piece1 + "->" + letters[i + 13]) - -# piece1P.append(letters[i + 13]) -# piece2P.append(letters[i + 13]) -# piece3P = piece3P + letters[i + 13] -# piece4P = piece4P + letters[i + 13] -# else: -# # use triangular matrix as preconditioner for this dim -# q_sharding = None -# if have_qs_sharding: -# if have_hax: -# # if we're in haliax we can grab fsdp axis and shard accordingly -# # get current mesh -# mesh = hax.partitioning._get_mesh() -# if mesh.devices.shape == (): -# mesh = None -# # get fsdp mesh axis -# if mesh is not None: -# fsdp_axis_name = hax.partitioning.ResourceAxis.DATA -# fsdp_axis = mesh.axis_names.index(fsdp_axis_name) -# fsdp_size = mesh.devices.shape[fsdp_axis] -# if size % fsdp_size == 0: -# q_sharding = PartitionSpec(fsdp_axis_name, None) -# else: -# q_sharding = PartitionSpec(None, None) -# else: -# q_sharding = PartitionSpec(None, None) -# else: -# # infer a so-so sharding scheme from params if nothing specified -# # (first dim of q will match corresponding dim in params) -# q_sharding = ( -# precond_sharding -# if precond_sharding is not None -# else PartitionSpec(dim_sh, None) -# ) -# # TODO ensure array axis is divisible by mesh axis -# sharding_out[i] = q_sharding - -# if existing_Q is None: -# q = scale * jnp.eye(size, dtype=dtype) -# if have_qs_sharding: -# q = _safe_sharding_constraint(q, q_sharding) -# Q.append(q) - -# piece1A.append(letters[i] + letters[i + 13]) -# piece2A = piece2A + letters[i + 13] -# piece3A = piece3A + letters[i] - -# piece1 = "".join( -# [ -# (letters[i + 13] if j == i else letters[j]) -# for j in range(len(t_shape)) -# ] -# ) -# piece2 = "".join( -# [ -# (letters[i + 26] if j == i else letters[j]) -# for j in range(len(t_shape)) -# ] -# ) -# exprGs.append( -# piece1 + "," + piece2 + "->" + letters[i + 13] + letters[i + 26] -# ) - -# a, b, c = (letters[i], letters[i + 13], letters[i + 26]) -# piece1P.append(a + b) -# piece2P.append(a + c) -# piece3P = piece3P + c -# piece4P = piece4P + b - -# exprA = ",".join(piece1A) + "," + piece2A + "->" + piece3A -# exprP = ( -# ",".join(piece1P) + "," + ",".join(piece2P) + "," + piece3P + "->" + piece4P -# ) - -# exprGs = tuple(exprGs) -# if existing_Q is not None: -# return (exprA, exprGs, exprP), sharding_out -# return Q, (exprA, exprGs, exprP), sharding_out - - -# def _norm_lower_bound(A: jax.Array): -# """Returns a cheap lower bound for the spectral norm of A. - -# Numerical results on random matrices with a wide range of distributions and -# sizes suggest, norm(A) <= sqrt(2) * norm_lower_bound(A). Looks to be a very -# tight lower bound. - -# A is hermitian so we can always use dim 0 and not have to compare to dim 1. -# """ -# max_abs = jnp.max(jnp.abs(A)) - -# def calc(A): -# A = A / max_abs -# aa = A * A -# aa_sum0 = jnp.sum(aa, axis=0) -# i = jnp.argmax(aa_sum0, 0) -# x = jax.lax.dynamic_index_in_dim(A, i, 1, keepdims=False) -# x = x @ A -# return max_abs * jnp.linalg.norm((x / jnp.linalg.norm(x)) @ A.T) - -# return jnp.where(max_abs > 0, calc(A), max_abs) - - -# def _solve_triangular_right(X, A): -# """Compute X @ inv(A). - -# A triangular solve has roughly the same complexity as a matmul. -# """ -# X_ndim = X.ndim -# if X_ndim < 2: -# X = X[None, :] - -# dtype_in = jnp.promote_types(A.dtype, X.dtype) -# A, X = A.astype(dtype_in), X.astype(dtype_in) -# leading_dims = 0 -# if X.ndim > 2: -# leading_dims = X.ndim - 2 -# solve_fn = partial(jax.lax.linalg.triangular_solve, left_side=False, lower=False) -# for _ in range(leading_dims): -# solve_fn = vmap(solve_fn, in_axes=(None, 0)) -# solution = solve_fn(A, X) - -# if X_ndim < 2: -# return solution[0] -# return solution - - -# def _conjB(Q, G, V): -# """Compute conjB.""" -# order = G.ndim -# p = list(range(order)) -# conjB = jnp.transpose(V, p[1:] + p[:1]) -# for i, q in enumerate(Q): -# conjB = conjB / q if q.ndim < 2 else _solve_triangular_right(conjB, q) -# if i < order - 1: -# conjB = jnp.swapaxes(conjB, i, order - 1) -# return conjB - - -# def _update_precond(Q, G, conjB, exprs, precond_lr, qs_sharding, params_sharding): -# """Compute A and update Q.""" -# exprA, exprGs, _ = exprs - -# A = jnp.einsum(exprA, *Q, G) - -# def _update_single_q(i, q): -# term1 = jnp.einsum(exprGs[i], A, A) -# term2 = jnp.einsum(exprGs[i], conjB, conjB) - -# if q.ndim < 2: -# q -= ( -# precond_lr -# / _add_tiny(jnp.max(jnp.abs(term1 + term2))) -# * (term1 - term2) -# * q -# ) -# else: -# if qs_sharding is not None: -# sharding = qs_sharding[i] -# # transpose q sharding for terms -# if len(sharding) < 2: -# sharding = PartitionSpec(*((None,) + sharding)) -# else: -# assert len(sharding) == 2 -# sharding = PartitionSpec(*(sharding[1:] + sharding[:1])) -# term1 = _safe_sharding_constraint(term1, sharding) -# term2 = _safe_sharding_constraint(term2, sharding) -# q -= ( -# precond_lr -# / _add_tiny(_norm_lower_bound(term1 + term2)) -# * jnp.triu(term1 - term2) -# @ q -# ) -# return q - -# return [_update_single_q(i, q) for i, q in enumerate(Q)] - - -# def _precond_grad(Q, G, exprs): -# """Precondition gradient G with preconditioner Q.""" -# exprP = exprs[-1] -# return jnp.einsum(exprP, *Q, *Q, G) - - -# def _safe_sharding_constraint(x, sharding): -# if sharding is None: -# return x -# else: -# return with_sharding_constraint(x, sharding) - - -# def _add_tiny(x): -# return x + jnp.finfo(x.dtype).tiny - - -# def _map_fn(lax_map, bs, n_maps, fn, *args): -# """Maybe map a fn along multiple leading axes.""" -# if n_maps <= 0: -# return fn(*args) - -# if lax_map: -# mapped_fn = lambda xs: _map_fn(lax_map, bs, n_maps - 1, fn, *xs) -# return jax.lax.map(mapped_fn, xs=args, batch_size=bs if bs > 1 else None) -# else: -# mapped_fn = lambda *xs: _map_fn(lax_map, bs, n_maps - 1, fn, *xs) -# return vmap(mapped_fn)(*args) - - -# def _tree_random_like( -# rng_key: chex.PRNGKey, target_tree: chex.ArrayTree, dtype=None -# ) -> chex.ArrayTree: -# # adopted from optax -# tree_def = jax.tree.structure(target_tree) -# keys = jax.random.split(rng_key, tree_def.num_leaves) -# keys_tree = jax.tree.unflatten(tree_def, keys) -# return jax.tree.map( -# lambda l, k: jax.random.normal( -# k, l.shape, dtype if dtype is not None else l.dtype -# ), -# target_tree, -# keys_tree, -# ) - - -# class BlockPartitioner: -# """Partitions a tensor into smaller tensors. - -# Modified from distributed_shampoo. -# https://github.com/google-research/google-research/blob/master/scalable_shampoo/optax/distributed_shampoo.py -# Scalable Second Order Optimization for Deep Learning, -# Rohan Anil, Vineet Gupta, Tomer Koren, Kevin Regan, Yoram Singer -# https://arxiv.org/abs/2002.09018 -# """ - -# def __init__(self, param_shape, block_size, dim_diag): -# assert len(dim_diag) == len( -# param_shape -# ), "dim_diag must have same length as param_shape" -# self._shape = param_shape -# self._splits = [] -# split_sizes = [] -# # We split params into smaller blocks. Here we store the metadata to make -# # that split. -# for i, d in enumerate(param_shape): -# if 0 < block_size < d and not dim_diag[i]: -# # d-1, otherwise split appends a 0-size array. -# nsplit = (d - 1) // block_size -# indices = (np.arange(nsplit, dtype=np.int32) + 1) * block_size -# sizes = np.ones(nsplit + 1, dtype=np.int32) * block_size -# sizes[-1] = d - indices[-1] -# self._splits.append((i, indices)) -# split_sizes.append(sizes) -# else: -# split_sizes.append(np.array([d], dtype=np.int32)) -# self._split_sizes = split_sizes - -# # TODO (evanatyourservice) -# # this might fail with scalar params but for now we're reshaping those -# single_shape = [a[0] for a in split_sizes] -# padded_single_shape = [-(-dim // block_size) * block_size for dim in single_shape] -# stack_size = max(1, np.prod([max(1, len(s)) for s in split_sizes])) -# self._padded_stacked_shape = tuple([stack_size] + padded_single_shape) - -# def split_sizes(self): -# return self._split_sizes - -# def partition(self, tensor): -# """Partition tensor into blocks.""" - -# assert tensor.shape == self._shape -# tensors = [tensor] -# for i, indices in self._splits: -# tensors_local = [] -# for t in tensors: -# tensors_local.extend(jnp.split(t, indices_or_sections=indices, axis=i)) -# tensors = tensors_local -# return tuple(tensors) - -# def merge_partitions(self, partitions): -# """Merge partitions back to original shape.""" - -# for i, indices in reversed(self._splits): -# n = len(indices) + 1 -# partial_merged_tensors = [] -# ind = 0 -# while ind < len(partitions): -# partial_merged_tensors.append( -# jnp.concatenate(partitions[ind : ind + n], axis=i) -# ) -# ind += n -# partitions = partial_merged_tensors -# assert len(partitions) == 1 -# return partitions[0] - - -# def _partitions(lst): -# """Generate all partitions of a list.""" -# if not lst: -# yield [[]] -# else: -# for i in range(len(lst)): -# for part in _partitions(lst[i + 1 :]): -# yield [lst[: i + 1]] + part - -# """ -# 128, 4, 4, 8 -# (128, 512) -# """ - -# def _merge_small_dims( -# shape_to_merge, max_dim, dim_diag, sharding_to_merge=None -# ) -> Tuple[List[int], List[bool], Optional[Tuple]]: -# if not shape_to_merge: # handles scalar shape () -# return [], [True], PartitionSpec() if sharding_to_merge is not None else None -# if np.all(np.array(shape_to_merge) == 1): # handles shape (1,) -# return ( -# [1], -# [True], -# PartitionSpec(None) if sharding_to_merge is not None else None, -# ) - -# def dim2loss(d, dim0=max_dim): -# """A heuristic map from dim to loss with the least loss occurs at dim0.""" -# loss = 0 -# if d < dim0: -# loss += np.log2(dim0 / d) -# too_small = dim0 / 8 -# if d < too_small: -# loss += 100 * np.log2(too_small / d) -# else: -# loss += 10 * np.log2(d / dim0) -# too_large = 8 * dim0 -# if d > too_large: -# loss += 1000 * np.log2(d / too_large) -# return loss - -# best_loss = float("inf") -# best_partition = None - -# for p in _partitions(list(range(len(shape_to_merge)))): -# loss = 0 -# merged = [] -# for group in p: -# if not group: -# continue -# d = np.prod([shape_to_merge[i] for i in group]) -# loss += dim2loss(d) -# merged.append(group) - -# if loss < best_loss: -# best_loss = loss -# best_partition = merged - -# merged_shape = [] -# merged_diag = [] -# merged_sharding = [] - -# for group in best_partition: -# merged_shape.append(np.prod([shape_to_merge[i] for i in group])) -# merged_diag.append(all(dim_diag[i] for i in group)) -# if sharding_to_merge: -# group_shardings = [sharding_to_merge[i] for i in group] -# valid_shardings = [s for s in group_shardings if s is not None] - -# if len(valid_shardings) > 1: -# merged_sharding.append(tuple(valid_shardings)) -# elif len(valid_shardings) == 1: -# merged_sharding.append(valid_shardings[0]) -# else: -# merged_sharding.append(None) - -# return ( -# merged_shape, -# merged_diag, -# PartitionSpec(*merged_sharding) if sharding_to_merge else None, -# ) - - -# def _pad_and_stack_matrices(array_list, block_size): -# # Handle scalar arrays by adding a dummy dimension -# is_scalar = len(array_list[0].shape) == 0 -# if is_scalar: -# array_list = [arr[None] for arr in array_list] - -# shapes = [arr.shape for arr in array_list] -# max_dims = [max(shape[i] for shape in shapes) for i in range(len(shapes[0]))] -# padded_shape = [-(-dim // block_size) * block_size for dim in max_dims] -# padded_arrays = [] -# for arr in array_list: -# pad_width = [(0, padded_shape[i] - arr.shape[i]) for i in range(arr.ndim)] -# padded = jnp.pad(arr, pad_width) -# padded_arrays.append(padded) - -# stacked = jnp.stack(padded_arrays) -# return stacked - - -# def _unstack_and_unpad_matrices(stacked_array, original_shapes): -# # Handle scalar arrays -# is_scalar = len(original_shapes[0]) == 0 - -# unstacked = jnp.split(stacked_array, stacked_array.shape[0], axis=0) -# unpadded = [] -# for arr, orig_shape in zip(unstacked, original_shapes): -# arr = jnp.squeeze(arr, axis=0) -# if is_scalar: -# # For scalars, just take the first element -# arr = arr[0] -# else: -# # For non-scalars, slice to original shape -# slices = tuple(slice(0, dim) for dim in orig_shape) -# arr = arr[slices] -# unpadded.append(arr) -# return tuple(unpadded) - - -# # unused fns (can be used for stacking partitions without padding): -# def _sort_and_group_matrices(matrix_shapes: List[Tuple[int, ...]]): -# indexed_list = list(enumerate(matrix_shapes)) -# sorted_indexed = sorted(indexed_list, key=lambda x: x[1]) -# sorted_shapes = [shape for _, shape in sorted_indexed] -# change_indices = [original_index for original_index, _ in sorted_indexed] -# revert_indices = [0] * len(matrix_shapes) -# for new_pos, (original_index, _) in enumerate(sorted_indexed): -# revert_indices[original_index] = new_pos -# shape_groups = defaultdict(list) -# for i, shape in enumerate(sorted_shapes): -# shape_groups[shape].append(i) -# unique_sorted_shapes = list(shape_groups.keys()) -# return unique_sorted_shapes, dict(shape_groups), change_indices, revert_indices - - -# def _stack_matrices(array_list): -# in_tuple = isinstance(array_list, tuple) -# shapes = [arr.shape for arr in array_list] -# unique_shapes, shape_groups, change_indices, _ = _sort_and_group_matrices(shapes) -# sorted_arrays = [array_list[i] for i in change_indices] -# stacked_arrays = [] -# for shape in unique_shapes: -# indices = shape_groups[shape] -# stacked = jnp.stack([sorted_arrays[i] for i in indices]) -# stacked_arrays.append(stacked) -# if in_tuple: -# return tuple(stacked_arrays) -# return stacked_arrays - - -# def _unstack_matrices(stacked_arrays, revert_indices): -# in_tuple = isinstance(stacked_arrays, tuple) -# unstacked = [] -# for arr in stacked_arrays: -# unstacked.extend(jnp.split(arr, arr.shape[0])) -# array_list = [jnp.squeeze(unstacked[i], axis=0) for i in revert_indices] -# if in_tuple: -# return tuple(array_list) -# return array_list - - -# if __name__ == "__main__": -# import jax_sourceror - -# axis_a = hax.Axis("d", 128) -# axis_b = hax.Axis("b", 8) - -# params = { -# "w": hax.NamedArray(jnp.ones((128, 8)), (axis_a, axis_b)), -# "b": hax.NamedArray(jnp.ones((128,)), (axis_a,)), -# } -# grads = { -# "w": hax.NamedArray(jnp.ones((128, 8)), (axis_a, axis_b)), -# "b": hax.NamedArray(jnp.ones((128,)), (axis_a,)), -# } - -# optimizer = kron() -# opt_state = optimizer.init(params) -# source_code = jax_sourceror.sourcerize(optimizer.update)(grads, opt_state, params) - -# print(source_code) + return jnp.einsum(exprP, *Q, *Q, G) From 88d49ed113b7eaa84b4fb2406854d2760ce9a004 Mon Sep 17 00:00:00 2001 From: Evan Walters Date: Sun, 22 Dec 2024 18:27:50 -0700 Subject: [PATCH 55/56] Update kron.py --- src/levanter/optim/kron.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/levanter/optim/kron.py b/src/levanter/optim/kron.py index 51f57f2cc..bce3a1f70 100644 --- a/src/levanter/optim/kron.py +++ b/src/levanter/optim/kron.py @@ -49,7 +49,7 @@ class KronConfig(OptimizerConfig): # some of these are changed from kron defaults to better suit levanter beta1: float = 0.9 weight_decay: float = 0.1 - max_grad_norm: Optional[float] = 0.0 + max_grad_norm: Optional[float] = None normalize_grads: bool = False preconditioner_update_probability: float = 0.05 update_prob_flat_start: int = 1000 From 4d630d87db0747eb516ede226e2cbb72d7c47b6a Mon Sep 17 00:00:00 2001 From: Evan Walters Date: Tue, 31 Dec 2024 15:44:46 -0700 Subject: [PATCH 56/56] get rid of norming and clipping in lieu of rms clip, retouches --- src/levanter/optim/kron.py | 28 ++++++---------------------- 1 file changed, 6 insertions(+), 22 deletions(-) diff --git a/src/levanter/optim/kron.py b/src/levanter/optim/kron.py index bce3a1f70..57006059b 100644 --- a/src/levanter/optim/kron.py +++ b/src/levanter/optim/kron.py @@ -15,11 +15,9 @@ class KronConfig(OptimizerConfig): Attributes: beta1: Momentum parameter. 0.9 or 0.95 are common values. weight_decay: Weight decay coefficient. - max_grad_norm: Optional gradient norm clipping value. - normalize_grads: Whether to normalize the incoming gradients to unit norm layer-wise. - Can help with stability but likely not necessary in this scenario. + max_grad_norm: Unused. preconditioner_update_probability: Final probability of updating the preconditioner. Default - is 0.05 (update every 20 steps). The `precond_update_prob_schedule` holds probability at + is 0.03 (update every 33 steps). The `precond_update_prob_schedule` holds probability at 1.0 for `update_prob_flat_start` steps before annealing exponentially down to this value within ~3000 steps. Training is slower while updates are done every step, but training speeds up after update probability decays. @@ -50,10 +48,9 @@ class KronConfig(OptimizerConfig): beta1: float = 0.9 weight_decay: float = 0.1 max_grad_norm: Optional[float] = None - normalize_grads: bool = False - preconditioner_update_probability: float = 0.05 + preconditioner_update_probability: float = 0.03 update_prob_flat_start: int = 1000 - max_size_triangular: int = 25000 + max_size_triangular: int = 16384 min_ndim_triangular: int = 2 memory_save_mode: Optional[str] = None preconditioner_lr: float = 0.1 @@ -67,15 +64,14 @@ class KronConfig(OptimizerConfig): def build(self, num_train_steps): """Creates the optimizer.""" + if self.max_grad_norm is not None and jax.process_index() == 0: + print("WARNING: max_grad_norm is unused in PSGD Kron optimizer") def _optimizer(learning_rate) -> optax.GradientTransformation: components = [] - if self.max_grad_norm and not self.normalize_grads: - components.append(optax.clip_by_global_norm(self.max_grad_norm)) components.append( scale_by_kron_for_levanter( b1=self.beta1, - normalize_grads=self.normalize_grads, preconditioner_update_probability=precond_update_prob_schedule( min_prob=self.preconditioner_update_probability, flat_start=self.update_prob_flat_start, @@ -157,7 +153,6 @@ def _schedule(n): def scale_by_kron_for_levanter( b1: float = 0.9, - normalize_grads: bool = False, preconditioner_update_probability: Union[ float, Callable[[int], float] ] = precond_update_prob_schedule(), @@ -182,7 +177,6 @@ def scale_by_kron_for_levanter( Args: b1: float, momentum parameter. - normalize_grads: bool, whether to normalize gradients to unit norm layer-wise. preconditioner_update_probability: float, probability of updating the preconditioner. Default anneals from 1.0 to 0.03 by 4000 steps. max_size_triangular: int, max size for dim's preconditioner to be triangular. @@ -361,13 +355,6 @@ def fsdp_size(): if isinstance(preconditioner_update_probability, Callable): update_prob_in = preconditioner_update_probability(count_inc) - # normalize grads - if normalize_grads: - updates = jax.tree.map( - lambda g: g / (jnp.linalg.norm(g) + 1e-16), - updates, - ) - # momentum mu = None momentum_updates = updates @@ -512,7 +499,6 @@ def _balance_Q(Q: List[jax.Array]): def kron( learning_rate: Union[float, Callable[[int], float]] = 0.001, b1: float = 0.9, - normalize_grads: bool = False, weight_decay: float = 0.0, weight_decay_mask: Optional[Union[Any, Callable[[base.Params], Any]]] = None, preconditioner_update_probability: Union[ @@ -537,7 +523,6 @@ def kron( Args: learning_rate: float or callable, learning rate. b1: float, momentum parameter. - normalize_grads: bool, whether to normalize gradients to unit norm layer-wise. weight_decay: float, weight decay. weight_decay_mask: optional Any or callable, pytree of bool same structure as params with weight decay applied to True elements. @@ -571,7 +556,6 @@ def kron( optimizer = [ scale_by_kron_for_levanter( b1=b1, - normalize_grads=normalize_grads, preconditioner_update_probability=preconditioner_update_probability, max_size_triangular=max_size_triangular, min_ndim_triangular=min_ndim_triangular,