From 8736b4130d1f73e46df6c8dbafe822acd8505a28 Mon Sep 17 00:00:00 2001 From: Julia Sloan Date: Tue, 18 Jun 2024 17:11:19 -0700 Subject: [PATCH] actually run on 2 gpus --- .buildkite/pipeline.yml | 131 ++++++++++++++++++++-------------------- 1 file changed, 66 insertions(+), 65 deletions(-) diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml index 54cb790922..41ba21f1d4 100644 --- a/.buildkite/pipeline.yml +++ b/.buildkite/pipeline.yml @@ -457,44 +457,41 @@ steps: slurm_cpus_per_task: 4 slurm_ntasks: 2 slurm_mem: 16GB - - label: "2 GPU AMIP with diagnostic EDMF" - key: "gpu_2_ngpus_amip_diagedmf" - command: "srun julia --threads=3 --color=yes --project=experiments/ClimaEarth/ experiments/ClimaEarth/run_amip.jl --config_file $BENCHMARK_CONFIG_PATH/amip_diagedmf.yml --job_id gpu_2_ngpus_amip_diagedmf" - artifact_paths: "experiments/ClimaEarth/output/amip/gpu_2_ngpus_amip_diagedmf_artifacts/*" + + # GPU RUNS: slabplanet + - label: "GPU Slabplanet: albedo from function" + key: "gpu_slabplanet_albedo_function" + command: "julia --color=yes --project=experiments/ClimaEarth/ experiments/ClimaEarth/run_amip.jl --config_file $CONFIG_PATH/slabplanet_albedo_function.yml --job_id gpu_slabplanet_albedo_function" + artifact_paths: "experiments/ClimaEarth/output/slabplanet/gpu_slabplanet_albedo_function_artifacts/*" agents: slurm_mem: 20GB - slurm_gpus: 2 - - # # GPU RUNS: slabplanet - # - label: "GPU Slabplanet: albedo from function" - # key: "gpu_slabplanet_albedo_function" - # command: "julia --color=yes --project=experiments/ClimaEarth/ experiments/ClimaEarth/run_amip.jl --config_file $CONFIG_PATH/slabplanet_albedo_function.yml --job_id gpu_slabplanet_albedo_function" - # artifact_paths: "experiments/ClimaEarth/output/slabplanet/gpu_slabplanet_albedo_function_artifacts/*" - # agents: - # slurm_mem: 20GB - # slurm_gpus: 1 - # - label: "2 GPU Slabplanet: albedo from function" - # key: "gpu_2_slabplanet_albedo_function" - # command: "julia --color=yes --project=experiments/ClimaEarth/ experiments/ClimaEarth/run_amip.jl --config_file $CONFIG_PATH/slabplanet_albedo_function.yml --job_id gpu_2_slabplanet_albedo_function" - # artifact_paths: "experiments/ClimaEarth/output/slabplanet/gpu_2_slabplanet_albedo_function_artifacts/*" - # agents: - # slurm_mem: 20GB - # slurm_gpus: 2 - - # - label: "GPU Slabplanet: albedo from static map" - # key: "gpu_slabplanet_albedo_static_map" - # command: "julia --color=yes --project=experiments/ClimaEarth/ experiments/ClimaEarth/run_amip.jl --config_file $CONFIG_PATH/slabplanet_albedo_static_map.yml --job_id gpu_slabplanet_albedo_static_map" - # artifact_paths: "experiments/ClimaEarth/output/slabplanet/gpu_slabplanet_albedo_static_map_artifacts/*" - # agents: - # slurm_mem: 20GB - # slurm_gpus: 1 - # - label: "2 GPU Slabplanet: albedo from static map" - # key: "gpu_2_slabplanet_albedo_static_map" - # command: "julia --color=yes --project=experiments/ClimaEarth/ experiments/ClimaEarth/run_amip.jl --config_file $CONFIG_PATH/slabplanet_albedo_static_map.yml --job_id gpu_2_slabplanet_albedo_static_map" - # artifact_paths: "experiments/ClimaEarth/output/slabplanet/gpu_2_slabplanet_albedo_static_map_artifacts/*" - # agents: - # slurm_mem: 20GB - # slurm_gpus: 2 + slurm_gpus: 1 + - label: "2 GPU Slabplanet: albedo from function" + key: "gpu_2_slabplanet_albedo_function" + command: "julia --color=yes --project=experiments/ClimaEarth/ experiments/ClimaEarth/run_amip.jl --config_file $CONFIG_PATH/slabplanet_albedo_function.yml --job_id gpu_2_slabplanet_albedo_function" + artifact_paths: "experiments/ClimaEarth/output/slabplanet/gpu_2_slabplanet_albedo_function_artifacts/*" + agents: + slurm_gpus_per_task: 1 + slurm_cpus_per_task: 4 + slurm_ntasks: 2 + slurm_mem: 16GB + + - label: "GPU Slabplanet: albedo from static map" + key: "gpu_slabplanet_albedo_static_map" + command: "julia --color=yes --project=experiments/ClimaEarth/ experiments/ClimaEarth/run_amip.jl --config_file $CONFIG_PATH/slabplanet_albedo_static_map.yml --job_id gpu_slabplanet_albedo_static_map" + artifact_paths: "experiments/ClimaEarth/output/slabplanet/gpu_slabplanet_albedo_static_map_artifacts/*" + agents: + slurm_mem: 20GB + slurm_gpus: 1 + - label: "2 GPU Slabplanet: albedo from static map" + key: "gpu_2_slabplanet_albedo_static_map" + command: "julia --color=yes --project=experiments/ClimaEarth/ experiments/ClimaEarth/run_amip.jl --config_file $CONFIG_PATH/slabplanet_albedo_static_map.yml --job_id gpu_2_slabplanet_albedo_static_map" + artifact_paths: "experiments/ClimaEarth/output/slabplanet/gpu_2_slabplanet_albedo_static_map_artifacts/*" + agents: + slurm_gpus_per_task: 1 + slurm_cpus_per_task: 4 + slurm_ntasks: 2 + slurm_mem: 16GB # # - label: "GPU Slabplanet: albedo from temporal map" # # key: "gpu_slabplanet_albedo_temporal_map" @@ -512,21 +509,23 @@ steps: # # slurm_mem: 20GB # # slurm_gpus: 1 - # # GPU RUNS: AMIP - # - label: "GPU AMIP test: albedo from function" - # key: "gpu_amip_albedo_function" - # command: "julia --color=yes --project=experiments/ClimaEarth/ experiments/ClimaEarth/run_amip.jl --config_file $CONFIG_PATH/gpu_amip_albedo_function.yml --job_id gpu_amip_albedo_function" - # artifact_paths: "experiments/ClimaEarth/output/amip/gpu_amip_albedo_function_artifacts/*" - # agents: - # slurm_mem: 20GB - # slurm_gpus: 1 - # - label: "2 GPU AMIP test: albedo from function" - # key: "gpu_2_amip_albedo_function" - # command: "julia --color=yes --project=experiments/ClimaEarth/ experiments/ClimaEarth/run_amip.jl --config_file $CONFIG_PATH/gpu_amip_albedo_function.yml --job_id gpu_2_amip_albedo_function" - # artifact_paths: "experiments/ClimaEarth/output/amip/gpu_2_amip_albedo_function_artifacts/*" - # agents: - # slurm_mem: 20GB - # slurm_gpus: 2 + # GPU RUNS: AMIP + - label: "GPU AMIP test: albedo from function" + key: "gpu_amip_albedo_function" + command: "julia --color=yes --project=experiments/ClimaEarth/ experiments/ClimaEarth/run_amip.jl --config_file $CONFIG_PATH/gpu_amip_albedo_function.yml --job_id gpu_amip_albedo_function" + artifact_paths: "experiments/ClimaEarth/output/amip/gpu_amip_albedo_function_artifacts/*" + agents: + slurm_mem: 20GB + slurm_gpus: 1 + - label: "2 GPU AMIP test: albedo from function" + key: "gpu_2_amip_albedo_function" + command: "julia --color=yes --project=experiments/ClimaEarth/ experiments/ClimaEarth/run_amip.jl --config_file $CONFIG_PATH/gpu_amip_albedo_function.yml --job_id gpu_2_amip_albedo_function" + artifact_paths: "experiments/ClimaEarth/output/amip/gpu_2_amip_albedo_function_artifacts/*" + agents: + slurm_gpus_per_task: 1 + slurm_cpus_per_task: 4 + slurm_ntasks: 2 + slurm_mem: 16GB # # - label: "GPU AMIP target: topography and diagnostic EDMF" # # key: "gpu_amip_target_topo_diagedmf_shortrun" @@ -536,20 +535,22 @@ steps: # # slurm_mem: 20GB # # slurm_gpus: 1 - # - label: "GPU AMIP: albedo from static map" - # key: "gpu_amip_albedo_static_map" - # command: "julia --color=yes --project=experiments/ClimaEarth/ experiments/ClimaEarth/run_amip.jl --config_file $CONFIG_PATH/gpu_amip_albedo_static_map.yml --job_id gpu_amip_albedo_static_map" - # artifact_paths: "experiments/ClimaEarth/output/amip/gpu_amip_albedo_static_map_artifacts/*" - # agents: - # slurm_mem: 20GB - # slurm_gpus: 1 - # - label: "2 GPU AMIP: albedo from static map" - # key: "gpu_2_amip_albedo_static_map" - # command: "julia --color=yes --project=experiments/ClimaEarth/ experiments/ClimaEarth/run_amip.jl --config_file $CONFIG_PATH/gpu_amip_albedo_static_map.yml --job_id gpu_2_amip_albedo_static_map" - # artifact_paths: "experiments/ClimaEarth/output/amip/gpu_2_amip_albedo_static_map_artifacts/*" - # agents: - # slurm_mem: 20GB - # slurm_gpus: 2 + - label: "GPU AMIP: albedo from static map" + key: "gpu_amip_albedo_static_map" + command: "julia --color=yes --project=experiments/ClimaEarth/ experiments/ClimaEarth/run_amip.jl --config_file $CONFIG_PATH/gpu_amip_albedo_static_map.yml --job_id gpu_amip_albedo_static_map" + artifact_paths: "experiments/ClimaEarth/output/amip/gpu_amip_albedo_static_map_artifacts/*" + agents: + slurm_mem: 20GB + slurm_gpus: 1 + - label: "2 GPU AMIP: albedo from static map" + key: "gpu_2_amip_albedo_static_map" + command: "julia --color=yes --project=experiments/ClimaEarth/ experiments/ClimaEarth/run_amip.jl --config_file $CONFIG_PATH/gpu_amip_albedo_static_map.yml --job_id gpu_2_amip_albedo_static_map" + artifact_paths: "experiments/ClimaEarth/output/amip/gpu_2_amip_albedo_static_map_artifacts/*" + agents: + slurm_gpus_per_task: 1 + slurm_cpus_per_task: 4 + slurm_ntasks: 2 + slurm_mem: 16GB # - label: "GPU AMIP: albedo from temporal map" # key: "gpu_amip_albedo_temporal_map"