From be1f53935f8cf5007cd9715de9fdb35644236dab Mon Sep 17 00:00:00 2001 From: Alexis Renchon Date: Tue, 21 Jan 2025 13:20:02 -0800 Subject: [PATCH] tweaks --- .buildkite/Project.toml | 3 +- Artifacts.toml | 3 + .../bucket_target_script.jl | 130 ++++++++++-------- .../climacalibrate_bucket/calibrate.jl | 45 ++++-- .../climacalibrate_bucket/forward_model.jl | 10 +- .../climacalibrate_bucket/observation_map.jl | 14 +- run_calibration.sh | 14 ++ src/Artifacts.jl | 2 +- 8 files changed, 136 insertions(+), 85 deletions(-) create mode 100644 run_calibration.sh diff --git a/.buildkite/Project.toml b/.buildkite/Project.toml index 3cf384bbb5..412485f0ac 100644 --- a/.buildkite/Project.toml +++ b/.buildkite/Project.toml @@ -6,6 +6,7 @@ CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b" CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" CairoMakie = "13f3f980-e62b-5c42-98c6-ff1f3baf88f0" ClimaAnalysis = "29b5916a-a76c-4e73-9657-3c8fd22e65e6" +ClimaCalibrate = "4347a170-ebd6-470c-89d3-5c705c0cacc2" ClimaComms = "3a4d1b5c-c61d-41fd-a00a-5873ba7a1b0d" ClimaCore = "d414da3d-4745-48bb-8d80-42e94e092884" ClimaCoreMakie = "908f55d8-4145-4867-9c14-5dad1a479e4d" @@ -46,5 +47,5 @@ cuDNN = "02a925ec-e4fe-4b08-9a7e-0d78e3d38ccd" [compat] ClimaAnalysis = "0.5.12" ClimaTimeSteppers = "0.7" -Statistics = "1" Flux = "0.15" +Statistics = "1" diff --git a/Artifacts.toml b/Artifacts.toml index 153a017eac..efa4bb7e47 100644 --- a/Artifacts.toml +++ b/Artifacts.toml @@ -93,3 +93,6 @@ git-tree-sha1 = "839224a62b59d73073bdb9a5c55d3dc75e30fe33" [[ilamb_data.download]] sha256 = "64a9a344ebfbb0113014178a1f93a655c401431565907c07fd33aff8860b62d6" url = "https://caltech.box.com/shared/static/eii2bfwfp47axfeuysgxlgzbczz27u5g.gz" + +[era5_surface_fluxes] +git-tree-sha1 = "f2cb69e41cba84991054162e794292f98bf0ae34" diff --git a/experiments/calibration/global_bucket/climacalibrate_bucket/bucket_target_script.jl b/experiments/calibration/global_bucket/climacalibrate_bucket/bucket_target_script.jl index 8d4c16e13f..c7050d916f 100644 --- a/experiments/calibration/global_bucket/climacalibrate_bucket/bucket_target_script.jl +++ b/experiments/calibration/global_bucket/climacalibrate_bucket/bucket_target_script.jl @@ -57,7 +57,7 @@ const FT = Float64; context = ClimaComms.context() device = ClimaComms.device() device_suffix = device isa ClimaComms.CPUSingleThreaded ? "cpu" : "gpu" -root_path = "bucket_longrun_$(device_suffix)" +root_path = "bucket_longrun" # Returns a 2 SVectors of (lat, lon) tuples for n random locations on the land # surface. The two sets of locations are designed to be used as a training and @@ -126,6 +126,10 @@ function setup_prob(t0, tf, Δt, params, outdir; nelements = (101, 7)) ) # Set up parameters + p_names = collect(keys(params)) + p_values = [params[name]["value"] for name in p_names] + params = (; zip(Symbol.(p_names), p_values)...) + (; κ_soil, ρc_soil, f_bucket, W_f, p, z_0m) = params z_0b = z_0m τc = FT(Δt) @@ -194,66 +198,70 @@ function setup_prob(t0, tf, Δt, params, outdir; nelements = (101, 7)) return prob, SciMLBase.CallbackSet(driver_cb, diag_cb) end -# Define the locations longitudes and latitudes -# Needs to be defined once, both for g(θ) and ERA5 target -t0 = 0.0 -tf = 60 * 60.0 * 24 * 366 -Δt = 900.0 -nelements = (101, 7) -regridder_type = :InterpolationsRegridder -radius = FT(6378.1e3) -depth = FT(3.5) -domain = ClimaLand.Domains.SphericalShell(; - radius = radius, - depth = depth, - nelements = nelements, - npolynomial = 1, - dz_tuple = FT.((1.0, 0.05)), - ) -surface_space = domain.space.surface -training_locations, validation_locations = -rand_locations(surface_space, regridder_type, 25) - -# (; κ_soil, ρc_soil, f_bucket, W_f, p, z_0m) = params -# The truth params = (;κ_soil = FT(1.5), ρc_soil = FT(2e6), f_bucket = FT(0.75), W_f = FT(0.2), p = FT(1), z_0m = FT(1e-2)) -# Read in the era5 datafile -era5_ds = Dataset(joinpath(ClimaLand.Artifacts.era5_surface_data2008_path(), - "era5_monthly_surface_fluxes_200801-200812.nc")) - -# Make the ERA5 target -ERA5_target = [] -close = (x, y) -> abs(x - y) < 5e-1 -for (lon, lat) in training_locations - # Fetch slices of lhf and shf era5 data from the era5 dataset - lat_ind, lon_ind = findall((x) -> close(x, lat), era5_ds["latitude"][:])[1], - findall((x) -> close(x, lon + 180), era5_ds["longitude"][:])[1] - lhf_loc = vec(era5_ds["mslhf"][lon_ind, lat_ind, :][:, end, :]) - shf_loc = vec(era5_ds["msshf"][lon_ind, lat_ind, :][:, end, :]) - - # Create Observation objects for lhf and shf - lhf_ERA5 = EKP.Observation( - Dict( - "samples" => lhf_loc, - "covariances" => cov(lhf_loc) * EKP.I, - "names" => "lhf_$(lon)_$(lat)", - ), - ) - - shf_ERA5 = EKP.Observation( - Dict( - "samples" => shf_loc, - "covariances" => cov(shf_loc) * EKP.I, - "names" => "shf_$(lon)_$(lat)", - ), - ) - - # Add the observations to the target - push!(ERA5_target, lhf_ERA5) - push!(ERA5_target, shf_ERA5) - -end - -full_obs_era5 = EKP.combine_observations(ERA5_target) +# function target_and_locations() + # Define the locations longitudes and latitudes + # Needs to be defined once, both for g(θ) and ERA5 target + t0 = 0.0 + tf = 60 * 60.0 * 24 * 366 + Δt = 900.0 + nelements = (101, 7) + regridder_type = :InterpolationsRegridder + radius = FT(6378.1e3) + depth = FT(3.5) + domain = ClimaLand.Domains.SphericalShell(; + radius = radius, + depth = depth, + nelements = nelements, + npolynomial = 1, + dz_tuple = FT.((1.0, 0.05)), + ) + surface_space = domain.space.surface + locations, validation_locations = + rand_locations(surface_space, regridder_type, 25) + + # (; κ_soil, ρc_soil, f_bucket, W_f, p, z_0m) = params + # The truth params = (;κ_soil = FT(1.5), ρc_soil = FT(2e6), f_bucket = FT(0.75), W_f = FT(0.2), p = FT(1), z_0m = FT(1e-2)) + # Read in the era5 datafile + era5_ds = Dataset(joinpath(ClimaLand.Artifacts.era5_surface_data2008_path(), + "era5_monthly_surface_fluxes_200801-200812.nc")) + + # Make the ERA5 target + ERA5_target = [] + close = (x, y) -> abs(x - y) < 5e-1 + for (lon, lat) in locations + # Fetch slices of lhf and shf era5 data from the era5 dataset + lat_ind, lon_ind = findall((x) -> close(x, lat), era5_ds["latitude"][:])[1], + findall((x) -> close(x, lon + 180), era5_ds["longitude"][:])[1] + lhf_loc = vec(era5_ds["mslhf"][lon_ind, lat_ind, :][:, end, :]) + shf_loc = vec(era5_ds["msshf"][lon_ind, lat_ind, :][:, end, :]) + + # Create Observation objects for lhf and shf + lhf_ERA5 = EKP.Observation( + Dict( + "samples" => lhf_loc, + "covariances" => cov(lhf_loc) * EKP.I, + "names" => "lhf_$(lon)_$(lat)", + ), + ) + + shf_ERA5 = EKP.Observation( + Dict( + "samples" => shf_loc, + "covariances" => cov(shf_loc) * EKP.I, + "names" => "shf_$(lon)_$(lat)", + ), + ) + + # Add the observations to the target + push!(ERA5_target, lhf_ERA5) + push!(ERA5_target, shf_ERA5) + + end + + full_obs_era5 = EKP.combine_observations(ERA5_target) + observations = EKP.get_obs(full_obs_era5) +# return training_locations, observations +# end # Things to consider: # - masking out the ocean when comparing to data diff --git a/experiments/calibration/global_bucket/climacalibrate_bucket/calibrate.jl b/experiments/calibration/global_bucket/climacalibrate_bucket/calibrate.jl index 3d79d27b70..cf53938d3b 100644 --- a/experiments/calibration/global_bucket/climacalibrate_bucket/calibrate.jl +++ b/experiments/calibration/global_bucket/climacalibrate_bucket/calibrate.jl @@ -1,26 +1,44 @@ # The script included below contains the following functions: # rand_locations - returns n random locations on land (lon, lat) # setup_prob - config for the bucket model -# it also returns the target used for calibration, full_obs_era5, which is -# latent and sensible heat at the rand_locations stacked as a vector -include("bucket_target_script.jl") -observations = full_obs_era5 +# target_and_locations - returns locations on land and ERA5 LHF SHF obs at those locations +using ClimaLand +dir = pkgdir(ClimaLand) +# locations, observations = target_and_locations() +# Should observations be a vector or EKP.Observation type? # Now, we need a forward_model for ClimaCalibrate. # forward_model runs the model and generates a ClimaDiagnostic output directory # that will be used to generate the observation_map. # note that forward_model needs the setup_prob function defined above. -import ClimaCalibrate: forward_model, parameter_path +# IMPORTANT: forward_model needs to be defined as CAL.forward_model, as it adds a method +import ClimaCalibrate: forward_model, parameter_path, path_to_ensemble_member import ClimaCalibrate as CAL -include("forward_model.jl") +using Distributed +ntasks = 4 +addprocs(CAL.SlurmManager(ntasks)) + +@everywhere begin + import ClimaCalibrate: forward_model, parameter_path, path_to_ensemble_member + import ClimaCalibrate as CAL + + using ClimaLand + caldir = "calibration_output" + dir = pkgdir(ClimaLand) + + include(joinpath(dir,"experiments/calibration/global_bucket/climacalibrate_bucket/bucket_target_script.jl")) + include(joinpath(dir,"experiments/calibration/global_bucket/climacalibrate_bucket/forward_model.jl")) +end # Now we include the script observation_map.jl, which contains two functions: -# process_member_data - makes the loss function we want from the forward_model output -# observation_map - makes our G_ensemble, the loss for all n_ensemble parameters -include("observation_map.jl") +# process_member_data - makes the parameter to data map we want from the forward_model output for one ensemble member +# observation_map - makes our G_ensemble, the parameter to data map for all n_ensemble parameters +# IMPORTANT: observation_map needs to be defined as CAL.observation_map, as it adds a method +include(joinpath(dir,"experiments/calibration/global_bucket/climacalibrate_bucket/observation_map.jl")) # Now that our functions are defined, we still need some to specify some arguments they require. # Parameters prior +# Note: this could be provided in a .toml file. Not sure if it has to. prior_κ_soil = EKP.constrained_gaussian("κ_soil", 2, 1, 0, Inf); prior_ρc_soil = EKP.constrained_gaussian("ρc_soil", 4e6, 2e6, 0, Inf); prior_f_bucket = EKP.constrained_gaussian("f_bucket", 0.5, 0.3, 0, 1); @@ -38,12 +56,15 @@ prior = EKP.combine_distributions([ ensemble_size = 10 n_iterations = 5 -noise = EKP.I # not sure what to do here, I had noise embeded in my observation_map function +noise = 1.0*EKP.I # Should work, but this should be covariance of each month from observation (ERA5) -output_dir = "calibration_output" # Should I create this folder? +# This folder will contains ClimaCalibrate outputs - parameters ensemble at each iterations, etc. +caldir = "calibration_output" +# Note: what is the best way to test this? +# Should this script be run as a slurm job? (I expect it takes ~ 5 hours or so to complete) CAL.calibrate( - CAL.WorkerBackend, # not sure what to do here + CAL.WorkerBackend, ensemble_size, n_iterations, observations, diff --git a/experiments/calibration/global_bucket/climacalibrate_bucket/forward_model.jl b/experiments/calibration/global_bucket/climacalibrate_bucket/forward_model.jl index 975ad99068..2966e8c1ce 100644 --- a/experiments/calibration/global_bucket/climacalibrate_bucket/forward_model.jl +++ b/experiments/calibration/global_bucket/climacalibrate_bucket/forward_model.jl @@ -1,12 +1,15 @@ +import TOML as toml + # Explain things here - tutorial style -# (Nat: why is it CAL.forward_model, and not CAL.parameter_path? """ """ function CAL.forward_model(iteration, member) - ensemble_member_path = parameter_path(caldir, iteration, member) - params = toml.parsefile(ensemble_member_path) # should load a Dict, that needs to be converted to namedtuple + ensemble_member_path = path_to_ensemble_member(caldir, iteration, member) + params_path = parameter_path(caldir, iteration, member) + params = toml.parsefile(params_path) # should load a Dict, that needs to be converted to namedtuple + @info ensemble_member_path diagnostics_dir = joinpath(ensemble_member_path, "global_diagnostics") diagdir = ClimaUtilities.OutputPathGenerator.generate_output_path( diagnostics_dir, @@ -17,4 +20,5 @@ function CAL.forward_model(iteration, member) timestepper = CTS.RK4() ode_algo = CTS.ExplicitAlgorithm(timestepper) SciMLBase.solve(prob, ode_algo; dt = Δt, callback = cb, adaptive = false) + return nothing end diff --git a/experiments/calibration/global_bucket/climacalibrate_bucket/observation_map.jl b/experiments/calibration/global_bucket/climacalibrate_bucket/observation_map.jl index e7baab4031..61f744d7ce 100644 --- a/experiments/calibration/global_bucket/climacalibrate_bucket/observation_map.jl +++ b/experiments/calibration/global_bucket/climacalibrate_bucket/observation_map.jl @@ -1,16 +1,16 @@ +using ClimaAnalysis +import EnsembleKalmanProcesses as EKP + # explain things here - tutorial style -function process_member_data(root_path) +function process_member_data(simdir) - simdir = ClimaAnalysis.SimDir( - joinpath(root_path, "global_diagnostics", "output_active"), - ) lhf = get(simdir; short_name = "lhf") shf = get(simdir; short_name = "shf") # Initialize an empty list to store observations obs_list = [] # Loop over each location - for (lon, lat) in training_locations + for (lon, lat) in locations # Slice lhf and shf at the given longitude and latitude lhf_loc = ClimaAnalysis.slice(lhf, lon = lon, lat = lat) shf_loc = ClimaAnalysis.slice(shf, lon = lon, lat = lat) @@ -42,12 +42,12 @@ function process_member_data(root_path) return obs end -function observation_map(iteration) +function CAL.observation_map(iteration) single_member_dims = (1,) G_ensemble = Array{Float64}(undef, single_member_dims..., ensemble_size) for m in 1:ensemble_size - member_path = CAL.path_to_ensemble_member(caldir, iteration, m) + member_path = path_to_ensemble_member(caldir, iteration, m) simdir_path = joinpath(member_path, "global_diagnostics", "output_active") if isdir(simdir_path) simdir = SimDir(simdir_path) diff --git a/run_calibration.sh b/run_calibration.sh new file mode 100644 index 0000000000..9f33a8141a --- /dev/null +++ b/run_calibration.sh @@ -0,0 +1,14 @@ +#!/bin/bash +#SBATCH --job-name=calibrate_bucket +#SBATCH --output=output_%j.txt # Output file (%j expands to job ID) +#SBATCH --error=error_%j.txt # Error file +#SBATCH --nodes=1 # Number of nodes +#SBATCH --ntasks=4 # Number of tasks (processes) +#SBATCH --time=10:00:00 # Time limit hrs:min:sec +#SBATCH --gpus-per-task=1 + +# Run your command +export CLIMACOMMS_DEVICE="CUDA" +export CLIMACOMMS_CONTEXT="SINGLETON" + +julia --project=.buildkite/ experiments/calibration/global_bucket/climacalibrate_bucket/calibrate.jl diff --git a/src/Artifacts.jl b/src/Artifacts.jl index 49e3854a49..e01d99b347 100644 --- a/src/Artifacts.jl +++ b/src/Artifacts.jl @@ -41,7 +41,7 @@ NOTE: We should be able to use @clima_artifact here, but the folders on central seem to be named differently than the artifact name """ function era5_surface_data2008_path(; context = nothing) - return "/groups/esm/ClimaArtifacts/artifacts/era5_surface_fluxes_2008" + return @clima_artifact("era5_surface_fluxes", context) end """