diff --git a/Artifacts.toml b/Artifacts.toml index 153a017eac..efa4bb7e47 100644 --- a/Artifacts.toml +++ b/Artifacts.toml @@ -93,3 +93,6 @@ git-tree-sha1 = "839224a62b59d73073bdb9a5c55d3dc75e30fe33" [[ilamb_data.download]] sha256 = "64a9a344ebfbb0113014178a1f93a655c401431565907c07fd33aff8860b62d6" url = "https://caltech.box.com/shared/static/eii2bfwfp47axfeuysgxlgzbczz27u5g.gz" + +[era5_surface_fluxes] +git-tree-sha1 = "f2cb69e41cba84991054162e794292f98bf0ae34" diff --git a/experiments/calibration/global_bucket/climacalibrate_bucket/bucket_target_script.jl b/experiments/calibration/global_bucket/climacalibrate_bucket/bucket_target_script.jl index 8d4c16e13f..24abd8932b 100644 --- a/experiments/calibration/global_bucket/climacalibrate_bucket/bucket_target_script.jl +++ b/experiments/calibration/global_bucket/climacalibrate_bucket/bucket_target_script.jl @@ -126,6 +126,10 @@ function setup_prob(t0, tf, Δt, params, outdir; nelements = (101, 7)) ) # Set up parameters + p_names = collect(keys(params)) + p_values = [params[name]["value"] for name in p_names] + params = (; zip(Symbol.(p_names), p_values)...) + (; κ_soil, ρc_soil, f_bucket, W_f, p, z_0m) = params z_0b = z_0m τc = FT(Δt) @@ -194,66 +198,70 @@ function setup_prob(t0, tf, Δt, params, outdir; nelements = (101, 7)) return prob, SciMLBase.CallbackSet(driver_cb, diag_cb) end -# Define the locations longitudes and latitudes -# Needs to be defined once, both for g(θ) and ERA5 target -t0 = 0.0 -tf = 60 * 60.0 * 24 * 366 -Δt = 900.0 -nelements = (101, 7) -regridder_type = :InterpolationsRegridder -radius = FT(6378.1e3) -depth = FT(3.5) -domain = ClimaLand.Domains.SphericalShell(; - radius = radius, - depth = depth, - nelements = nelements, - npolynomial = 1, - dz_tuple = FT.((1.0, 0.05)), - ) -surface_space = domain.space.surface -training_locations, validation_locations = -rand_locations(surface_space, regridder_type, 25) - -# (; κ_soil, ρc_soil, f_bucket, W_f, p, z_0m) = params -# The truth params = (;κ_soil = FT(1.5), ρc_soil = FT(2e6), f_bucket = FT(0.75), W_f = FT(0.2), p = FT(1), z_0m = FT(1e-2)) -# Read in the era5 datafile -era5_ds = Dataset(joinpath(ClimaLand.Artifacts.era5_surface_data2008_path(), - "era5_monthly_surface_fluxes_200801-200812.nc")) - -# Make the ERA5 target -ERA5_target = [] -close = (x, y) -> abs(x - y) < 5e-1 -for (lon, lat) in training_locations - # Fetch slices of lhf and shf era5 data from the era5 dataset - lat_ind, lon_ind = findall((x) -> close(x, lat), era5_ds["latitude"][:])[1], - findall((x) -> close(x, lon + 180), era5_ds["longitude"][:])[1] - lhf_loc = vec(era5_ds["mslhf"][lon_ind, lat_ind, :][:, end, :]) - shf_loc = vec(era5_ds["msshf"][lon_ind, lat_ind, :][:, end, :]) - - # Create Observation objects for lhf and shf - lhf_ERA5 = EKP.Observation( - Dict( - "samples" => lhf_loc, - "covariances" => cov(lhf_loc) * EKP.I, - "names" => "lhf_$(lon)_$(lat)", - ), - ) - - shf_ERA5 = EKP.Observation( - Dict( - "samples" => shf_loc, - "covariances" => cov(shf_loc) * EKP.I, - "names" => "shf_$(lon)_$(lat)", - ), - ) - - # Add the observations to the target - push!(ERA5_target, lhf_ERA5) - push!(ERA5_target, shf_ERA5) - -end - -full_obs_era5 = EKP.combine_observations(ERA5_target) +# function target_and_locations() + # Define the locations longitudes and latitudes + # Needs to be defined once, both for g(θ) and ERA5 target + t0 = 0.0 + tf = 60 * 60.0 * 24 * 366 + Δt = 900.0 + nelements = (101, 7) + regridder_type = :InterpolationsRegridder + radius = FT(6378.1e3) + depth = FT(3.5) + domain = ClimaLand.Domains.SphericalShell(; + radius = radius, + depth = depth, + nelements = nelements, + npolynomial = 1, + dz_tuple = FT.((1.0, 0.05)), + ) + surface_space = domain.space.surface + locations, validation_locations = + rand_locations(surface_space, regridder_type, 25) + + # (; κ_soil, ρc_soil, f_bucket, W_f, p, z_0m) = params + # The truth params = (;κ_soil = FT(1.5), ρc_soil = FT(2e6), f_bucket = FT(0.75), W_f = FT(0.2), p = FT(1), z_0m = FT(1e-2)) + # Read in the era5 datafile + era5_ds = Dataset(joinpath(ClimaLand.Artifacts.era5_surface_data2008_path(), + "era5_monthly_surface_fluxes_200801-200812.nc")) + + # Make the ERA5 target + ERA5_target = [] + close = (x, y) -> abs(x - y) < 5e-1 + for (lon, lat) in locations + # Fetch slices of lhf and shf era5 data from the era5 dataset + lat_ind, lon_ind = findall((x) -> close(x, lat), era5_ds["latitude"][:])[1], + findall((x) -> close(x, lon + 180), era5_ds["longitude"][:])[1] + lhf_loc = vec(era5_ds["mslhf"][lon_ind, lat_ind, :][:, end, :]) + shf_loc = vec(era5_ds["msshf"][lon_ind, lat_ind, :][:, end, :]) + + # Create Observation objects for lhf and shf + lhf_ERA5 = EKP.Observation( + Dict( + "samples" => lhf_loc, + "covariances" => cov(lhf_loc) * EKP.I, + "names" => "lhf_$(lon)_$(lat)", + ), + ) + + shf_ERA5 = EKP.Observation( + Dict( + "samples" => shf_loc, + "covariances" => cov(shf_loc) * EKP.I, + "names" => "shf_$(lon)_$(lat)", + ), + ) + + # Add the observations to the target + push!(ERA5_target, lhf_ERA5) + push!(ERA5_target, shf_ERA5) + + end + + full_obs_era5 = EKP.combine_observations(ERA5_target) + observations = EKP.get_obs(full_obs_era5) +# return training_locations, observations +# end # Things to consider: # - masking out the ocean when comparing to data diff --git a/experiments/calibration/global_bucket/climacalibrate_bucket/calibrate.jl b/experiments/calibration/global_bucket/climacalibrate_bucket/calibrate.jl index 3d79d27b70..13399498e2 100644 --- a/experiments/calibration/global_bucket/climacalibrate_bucket/calibrate.jl +++ b/experiments/calibration/global_bucket/climacalibrate_bucket/calibrate.jl @@ -1,26 +1,34 @@ # The script included below contains the following functions: # rand_locations - returns n random locations on land (lon, lat) # setup_prob - config for the bucket model -# it also returns the target used for calibration, full_obs_era5, which is -# latent and sensible heat at the rand_locations stacked as a vector -include("bucket_target_script.jl") -observations = full_obs_era5 +# target_and_locations - returns locations on land and ERA5 LHF SHF obs at those locations +using ClimaLand +dir = pkgdir(ClimaLand) +include(joinpath(dir,"experiments/calibration/global_bucket/climacalibrate_bucket/bucket_target_script.jl")) +# locations, observations = target_and_locations() +# Should observations be a vector or EKP.Observation type? # Now, we need a forward_model for ClimaCalibrate. # forward_model runs the model and generates a ClimaDiagnostic output directory # that will be used to generate the observation_map. # note that forward_model needs the setup_prob function defined above. -import ClimaCalibrate: forward_model, parameter_path +# IMPORTANT: forward_model needs to be defined as CAL.forward_model, as it adds a method +import ClimaCalibrate: forward_model, parameter_path, path_to_ensemble_member import ClimaCalibrate as CAL -include("forward_model.jl") +using Distributed +ntasks = 4 +addprocs(CAL.SlurmManager(ntasks), gpus_per_task=1, time="10:00:00") +@everywhere include(joinpath(dir,"experiments/calibration/global_bucket/climacalibrate_bucket/forward_model.jl")) # Now we include the script observation_map.jl, which contains two functions: -# process_member_data - makes the loss function we want from the forward_model output -# observation_map - makes our G_ensemble, the loss for all n_ensemble parameters -include("observation_map.jl") +# process_member_data - makes the parameter to data map we want from the forward_model output for one ensemble member +# observation_map - makes our G_ensemble, the parameter to data map for all n_ensemble parameters +# IMPORTANT: observation_map needs to be defined as CAL.observation_map, as it adds a method +include(joinpath(dir,"experiments/calibration/global_bucket/climacalibrate_bucket/observation_map.jl")) # Now that our functions are defined, we still need some to specify some arguments they require. # Parameters prior +# Note: this could be provided in a .toml file. Not sure if it has to. prior_κ_soil = EKP.constrained_gaussian("κ_soil", 2, 1, 0, Inf); prior_ρc_soil = EKP.constrained_gaussian("ρc_soil", 4e6, 2e6, 0, Inf); prior_f_bucket = EKP.constrained_gaussian("f_bucket", 0.5, 0.3, 0, 1); @@ -38,12 +46,15 @@ prior = EKP.combine_distributions([ ensemble_size = 10 n_iterations = 5 -noise = EKP.I # not sure what to do here, I had noise embeded in my observation_map function +noise = 1.0*EKP.I # Should work, but this should be covariance of each month from observation (ERA5) -output_dir = "calibration_output" # Should I create this folder? +# This folder will contains ClimaCalibrate outputs - parameters ensemble at each iterations, etc. +caldir = "calibration_output" +# Note: what is the best way to test this? +# Should this script be run as a slurm job? (I expect it takes ~ 5 hours or so to complete) CAL.calibrate( - CAL.WorkerBackend, # not sure what to do here + CAL.WorkerBackend, ensemble_size, n_iterations, observations, diff --git a/experiments/calibration/global_bucket/climacalibrate_bucket/forward_model.jl b/experiments/calibration/global_bucket/climacalibrate_bucket/forward_model.jl index 975ad99068..2966e8c1ce 100644 --- a/experiments/calibration/global_bucket/climacalibrate_bucket/forward_model.jl +++ b/experiments/calibration/global_bucket/climacalibrate_bucket/forward_model.jl @@ -1,12 +1,15 @@ +import TOML as toml + # Explain things here - tutorial style -# (Nat: why is it CAL.forward_model, and not CAL.parameter_path? """ """ function CAL.forward_model(iteration, member) - ensemble_member_path = parameter_path(caldir, iteration, member) - params = toml.parsefile(ensemble_member_path) # should load a Dict, that needs to be converted to namedtuple + ensemble_member_path = path_to_ensemble_member(caldir, iteration, member) + params_path = parameter_path(caldir, iteration, member) + params = toml.parsefile(params_path) # should load a Dict, that needs to be converted to namedtuple + @info ensemble_member_path diagnostics_dir = joinpath(ensemble_member_path, "global_diagnostics") diagdir = ClimaUtilities.OutputPathGenerator.generate_output_path( diagnostics_dir, @@ -17,4 +20,5 @@ function CAL.forward_model(iteration, member) timestepper = CTS.RK4() ode_algo = CTS.ExplicitAlgorithm(timestepper) SciMLBase.solve(prob, ode_algo; dt = Δt, callback = cb, adaptive = false) + return nothing end diff --git a/experiments/calibration/global_bucket/climacalibrate_bucket/observation_map.jl b/experiments/calibration/global_bucket/climacalibrate_bucket/observation_map.jl index e7baab4031..c929866680 100644 --- a/experiments/calibration/global_bucket/climacalibrate_bucket/observation_map.jl +++ b/experiments/calibration/global_bucket/climacalibrate_bucket/observation_map.jl @@ -10,7 +10,7 @@ function process_member_data(root_path) # Initialize an empty list to store observations obs_list = [] # Loop over each location - for (lon, lat) in training_locations + for (lon, lat) in locations # Slice lhf and shf at the given longitude and latitude lhf_loc = ClimaAnalysis.slice(lhf, lon = lon, lat = lat) shf_loc = ClimaAnalysis.slice(shf, lon = lon, lat = lat) @@ -42,12 +42,12 @@ function process_member_data(root_path) return obs end -function observation_map(iteration) +function CAL.observation_map(iteration) single_member_dims = (1,) G_ensemble = Array{Float64}(undef, single_member_dims..., ensemble_size) for m in 1:ensemble_size - member_path = CAL.path_to_ensemble_member(caldir, iteration, m) + member_path = path_to_ensemble_member(caldir, iteration, m) simdir_path = joinpath(member_path, "global_diagnostics", "output_active") if isdir(simdir_path) simdir = SimDir(simdir_path) diff --git a/src/Artifacts.jl b/src/Artifacts.jl index 49e3854a49..e01d99b347 100644 --- a/src/Artifacts.jl +++ b/src/Artifacts.jl @@ -41,7 +41,7 @@ NOTE: We should be able to use @clima_artifact here, but the folders on central seem to be named differently than the artifact name """ function era5_surface_data2008_path(; context = nothing) - return "/groups/esm/ClimaArtifacts/artifacts/era5_surface_fluxes_2008" + return @clima_artifact("era5_surface_fluxes", context) end """