Skip to content

Commit

Permalink
tweaks
Browse files Browse the repository at this point in the history
  • Loading branch information
AlexisRenchon committed Jan 29, 2025
1 parent e0a305d commit abaa6ef
Show file tree
Hide file tree
Showing 6 changed files with 105 additions and 79 deletions.
3 changes: 3 additions & 0 deletions Artifacts.toml
Original file line number Diff line number Diff line change
Expand Up @@ -93,3 +93,6 @@ git-tree-sha1 = "839224a62b59d73073bdb9a5c55d3dc75e30fe33"
[[ilamb_data.download]]
sha256 = "64a9a344ebfbb0113014178a1f93a655c401431565907c07fd33aff8860b62d6"
url = "https://caltech.box.com/shared/static/eii2bfwfp47axfeuysgxlgzbczz27u5g.gz"

[era5_surface_fluxes]
git-tree-sha1 = "f2cb69e41cba84991054162e794292f98bf0ae34"
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,10 @@ function setup_prob(t0, tf, Δt, params, outdir; nelements = (101, 7))
)

# Set up parameters
p_names = collect(keys(params))
p_values = [params[name]["value"] for name in p_names]
params = (; zip(Symbol.(p_names), p_values)...)

(; κ_soil, ρc_soil, f_bucket, W_f, p, z_0m) = params
z_0b = z_0m
τc = FT(Δt)
Expand Down Expand Up @@ -194,66 +198,70 @@ function setup_prob(t0, tf, Δt, params, outdir; nelements = (101, 7))
return prob, SciMLBase.CallbackSet(driver_cb, diag_cb)
end

# Define the locations longitudes and latitudes
# Needs to be defined once, both for g(θ) and ERA5 target
t0 = 0.0
tf = 60 * 60.0 * 24 * 366
Δt = 900.0
nelements = (101, 7)
regridder_type = :InterpolationsRegridder
radius = FT(6378.1e3)
depth = FT(3.5)
domain = ClimaLand.Domains.SphericalShell(;
radius = radius,
depth = depth,
nelements = nelements,
npolynomial = 1,
dz_tuple = FT.((1.0, 0.05)),
)
surface_space = domain.space.surface
training_locations, validation_locations =
rand_locations(surface_space, regridder_type, 25)

# (; κ_soil, ρc_soil, f_bucket, W_f, p, z_0m) = params
# The truth params = (;κ_soil = FT(1.5), ρc_soil = FT(2e6), f_bucket = FT(0.75), W_f = FT(0.2), p = FT(1), z_0m = FT(1e-2))
# Read in the era5 datafile
era5_ds = Dataset(joinpath(ClimaLand.Artifacts.era5_surface_data2008_path(),
"era5_monthly_surface_fluxes_200801-200812.nc"))

# Make the ERA5 target
ERA5_target = []
close = (x, y) -> abs(x - y) < 5e-1
for (lon, lat) in training_locations
# Fetch slices of lhf and shf era5 data from the era5 dataset
lat_ind, lon_ind = findall((x) -> close(x, lat), era5_ds["latitude"][:])[1],
findall((x) -> close(x, lon + 180), era5_ds["longitude"][:])[1]
lhf_loc = vec(era5_ds["mslhf"][lon_ind, lat_ind, :][:, end, :])
shf_loc = vec(era5_ds["msshf"][lon_ind, lat_ind, :][:, end, :])

# Create Observation objects for lhf and shf
lhf_ERA5 = EKP.Observation(
Dict(
"samples" => lhf_loc,
"covariances" => cov(lhf_loc) * EKP.I,
"names" => "lhf_$(lon)_$(lat)",
),
)

shf_ERA5 = EKP.Observation(
Dict(
"samples" => shf_loc,
"covariances" => cov(shf_loc) * EKP.I,
"names" => "shf_$(lon)_$(lat)",
),
)

# Add the observations to the target
push!(ERA5_target, lhf_ERA5)
push!(ERA5_target, shf_ERA5)

end

full_obs_era5 = EKP.combine_observations(ERA5_target)
# function target_and_locations()
# Define the locations longitudes and latitudes
# Needs to be defined once, both for g(θ) and ERA5 target
t0 = 0.0
tf = 60 * 60.0 * 24 * 366
Δt = 900.0
nelements = (101, 7)
regridder_type = :InterpolationsRegridder
radius = FT(6378.1e3)
depth = FT(3.5)
domain = ClimaLand.Domains.SphericalShell(;
radius = radius,
depth = depth,
nelements = nelements,
npolynomial = 1,
dz_tuple = FT.((1.0, 0.05)),
)
surface_space = domain.space.surface
locations, validation_locations =
rand_locations(surface_space, regridder_type, 25)

# (; κ_soil, ρc_soil, f_bucket, W_f, p, z_0m) = params
# The truth params = (;κ_soil = FT(1.5), ρc_soil = FT(2e6), f_bucket = FT(0.75), W_f = FT(0.2), p = FT(1), z_0m = FT(1e-2))
# Read in the era5 datafile
era5_ds = Dataset(joinpath(ClimaLand.Artifacts.era5_surface_data2008_path(),
"era5_monthly_surface_fluxes_200801-200812.nc"))

# Make the ERA5 target
ERA5_target = []
close = (x, y) -> abs(x - y) < 5e-1
for (lon, lat) in locations
# Fetch slices of lhf and shf era5 data from the era5 dataset
lat_ind, lon_ind = findall((x) -> close(x, lat), era5_ds["latitude"][:])[1],
findall((x) -> close(x, lon + 180), era5_ds["longitude"][:])[1]
lhf_loc = vec(era5_ds["mslhf"][lon_ind, lat_ind, :][:, end, :])
shf_loc = vec(era5_ds["msshf"][lon_ind, lat_ind, :][:, end, :])

# Create Observation objects for lhf and shf
lhf_ERA5 = EKP.Observation(
Dict(
"samples" => lhf_loc,
"covariances" => cov(lhf_loc) * EKP.I,
"names" => "lhf_$(lon)_$(lat)",
),
)

shf_ERA5 = EKP.Observation(
Dict(
"samples" => shf_loc,
"covariances" => cov(shf_loc) * EKP.I,
"names" => "shf_$(lon)_$(lat)",
),
)

# Add the observations to the target
push!(ERA5_target, lhf_ERA5)
push!(ERA5_target, shf_ERA5)

end

full_obs_era5 = EKP.combine_observations(ERA5_target)
observations = EKP.get_obs(full_obs_era5)
# return training_locations, observations
# end

# Things to consider:
# - masking out the ocean when comparing to data
Expand Down
Original file line number Diff line number Diff line change
@@ -1,26 +1,34 @@
# The script included below contains the following functions:
# rand_locations - returns n random locations on land (lon, lat)
# setup_prob - config for the bucket model
# it also returns the target used for calibration, full_obs_era5, which is
# latent and sensible heat at the rand_locations stacked as a vector
include("bucket_target_script.jl")
observations = full_obs_era5
# target_and_locations - returns locations on land and ERA5 LHF SHF obs at those locations
using ClimaLand
dir = pkgdir(ClimaLand)
include(joinpath(dir,"experiments/calibration/global_bucket/climacalibrate_bucket/bucket_target_script.jl"))
# locations, observations = target_and_locations()
# Should observations be a vector or EKP.Observation type?

# Now, we need a forward_model for ClimaCalibrate.
# forward_model runs the model and generates a ClimaDiagnostic output directory
# that will be used to generate the observation_map.
# note that forward_model needs the setup_prob function defined above.
import ClimaCalibrate: forward_model, parameter_path
# IMPORTANT: forward_model needs to be defined as CAL.forward_model, as it adds a method
import ClimaCalibrate: forward_model, parameter_path, path_to_ensemble_member
import ClimaCalibrate as CAL
include("forward_model.jl")
using Distributed
ntasks = 4
addprocs(CAL.SlurmManager(ntasks), gpus_per_task=1, time="10:00:00")
@everywhere include(joinpath(dir,"experiments/calibration/global_bucket/climacalibrate_bucket/forward_model.jl"))

# Now we include the script observation_map.jl, which contains two functions:
# process_member_data - makes the loss function we want from the forward_model output
# observation_map - makes our G_ensemble, the loss for all n_ensemble parameters
include("observation_map.jl")
# process_member_data - makes the parameter to data map we want from the forward_model output for one ensemble member
# observation_map - makes our G_ensemble, the parameter to data map for all n_ensemble parameters
# IMPORTANT: observation_map needs to be defined as CAL.observation_map, as it adds a method
include(joinpath(dir,"experiments/calibration/global_bucket/climacalibrate_bucket/observation_map.jl"))

# Now that our functions are defined, we still need some to specify some arguments they require.
# Parameters prior
# Note: this could be provided in a .toml file. Not sure if it has to.
prior_κ_soil = EKP.constrained_gaussian("κ_soil", 2, 1, 0, Inf);
prior_ρc_soil = EKP.constrained_gaussian("ρc_soil", 4e6, 2e6, 0, Inf);
prior_f_bucket = EKP.constrained_gaussian("f_bucket", 0.5, 0.3, 0, 1);
Expand All @@ -38,12 +46,15 @@ prior = EKP.combine_distributions([

ensemble_size = 10
n_iterations = 5
noise = EKP.I # not sure what to do here, I had noise embeded in my observation_map function
noise = 1.0*EKP.I # Should work, but this should be covariance of each month from observation (ERA5)

output_dir = "calibration_output" # Should I create this folder?
# This folder will contains ClimaCalibrate outputs - parameters ensemble at each iterations, etc.
caldir = "calibration_output"

# Note: what is the best way to test this?
# Should this script be run as a slurm job? (I expect it takes ~ 5 hours or so to complete)
CAL.calibrate(
CAL.WorkerBackend, # not sure what to do here
CAL.WorkerBackend,
ensemble_size,
n_iterations,
observations,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,12 +1,15 @@
import TOML as toml

# Explain things here - tutorial style
# (Nat: why is it CAL.forward_model, and not CAL.parameter_path?
"""
"""
function CAL.forward_model(iteration, member)
ensemble_member_path = parameter_path(caldir, iteration, member)
params = toml.parsefile(ensemble_member_path) # should load a Dict, that needs to be converted to namedtuple
ensemble_member_path = path_to_ensemble_member(caldir, iteration, member)
params_path = parameter_path(caldir, iteration, member)
params = toml.parsefile(params_path) # should load a Dict, that needs to be converted to namedtuple

@info ensemble_member_path
diagnostics_dir = joinpath(ensemble_member_path, "global_diagnostics")
diagdir = ClimaUtilities.OutputPathGenerator.generate_output_path(
diagnostics_dir,
Expand All @@ -17,4 +20,5 @@ function CAL.forward_model(iteration, member)
timestepper = CTS.RK4()
ode_algo = CTS.ExplicitAlgorithm(timestepper)
SciMLBase.solve(prob, ode_algo; dt = Δt, callback = cb, adaptive = false)
return nothing
end
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ function process_member_data(root_path)
# Initialize an empty list to store observations
obs_list = []
# Loop over each location
for (lon, lat) in training_locations
for (lon, lat) in locations
# Slice lhf and shf at the given longitude and latitude
lhf_loc = ClimaAnalysis.slice(lhf, lon = lon, lat = lat)
shf_loc = ClimaAnalysis.slice(shf, lon = lon, lat = lat)
Expand Down Expand Up @@ -42,12 +42,12 @@ function process_member_data(root_path)
return obs
end

function observation_map(iteration)
function CAL.observation_map(iteration)
single_member_dims = (1,)
G_ensemble = Array{Float64}(undef, single_member_dims..., ensemble_size)

for m in 1:ensemble_size
member_path = CAL.path_to_ensemble_member(caldir, iteration, m)
member_path = path_to_ensemble_member(caldir, iteration, m)
simdir_path = joinpath(member_path, "global_diagnostics", "output_active")
if isdir(simdir_path)
simdir = SimDir(simdir_path)
Expand Down
2 changes: 1 addition & 1 deletion src/Artifacts.jl
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ NOTE: We should be able to use @clima_artifact here, but the folders on central
seem to be named differently than the artifact name
"""
function era5_surface_data2008_path(; context = nothing)
return "/groups/esm/ClimaArtifacts/artifacts/era5_surface_fluxes_2008"
return @clima_artifact("era5_surface_fluxes", context)
end

"""
Expand Down

0 comments on commit abaa6ef

Please sign in to comment.