Skip to content

Commit

Permalink
tweaks
Browse files Browse the repository at this point in the history
  • Loading branch information
AlexisRenchon committed Jan 31, 2025
1 parent e0a305d commit be1f539
Show file tree
Hide file tree
Showing 8 changed files with 136 additions and 85 deletions.
3 changes: 2 additions & 1 deletion .buildkite/Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b"
CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
CairoMakie = "13f3f980-e62b-5c42-98c6-ff1f3baf88f0"
ClimaAnalysis = "29b5916a-a76c-4e73-9657-3c8fd22e65e6"
ClimaCalibrate = "4347a170-ebd6-470c-89d3-5c705c0cacc2"
ClimaComms = "3a4d1b5c-c61d-41fd-a00a-5873ba7a1b0d"
ClimaCore = "d414da3d-4745-48bb-8d80-42e94e092884"
ClimaCoreMakie = "908f55d8-4145-4867-9c14-5dad1a479e4d"
Expand Down Expand Up @@ -46,5 +47,5 @@ cuDNN = "02a925ec-e4fe-4b08-9a7e-0d78e3d38ccd"
[compat]
ClimaAnalysis = "0.5.12"
ClimaTimeSteppers = "0.7"
Statistics = "1"
Flux = "0.15"
Statistics = "1"
3 changes: 3 additions & 0 deletions Artifacts.toml
Original file line number Diff line number Diff line change
Expand Up @@ -93,3 +93,6 @@ git-tree-sha1 = "839224a62b59d73073bdb9a5c55d3dc75e30fe33"
[[ilamb_data.download]]
sha256 = "64a9a344ebfbb0113014178a1f93a655c401431565907c07fd33aff8860b62d6"
url = "https://caltech.box.com/shared/static/eii2bfwfp47axfeuysgxlgzbczz27u5g.gz"

[era5_surface_fluxes]
git-tree-sha1 = "f2cb69e41cba84991054162e794292f98bf0ae34"
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ const FT = Float64;
context = ClimaComms.context()
device = ClimaComms.device()
device_suffix = device isa ClimaComms.CPUSingleThreaded ? "cpu" : "gpu"
root_path = "bucket_longrun_$(device_suffix)"
root_path = "bucket_longrun"

# Returns a 2 SVectors of (lat, lon) tuples for n random locations on the land
# surface. The two sets of locations are designed to be used as a training and
Expand Down Expand Up @@ -126,6 +126,10 @@ function setup_prob(t0, tf, Δt, params, outdir; nelements = (101, 7))
)

# Set up parameters
p_names = collect(keys(params))
p_values = [params[name]["value"] for name in p_names]
params = (; zip(Symbol.(p_names), p_values)...)

(; κ_soil, ρc_soil, f_bucket, W_f, p, z_0m) = params
z_0b = z_0m
τc = FT(Δt)
Expand Down Expand Up @@ -194,66 +198,70 @@ function setup_prob(t0, tf, Δt, params, outdir; nelements = (101, 7))
return prob, SciMLBase.CallbackSet(driver_cb, diag_cb)
end

# Define the locations longitudes and latitudes
# Needs to be defined once, both for g(θ) and ERA5 target
t0 = 0.0
tf = 60 * 60.0 * 24 * 366
Δt = 900.0
nelements = (101, 7)
regridder_type = :InterpolationsRegridder
radius = FT(6378.1e3)
depth = FT(3.5)
domain = ClimaLand.Domains.SphericalShell(;
radius = radius,
depth = depth,
nelements = nelements,
npolynomial = 1,
dz_tuple = FT.((1.0, 0.05)),
)
surface_space = domain.space.surface
training_locations, validation_locations =
rand_locations(surface_space, regridder_type, 25)

# (; κ_soil, ρc_soil, f_bucket, W_f, p, z_0m) = params
# The truth params = (;κ_soil = FT(1.5), ρc_soil = FT(2e6), f_bucket = FT(0.75), W_f = FT(0.2), p = FT(1), z_0m = FT(1e-2))
# Read in the era5 datafile
era5_ds = Dataset(joinpath(ClimaLand.Artifacts.era5_surface_data2008_path(),
"era5_monthly_surface_fluxes_200801-200812.nc"))

# Make the ERA5 target
ERA5_target = []
close = (x, y) -> abs(x - y) < 5e-1
for (lon, lat) in training_locations
# Fetch slices of lhf and shf era5 data from the era5 dataset
lat_ind, lon_ind = findall((x) -> close(x, lat), era5_ds["latitude"][:])[1],
findall((x) -> close(x, lon + 180), era5_ds["longitude"][:])[1]
lhf_loc = vec(era5_ds["mslhf"][lon_ind, lat_ind, :][:, end, :])
shf_loc = vec(era5_ds["msshf"][lon_ind, lat_ind, :][:, end, :])

# Create Observation objects for lhf and shf
lhf_ERA5 = EKP.Observation(
Dict(
"samples" => lhf_loc,
"covariances" => cov(lhf_loc) * EKP.I,
"names" => "lhf_$(lon)_$(lat)",
),
)

shf_ERA5 = EKP.Observation(
Dict(
"samples" => shf_loc,
"covariances" => cov(shf_loc) * EKP.I,
"names" => "shf_$(lon)_$(lat)",
),
)

# Add the observations to the target
push!(ERA5_target, lhf_ERA5)
push!(ERA5_target, shf_ERA5)

end

full_obs_era5 = EKP.combine_observations(ERA5_target)
# function target_and_locations()
# Define the locations longitudes and latitudes
# Needs to be defined once, both for g(θ) and ERA5 target
t0 = 0.0
tf = 60 * 60.0 * 24 * 366
Δt = 900.0
nelements = (101, 7)
regridder_type = :InterpolationsRegridder
radius = FT(6378.1e3)
depth = FT(3.5)
domain = ClimaLand.Domains.SphericalShell(;
radius = radius,
depth = depth,
nelements = nelements,
npolynomial = 1,
dz_tuple = FT.((1.0, 0.05)),
)
surface_space = domain.space.surface
locations, validation_locations =
rand_locations(surface_space, regridder_type, 25)

# (; κ_soil, ρc_soil, f_bucket, W_f, p, z_0m) = params
# The truth params = (;κ_soil = FT(1.5), ρc_soil = FT(2e6), f_bucket = FT(0.75), W_f = FT(0.2), p = FT(1), z_0m = FT(1e-2))
# Read in the era5 datafile
era5_ds = Dataset(joinpath(ClimaLand.Artifacts.era5_surface_data2008_path(),
"era5_monthly_surface_fluxes_200801-200812.nc"))

# Make the ERA5 target
ERA5_target = []
close = (x, y) -> abs(x - y) < 5e-1
for (lon, lat) in locations
# Fetch slices of lhf and shf era5 data from the era5 dataset
lat_ind, lon_ind = findall((x) -> close(x, lat), era5_ds["latitude"][:])[1],
findall((x) -> close(x, lon + 180), era5_ds["longitude"][:])[1]
lhf_loc = vec(era5_ds["mslhf"][lon_ind, lat_ind, :][:, end, :])
shf_loc = vec(era5_ds["msshf"][lon_ind, lat_ind, :][:, end, :])

# Create Observation objects for lhf and shf
lhf_ERA5 = EKP.Observation(
Dict(
"samples" => lhf_loc,
"covariances" => cov(lhf_loc) * EKP.I,
"names" => "lhf_$(lon)_$(lat)",
),
)

shf_ERA5 = EKP.Observation(
Dict(
"samples" => shf_loc,
"covariances" => cov(shf_loc) * EKP.I,
"names" => "shf_$(lon)_$(lat)",
),
)

# Add the observations to the target
push!(ERA5_target, lhf_ERA5)
push!(ERA5_target, shf_ERA5)

end

full_obs_era5 = EKP.combine_observations(ERA5_target)
observations = EKP.get_obs(full_obs_era5)
# return training_locations, observations
# end

# Things to consider:
# - masking out the ocean when comparing to data
Expand Down
Original file line number Diff line number Diff line change
@@ -1,26 +1,44 @@
# The script included below contains the following functions:
# rand_locations - returns n random locations on land (lon, lat)
# setup_prob - config for the bucket model
# it also returns the target used for calibration, full_obs_era5, which is
# latent and sensible heat at the rand_locations stacked as a vector
include("bucket_target_script.jl")
observations = full_obs_era5
# target_and_locations - returns locations on land and ERA5 LHF SHF obs at those locations
using ClimaLand
dir = pkgdir(ClimaLand)
# locations, observations = target_and_locations()
# Should observations be a vector or EKP.Observation type?

# Now, we need a forward_model for ClimaCalibrate.
# forward_model runs the model and generates a ClimaDiagnostic output directory
# that will be used to generate the observation_map.
# note that forward_model needs the setup_prob function defined above.
import ClimaCalibrate: forward_model, parameter_path
# IMPORTANT: forward_model needs to be defined as CAL.forward_model, as it adds a method
import ClimaCalibrate: forward_model, parameter_path, path_to_ensemble_member
import ClimaCalibrate as CAL
include("forward_model.jl")
using Distributed
ntasks = 4
addprocs(CAL.SlurmManager(ntasks))

@everywhere begin
import ClimaCalibrate: forward_model, parameter_path, path_to_ensemble_member
import ClimaCalibrate as CAL

using ClimaLand
caldir = "calibration_output"
dir = pkgdir(ClimaLand)

include(joinpath(dir,"experiments/calibration/global_bucket/climacalibrate_bucket/bucket_target_script.jl"))
include(joinpath(dir,"experiments/calibration/global_bucket/climacalibrate_bucket/forward_model.jl"))
end

# Now we include the script observation_map.jl, which contains two functions:
# process_member_data - makes the loss function we want from the forward_model output
# observation_map - makes our G_ensemble, the loss for all n_ensemble parameters
include("observation_map.jl")
# process_member_data - makes the parameter to data map we want from the forward_model output for one ensemble member
# observation_map - makes our G_ensemble, the parameter to data map for all n_ensemble parameters
# IMPORTANT: observation_map needs to be defined as CAL.observation_map, as it adds a method
include(joinpath(dir,"experiments/calibration/global_bucket/climacalibrate_bucket/observation_map.jl"))

# Now that our functions are defined, we still need some to specify some arguments they require.
# Parameters prior
# Note: this could be provided in a .toml file. Not sure if it has to.
prior_κ_soil = EKP.constrained_gaussian("κ_soil", 2, 1, 0, Inf);
prior_ρc_soil = EKP.constrained_gaussian("ρc_soil", 4e6, 2e6, 0, Inf);
prior_f_bucket = EKP.constrained_gaussian("f_bucket", 0.5, 0.3, 0, 1);
Expand All @@ -38,12 +56,15 @@ prior = EKP.combine_distributions([

ensemble_size = 10
n_iterations = 5
noise = EKP.I # not sure what to do here, I had noise embeded in my observation_map function
noise = 1.0*EKP.I # Should work, but this should be covariance of each month from observation (ERA5)

output_dir = "calibration_output" # Should I create this folder?
# This folder will contains ClimaCalibrate outputs - parameters ensemble at each iterations, etc.
caldir = "calibration_output"

# Note: what is the best way to test this?
# Should this script be run as a slurm job? (I expect it takes ~ 5 hours or so to complete)
CAL.calibrate(
CAL.WorkerBackend, # not sure what to do here
CAL.WorkerBackend,
ensemble_size,
n_iterations,
observations,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,12 +1,15 @@
import TOML as toml

# Explain things here - tutorial style
# (Nat: why is it CAL.forward_model, and not CAL.parameter_path?
"""
"""
function CAL.forward_model(iteration, member)
ensemble_member_path = parameter_path(caldir, iteration, member)
params = toml.parsefile(ensemble_member_path) # should load a Dict, that needs to be converted to namedtuple
ensemble_member_path = path_to_ensemble_member(caldir, iteration, member)
params_path = parameter_path(caldir, iteration, member)
params = toml.parsefile(params_path) # should load a Dict, that needs to be converted to namedtuple

@info ensemble_member_path
diagnostics_dir = joinpath(ensemble_member_path, "global_diagnostics")
diagdir = ClimaUtilities.OutputPathGenerator.generate_output_path(
diagnostics_dir,
Expand All @@ -17,4 +20,5 @@ function CAL.forward_model(iteration, member)
timestepper = CTS.RK4()
ode_algo = CTS.ExplicitAlgorithm(timestepper)
SciMLBase.solve(prob, ode_algo; dt = Δt, callback = cb, adaptive = false)
return nothing
end
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
using ClimaAnalysis
import EnsembleKalmanProcesses as EKP

# explain things here - tutorial style
function process_member_data(root_path)
function process_member_data(simdir)

simdir = ClimaAnalysis.SimDir(
joinpath(root_path, "global_diagnostics", "output_active"),
)
lhf = get(simdir; short_name = "lhf")
shf = get(simdir; short_name = "shf")

# Initialize an empty list to store observations
obs_list = []
# Loop over each location
for (lon, lat) in training_locations
for (lon, lat) in locations
# Slice lhf and shf at the given longitude and latitude
lhf_loc = ClimaAnalysis.slice(lhf, lon = lon, lat = lat)
shf_loc = ClimaAnalysis.slice(shf, lon = lon, lat = lat)
Expand Down Expand Up @@ -42,12 +42,12 @@ function process_member_data(root_path)
return obs
end

function observation_map(iteration)
function CAL.observation_map(iteration)
single_member_dims = (1,)
G_ensemble = Array{Float64}(undef, single_member_dims..., ensemble_size)

for m in 1:ensemble_size
member_path = CAL.path_to_ensemble_member(caldir, iteration, m)
member_path = path_to_ensemble_member(caldir, iteration, m)
simdir_path = joinpath(member_path, "global_diagnostics", "output_active")
if isdir(simdir_path)
simdir = SimDir(simdir_path)
Expand Down
14 changes: 14 additions & 0 deletions run_calibration.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
#!/bin/bash
#SBATCH --job-name=calibrate_bucket
#SBATCH --output=output_%j.txt # Output file (%j expands to job ID)
#SBATCH --error=error_%j.txt # Error file
#SBATCH --nodes=1 # Number of nodes
#SBATCH --ntasks=4 # Number of tasks (processes)
#SBATCH --time=10:00:00 # Time limit hrs:min:sec
#SBATCH --gpus-per-task=1

# Run your command
export CLIMACOMMS_DEVICE="CUDA"
export CLIMACOMMS_CONTEXT="SINGLETON"

julia --project=.buildkite/ experiments/calibration/global_bucket/climacalibrate_bucket/calibrate.jl
2 changes: 1 addition & 1 deletion src/Artifacts.jl
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ NOTE: We should be able to use @clima_artifact here, but the folders on central
seem to be named differently than the artifact name
"""
function era5_surface_data2008_path(; context = nothing)
return "/groups/esm/ClimaArtifacts/artifacts/era5_surface_fluxes_2008"
return @clima_artifact("era5_surface_fluxes", context)
end

"""
Expand Down

0 comments on commit be1f539

Please sign in to comment.