diff --git a/docs/src/leaderboard.md b/docs/src/leaderboard.md index d2f8265563..215998533b 100644 --- a/docs/src/leaderboard.md +++ b/docs/src/leaderboard.md @@ -48,3 +48,6 @@ must be initialized for each variable of interest. The CliMA model is added with the `RMSEVariable`. It is assumed that the `RMSEVariable` contains only the columns "DJF", "MAM", "JJA", "SON", and "ANN" in that order. The file `leaderboard.jl` will load the appropriate data into the `RMSEVariable`. + +### Add a new variable to compare against observations in pressure coordinates +To add a new variable, you only need to modify `sim_var_pfull_dict` in `get_sim_var_in_pfull_dict`, `obs_var_dict` in `get_obs_var_in_pfull_dict`, and `compare_vars_biases_plot_extrema` in `get_compare_vars_biases_plot_extrema_pfull`. It is expected that the dimensions of the variables are time, latitude, longitude, and pressure in no particular order and the units for the pressure dimension is expected to be `hPa`. The dimensions should be in increasing order so that an interpolant can be generated. diff --git a/experiments/ClimaEarth/leaderboard/data_sources.jl b/experiments/ClimaEarth/leaderboard/data_sources.jl index 24c42c9d24..98886e6b22 100644 --- a/experiments/ClimaEarth/leaderboard/data_sources.jl +++ b/experiments/ClimaEarth/leaderboard/data_sources.jl @@ -1,5 +1,6 @@ import ClimaAnalysis import ClimaUtilities.ClimaArtifacts: @clima_artifact +import OrderedCollections: OrderedDict # Tuple of short names for loading simulation and observational data sim_obs_short_names_no_pr = [ @@ -27,7 +28,8 @@ compare_vars_biases_groups = [ """ get_compare_vars_biases_plot_extrema() -Return a dictionary mapping short names to ranges for the bias plots. +Return a dictionary mapping short names to ranges for the bias plots. This is +used by the function `compute_leaderboard`. To add a variable to the leaderboard, add a key-value pair to the dictionary `compare_vars_biases_plot_extrema` whose key is a short name key is the same @@ -57,7 +59,7 @@ end get_sim_var_dict(diagnostics_folder_path) Return a dictionary mapping short names to `OutputVar` containing preprocessed -simulation data. +simulation data. This is used by the function `compute_leaderboard`. To add a variable for the leaderboard, add a key-value pair to the dictionary `sim_var_dict` whose key is the short name of the variable and the value is a @@ -96,7 +98,7 @@ end get_obs_var_dict() Return a dictionary mapping short names to `OutputVar` containing preprocessed -observational data. +observational data. This is used by the function `compute_leaderboard`. To add a variable for the leaderboard, add a key-value pair to the dictionary `obs_var_dict` whose key is the short name of the variable and the value is a @@ -144,11 +146,11 @@ end get_rmse_var_dict() Return a dictionary mapping short names to `RMSEVariable` containing information about -RMSEs of models for the short name of the variable. +RMSEs of models for the short name of the variable. This is used by the function +`compute_leaderboard`. """ function get_rmse_var_dict() # Load data into RMSEVariables - rmse_var_names = ["pr", "rsut", "rlut"] rmse_var_pr = ClimaAnalysis.read_rmses( joinpath(@clima_artifact("cmip_model_rmse"), "pr_rmse_amip_pr_amip_5yr.csv"), "pr", @@ -176,6 +178,88 @@ function get_rmse_var_dict() ClimaAnalysis.add_unit!(rmse_var_rlut, "CliMA", "W m^-2") # Store the rmse vars in a dictionary - rmse_var_dict = Dict("pr" => rmse_var_pr, "rsut" => rmse_var_rsut, "rlut" => rmse_var_rlut) + rmse_var_dict = OrderedDict("pr" => rmse_var_pr, "rsut" => rmse_var_rsut, "rlut" => rmse_var_rlut) return rmse_var_dict end + +""" + get_sim_var_in_pfull_dict(diagnostics_folder_path) + +Return a dictionary mapping short names to `OutputVar` containing preprocessed +simulation data in pressure space. This is used by `compute_pfull_leaderboard`. +""" +function get_sim_var_in_pfull_dict(diagnostics_folder_path) + simdir = ClimaAnalysis.SimDir(diagnostics_folder_path) + sim_var_pfull_dict = Dict{String, Any}( + "ta" => + () -> begin + sim_var = get(simdir, short_name = "ta") + pfull_var = get(simdir, short_name = "pfull") + sim_in_pfull_var = ClimaAnalysis.Atmos.to_pressure_coordinates(sim_var, pfull_var) + sim_in_pfull_var = ClimaAnalysis.shift_to_start_of_previous_month(sim_in_pfull_var) + sim_in_pfull_var = ClimaAnalysis.convert_dim_units( + sim_in_pfull_var, + "pfull", + "hPa"; + conversion_function = x -> 0.01 * x, + ) + return sim_in_pfull_var + end, + ) + return sim_var_pfull_dict +end + +""" + get_obs_var_in_pfull_dict() + +Return a dictionary mapping short names to `OutputVar` containing preprocessed +observational data in pressure coordinates. This is used by +`compute_pfull_leaderboard`. + +To add a variable for the leaderboard, add a key-value pair to the dictionary +`obs_var_dict` whose key is the short name of the variable and the value is a +function that returns a `OutputVar`. The function must takes in a start date +which is used to align the times in the observational data to match the +simulation data. The short name must be the same as in `sim_var_pfull_dict` in +the function `get_sim_var_in_pfull_dict`. Any preprocessing is done in the +function which includes unit conversion and shifting the dates. +""" +function get_obs_var_in_pfull_dict() + artifact_path = joinpath(@clima_artifact("era5_monthly_averages_pressure_levels_1979_2024"), "era5_monthly_averages_pressure_levels_197901-202410.nc") + obs_var_dict = Dict{String, Any}( + "ta" => + (start_date) -> begin + obs_var = ClimaAnalysis.OutputVar( + artifact_path, + "t", + new_start_date = start_date, + shift_by = Dates.firstdayofmonth, + ) + obs_var = ClimaAnalysis.Var.convert_dim_units( + obs_var, + "pressure_level", + "hPa"; + conversion_function = x -> 0.01 * x, + ) + return obs_var + end, + ) + return obs_var_dict +end + +""" + get_compare_vars_biases_plot_extrema_pfull() + +Return a dictionary mapping short names to ranges for the bias plots. This is +used by the function `compute_pfull_leaderboard`. + +To add a variable to the leaderboard, add a key-value pair to the dictionary +`compare_vars_biases_plot_extrema` whose key is a short name key is the same +short name in `sim_var_pfull_dict` in the function `get_sim_var_pfull_dict` and +the value is a tuple, where the first element is the lower bound and the last +element is the upper bound for the bias plots. +""" +function get_compare_vars_biases_plot_extrema_pfull() + compare_vars_biases_plot_extrema = Dict("ta" => (-5.0, 5.0)) + return compare_vars_biases_plot_extrema +end diff --git a/experiments/ClimaEarth/leaderboard/leaderboard.jl b/experiments/ClimaEarth/leaderboard/leaderboard.jl index b73fdecda3..ec9574b29d 100644 --- a/experiments/ClimaEarth/leaderboard/leaderboard.jl +++ b/experiments/ClimaEarth/leaderboard/leaderboard.jl @@ -1,5 +1,4 @@ import ClimaAnalysis -import ClimaUtilities.ClimaArtifacts: @clima_artifact import GeoMakie import CairoMakie import Dates @@ -130,6 +129,7 @@ function compute_leaderboard(leaderboard_base_path, diagnostics_folder_path) end # Add RMSE for the CliMA model and for each season + rmse_var_names = keys(rmse_var_dict) for short_name in rmse_var_names for season in seasons !ClimaAnalysis.isempty(sim_obs_comparsion_dict[short_name][season][1]) && ( @@ -160,6 +160,134 @@ function compute_leaderboard(leaderboard_base_path, diagnostics_folder_path) CairoMakie.save(joinpath(leaderboard_base_path, "bias_leaderboard.png"), fig_leaderboard) end +""" + compute_pfull_leaderboard(leaderboard_base_path, diagnostics_folder_path) + +Plot the biases and a leaderboard of various variables in pressure coordinates. + +The paramter `leaderboard_base_path` is the path to save the leaderboards and bias plots and +`diagnostics_folder_path` is the path to the simulation data. +""" +function compute_pfull_leaderboard(leaderboard_base_path, diagnostics_folder_path) + @info "Error against observations for variables in pressure coordinates" + + sim_var_pfull_dict = get_sim_var_in_pfull_dict(diagnostics_folder_path) + obs_var_pfull_dict = get_obs_var_in_pfull_dict() + + # Print dates for debugging + _, get_var = first(sim_var_pfull_dict) + var = get_var() + output_dates = Dates.DateTime(var.attributes["start_date"]) .+ Dates.Second.(ClimaAnalysis.times(var)) + @info "Working with dates:" + @info output_dates + + # Set up dict for storing simulation and observational data after processing + sim_obs_comparsion_dict = Dict() + + for short_name in keys(sim_var_pfull_dict) + sim_var = sim_var_pfull_dict[short_name]() + obs_var = obs_var_pfull_dict[short_name](sim_var.attributes["start_date"]) + # Remove first spin_up_months from simulation + spin_up_months = 6 + spinup_cutoff = spin_up_months * 31 * 86400.0 + ClimaAnalysis.times(sim_var)[end] >= spinup_cutoff && + (sim_var = ClimaAnalysis.window(sim_var, "time", left = spinup_cutoff)) + + # Restrain the pressure levels so we can resample + min_pfull = ClimaAnalysis.pressures(obs_var)[1] + sim_pressures = ClimaAnalysis.pressures(sim_var) + greater_than_min_pfull_idx = findfirst(x -> >(x, min_pfull), sim_pressures) + sim_var = ClimaAnalysis.window(sim_var, "pfull", left = sim_pressures[greater_than_min_pfull_idx]) + + # Resample over lat, lon, time, and pressures + obs_var = ClimaAnalysis.resampled_as(obs_var, sim_var) + + # Take time average + sim_var = ClimaAnalysis.average_time(sim_var) + obs_var = ClimaAnalysis.average_time(obs_var) + + # Save observation and simulation data + sim_obs_comparsion_dict[short_name] = (; sim = sim_var, obs = obs_var) + end + + # Define figure whose columns are pressure levels and rows are variables + + # Slicing only uses the nearest value, so we also need to keep track of the + # actual pressure levels that we get when slicing + target_p_lvls = [850.0, 500.0, 250.0] + real_p_lvls = [] + + # Get units for pressure for plotting = + p_units = ClimaAnalysis.dim_units(first(sim_obs_comparsion_dict)[2].sim, "pfull") + + # Initialize ranges for colorbar and figure + compare_vars_biases_plot_extrema_pfull = get_compare_vars_biases_plot_extrema_pfull() + fig_bias_pfull_vars = CairoMakie.Figure(size = (650 * length(target_p_lvls), 450 * length(sim_obs_comparsion_dict))) + + # Plot bias at the pressure levels in p_lvls + for (row_idx, short_name) in enumerate(keys(sim_obs_comparsion_dict)) + # Plot label for variable name + CairoMakie.Label(fig_bias_pfull_vars[row_idx, 0], short_name, tellheight = false, fontsize = 30) + for (col_idx, p_lvl) in enumerate(target_p_lvls) + layout = fig_bias_pfull_vars[row_idx, col_idx] = CairoMakie.GridLayout() + sim_var = sim_obs_comparsion_dict[short_name].sim + obs_var = sim_obs_comparsion_dict[short_name].obs + + # Slice at pressure level using nearest value rather interpolating + sim_var = ClimaAnalysis.slice(sim_var, pfull = p_lvl) + obs_var = ClimaAnalysis.slice(obs_var, pressure_level = p_lvl) + + # Get the actual pressure level that we slice at + push!(real_p_lvls, parse(Float64, sim_var.attributes["slice_pfull"])) + + # Add so that it show up on in the title + sim_var.attributes["short_name"] = "mean $(ClimaAnalysis.short_name(sim_var))" + + # Plot bias + ClimaAnalysis.Visualize.plot_bias_on_globe!( + layout, + sim_var, + obs_var, + cmap_extrema = compare_vars_biases_plot_extrema_pfull[short_name], + ) + end + end + + # Plot label for the pressure levels + for (col_idx, p_lvl) in enumerate(real_p_lvls) + CairoMakie.Label(fig_bias_pfull_vars[0, col_idx], "$p_lvl $p_units", tellwidth = false, fontsize = 30) + end + + # Define figure with at most four columns and the necessary number of rows for + # all the variables + num_vars = length(compare_vars_biases_plot_extrema_pfull) + num_cols = min(4, num_vars) + num_rows = ceil(Int, num_vars / 4) + fig_lat_pres = CairoMakie.Figure(size = (650 * num_cols, 450 * num_rows)) + + # Take zonal mean and plot lat - pressure heatmap + for (idx, short_name) in enumerate(keys(sim_obs_comparsion_dict)) + layout = fig_lat_pres[1, idx] = CairoMakie.GridLayout() + sim_var = sim_obs_comparsion_dict[short_name].sim + obs_var = sim_obs_comparsion_dict[short_name].obs + + # Take zonal mean (average along lon arithmetically) + sim_var = ClimaAnalysis.average_lon(sim_var) + obs_var = ClimaAnalysis.average_lon(obs_var) + + # Compute bias and set short name, long name, and units + bias_var = sim_var - obs_var + bias_var = ClimaAnalysis.set_units(bias_var, ClimaAnalysis.units(sim_var)) + bias_var.attributes["short_name"] = "sim-obs_$(ClimaAnalysis.short_name(sim_var))" + bias_var.attributes["long_name"] = "SIM-OBS_$(ClimaAnalysis.long_name(sim_var))" + ClimaAnalysis.Visualize.heatmap2D!(layout, bias_var, more_kwargs = Dict(:axis => Dict([:yreversed => true]), :plot => Dict(:colormap => :vik, :colorrange => (-10.0, 10.0), :colormap => CairoMakie.cgrad(:vik, 11, categorical = true)))) + end + + # Save figures + CairoMakie.save(joinpath(leaderboard_base_path, "bias_vars_in_pfull.png"), fig_bias_pfull_vars) + CairoMakie.save(joinpath(leaderboard_base_path, "lat_pfull_heatmaps.png"), fig_lat_pres) +end + if abspath(PROGRAM_FILE) == @__FILE__ if length(ARGS) < 2 error("Usage: julia leaderboard.jl ") @@ -167,4 +295,5 @@ if abspath(PROGRAM_FILE) == @__FILE__ leaderboard_base_path = ARGS[begin] diagnostics_folder_path = ARGS[begin + 1] compute_leaderboard(leaderboard_base_path, diagnostics_folder_path) + compute_pfull_leaderboard(leaderboard_base_path, diagnostics_folder_path) end diff --git a/experiments/ClimaEarth/run_amip.jl b/experiments/ClimaEarth/run_amip.jl index 0265ea0a0d..6979b7b69d 100644 --- a/experiments/ClimaEarth/run_amip.jl +++ b/experiments/ClimaEarth/run_amip.jl @@ -959,6 +959,7 @@ if ClimaComms.iamroot(comms_ctx) diagnostics_folder_path = atmos_sim.integrator.p.output_dir leaderboard_base_path = dir_paths.artifacts compute_leaderboard(leaderboard_base_path, diagnostics_folder_path) + compute_pfull_leaderboard(leaderboard_base_path, diagnostics_folder_path) end end ## plot extra atmosphere diagnostics if specified