Making a nice table#

import xarray as xr
xr.set_options(display_style='html')
import intake
import cftime
import matplotlib.pyplot as plt
import cartopy.crs as ccrs
import numpy as np
import pandas as pd
import datetime
import seaborn as sns
def global_mean(ds):
    if 'lat' in ds:
        la = 'lat'
        lo = 'lon'
        weights = np.cos(np.deg2rad(ds.lat))
    elif 'latitude' in ds:
        weights = np.cos(np.deg2rad(ds.latitude))
        la = 'latitude'
        lo = 'longitude'
    else:
        weights = np.cos(np.deg2rad(ds.lat))
        
    return ds.weighted(weights).mean(la)
#cat_url = "https://storage.googleapis.com/cmip6/pangeo-cmip6.json"
#cat_url = "/mnt/craas1-ns9989k-geo4992/data/cmip6.json"
cat_url = '/mnt/craas2-ns9988k/data/catalogs/cmip6.json'
col = intake.open_esm_datastore(cat_url)
col
varl = ['tas','clt']

models = ['CESM2','CanESM5-1','EC-Earth3-Veg-LR','GISS-E2-2-H','E3SM-1-0']
cat = col.search(source_id = models, 
                 experiment_id=['historical'], 
                 table_id=['Amon',], 
                 variable_id=varl, 
                 member_id=['r1i1p1f1'],
                )
cat.df.head()
variable_id table_id source_id experiment_id member_id grid_label time_range activity_id institution_id version path dcpp_init_year
0 tas Amon CanESM5-1 historical r1i1p1f1 gn 185001-201412 CMIP CCCma v20190429 /mnt/craas2-ns9988k-dl-ns9560k/ESGF/CMIP6/CMIP... NaN
1 clt Amon E3SM-1-0 historical r1i1p1f1 gr 185001-187412 CMIP E3SM-Project v20190913 /mnt/craas2-ns9988k-dl-ns9560k/ESGF/CMIP6/CMIP... NaN
2 clt Amon E3SM-1-0 historical r1i1p1f1 gr 187501-189912 CMIP E3SM-Project v20190913 /mnt/craas2-ns9988k-dl-ns9560k/ESGF/CMIP6/CMIP... NaN
3 clt Amon E3SM-1-0 historical r1i1p1f1 gr 190001-192412 CMIP E3SM-Project v20190913 /mnt/craas2-ns9988k-dl-ns9560k/ESGF/CMIP6/CMIP... NaN
4 clt Amon E3SM-1-0 historical r1i1p1f1 gr 192501-194912 CMIP E3SM-Project v20190913 /mnt/craas2-ns9988k-dl-ns9560k/ESGF/CMIP6/CMIP... NaN
cat.df['source_id'].unique()
array(['CanESM5-1', 'E3SM-1-0', 'EC-Earth3-Veg-LR', 'GISS-E2-2-H',
       'CESM2'], dtype=object)
cat.esmcat.aggregation_control.groupby_attrs = ['activity_id','experiment_id', 'source_id','table_id','grid_label']
cat.esmcat.aggregation_control.groupby_attrs
['activity_id', 'experiment_id', 'source_id', 'table_id', 'grid_label']

Create dictionary from the list of datasets we found#

  • This step may take several minutes so be patient!

dset_dict = cat.to_dataset_dict(zarr_kwargs={'use_cftime':True})
--> The keys in the returned dictionary of datasets are constructed as follows:
	'activity_id.experiment_id.source_id.table_id.grid_label'
100.00% [5/5 00:36<00:00]
/opt/conda/envs/pangeo-notebook/lib/python3.11/site-packages/xarray/conventions.py:286: SerializationWarning: variable 'tas' has multiple fill values {1e+20, 1e+20} defined, decoding all values to NaN.
  var = coder.decode(var, name=name)
/opt/conda/envs/pangeo-notebook/lib/python3.11/site-packages/xarray/conventions.py:286: SerializationWarning: variable 'clt' has multiple fill values {1e+20, 1e+20} defined, decoding all values to NaN.
  var = coder.decode(var, name=name)
list(dset_dict.keys())
['CMIP.historical.CanESM5-1.Amon.gn',
 'CMIP.historical.GISS-E2-2-H.Amon.gn',
 'CMIP.historical.CESM2.Amon.gn',
 'CMIP.historical.E3SM-1-0.Amon.gr',
 'CMIP.historical.EC-Earth3-Veg-LR.Amon.gr']

simplify keys:#

ds_dict = {}
for k in dset_dict.keys():
    mod = k.split('.')[2]
    ds_dict[mod] = dset_dict[k]

Compute global mean (replace by whatever you want to calculate)#

ds_dict[mod]
<xarray.Dataset> Size: 811MB
Dimensions:    (time: 1980, bnds: 2, lat: 160, lon: 320, member_id: 1)
Coordinates:
  * time       (time) object 16kB 1850-01-16 12:00:00 ... 2014-12-16 12:00:00
    time_bnds  (time, bnds) object 32kB dask.array<chunksize=(1, 2), meta=np.ndarray>
  * lat        (lat) float64 1kB -89.14 -88.03 -86.91 ... 86.91 88.03 89.14
    lat_bnds   (lat, bnds) float64 3kB dask.array<chunksize=(160, 2), meta=np.ndarray>
  * lon        (lon) float64 3kB 0.0 1.125 2.25 3.375 ... 356.6 357.8 358.9
    lon_bnds   (lon, bnds) float64 5kB dask.array<chunksize=(320, 2), meta=np.ndarray>
  * member_id  (member_id) object 8B 'r1i1p1f1'
    height     float64 8B 2.0
Dimensions without coordinates: bnds
Data variables:
    clt        (member_id, time, lat, lon) float32 406MB dask.array<chunksize=(1, 1, 160, 320), meta=np.ndarray>
    tas        (member_id, time, lat, lon) float32 406MB dask.array<chunksize=(1, 1, 160, 320), meta=np.ndarray>
Attributes: (12/51)
    Conventions:                      CF-1.7 CMIP-6.2
    activity_id:                      CMIP
    branch_method:                    standard
    branch_time_in_child:             0.0
    contact:                          cmip6-data@ec-earth.org
    data_specs_version:               01.00.31
    ...                               ...
    intake_esm_attrs:grid_label:      gr
    intake_esm_attrs:activity_id:     CMIP
    intake_esm_attrs:institution_id:  EC-Earth-Consortium
    intake_esm_attrs:version:         v20200217
    intake_esm_attrs:_data_format_:   netcdf
    intake_esm_dataset_key:           CMIP.historical.EC-Earth3-Veg-LR.Amon.gr
ds_vals_mod_var = {}
for mod in ds_dict.keys():
    print(mod)
    ds_vals_mod_var[mod]=dict()
    _ds = ds_dict[mod]
    _ds_m = global_mean(_ds.sel(time=slice('1990','2015'))).mean()
    for v in varl:
        if v in _ds_m:
            val = float(_ds_m[v].values)
            ds_vals_mod_var[mod][v] = val
        else:
            ds_vals_mod_var[mod][v] = np.nan
            
CanESM5-1
GISS-E2-2-H
CESM2
E3SM-1-0
EC-Earth3-Veg-LR
/opt/conda/envs/pangeo-notebook/lib/python3.11/site-packages/dask/array/numpy_compat.py:53: RuntimeWarning: invalid value encountered in divide
  x = np.divide(x1, x2, out)
ds_vals_mod_var_meas = {}
for mod in ds_dict.keys():
    ds_vals_mod_var_meas[mod]=dict()
    _ds = ds_dict[mod].copy()
    meas = {}
    
    meas['mean'] = global_mean(_ds.sel(time=slice('1990','2015')).mean('lon').mean('time'))
    meas['median']= global_mean(_ds.sel(time=slice('1990','2015')).mean('lon').median('time'))
    meas['std'] = global_mean(_ds.sel(time=slice('1990','2015')).mean('lon').std('time'))
    for v in varl:
        ds_vals_mod_var_meas[mod][v] =dict()
        for m in meas:
            if v in meas[m]:
                try:
                    ds_vals_mod_var_meas[mod][v][m] = float(meas[m][v].squeeze().values)
                except:
                    ds_vals_mod_var_meas[mod][v][m] = np.nan
                    
            else:
                ds_vals_mod_var_meas[mod][v][m] = np.nan
            

Extract units#

_ds = ds_dict[models[0]]
u_dic = {v:_ds[v].units for v in varl}
units = pd.DataFrame.from_dict(u_dic,orient='index',columns=['units'] )#index=['units'])

units
units
tas K
clt %
import pandas as pd
df_nice =pd.DataFrame.from_dict(ds_vals_mod_var)
df_nice = pd.concat([units,df_nice], axis=1)
#pd.options.display.float_format = '{:20,.2f}'.format
pd.set_option('display.float_format','{:20,.2f}'.format)
display(df_nice)
units CanESM5-1 GISS-E2-2-H CESM2 E3SM-1-0 EC-Earth3-Veg-LR
tas K 287.65 NaN 287.94 287.47 287.54
clt % NaN NaN 69.74 67.78 64.10
ls_meas = list()
for m in ds_vals_mod_var_meas:
    _df = pd.DataFrame.from_dict(ds_vals_mod_var_meas[m])
    _df['model']=m
    ls_meas.append(_df)
df_out = pd.concat(ls_meas).reset_index().sort_values('index').set_index(['index','model'])
pd.set_option('display.float_format','{:20,.2f}'.format)

df_out
tas clt
index model
mean CanESM5-1 287.65 NaN
GISS-E2-2-H NaN NaN
CESM2 287.94 69.74
E3SM-1-0 287.47 67.78
EC-Earth3-Veg-LR 287.54 64.10
median CanESM5-1 287.47 NaN
GISS-E2-2-H NaN NaN
CESM2 287.92 70.04
E3SM-1-0 287.35 68.22
EC-Earth3-Veg-LR 287.29 64.48
std CanESM5-1 3.70 NaN
GISS-E2-2-H NaN NaN
CESM2 3.47 5.73
E3SM-1-0 3.61 5.48
EC-Earth3-Veg-LR 3.65 6.35
df_out.T
index mean median std
model CanESM5-1 GISS-E2-2-H CESM2 E3SM-1-0 EC-Earth3-Veg-LR CanESM5-1 GISS-E2-2-H CESM2 E3SM-1-0 EC-Earth3-Veg-LR CanESM5-1 GISS-E2-2-H CESM2 E3SM-1-0 EC-Earth3-Veg-LR
tas 287.65 NaN 287.94 287.47 287.54 287.47 NaN 287.92 287.35 287.29 3.70 NaN 3.47 3.61 3.65
clt NaN NaN 69.74 67.78 64.10 NaN NaN 70.04 68.22 64.48 NaN NaN 5.73 5.48 6.35