Making a nice table#

import xarray as xr
xr.set_options(display_style='html')
import intake
import cftime
import matplotlib.pyplot as plt
import cartopy.crs as ccrs
import numpy as np
import pandas as pd
import datetime
import seaborn as sns
def global_mean(ds):
    if 'lat' in ds:
        la = 'lat'
        lo = 'lon'
        weights = np.cos(np.deg2rad(ds.lat))
    elif 'latitude' in ds:
        weights = np.cos(np.deg2rad(ds.latitude))
        la = 'latitude'
        lo = 'longitude'
    else:
        weights = np.cos(np.deg2rad(ds.lat))
        
    return ds.weighted(weights).mean(la)
#cat_url = "https://storage.googleapis.com/cmip6/pangeo-cmip6.json"
#cat_url = "/mnt/craas1-ns9989k-geo4992/data/cmip6.json"
cat_url = '/mnt/craas1-ns9989k-geo4992/data/catalogs/cmip6.json'
col = intake.open_esm_datastore(cat_url)
col
varl = ['tas','clt']

models = ['CESM2','CanESM5-1','EC-Earth3-Veg-LR','GISS-E2-2-H','E3SM-1-0']
cat = col.search(source_id = models, 
                 experiment_id=['historical'], 
                 table_id=['Amon',], 
                 variable_id=varl, 
                 member_id=['r1i1p1f1'],
                )
cat.df.head()
variable_id table_id source_id experiment_id member_id grid_label time_range activity_id institution_id version path dcpp_init_year
0 tas Amon CanESM5-1 historical r1i1p1f1 gn 185001-201412 CMIP CCCma v20190429 /mnt/craas1-ns9989k-ns9560k/ESGF/CMIP6/CMIP/CC... NaN
1 clt Amon E3SM-1-0 historical r1i1p1f1 gr 185001-187412 CMIP E3SM-Project v20190913 /mnt/craas1-ns9989k-ns9560k/ESGF/CMIP6/CMIP/E3... NaN
2 clt Amon E3SM-1-0 historical r1i1p1f1 gr 187501-189912 CMIP E3SM-Project v20190913 /mnt/craas1-ns9989k-ns9560k/ESGF/CMIP6/CMIP/E3... NaN
3 clt Amon E3SM-1-0 historical r1i1p1f1 gr 190001-192412 CMIP E3SM-Project v20190913 /mnt/craas1-ns9989k-ns9560k/ESGF/CMIP6/CMIP/E3... NaN
4 clt Amon E3SM-1-0 historical r1i1p1f1 gr 192501-194912 CMIP E3SM-Project v20190913 /mnt/craas1-ns9989k-ns9560k/ESGF/CMIP6/CMIP/E3... NaN
cat.df['source_id'].unique()
array(['CanESM5-1', 'E3SM-1-0', 'EC-Earth3-Veg-LR', 'GISS-E2-2-H',
       'CESM2'], dtype=object)
cat.esmcat.aggregation_control.groupby_attrs = ['activity_id','experiment_id', 'source_id','table_id','grid_label']
cat.esmcat.aggregation_control.groupby_attrs
['activity_id', 'experiment_id', 'source_id', 'table_id', 'grid_label']

Create dictionary from the list of datasets we found#

  • This step may take several minutes so be patient!

dset_dict = cat.to_dataset_dict(zarr_kwargs={'use_cftime':True})
--> The keys in the returned dictionary of datasets are constructed as follows:
	'activity_id.experiment_id.source_id.table_id.grid_label'
100.00% [5/5 00:30<00:00]
/opt/conda/envs/pangeo-notebook/lib/python3.11/site-packages/xarray/conventions.py:286: SerializationWarning: variable 'clt' has multiple fill values {1e+20, 1e+20} defined, decoding all values to NaN.
  var = coder.decode(var, name=name)
/opt/conda/envs/pangeo-notebook/lib/python3.11/site-packages/xarray/conventions.py:286: SerializationWarning: variable 'tas' has multiple fill values {1e+20, 1e+20} defined, decoding all values to NaN.
  var = coder.decode(var, name=name)
list(dset_dict.keys())
['CMIP.historical.CanESM5-1.Amon.gn',
 'CMIP.historical.GISS-E2-2-H.Amon.gn',
 'CMIP.historical.CESM2.Amon.gn',
 'CMIP.historical.E3SM-1-0.Amon.gr',
 'CMIP.historical.EC-Earth3-Veg-LR.Amon.gr']

simplify keys:#

ds_dict = {}
for k in dset_dict.keys():
    mod = k.split('.')[2]
    ds_dict[mod] = dset_dict[k]

Compute global mean (replace by whatever you want to calculate)#

_ds
<xarray.Dataset> Size: 65MB
Dimensions:    (time: 1980, bnds: 2, lat: 64, lon: 128, member_id: 1)
Coordinates:
  * time       (time) object 16kB 1850-01-16 12:00:00 ... 2014-12-16 12:00:00
  * lat        (lat) float64 512B -87.86 -85.1 -82.31 ... 82.31 85.1 87.86
  * lon        (lon) float64 1kB 0.0 2.812 5.625 8.438 ... 351.6 354.4 357.2
    height     float64 8B ...
  * member_id  (member_id) object 8B 'r1i1p1f1'
Dimensions without coordinates: bnds
Data variables:
    time_bnds  (time, bnds) object 32kB dask.array<chunksize=(1, 2), meta=np.ndarray>
    lat_bnds   (lat, bnds) float64 1kB dask.array<chunksize=(64, 2), meta=np.ndarray>
    lon_bnds   (lon, bnds) float64 2kB dask.array<chunksize=(128, 2), meta=np.ndarray>
    tas        (member_id, time, lat, lon) float32 65MB dask.array<chunksize=(1, 1, 64, 128), meta=np.ndarray>
Attributes: (12/67)
    CCCma_model_hash:                 c8708a7ff559a1daeee0b713f94508a26a04c6b7
    CCCma_parent_runid:               v510b-pict
    CCCma_pycmor_hash:                0db7dba6172f1fb31dd9a4af1ae40a53189337ea
    CCCma_runid:                      v5.1-histens-01
    Conventions:                      CF-1.7 CMIP-6.2
    YMDH_branch_time_in_child:        1850:01:01:00
    ...                               ...
    intake_esm_attrs:activity_id:     CMIP
    intake_esm_attrs:institution_id:  CCCma
    intake_esm_attrs:version:         v20190429
    intake_esm_attrs:path:            /mnt/craas1-ns9989k-ns9560k/ESGF/CMIP6/...
    intake_esm_attrs:_data_format_:   netcdf
    intake_esm_dataset_key:           CMIP.historical.CanESM5-1.Amon.gn
ds_dict[mod]
<xarray.Dataset> Size: 65MB
Dimensions:    (time: 1980, bnds: 2, lat: 64, lon: 128, member_id: 1)
Coordinates:
  * time       (time) object 16kB 1850-01-16 12:00:00 ... 2014-12-16 12:00:00
  * lat        (lat) float64 512B -87.86 -85.1 -82.31 ... 82.31 85.1 87.86
  * lon        (lon) float64 1kB 0.0 2.812 5.625 8.438 ... 351.6 354.4 357.2
    height     float64 8B ...
  * member_id  (member_id) object 8B 'r1i1p1f1'
Dimensions without coordinates: bnds
Data variables:
    time_bnds  (time, bnds) object 32kB dask.array<chunksize=(1, 2), meta=np.ndarray>
    lat_bnds   (lat, bnds) float64 1kB dask.array<chunksize=(64, 2), meta=np.ndarray>
    lon_bnds   (lon, bnds) float64 2kB dask.array<chunksize=(128, 2), meta=np.ndarray>
    tas        (member_id, time, lat, lon) float32 65MB dask.array<chunksize=(1, 1, 64, 128), meta=np.ndarray>
Attributes: (12/67)
    CCCma_model_hash:                 c8708a7ff559a1daeee0b713f94508a26a04c6b7
    CCCma_parent_runid:               v510b-pict
    CCCma_pycmor_hash:                0db7dba6172f1fb31dd9a4af1ae40a53189337ea
    CCCma_runid:                      v5.1-histens-01
    Conventions:                      CF-1.7 CMIP-6.2
    YMDH_branch_time_in_child:        1850:01:01:00
    ...                               ...
    intake_esm_attrs:activity_id:     CMIP
    intake_esm_attrs:institution_id:  CCCma
    intake_esm_attrs:version:         v20190429
    intake_esm_attrs:path:            /mnt/craas1-ns9989k-ns9560k/ESGF/CMIP6/...
    intake_esm_attrs:_data_format_:   netcdf
    intake_esm_dataset_key:           CMIP.historical.CanESM5-1.Amon.gn
ds_vals_mod_var = {}
for mod in ds_dict.keys():
    print(mod)
    ds_vals_mod_var[mod]=dict()
    _ds = ds_dict[mod]
    _ds_m = global_mean(_ds.sel(time=slice('1990','2015'))).mean()
    for v in varl:
        if v in _ds_m:
            val = float(_ds_m[v].values)
            ds_vals_mod_var[mod][v] = val
        else:
            ds_vals_mod_var[mod][v] = np.nan
            
CanESM5-1
GISS-E2-2-H
CESM2
E3SM-1-0
EC-Earth3-Veg-LR
ds_vals_mod_var_meas = {}
for mod in ds_dict.keys():
    ds_vals_mod_var_meas[mod]=dict()
    _ds = ds_dict[mod].copy()
    meas = {}
    
    meas['mean'] = global_mean(_ds.sel(time=slice('1990','2015')).mean('lon').mean('time'))
    meas['median']= global_mean(_ds.sel(time=slice('1990','2015')).mean('lon').median('time'))
    meas['std'] = global_mean(_ds.sel(time=slice('1990','2015')).mean('lon').std('time'))
    for v in varl:
        ds_vals_mod_var_meas[mod][v] =dict()
        for m in meas:
            if v in meas[m]:
                try:
                    ds_vals_mod_var_meas[mod][v][m] = float(meas[m][v].squeeze().values)
                except:
                    ds_vals_mod_var_meas[mod][v][m] = np.nan
                    
            else:
                ds_vals_mod_var_meas[mod][v][m] = np.nan
            

Extract units#

_ds = ds_dict[models[0]]
u_dic = {v:_ds[v].units for v in varl}
units = pd.DataFrame.from_dict(u_dic,orient='index',columns=['units'] )#index=['units'])

units
units
tas K
clt %
import pandas as pd
df_nice =pd.DataFrame.from_dict(ds_vals_mod_var)
df_nice = pd.concat([units,df_nice], axis=1)
#pd.options.display.float_format = '{:20,.2f}'.format
pd.set_option('display.float_format','{:20,.2f}'.format)
display(df_nice)
units CanESM5-1 GISS-E2-2-H CESM2 E3SM-1-0 EC-Earth3-Veg-LR
tas K 287.65 NaN 287.94 287.47 287.54
clt % NaN NaN 69.74 67.78 64.10
ls_meas = list()
for m in ds_vals_mod_var_meas:
    _df = pd.DataFrame.from_dict(ds_vals_mod_var_meas[m])
    _df['model']=m
    ls_meas.append(_df)
df_out = pd.concat(ls_meas).reset_index().sort_values('index').set_index(['index','model'])
pd.set_option('display.float_format','{:20,.2f}'.format)

df_out
tas clt
index model
mean CanESM5-1 287.65 NaN
GISS-E2-2-H NaN NaN
CESM2 287.94 69.74
E3SM-1-0 287.47 67.78
EC-Earth3-Veg-LR 287.54 64.10
median CanESM5-1 287.47 NaN
GISS-E2-2-H NaN NaN
CESM2 287.92 70.04
E3SM-1-0 287.35 68.22
EC-Earth3-Veg-LR 287.29 64.48
std CanESM5-1 3.70 NaN
GISS-E2-2-H NaN NaN
CESM2 3.47 5.73
E3SM-1-0 3.61 5.48
EC-Earth3-Veg-LR 3.65 6.35
df_out.T
index mean median std
model CanESM5-1 GISS-E2-2-H CESM2 E3SM-1-0 EC-Earth3-Veg-LR CanESM5-1 GISS-E2-2-H CESM2 E3SM-1-0 EC-Earth3-Veg-LR CanESM5-1 GISS-E2-2-H CESM2 E3SM-1-0 EC-Earth3-Veg-LR
tas 287.65 NaN 287.94 287.47 287.54 287.47 NaN 287.92 287.35 287.29 3.70 NaN 3.47 3.61 3.65
clt NaN NaN 69.74 67.78 64.10 NaN NaN 70.04 68.22 64.48 NaN NaN 5.73 5.48 6.35