Making a nice table#
import xarray as xr
xr.set_options(display_style='html')
import intake
import cftime
import matplotlib.pyplot as plt
import cartopy.crs as ccrs
import numpy as np
import pandas as pd
import datetime
import seaborn as sns
def global_mean(ds):
if 'lat' in ds:
la = 'lat'
lo = 'lon'
weights = np.cos(np.deg2rad(ds.lat))
elif 'latitude' in ds:
weights = np.cos(np.deg2rad(ds.latitude))
la = 'latitude'
lo = 'longitude'
else:
weights = np.cos(np.deg2rad(ds.lat))
return ds.weighted(weights).mean(la)
#cat_url = "https://storage.googleapis.com/cmip6/pangeo-cmip6.json"
#cat_url = "/mnt/craas1-ns9989k-geo4992/data/cmip6.json"
cat_url = '/mnt/craas1-ns9989k-geo4992/data/catalogs/cmip6.json'
col = intake.open_esm_datastore(cat_url)
col
varl = ['tas','clt']
models = ['CESM2','CanESM5-1','EC-Earth3-Veg-LR','GISS-E2-2-H','E3SM-1-0']
cat = col.search(source_id = models,
experiment_id=['historical'],
table_id=['Amon',],
variable_id=varl,
member_id=['r1i1p1f1'],
)
cat.df.head()
variable_id | table_id | source_id | experiment_id | member_id | grid_label | time_range | activity_id | institution_id | version | path | dcpp_init_year | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | tas | Amon | CanESM5-1 | historical | r1i1p1f1 | gn | 185001-201412 | CMIP | CCCma | v20190429 | /mnt/craas1-ns9989k-ns9560k/ESGF/CMIP6/CMIP/CC... | NaN |
1 | clt | Amon | E3SM-1-0 | historical | r1i1p1f1 | gr | 185001-187412 | CMIP | E3SM-Project | v20190913 | /mnt/craas1-ns9989k-ns9560k/ESGF/CMIP6/CMIP/E3... | NaN |
2 | clt | Amon | E3SM-1-0 | historical | r1i1p1f1 | gr | 187501-189912 | CMIP | E3SM-Project | v20190913 | /mnt/craas1-ns9989k-ns9560k/ESGF/CMIP6/CMIP/E3... | NaN |
3 | clt | Amon | E3SM-1-0 | historical | r1i1p1f1 | gr | 190001-192412 | CMIP | E3SM-Project | v20190913 | /mnt/craas1-ns9989k-ns9560k/ESGF/CMIP6/CMIP/E3... | NaN |
4 | clt | Amon | E3SM-1-0 | historical | r1i1p1f1 | gr | 192501-194912 | CMIP | E3SM-Project | v20190913 | /mnt/craas1-ns9989k-ns9560k/ESGF/CMIP6/CMIP/E3... | NaN |
cat.df['source_id'].unique()
array(['CanESM5-1', 'E3SM-1-0', 'EC-Earth3-Veg-LR', 'GISS-E2-2-H',
'CESM2'], dtype=object)
cat.esmcat.aggregation_control.groupby_attrs = ['activity_id','experiment_id', 'source_id','table_id','grid_label']
cat.esmcat.aggregation_control.groupby_attrs
['activity_id', 'experiment_id', 'source_id', 'table_id', 'grid_label']
Create dictionary from the list of datasets we found#
This step may take several minutes so be patient!
dset_dict = cat.to_dataset_dict(zarr_kwargs={'use_cftime':True})
--> The keys in the returned dictionary of datasets are constructed as follows:
'activity_id.experiment_id.source_id.table_id.grid_label'
100.00% [5/5 00:30<00:00]
/opt/conda/envs/pangeo-notebook/lib/python3.11/site-packages/xarray/conventions.py:286: SerializationWarning: variable 'clt' has multiple fill values {1e+20, 1e+20} defined, decoding all values to NaN.
var = coder.decode(var, name=name)
/opt/conda/envs/pangeo-notebook/lib/python3.11/site-packages/xarray/conventions.py:286: SerializationWarning: variable 'tas' has multiple fill values {1e+20, 1e+20} defined, decoding all values to NaN.
var = coder.decode(var, name=name)
list(dset_dict.keys())
['CMIP.historical.CanESM5-1.Amon.gn',
'CMIP.historical.GISS-E2-2-H.Amon.gn',
'CMIP.historical.CESM2.Amon.gn',
'CMIP.historical.E3SM-1-0.Amon.gr',
'CMIP.historical.EC-Earth3-Veg-LR.Amon.gr']
simplify keys:#
ds_dict = {}
for k in dset_dict.keys():
mod = k.split('.')[2]
ds_dict[mod] = dset_dict[k]
Compute global mean (replace by whatever you want to calculate)#
_ds
<xarray.Dataset> Size: 65MB Dimensions: (time: 1980, bnds: 2, lat: 64, lon: 128, member_id: 1) Coordinates: * time (time) object 16kB 1850-01-16 12:00:00 ... 2014-12-16 12:00:00 * lat (lat) float64 512B -87.86 -85.1 -82.31 ... 82.31 85.1 87.86 * lon (lon) float64 1kB 0.0 2.812 5.625 8.438 ... 351.6 354.4 357.2 height float64 8B ... * member_id (member_id) object 8B 'r1i1p1f1' Dimensions without coordinates: bnds Data variables: time_bnds (time, bnds) object 32kB dask.array<chunksize=(1, 2), meta=np.ndarray> lat_bnds (lat, bnds) float64 1kB dask.array<chunksize=(64, 2), meta=np.ndarray> lon_bnds (lon, bnds) float64 2kB dask.array<chunksize=(128, 2), meta=np.ndarray> tas (member_id, time, lat, lon) float32 65MB dask.array<chunksize=(1, 1, 64, 128), meta=np.ndarray> Attributes: (12/67) CCCma_model_hash: c8708a7ff559a1daeee0b713f94508a26a04c6b7 CCCma_parent_runid: v510b-pict CCCma_pycmor_hash: 0db7dba6172f1fb31dd9a4af1ae40a53189337ea CCCma_runid: v5.1-histens-01 Conventions: CF-1.7 CMIP-6.2 YMDH_branch_time_in_child: 1850:01:01:00 ... ... intake_esm_attrs:activity_id: CMIP intake_esm_attrs:institution_id: CCCma intake_esm_attrs:version: v20190429 intake_esm_attrs:path: /mnt/craas1-ns9989k-ns9560k/ESGF/CMIP6/... intake_esm_attrs:_data_format_: netcdf intake_esm_dataset_key: CMIP.historical.CanESM5-1.Amon.gn
ds_dict[mod]
<xarray.Dataset> Size: 65MB Dimensions: (time: 1980, bnds: 2, lat: 64, lon: 128, member_id: 1) Coordinates: * time (time) object 16kB 1850-01-16 12:00:00 ... 2014-12-16 12:00:00 * lat (lat) float64 512B -87.86 -85.1 -82.31 ... 82.31 85.1 87.86 * lon (lon) float64 1kB 0.0 2.812 5.625 8.438 ... 351.6 354.4 357.2 height float64 8B ... * member_id (member_id) object 8B 'r1i1p1f1' Dimensions without coordinates: bnds Data variables: time_bnds (time, bnds) object 32kB dask.array<chunksize=(1, 2), meta=np.ndarray> lat_bnds (lat, bnds) float64 1kB dask.array<chunksize=(64, 2), meta=np.ndarray> lon_bnds (lon, bnds) float64 2kB dask.array<chunksize=(128, 2), meta=np.ndarray> tas (member_id, time, lat, lon) float32 65MB dask.array<chunksize=(1, 1, 64, 128), meta=np.ndarray> Attributes: (12/67) CCCma_model_hash: c8708a7ff559a1daeee0b713f94508a26a04c6b7 CCCma_parent_runid: v510b-pict CCCma_pycmor_hash: 0db7dba6172f1fb31dd9a4af1ae40a53189337ea CCCma_runid: v5.1-histens-01 Conventions: CF-1.7 CMIP-6.2 YMDH_branch_time_in_child: 1850:01:01:00 ... ... intake_esm_attrs:activity_id: CMIP intake_esm_attrs:institution_id: CCCma intake_esm_attrs:version: v20190429 intake_esm_attrs:path: /mnt/craas1-ns9989k-ns9560k/ESGF/CMIP6/... intake_esm_attrs:_data_format_: netcdf intake_esm_dataset_key: CMIP.historical.CanESM5-1.Amon.gn
ds_vals_mod_var = {}
for mod in ds_dict.keys():
print(mod)
ds_vals_mod_var[mod]=dict()
_ds = ds_dict[mod]
_ds_m = global_mean(_ds.sel(time=slice('1990','2015'))).mean()
for v in varl:
if v in _ds_m:
val = float(_ds_m[v].values)
ds_vals_mod_var[mod][v] = val
else:
ds_vals_mod_var[mod][v] = np.nan
CanESM5-1
GISS-E2-2-H
CESM2
E3SM-1-0
EC-Earth3-Veg-LR
ds_vals_mod_var_meas = {}
for mod in ds_dict.keys():
ds_vals_mod_var_meas[mod]=dict()
_ds = ds_dict[mod].copy()
meas = {}
meas['mean'] = global_mean(_ds.sel(time=slice('1990','2015')).mean('lon').mean('time'))
meas['median']= global_mean(_ds.sel(time=slice('1990','2015')).mean('lon').median('time'))
meas['std'] = global_mean(_ds.sel(time=slice('1990','2015')).mean('lon').std('time'))
for v in varl:
ds_vals_mod_var_meas[mod][v] =dict()
for m in meas:
if v in meas[m]:
try:
ds_vals_mod_var_meas[mod][v][m] = float(meas[m][v].squeeze().values)
except:
ds_vals_mod_var_meas[mod][v][m] = np.nan
else:
ds_vals_mod_var_meas[mod][v][m] = np.nan
Extract units#
_ds = ds_dict[models[0]]
u_dic = {v:_ds[v].units for v in varl}
units = pd.DataFrame.from_dict(u_dic,orient='index',columns=['units'] )#index=['units'])
units
units | |
---|---|
tas | K |
clt | % |
import pandas as pd
df_nice =pd.DataFrame.from_dict(ds_vals_mod_var)
df_nice = pd.concat([units,df_nice], axis=1)
#pd.options.display.float_format = '{:20,.2f}'.format
pd.set_option('display.float_format','{:20,.2f}'.format)
display(df_nice)
units | CanESM5-1 | GISS-E2-2-H | CESM2 | E3SM-1-0 | EC-Earth3-Veg-LR | |
---|---|---|---|---|---|---|
tas | K | 287.65 | NaN | 287.94 | 287.47 | 287.54 |
clt | % | NaN | NaN | 69.74 | 67.78 | 64.10 |
ls_meas = list()
for m in ds_vals_mod_var_meas:
_df = pd.DataFrame.from_dict(ds_vals_mod_var_meas[m])
_df['model']=m
ls_meas.append(_df)
df_out = pd.concat(ls_meas).reset_index().sort_values('index').set_index(['index','model'])
pd.set_option('display.float_format','{:20,.2f}'.format)
df_out
tas | clt | ||
---|---|---|---|
index | model | ||
mean | CanESM5-1 | 287.65 | NaN |
GISS-E2-2-H | NaN | NaN | |
CESM2 | 287.94 | 69.74 | |
E3SM-1-0 | 287.47 | 67.78 | |
EC-Earth3-Veg-LR | 287.54 | 64.10 | |
median | CanESM5-1 | 287.47 | NaN |
GISS-E2-2-H | NaN | NaN | |
CESM2 | 287.92 | 70.04 | |
E3SM-1-0 | 287.35 | 68.22 | |
EC-Earth3-Veg-LR | 287.29 | 64.48 | |
std | CanESM5-1 | 3.70 | NaN |
GISS-E2-2-H | NaN | NaN | |
CESM2 | 3.47 | 5.73 | |
E3SM-1-0 | 3.61 | 5.48 | |
EC-Earth3-Veg-LR | 3.65 | 6.35 |
df_out.T
index | mean | median | std | ||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
model | CanESM5-1 | GISS-E2-2-H | CESM2 | E3SM-1-0 | EC-Earth3-Veg-LR | CanESM5-1 | GISS-E2-2-H | CESM2 | E3SM-1-0 | EC-Earth3-Veg-LR | CanESM5-1 | GISS-E2-2-H | CESM2 | E3SM-1-0 | EC-Earth3-Veg-LR |
tas | 287.65 | NaN | 287.94 | 287.47 | 287.54 | 287.47 | NaN | 287.92 | 287.35 | 287.29 | 3.70 | NaN | 3.47 | 3.61 | 3.65 |
clt | NaN | NaN | 69.74 | 67.78 | 64.10 | NaN | NaN | 70.04 | 68.22 | 64.48 | NaN | NaN | 5.73 | 5.48 | 6.35 |