clisops regridding functionalities - powered by xesmf

The regridding functionalities of clisops consist of the regridding operator/function regrid in clisops.ops, allowing one-line remapping of xarray.Datasets or xarray.DataArrays, while orchestrating the use of classes and functions in clisops.core: - the Grid and Weights classes, to check and pre-process input as well as output grids and to generate the remapping weights - a regrid function, performing the remapping by applying the generated weights on the input data

For the weight generation and the regridding, the xESMF Regridder class is used, which itself allows an easy application of many of the remapping functionalities of ESMF/ESMPy.

[1]:
# Imports

%matplotlib inline
import matplotlib.pyplot as plt
import cartopy.crs as ccrs
import psyplot.project as psy
import numpy as np
import xarray as xr
import cf_xarray as cfxr

from pathlib import Path
# Set required environment variable for ESMPy
import os
os.environ['ESMFMKFILE'] = str(Path(os.__file__).parent.parent / 'esmf.mk')
import xesmf as xe

import clisops as cl # atm. the regrid-main-martin branch of clisops
import clisops.ops as clops
import clisops.core as clore
from clisops.utils import dataset_utils
from roocs_grids import get_grid_file, grid_dict, grid_annotations

print(f"Using xarray in version {xr.__version__}")
print(f"Using cf_xarray in version {cfxr.__version__}")
print(f"Using xESMF in version {xe.__version__}")
print(f"Using clisops in version {cl.__version__}")

xr.set_options(display_style='html')

## Turn off warnings?
import warnings
warnings.simplefilter("ignore")
Matplotlib is building the font cache; this may take a moment.
Using xarray in version 2023.2.0
Using cf_xarray in version 0.10.0
Using xESMF in version 0.8.8
Using clisops in version 0.15.0
[2]:
# Initialize test data
import clisops.utils.testing as clite
def open_dataset(ds_id):
    return clite.open_dataset(name=clite.get_esgf_file_paths("")[ds_id],
                              repo=clite.ESGF_TEST_DATA_REPO_URL,
                              branch=clite.ESGF_TEST_DATA_VERSION,
                              cache_dir=clite.ESGF_TEST_DATA_CACHE_DIR)

clisops.ops.regrid

One-line remapping with clisops.ops.regrid

def regrid(
    ds: Union[xr.Dataset, str, Path],
    *,
    method: Optional[str] = "nearest_s2d",
    adaptive_masking_threshold: Optional[Union[int, float]] = 0.5,
    grid: Optional[
        Union[xr.Dataset, xr.DataArray, int, float, tuple, str]
    ] = "adaptive",
    output_dir: Optional[Union[str, Path]] = None,
    output_type: Optional[str] = "netcdf",
    split_method: Optional[str] = "time:auto",
    file_namer: Optional[str] = "standard",
    keep_attrs: Optional[Union[bool, str]] = True,
) -> List[Union[xr.Dataset, str]]

The different options for the method, grid and adaptive_masking_threshold parameters are described in below sections:

Remap a global xarray.Dataset to a global 2.5 degree grid using the bilinear method

Load the dataset

[3]:
ds_vert = open_dataset("CMIP6_ATM_VERT_ONE_TIMESTEP")
ds_vert
---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
Cell In[3], line 1
----> 1 ds_vert = open_dataset("CMIP6_ATM_VERT_ONE_TIMESTEP")
      2 ds_vert

Cell In[2], line 4, in open_dataset(ds_id)
      3 def open_dataset(ds_id):
----> 4     return clite.open_dataset(name=clite.get_esgf_file_paths("")[ds_id],
      5                               repo=clite.ESGF_TEST_DATA_REPO_URL,
      6                               branch=clite.ESGF_TEST_DATA_VERSION,
      7                               cache_dir=clite.ESGF_TEST_DATA_CACHE_DIR)

AttributeError: module 'clisops.utils.testing' has no attribute 'open_dataset'

Take a look at the grid

[4]:
# Create 2D coordinate variables
lon,lat = np.meshgrid(ds_vert["lon"].data, ds_vert["lat"].data)

# Plot
plt.figure(figsize=(8,5))
plt.scatter(lon[::3, ::3], lat[::3, ::3], s=0.5)
plt.xlabel('lon')
plt.ylabel('lat')
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
Cell In[4], line 2
      1 # Create 2D coordinate variables
----> 2 lon,lat = np.meshgrid(ds_vert["lon"].data, ds_vert["lat"].data)
      4 # Plot
      5 plt.figure(figsize=(8,5))

NameError: name 'ds_vert' is not defined

Remap to global 2.5 degree grid with the bilinear method

[5]:
ds_remap = clops.regrid(ds_vert, method="bilinear", grid="2pt5deg", output_type="xarray")[0]
ds_remap
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
Cell In[5], line 1
----> 1 ds_remap = clops.regrid(ds_vert, method="bilinear", grid="2pt5deg", output_type="xarray")[0]
      2 ds_remap

NameError: name 'ds_vert' is not defined

Plot the remapped data next to the source data

[6]:
fig, axes = plt.subplots(ncols=2, figsize=(18,4), subplot_kw={'projection': ccrs.PlateCarree()})
for ax in axes:
    ax.coastlines()

# Source data
ds_vert.o3.isel(time=0, lev=0).plot.pcolormesh(ax=axes[0], x="lon", y="lat", shading="auto")
axes[0].title.set_text("Source - MPI-ESM1-2-LR ECHAM6 (T63L47, ~1.9° resolution)")
# Remapped data
ds_remap.o3.isel(time=0, lev=0).plot.pcolormesh(ax=axes[1], x="lon", y="lat", shading="auto")
axes[1].title.set_text("Target - regular lat-lon (2.5° resolution)")
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
Cell In[6], line 6
      3     ax.coastlines()
      5 # Source data
----> 6 ds_vert.o3.isel(time=0, lev=0).plot.pcolormesh(ax=axes[0], x="lon", y="lat", shading="auto")
      7 axes[0].title.set_text("Source - MPI-ESM1-2-LR ECHAM6 (T63L47, ~1.9° resolution)")
      8 # Remapped data

NameError: name 'ds_vert' is not defined
../_images/notebooks_regrid_11_1.png

Remap regional xarray.Dataset to a regional grid of adaptive resolution using the bilinear method

Adaptive resolution means, that the regular lat-lon target grid will have approximately the same resolution as the source grid.

Load the dataset

[7]:
ds_cordex = open_dataset("CORDEX_TAS_ONE_TIMESTEP")
ds_cordex
---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
Cell In[7], line 1
----> 1 ds_cordex = open_dataset("CORDEX_TAS_ONE_TIMESTEP")
      2 ds_cordex

Cell In[2], line 4, in open_dataset(ds_id)
      3 def open_dataset(ds_id):
----> 4     return clite.open_dataset(name=clite.get_esgf_file_paths("")[ds_id],
      5                               repo=clite.ESGF_TEST_DATA_REPO_URL,
      6                               branch=clite.ESGF_TEST_DATA_VERSION,
      7                               cache_dir=clite.ESGF_TEST_DATA_CACHE_DIR)

AttributeError: module 'clisops.utils.testing' has no attribute 'open_dataset'

Take a look at the grid

[8]:
plt.figure(figsize=(8,5))
plt.scatter(ds_cordex['lon'][::4, ::4], ds_cordex['lat'][::4, ::4], s=0.1)
plt.xlabel('lon')
plt.ylabel('lat')
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
Cell In[8], line 2
      1 plt.figure(figsize=(8,5))
----> 2 plt.scatter(ds_cordex['lon'][::4, ::4], ds_cordex['lat'][::4, ::4], s=0.1)
      3 plt.xlabel('lon')
      4 plt.ylabel('lat')

NameError: name 'ds_cordex' is not defined
<Figure size 800x500 with 0 Axes>

Remap to regional regular lat-lon grid of adaptive resolution with the bilinear method

[9]:
ds_remap = clops.regrid(ds_cordex, method="bilinear", grid="adaptive", output_type="xarray")[0]
ds_remap
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
Cell In[9], line 1
----> 1 ds_remap = clops.regrid(ds_cordex, method="bilinear", grid="adaptive", output_type="xarray")[0]
      2 ds_remap

NameError: name 'ds_cordex' is not defined

Plot the remapped data next to the source data

[10]:
fig, axes = plt.subplots(ncols=2, figsize=(18,4), subplot_kw={'projection': ccrs.PlateCarree()})
for ax in axes: ax.coastlines()

# Source data
ds_cordex.tas.isel(time=0).plot.pcolormesh(ax=axes[0], x="lon", y="lat", shading="auto", cmap="RdBu_r")
axes[0].title.set_text("Source - GERICS-REMO2015 (EUR22, ~0.22° resolution)")
# Remapped data
ds_remap.tas.isel(time=0).plot.pcolormesh(ax=axes[1], x="lon", y="lat", shading="auto", cmap="RdBu_r")
axes[1].title.set_text("Target - regional regular lat-lon (adaptive resolution)")
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
Cell In[10], line 5
      2 for ax in axes: ax.coastlines()
      4 # Source data
----> 5 ds_cordex.tas.isel(time=0).plot.pcolormesh(ax=axes[0], x="lon", y="lat", shading="auto", cmap="RdBu_r")
      6 axes[0].title.set_text("Source - GERICS-REMO2015 (EUR22, ~0.22° resolution)")
      7 # Remapped data

NameError: name 'ds_cordex' is not defined
../_images/notebooks_regrid_20_1.png

Remap unstructured xarray.Dataset to a global grid of adaptive resolution using the nearest neighbour method

For unstructured grids, at least for the moment, only the nearest neighbour remapping method is supported.

Load the dataset

[11]:
ds_icono = open_dataset("CMIP6_UNSTR_VERT_ICON_O")
ds_icono
---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
Cell In[11], line 1
----> 1 ds_icono = open_dataset("CMIP6_UNSTR_VERT_ICON_O")
      2 ds_icono

Cell In[2], line 4, in open_dataset(ds_id)
      3 def open_dataset(ds_id):
----> 4     return clite.open_dataset(name=clite.get_esgf_file_paths("")[ds_id],
      5                               repo=clite.ESGF_TEST_DATA_REPO_URL,
      6                               branch=clite.ESGF_TEST_DATA_VERSION,
      7                               cache_dir=clite.ESGF_TEST_DATA_CACHE_DIR)

AttributeError: module 'clisops.utils.testing' has no attribute 'open_dataset'

Take a look at the grid

[12]:
plt.figure(figsize=(16,9))
plt.scatter(ds_icono['longitude'][::2], ds_icono['latitude'][::2], s=0.05)
plt.xlabel('lon')
plt.ylabel('lat')
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
Cell In[12], line 2
      1 plt.figure(figsize=(16,9))
----> 2 plt.scatter(ds_icono['longitude'][::2], ds_icono['latitude'][::2], s=0.05)
      3 plt.xlabel('lon')
      4 plt.ylabel('lat')

NameError: name 'ds_icono' is not defined
<Figure size 1600x900 with 0 Axes>

Remap to global grid of adaptive resolution with the nearest neighbour method

[13]:
ds_remap = clops.regrid(ds_icono, method="nearest_s2d", grid="adaptive", output_type="xarray")[0]
ds_remap
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
Cell In[13], line 1
----> 1 ds_remap = clops.regrid(ds_icono, method="nearest_s2d", grid="adaptive", output_type="xarray")[0]
      2 ds_remap

NameError: name 'ds_icono' is not defined

Plot source data and remapped data

(Using psyplot to plot the unstructured data since xarray does not (yet?) support it.)

[14]:
# Source data
ds_icono_path = ds_icono.encoding["source"]
maps=psy.plot.mapplot(ds_icono_path, cmap="RdBu_r",
                      title="Source - ICON-ESM-LR ICON-O (Ruby-0, 40km resolution)",
                      time=[0], lev=[0])
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
Cell In[14], line 2
      1 # Source data
----> 2 ds_icono_path = ds_icono.encoding["source"]
      3 maps=psy.plot.mapplot(ds_icono_path, cmap="RdBu_r",
      4                       title="Source - ICON-ESM-LR ICON-O (Ruby-0, 40km resolution)",
      5                       time=[0], lev=[0])

NameError: name 'ds_icono' is not defined
[15]:
# Remapped data
plt.figure(figsize=(9,4));
ax = plt.axes(projection=ccrs.PlateCarree());
ds_remap.thetao.isel(time=0, lev=0).plot.pcolormesh(ax=ax, x="lon", y="lat", shading="auto",
                                                    cmap="RdBu_r", vmin = -1, vmax=40)
ax.title.set_text("Target - regular lat-lon (adaptive resolution)")
ax.coastlines()
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
Cell In[15], line 4
      2 plt.figure(figsize=(9,4));
      3 ax = plt.axes(projection=ccrs.PlateCarree());
----> 4 ds_remap.thetao.isel(time=0, lev=0).plot.pcolormesh(ax=ax, x="lon", y="lat", shading="auto",
      5                                                     cmap="RdBu_r", vmin = -1, vmax=40)
      6 ax.title.set_text("Target - regular lat-lon (adaptive resolution)")
      7 ax.coastlines()

NameError: name 'ds_remap' is not defined
../_images/notebooks_regrid_29_1.png

clisops.core.Grid

Create a grid object from an xarray.Dataset

Load the dataset

[16]:
dso = open_dataset("CMIP6_TOS_ONE_TIME_STEP")
dso
---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
Cell In[16], line 1
----> 1 dso = open_dataset("CMIP6_TOS_ONE_TIME_STEP")
      2 dso

Cell In[2], line 4, in open_dataset(ds_id)
      3 def open_dataset(ds_id):
----> 4     return clite.open_dataset(name=clite.get_esgf_file_paths("")[ds_id],
      5                               repo=clite.ESGF_TEST_DATA_REPO_URL,
      6                               branch=clite.ESGF_TEST_DATA_VERSION,
      7                               cache_dir=clite.ESGF_TEST_DATA_CACHE_DIR)

AttributeError: module 'clisops.utils.testing' has no attribute 'open_dataset'

Create the Grid object

[17]:
grido = clore.Grid(ds=dso)
grido
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
Cell In[17], line 1
----> 1 grido = clore.Grid(ds=dso)
      2 grido

NameError: name 'dso' is not defined

The xarray.Dataset is attached to the clisops.core.Grid object. Auxiliary coordinates and data variables have been (re)set appropriately.

[18]:
grido.ds
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
Cell In[18], line 1
----> 1 grido.ds

NameError: name 'grido' is not defined

Plot the data

[19]:
plt.figure(figsize=(9,4))
ax = plt.axes(projection=ccrs.PlateCarree())
grido.ds.tos.isel(time=0).plot.pcolormesh(ax=ax, x=grido.lon, y=grido.lat, shading="auto",
                                          cmap="RdBu_r", vmin = -1, vmax=40)
ax.coastlines()
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
Cell In[19], line 3
      1 plt.figure(figsize=(9,4))
      2 ax = plt.axes(projection=ccrs.PlateCarree())
----> 3 grido.ds.tos.isel(time=0).plot.pcolormesh(ax=ax, x=grido.lon, y=grido.lat, shading="auto",
      4                                           cmap="RdBu_r", vmin = -1, vmax=40)
      5 ax.coastlines()

NameError: name 'grido' is not defined
../_images/notebooks_regrid_38_1.png

Create a grid object from an xarray.DataArray

Note that xarray.DataArray objects do not support the bounds of coordinate variables to be defined.

Extract tos DataArray

[20]:
dao = dso.tos
dao
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
Cell In[20], line 1
----> 1 dao = dso.tos
      2 dao

NameError: name 'dso' is not defined

Create Grid object for MPIOM tos dataarray:

[21]:
grido_tos = clore.Grid(ds=dao)
grido_tos
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
Cell In[21], line 1
----> 1 grido_tos = clore.Grid(ds=dao)
      2 grido_tos

NameError: name 'dao' is not defined

Create a grid object using a grid_instructor

  • global grid: grid_instructor = (lon_step, lat_step) or grid_instructor = step

  • regional grid:grid_instructor = (lon_start, lon_end, lon_step, lat_start, lat_end, lat_step) or grid_instructor = (start, end, step)

[22]:
grid_1deg = clore.Grid(grid_instructor=1)
grid_1deg
[22]:
clisops 180x360_cells_grid
Lat x Lon:        180 x 360
Average resolution (x,y): (1.0, 1.0)
Gridcells:        64800
Format:           CF
Type:             regular_lat_lon
Extent (x):       global
Source:           xESMF
Bounds?           True
Degenerate cells? False
Duplicated cells? False
Permanent Mask:
md5 hash:         ff485fa62d5f72e2db23980e3c76efa7
[23]:
grid_1degx2deg_regional = clore.Grid(grid_instructor=(0., 90., 1., 35., 50., 2. ))
grid_1degx2deg_regional
[23]:
clisops 7x90_cells_grid
Lat x Lon:        7 x 90
Average resolution (x,y): (1.0, 2.0)
Gridcells:        630
Format:           CF
Type:             regular_lat_lon
Extent (x):       regional
Source:           xESMF
Bounds?           True
Degenerate cells? False
Duplicated cells? False
Permanent Mask:
md5 hash:         c2ddd1634879f48dfa1bdde053198fcb

Create a grid object using a grid_id

Makes use of the predefined grids of roocs_grids, which is a collection of grids used for example for the IPCC Atlas and for CMIP6 Regridding Weights generation.

[24]:
for key, gridinfo in grid_annotations.items(): print(f"- {key:20} {gridinfo}")
- 0pt25deg             Global 0.25 degree grid with one cell centered at 0.125E,0.125N
- World_Ocean_Atlas    Global 1.0 degree grid with one cell centered at 0.5E,0.5N. As used by the World Ocean Atlas.
- 1deg                 Global 1.0 degree grid with one cell centered at 0.5E,0.5N. As used by the World Ocean Atlas.
- 2pt5deg              Global 2.5 degree grid with one cell centered at 1.25E,1.25N.
- MERRA-2              Global 0.65x0.5 (latxlon) degree grid with one cell centered at 0E,0N. As used by MERRA-2.
- 0pt625x0pt5deg       Global 0.65x0.5 (latxlon) degree grid with one cell centered at 0E,0N. As used by MERRA-2.
- ERA-Interim          Global 0.75 degree grid with one cell centered at 0E,0N. As used by ERA-Interim.
- 0pt75deg             Global 0.75 degree grid with one cell centered at 0E,0N. As used by ERA-Interim.
- ERA-40               Global 1.25 degree grid with one cell centered at 0E,0N. As used by ERA-40.
- 1pt25deg             Global 1.25 degree grid with one cell centered at 0E,0N. As used by ERA-40.
- ERA5                 Global 0.25 degree grid with one cell centered at 0E,0N. As used by ERA-5.
- 0pt25deg_era5        Global 0.25 degree grid with one cell centered at 0E,0N. As used by ERA-5.
- 0pt25deg_era5_lsm    Global 0.25 degree grid with one cell centered at 0E,0N. As used by ERA-5. Includes a fractional land-sea mask.
- 0pt5deg_lsm          Global 0.5 degree grid with one cell centered at 0.25E,0.25N. Includes a fractional land-sea mask.
- 1deg_lsm             Global 1.0 degree grid with one cell centered at 0.5E,0.5N. As used by the World Ocean Atlas. Includes a fractional land-sea mask.
- 2deg_lsm             Global 2.0 degree grid with one cell centered at 1.0E,1.0N.
- 0pt25deg_era5_lsm_binary Global 0.25 degree grid with one cell centered at 0E,0N. As used by ERA-5. Includes a binary land-sea mask with land/sea fraction cut at >=0.5.
- 1deg_lsm_binary      Global 1.0 degree grid with one cell centered at 0.5E,0.5N. As used by the World Ocean Atlas. Includes a binary land-sea mask with land/sea fraction cut at >=0.5.
- 2deg_lsm_binary      Global 2.0 degree grid with one cell centered at 1.0E,1.0N. Includes a binary land-sea mask with land/sea fraction cut at >=0.5.
- T31                  Gaussian global grid of approx. 3.8 degree resolution, 48x96 nlatxnlon. Associated to a T31 spectral grid representation.
- T42                  Gaussian global grid of approx. 2.8 degree resolution, 64x128 nlatxnlon. Associated to a T42 spectral grid representation.
- T63_lsm_binary       Gaussian global grid of approx. 1.9 degree resolution, 96x192 nlatxnlon. Associated to a T63 spectral grid representation.  Includes a binary land-sea mask.
- T127_lsm_binary      Gaussian global grid of approx. 1.0 degree resolution, 192x384 nlatxnlon. Associated to a T127 spectral grid representation. Includes a binary land-sea mask.
- T255                 Gaussian global grid of approx. 0.5 degree resolution, 384x768 nlatxnlon. Associated to a T255 spectral grid representation.
[25]:
grid_era5 = clore.Grid(grid_id = "0pt25deg_era5")
grid_era5
[25]:
clisops 721x1440_cells_grid
Lat x Lon:        721 x 1440
Average resolution (x,y): (0.25, 0.25)
Gridcells:        1038240
Format:           CF
Type:             regular_lat_lon
Extent (x):       global
Source:           Predefined_0pt25deg_era5
Bounds?           True
Degenerate cells? False
Duplicated cells? False
Permanent Mask:
md5 hash:         cdf4f59eab828857bc0c2819c0c465d7

clisops.core.Grid objects can be compared to one another

Optional verbose output gives information on where the grids differ: lat, lon, lat_bnds, lon_bnds, mask?

Compare the tos dataset to the tos dataarray

[26]:
comp = grido.compare_grid(grido_tos, verbose = True)
print("Grids are equal?", comp)
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
Cell In[26], line 1
----> 1 comp = grido.compare_grid(grido_tos, verbose = True)
      2 print("Grids are equal?", comp)

NameError: name 'grido' is not defined

Compare both 0.25° ERA5 Grids

[27]:
# Create the Grid object
grid_era5_lsm = clore.Grid(grid_id = "0pt25deg_era5_lsm", compute_bounds=True)
[28]:
# Compare
comp = grid_era5.compare_grid(grid_era5_lsm, verbose=True)
print("Grids are equal?", comp)
The two grids are considered equal.
Grids are equal? True

Strip clisops.core.Grid objects of all data_vars and coords unrelated to the horizontal grid

[29]:
grid_era5_lsm.ds
[29]:
<xarray.Dataset>
Dimensions:    (longitude: 1440, latitude: 721, time: 1, bnds: 2)
Coordinates:
  * longitude  (longitude) float64 0.0 0.25 0.5 0.75 ... 359.0 359.2 359.5 359.8
  * latitude   (latitude) float64 90.0 89.75 89.5 89.25 ... -89.5 -89.75 -90.0
  * time       (time) datetime64[ns] 2010-01-01
    lat_bnds   (latitude, bnds) float64 89.88 90.0 89.62 ... -89.62 -90.0 -89.88
    lon_bnds   (longitude, bnds) float64 -0.125 0.125 0.125 ... 359.6 359.9
Dimensions without coordinates: bnds
Data variables:
    lsm        (time, latitude, longitude) float32 0.0 0.0 0.0 ... 1.0 1.0 1.0
    z          (time, latitude, longitude) float32 -0.07617 ... 2.735e+04
Attributes:
    Conventions:  CF-1.6
    history:      2020-10-21 11:41:35 GMT by grib_to_netcdf-2.16.0: /opt/ecmw...

The parameter keep_attrs can be set, the default is False.

[30]:
grid_era5_lsm._drop_vars(keep_attrs=False)
grid_era5_lsm.ds
[30]:
<xarray.Dataset>
Dimensions:    (longitude: 1440, latitude: 721, bnds: 2)
Coordinates:
  * longitude  (longitude) float64 0.0 0.25 0.5 0.75 ... 359.0 359.2 359.5 359.8
  * latitude   (latitude) float64 90.0 89.75 89.5 89.25 ... -89.5 -89.75 -90.0
    lat_bnds   (latitude, bnds) float64 89.88 90.0 89.62 ... -89.62 -90.0 -89.88
    lon_bnds   (longitude, bnds) float64 -0.125 0.125 0.125 ... 359.6 359.9
Dimensions without coordinates: bnds
Data variables:
    *empty*

Transfer coordinate variables between clisops.core.Grid objects that are unrelated to the horizontal grid

The parameter keep_attrs can be set, the default is True. All settings for keep_attrs are described later in section clisops.core.regrid.

Load the dataset

[31]:
ds_vert = open_dataset("CMIP6_ATM_VERT_ONE_TIMESTEP")
ds_vert
---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
Cell In[31], line 1
----> 1 ds_vert = open_dataset("CMIP6_ATM_VERT_ONE_TIMESTEP")
      2 ds_vert

Cell In[2], line 4, in open_dataset(ds_id)
      3 def open_dataset(ds_id):
----> 4     return clite.open_dataset(name=clite.get_esgf_file_paths("")[ds_id],
      5                               repo=clite.ESGF_TEST_DATA_REPO_URL,
      6                               branch=clite.ESGF_TEST_DATA_VERSION,
      7                               cache_dir=clite.ESGF_TEST_DATA_CACHE_DIR)

AttributeError: module 'clisops.utils.testing' has no attribute 'open_dataset'

Create grid object

[32]:
grid_vert = clore.Grid(ds_vert)
grid_vert
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
Cell In[32], line 1
----> 1 grid_vert = clore.Grid(ds_vert)
      2 grid_vert

NameError: name 'ds_vert' is not defined

Transfer the coordinates to the ERA5 grid object

[33]:
grid_era5_lsm._transfer_coords(grid_vert, keep_attrs=True)
grid_era5_lsm.ds
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
Cell In[33], line 1
----> 1 grid_era5_lsm._transfer_coords(grid_vert, keep_attrs=True)
      2 grid_era5_lsm.ds

NameError: name 'grid_vert' is not defined

clisops.core.Weights

Create regridding weights to regrid between two grids. Supported are the following of xESMF’s remapping methods: * nearest_s2d * bilinear * conservative * patch

Create 2-degree target grid

[34]:
grid_2deg = clore.Grid(grid_id="2deg_lsm", compute_bounds=True)
grid_2deg
[34]:
clisops 90x180_cells_grid
Lat x Lon:        90 x 180
Average resolution (x,y): (2.0, 2.0)
Gridcells:        16200
Format:           CF
Type:             regular_lat_lon
Extent (x):       global
Source:           Predefined_2deg_lsm
Bounds?           True
Degenerate cells? False
Duplicated cells? False
Permanent Mask:
md5 hash:         438f79b4376004d360a56fa2c0abb9f0

Create conservative remapping weights using the clisops.core.Weights class

grid_in and grid_out are Grid objects

[35]:
%time weights = clore.Weights(grid_in = grido, grid_out = grid_2deg, method="conservative")
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
File <timed exec>:1

NameError: name 'grido' is not defined

Local weights cache

Weights are cached on disk and do not have to be created more than once. The default cache directory is platform-dependent and set via the package platformdirs. For Linux it is '/home/my_user/.local/share/clisops/weights_dir' and can optionally be adjusted:

  • permanently by modifying the parameter grid_weights: local_weights_dir in the roocs.ini configuration file that can be found in the clisops installation directory

  • or temporarily via:

from clisops import core as clore
clore.weights_cache_init("/dir/for/weights/cache")
[36]:
from clisops.core.regrid import CONFIG
print(CONFIG["clisops:grid_weights"]["local_weights_dir"])
/home/docs/.local/share/clisops/grid_weights
[37]:
!ls -sh {CONFIG["clisops:grid_weights"]["local_weights_dir"]}
total 0
[38]:
!cat {CONFIG["clisops:grid_weights"]["local_weights_dir"]}/weights_*_conservative.json
cat: '/home/docs/.local/share/clisops/grid_weights/weights_*_conservative.json': No such file or directory

Now the weights will be read directly from the cache

[39]:
%time weights = clore.Weights(grid_in = grido, grid_out = grid_2deg, method="conservative")
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
File <timed exec>:1

NameError: name 'grido' is not defined

The weights cache can be flushed, which removes all weight and grid files as well as the json files holding the metadata. To see what would be removed, one can use the dryrun=True parameter. To re-initialize the weights cache in a different directory, one can use the weights_dir_init="/new/dir/for/weights/cache" parameter. Even when re-initializing the weights cache under a new path, using clore.weights_cache_flush, no directory is getting removed, only above listed files. When dryrun is not set, the files that are getting deleted can be displayed with verbose=True.

[40]:
clore.weights_cache_flush(dryrun=True)
Flushing the clisops weights cache ('/home/docs/.local/share/clisops/grid_weights') would remove:
No weight or grid files found. Cache empty?
[41]:
clore.weights_cache_flush(verbose=True)
Flushing the clisops weights cache ('/home/docs/.local/share/clisops/grid_weights'). Removing ...
No weight or grid files found. Cache empty?
Initialized new weights cache at /home/docs/.local/share/clisops/grid_weights

clisops.core.regrid

This function allows to perform the eventual regridding and provides a resulting xarray.Dataset

def regrid(
    grid_in: Grid,
    grid_out: Grid,
    weights: Weights,
    adaptive_masking_threshold: Optional[float] = 0.5,
    keep_attrs: Optional[bool] = True,
):
  • grid_in and grid_out are Grid objects, weights is a Weights object.

  • adaptive_masking_threshold (AMT) A value within the [0., 1.] interval that defines the maximum RATIO of missing_values amongst the total number of data values contributing to the calculation of the target grid cell value. For a fraction [0., AMT[ of the contributing source data missing, the target grid cell will be set to missing_value, else, it will be re-normalized by the factor 1./(1.-RATIO). Thus, if AMT is set to 1, all source grid cells that contribute to a target grid cell must be missing in order for the target grid cell to be defined as missing itself. Values greater than 1 or less than 0 will cause adaptive masking to be turned off. This adaptive masking technique allows to reuse generated weights for differently masked data (e.g. land-sea masks or orographic masks that vary with depth / height).

  • keep_attrs can have the following settings:

    • True : The resulting xarray.Dataset will have all attributes of grid_in.ds.attrs, despite attributes that have to be added and altered due to the new grid.

    • False : The resulting xarray.Dataset will have no attributes despite attributes generated by the regridding process.

    • "target" : The resulting xarray.Dataset will have all attributes of grid_out.ds.attrs, despite attributes generated by the regridding process. Not recommended.

In the following an example showing the function application and the effect of the adaptive masking.

[42]:
ds_out_amt0 = clore.regrid(grido, grid_2deg, weights, adaptive_masking_threshold=-1)
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
Cell In[42], line 1
----> 1 ds_out_amt0 = clore.regrid(grido, grid_2deg, weights, adaptive_masking_threshold=-1)

NameError: name 'grido' is not defined
[43]:
ds_out_amt1 = clore.regrid(grido, grid_2deg, weights, adaptive_masking_threshold=0.5)
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
Cell In[43], line 1
----> 1 ds_out_amt1 = clore.regrid(grido, grid_2deg, weights, adaptive_masking_threshold=0.5)

NameError: name 'grido' is not defined

Plot the resulting data

[44]:
# Create panel plot of regridded data (global)
fig, axes = plt.subplots(ncols=2, nrows=1,
                         figsize=(18, 5), # global
                         subplot_kw={'projection': ccrs.PlateCarree()})

ds_out_amt0["tos"].isel(time=0).plot.pcolormesh(ax=axes[0], vmin=0, vmax=30, cmap="plasma")
axes[0].title.set_text("Target (2° regular lat-lon) - No adaptive masking")

ds_out_amt1["tos"].isel(time=0).plot.pcolormesh(ax=axes[1], vmin=0, vmax=30, cmap="plasma")
axes[1].title.set_text("Target (2° regular lat-lon) - Adaptive masking")

for axis in axes.flatten():
    axis.coastlines()
    axis.set_xlabel('lon')
    axis.set_ylabel('lat')
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
Cell In[44], line 6
      1 # Create panel plot of regridded data (global)
      2 fig, axes = plt.subplots(ncols=2, nrows=1,
      3                          figsize=(18, 5), # global
      4                          subplot_kw={'projection': ccrs.PlateCarree()})
----> 6 ds_out_amt0["tos"].isel(time=0).plot.pcolormesh(ax=axes[0], vmin=0, vmax=30, cmap="plasma")
      7 axes[0].title.set_text("Target (2° regular lat-lon) - No adaptive masking")
      9 ds_out_amt1["tos"].isel(time=0).plot.pcolormesh(ax=axes[1], vmin=0, vmax=30, cmap="plasma")

NameError: name 'ds_out_amt0' is not defined
../_images/notebooks_regrid_82_1.png
[45]:
# Create panel plot of regridded data (Japan)
fig, axes = plt.subplots(ncols=3, nrows=1,
                         figsize=(18, 4), # Japan
                         subplot_kw={'projection': ccrs.PlateCarree()})

grido.ds.tos.isel(time=0).plot.pcolormesh(ax=axes[0], x=grido.lon, y=grido.lat,
                                          vmin=0, vmax=30, cmap="plasma", shading="auto")
axes[0].title.set_text("Source - MPI-ESM1-2-HR MPIOM (TP04, ~0.4° resolution)")

ds_out_amt0["tos"].isel(time=0).plot.pcolormesh(ax=axes[1], vmin=0, vmax=30, cmap="plasma")
axes[1].title.set_text("Target - No adaptive masking")

ds_out_amt1["tos"].isel(time=0).plot.pcolormesh(ax=axes[2], vmin=0, vmax=30, cmap="plasma")
axes[2].title.set_text("Target - Adaptive masking")

for axis in axes.flatten():
    axis.coastlines()
    axis.set_xlabel('lon')
    axis.set_ylabel('lat')
    axis.set_xlim([125, 150])
    axis.set_ylim([25, 50])
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
Cell In[45], line 6
      1 # Create panel plot of regridded data (Japan)
      2 fig, axes = plt.subplots(ncols=3, nrows=1,
      3                          figsize=(18, 4), # Japan
      4                          subplot_kw={'projection': ccrs.PlateCarree()})
----> 6 grido.ds.tos.isel(time=0).plot.pcolormesh(ax=axes[0], x=grido.lon, y=grido.lat,
      7                                           vmin=0, vmax=30, cmap="plasma", shading="auto")
      8 axes[0].title.set_text("Source - MPI-ESM1-2-HR MPIOM (TP04, ~0.4° resolution)")
     10 ds_out_amt0["tos"].isel(time=0).plot.pcolormesh(ax=axes[1], vmin=0, vmax=30, cmap="plasma")

NameError: name 'grido' is not defined
../_images/notebooks_regrid_83_1.png