Regrid CloudSat data to horizontal grid#

Table of Contents#

1. Introduction #

Questions

NOTE: The pressure grid was created in the Jupyter Notebook to calculate the daily means of CloudSat variables.

2. Data Wrangling #

  • Time period: 2007 - 2010

  • time resolution: daily atmospheric overpasses

  • Variables:

Long name

Units

Dimension

import os
import pathlib
import sys
import socket
hostname = socket.gethostname()

abs_path = str(pathlib.Path(hostname).parent.absolute())
WORKDIR = abs_path[:- (len(abs_path.split('/')[-2] + abs_path.split('/')[-1])+1)]


if "mimi" in hostname:
    print(hostname)
    DATA_DIR = "/scratch/franzihe/"
    # FIG_DIR = "/uio/kant/geo-metos-u1/franzihe/Documents/Figures/ERA5/"
elif "glefsekaldt" in hostname: 
    DATA_DIR = "/home/franzihe/Data/"
    # FIG_DIR = "/home/franzihe/Documents/Figures/ERA5/"

INPUT_DATA_DIR = os.path.join(DATA_DIR, 'input')
OUTPUT_DATA_DIR = os.path.join(DATA_DIR, 'output')
UTILS_DIR = os.path.join(WORKDIR, 'utils')

sys.path.append(UTILS_DIR)
# make figure directory
# try:
#     os.mkdir(FIG_DIR)
# except OSError:
#     pass
mimi.uio.no

Import python packages#

  • Python environment requirements: file globalsnow.yml

  • load python packages from imports.py

  • load functions from functions.py

# supress warnings
import warnings
warnings.filterwarnings('ignore') # don't output warnings

# import packages
from imports import(xr, ccrs, cy, plt, glob, fct, np, datetime, timedelta, h5py, da)
# reload imports
%load_ext autoreload
%autoreload 2

Open CloudSat variables#

Get the data requried for the analysis. Beforehand we downloaded the ECMWF-AUX files with the script provided on the CloudSat webpage.

The CloudSat data is located in the folder /input/CloudSat/ and individual folders for the CloudSat product. The 2C-SNOW-PROFILEs are in /2C-SNOW-PROFILE.P1_R05/ while the ECMWF-AUX files are in /ECMWF-AUX.P_R05/. Each year has extra folders for the day of the year, where all CloudSat granules can be found of that specific day.

cs_in = os.path.join(INPUT_DATA_DIR, 'CloudSat/2C-SNOW-PROFILE.P1_R05')
cs_out = os.path.join(OUTPUT_DATA_DIR, 'CloudSat/2C-SNOW-PROFILE.P1_R05')
# make output data directory
try:
    os.mkdir(cs_out)
except OSError:
    pass
ff_cs = sorted(glob(cs_in+'/2007/*/*.h5'))
GEOLOC_VARNAMES = ["DEM_elevation","Height", "Latitude", "Longitude"]
DATA_VARNAMES = ["snow_water_content", "snowfall_rate", "snowfall_rate_sfc"]
DATA_VARNAMES_EC = ["Pressure", "Temperature"]
_VARNAMES = np.append(DATA_VARNAMES, DATA_VARNAMES_EC)
ds = xr.Dataset()
ds['Pressure_grid'] = xr.open_dataset('{}/CloudSat/ECMWF-AUX.P_R05/pressure_grid.nc'.format(OUTPUT_DATA_DIR))['Pressure_grid']
# # load era grid
# ds_era = xr.open_dataset('{}/clwc_Amon_ERA5_198501_198912.nc'.format(era_in, ))
# # ds_era = ds_era.isel(time = month-1).squeeze().drop('clwc')
# ds_era = ds_era.drop('clwc')
datasets = []

for file in ff_cs[1:2]:
    year = int(file.split('/')[-1].split('_')[0][0:4])
    doy = int(file.split('/')[-1].split('_')[0][4:7])  # day of the year
    tt = datetime(year, 1, 1) + timedelta(doy - 1)
    
    
    file_ec = sorted(glob('{}/CloudSat/ECMWF-AUX.P_R05/{}/{}_*.h5'.format(INPUT_DATA_DIR,year, file.split('/')[-1].split('_')[0])))[0]
    for month in range(1, 2):
        if tt.month == month:
            # ds = xr.Dataset()
            # ds['Pressure_grid'] = xr.open_dataset('{}ECMWF-AUX.P_R05/pressure_grid.nc'.format(cs_out))['Pressure_grid']

            h5file = h5py.File(file, "r")   # open h5 file
            ds['Profile_time'] = fct.get_profile_times(h5file['2C-SNOW-PROFILE'])

            for var in GEOLOC_VARNAMES:
                ds[var] = fct.get_geoloc_var(h5file['2C-SNOW-PROFILE'], var )

            for var in DATA_VARNAMES:
                ds[var] = fct.get_data_var(h5file['2C-SNOW-PROFILE'], var)

            # load ECMWF-AUX file
            h5file_ec = h5py.File(file_ec, "r")
            for var in DATA_VARNAMES_EC:
                ds[var] = fct.get_data_var(h5file_ec['ECMWF-AUX'], var)

                if var == 'Pressure':
                    ds[var] = ds[var]/100
                    ds[var].attrs = {'units': 'hPa', 'longname':'Pressure'}

            for var in _VARNAMES:
                if (len(ds[var].dims)) == 2:
                    ds[var +'_pregrid'] = xr.DataArray(data=da.full(shape = (ds[var].shape), fill_value=np.nan, chunks=(int(len(ds['nray'])/2), int(len(ds['nbin'])))), dims= list(ds[var].dims), coords=[ds[var]['nray'].values, ds[var]['nbin']])
                    # Before performing the average within a grid cell we will bring the Profiles on a common pressure grid which we created in get_pressure_grid.py
                    for nray in range(len(ds.nray)):
                        for nbin in range(len(ds.nbin)):
                            dmin= int(abs(ds['Pressure'].isel(nray = nray) - ds['Pressure_grid'].isel(nbin = nbin)).idxmin(dim = 'nbin'))
                            ds[var +'_pregrid'][nray, nbin] =  ds[var].isel(nray = nray, nbin = dmin)


            datasets.append(ds)
            h5file.close()
            h5file_ec.close()
            

    ds_cs = xr.concat(datasets,dim='nray')

# # add common pressure grid to dataset
# ds_cs['Pressure_grid'] = xr.open_dataset('{}ECMWF-AUX.P_R05/pressure_grid.nc'.format(cs_out))['Pressure_grid']
---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
Cell In [56], line 40
     38             for nbin in range(len(ds.nbin)):
     39                 dmin= int(abs(ds['Pressure'].isel(nray = nray) - ds['Pressure_grid'].isel(nbin = nbin)).idxmin(dim = 'nbin'))
---> 40                 ds[var +'_pregrid'][nray, nbin] =  ds[var].isel(nray = nray, nbin = dmin)
     43 datasets.append(ds)
     44 h5file.close()

File ~/miniconda3/envs/globalsnow/lib/python3.10/site-packages/xarray/core/dataarray.py:819, in DataArray.__setitem__(self, key, value)
    814     self.coords[key] = value
    815 else:
    816     # Coordinates in key, value and self[key] should be consistent.
    817     # TODO Coordinate consistency in key is checked here, but it
    818     # causes unnecessary indexing. It should be optimized.
--> 819     obj = self[key]
    820     if isinstance(value, DataArray):
    821         assert_coordinate_consistent(value, obj.coords.variables)

File ~/miniconda3/envs/globalsnow/lib/python3.10/site-packages/xarray/core/dataarray.py:810, in DataArray.__getitem__(self, key)
    807     return self._getitem_coord(key)
    808 else:
    809     # xarray-style array indexing
--> 810     return self.isel(indexers=self._item_key_to_dict(key))

File ~/miniconda3/envs/globalsnow/lib/python3.10/site-packages/xarray/core/dataarray.py:1381, in DataArray.isel(self, indexers, drop, missing_dims, **indexers_kwargs)
   1376     return self._from_temp_dataset(ds)
   1378 # Much faster algorithm for when all indexers are ints, slices, one-dimensional
   1379 # lists, or zero or one-dimensional np.ndarray's
-> 1381 variable = self._variable.isel(indexers, missing_dims=missing_dims)
   1382 indexes, index_variables = isel_indexes(self.xindexes, indexers)
   1384 coords = {}

File ~/miniconda3/envs/globalsnow/lib/python3.10/site-packages/xarray/core/variable.py:1258, in Variable.isel(self, indexers, missing_dims, **indexers_kwargs)
   1255 indexers = drop_dims_from_indexers(indexers, self.dims, missing_dims)
   1257 key = tuple(indexers.get(dim, slice(None)) for dim in self.dims)
-> 1258 return self[key]

File ~/miniconda3/envs/globalsnow/lib/python3.10/site-packages/xarray/core/variable.py:817, in Variable.__getitem__(self, key)
    804 """Return a new Variable object whose contents are consistent with
    805 getting the provided key from the underlying data.
    806 
   (...)
    814 array `x.values` directly.
    815 """
    816 dims, indexer, new_order = self._broadcast_indexes(key)
--> 817 data = as_indexable(self._data)[indexer]
    818 if new_order:
    819     data = np.moveaxis(data, range(len(new_order)), new_order)

File ~/miniconda3/envs/globalsnow/lib/python3.10/site-packages/xarray/core/indexing.py:1367, in DaskIndexingAdapter.__getitem__(self, key)
   1364         key = type(key)(tuple(new_indexer))
   1366 if isinstance(key, BasicIndexer):
-> 1367     return self.array[key.tuple]
   1368 elif isinstance(key, VectorizedIndexer):
   1369     return self.array.vindex[key.tuple]

File ~/miniconda3/envs/globalsnow/lib/python3.10/site-packages/dask/array/core.py:1976, in Array.__getitem__(self, index)
   1973 out = "getitem-" + tokenize(self, index2)
   1974 dsk, chunks = slice_array(out, self.name, self.chunks, index2, self.itemsize)
-> 1976 graph = HighLevelGraph.from_collections(out, dsk, dependencies=[self])
   1978 meta = meta_from_array(self._meta, ndim=len(chunks))
   1979 if np.isscalar(meta):

File ~/miniconda3/envs/globalsnow/lib/python3.10/site-packages/dask/highlevelgraph.py:700, in HighLevelGraph.from_collections(cls, name, layer, dependencies)
    666 """Construct a HighLevelGraph from a new layer and a set of collections
    667 
    668 This constructs a HighLevelGraph in the common case where we have a single
   (...)
    697 ...     return new_collection(name, graph)
    698 """
    699 if len(dependencies) == 1:
--> 700     return cls._from_collection(name, layer, dependencies[0])
    701 layers = {name: layer}
    702 deps = {name: set()}

KeyboardInterrupt: 
datasets.append(ds)
ds_cs = xr.concat(datasets,dim='nray')
ds_cs['Temperature']
<xarray.DataArray 'Temperature' (nray: 37081, nbin: 125)>
array([[210.33583069, 210.40197754, 210.68574524, ...,          nan,
                 nan,          nan],
       [210.33924866, 210.40563965, 210.68916321, ...,          nan,
                 nan,          nan],
       [210.34265137, 210.4092865 , 210.69258118, ...,          nan,
                 nan,          nan],
       ...,
       [211.10948181, 211.37901306, 211.66918945, ...,          nan,
                 nan,          nan],
       [211.11125183, 211.38110352, 211.67153931, ...,          nan,
                 nan,          nan],
       [211.11303711, 211.38319397, 211.67388916, ...,          nan,
                 nan,          nan]])
Coordinates:
  * nbin        (nbin) int64 0 1 2 3 4 5 6 7 ... 117 118 119 120 121 122 123 124
    height_agl  (nbin) float64 2.256e+04 2.24e+04 ... -5.198e+03 -5.49e+03
  * nray        (nray) int64 0 1 2 3 4 5 ... 37075 37076 37077 37078 37079 37080
Attributes:
    longname:  Temperature
    units:     K
# # convert ECMWF pressure to hPa
# ds_cs['Pressure'] = ds_cs['Pressure']/100
# ds_cs['Pressure'].attrs = {'units': ds_era['level'].attrs['units'], 'longname':ds_era['level'].attrs['long_name']}
# # create dataset with same isobaric pressure levels
# ds_pres = ds_cs.drop(('DEM_elevation', 'Height', ))
# # create array

# for var in DATA_VARNAMES:
#     if (len(ds_cs[var].dims)) == 2:
#         ds_pres[var] = xr.DataArray(data=da.full(shape = (ds_cs[var].shape), fill_value=np.nan, chunks=(len(ds_cs['nray'])/2, len(ds_cs['nbin']))), dims= list(ds_cs[var].dims), coords=list(ds_cs[var].indexes.values()))


#         # Before performing the average within a grid cell we will bring the Profiles on a common pressure grid which we created in get_pressure_grid.py
#         for nray in range(len(ds_cs.nray)):
#             for nbin in range(len(ds_cs.nbin)):
                
#                 dmin= int(abs(ds_cs['Pressure'].isel(nray = nray) - ds_cs['Pressure_grid'].isel(nbin = nbin)).idxmin(dim = 'nbin'))
#                 ds_pres[var][nray, nbin] =  ds_cs[var].isel(nray = nray, nbin = dmin)
lat_lon = xr.Dataset()

# create array 
for var in DATA_VARNAMES:
    if len(ds_pres[var].dims) == 1:
        lat_lon[var] = xr.DataArray(data=da.full(shape = (ds_era['longitude'].shape[0], ds_era['latitude'].shape[0], ), fill_value=np.nan, chunks=(ds_era['longitude'].shape[0]/2, ds_era['latitude'].shape[0])), 
                                                               dims=[ds_era['longitude'].dims[0], ds_era['latitude'].dims[0]], 
                                                               coords=[ds_era['longitude'].values, ds_era['latitude'].values])
    if len(ds_pres[var].dims) == 2:
        lat_lon[var] = xr.DataArray(data=da.full(shape = (ds_era['longitude'].shape[0], ds_era['latitude'].shape[0], ds_pres['Pressure_grid'].shape[0]), fill_value=np.nan, chunks=(ds_era['longitude'].shape[0]/2, ds_era['latitude'].shape[0], ds_pres['Pressure_grid'].shape[0])), 
                                                               dims=[ds_era['longitude'].dims[0], ds_era['latitude'].dims[0], ds_pres['Pressure_grid'].dims[0]], 
                                                               coords=[ds_era['longitude'].values, ds_era['latitude'].values, ds_pres['Pressure_grid'].values])

    # Calculate the average value within a grid box
    for i in range(len(lat_lon.longitude)+1):
        # loop through longitude
        if i == len(lat_lon.longitude):
            lon1 = 360
        else:
            lon1 = lat_lon['longitude'].isel(longitude=slice(i, i+2)).mean().values.tolist()
        if i == 0:
            lon0 = lat_lon['longitude'].isel(longitude=slice(i, i+1)).mean().values.tolist()
        else:
            lon0 = lat_lon['longitude'].isel(longitude=slice(i-1, i+1)).mean().values.tolist()

        # loop through latitude
        for k in range( len(lat_lon.latitude)):
            lat0 = lat_lon['latitude'].isel(latitude = slice(k, k+2)).mean().values.tolist()
            if k == 0:
                lat1 = lat_lon['latitude'].isel(latitude = slice(k, k+1)).mean().values.tolist()
            else:
                lat1 = lat_lon['latitude'].isel(latitude = slice(k-1, k+1)).mean().values.tolist()
            
            bbox = (lon0, lon1, lat0, lat1)
            # print(bbox)

            filter = ((ds_pres['Longitude'] > bbox[0]) & (ds_pres['Longitude'] < bbox[1]) & \
                (ds_pres['Latitude'] > bbox[2]) & (ds_pres['Latitude'] < bbox[3]))

            if len(ds_pres[var].dims) == 1:
                lat_lon[var][i, k] = ds_pres[var].where(filter).mean('nray',skipna=True,keep_attrs=True)
            if len(ds_pres[var].dims) == 2:
                lat_lon[var][i, k,:] = ds_pres[var].where(filter).mean('nray',skipna=True,keep_attrs=True)
fig = plt.figure()

ax = fig.add_subplot(111, projection=ccrs.Robinson())
ax.coastlines()
# ax.set_global()
# ax.set_extent([-30, 50, 50, 85], crs=ccrs.PlateCarree())
ax.set_extent([-10, 30, 25, 85], crs=ccrs.PlateCarree())

ax.plot(combined['Longitude'].where(filter), combined['Latitude'].where(filter), linewidth=2, transform=ccrs.PlateCarree());