Regrid CloudSat data to horizontal grid

Regrid CloudSat data to horizontal grid#

Table of Contents#

1. Introduction
2. Data Wrangling
3. Exploratory Data Analysis
4. Conclusion
5. References

1. Introduction #

Questions

NOTE: The pressure grid was created in the Jupyter Notebook to calculate the daily means of CloudSat variables.

2. Data Wrangling #

Time period: 2007 - 2010
time resolution: daily atmospheric overpasses
Variables:

Long name	Units	Dimension

import os
import pathlib
import sys
import socket
hostname = socket.gethostname()

abs_path = str(pathlib.Path(hostname).parent.absolute())
WORKDIR = abs_path[:- (len(abs_path.split('/')[-2] + abs_path.split('/')[-1])+1)]


if "mimi" in hostname:
    print(hostname)
    DATA_DIR = "/scratch/franzihe/"
    # FIG_DIR = "/uio/kant/geo-metos-u1/franzihe/Documents/Figures/ERA5/"
elif "glefsekaldt" in hostname: 
    DATA_DIR = "/home/franzihe/Data/"
    # FIG_DIR = "/home/franzihe/Documents/Figures/ERA5/"

INPUT_DATA_DIR = os.path.join(DATA_DIR, 'input')
OUTPUT_DATA_DIR = os.path.join(DATA_DIR, 'output')
UTILS_DIR = os.path.join(WORKDIR, 'utils')

sys.path.append(UTILS_DIR)
# make figure directory
# try:
#     os.mkdir(FIG_DIR)
# except OSError:
#     pass

mimi.uio.no

Import python packages#

Python environment requirements: file globalsnow.yml
load python packages from imports.py
load functions from functions.py

# supress warnings
import warnings
warnings.filterwarnings('ignore') # don't output warnings

# import packages
from imports import(xr, ccrs, cy, plt, glob, fct, np, datetime, timedelta, h5py, da)

# reload imports
%load_ext autoreload
%autoreload 2

Open CloudSat variables#

Get the data requried for the analysis. Beforehand we downloaded the ECMWF-AUX files with the script provided on the CloudSat webpage.

The CloudSat data is located in the folder /input/CloudSat/ and individual folders for the CloudSat product. The 2C-SNOW-PROFILEs are in /2C-SNOW-PROFILE.P1_R05/ while the ECMWF-AUX files are in /ECMWF-AUX.P_R05/. Each year has extra folders for the day of the year, where all CloudSat granules can be found of that specific day.

cs_in = os.path.join(INPUT_DATA_DIR, 'CloudSat/2C-SNOW-PROFILE.P1_R05')
cs_out = os.path.join(OUTPUT_DATA_DIR, 'CloudSat/2C-SNOW-PROFILE.P1_R05')
# make output data directory
try:
    os.mkdir(cs_out)
except OSError:
    pass

ff_cs = sorted(glob(cs_in+'/2007/*/*.h5'))

GEOLOC_VARNAMES = ["DEM_elevation","Height", "Latitude", "Longitude"]
DATA_VARNAMES = ["snow_water_content", "snowfall_rate", "snowfall_rate_sfc"]
DATA_VARNAMES_EC = ["Pressure", "Temperature"]
_VARNAMES = np.append(DATA_VARNAMES, DATA_VARNAMES_EC)

ds = xr.Dataset()
ds['Pressure_grid'] = xr.open_dataset('{}/CloudSat/ECMWF-AUX.P_R05/pressure_grid.nc'.format(OUTPUT_DATA_DIR))['Pressure_grid']

# # load era grid
# ds_era = xr.open_dataset('{}/clwc_Amon_ERA5_198501_198912.nc'.format(era_in, ))
# # ds_era = ds_era.isel(time = month-1).squeeze().drop('clwc')
# ds_era = ds_era.drop('clwc')

datasets = []

for file in ff_cs[1:2]:
    year = int(file.split('/')[-1].split('_')[0][0:4])
    doy = int(file.split('/')[-1].split('_')[0][4:7])  # day of the year
    tt = datetime(year, 1, 1) + timedelta(doy - 1)
    
    
    file_ec = sorted(glob('{}/CloudSat/ECMWF-AUX.P_R05/{}/{}_*.h5'.format(INPUT_DATA_DIR,year, file.split('/')[-1].split('_')[0])))[0]
    for month in range(1, 2):
        if tt.month == month:
            # ds = xr.Dataset()
            # ds['Pressure_grid'] = xr.open_dataset('{}ECMWF-AUX.P_R05/pressure_grid.nc'.format(cs_out))['Pressure_grid']

            h5file = h5py.File(file, "r")   # open h5 file
            ds['Profile_time'] = fct.get_profile_times(h5file['2C-SNOW-PROFILE'])

            for var in GEOLOC_VARNAMES:
                ds[var] = fct.get_geoloc_var(h5file['2C-SNOW-PROFILE'], var )

            for var in DATA_VARNAMES:
                ds[var] = fct.get_data_var(h5file['2C-SNOW-PROFILE'], var)

            # load ECMWF-AUX file
            h5file_ec = h5py.File(file_ec, "r")
            for var in DATA_VARNAMES_EC:
                ds[var] = fct.get_data_var(h5file_ec['ECMWF-AUX'], var)

                if var == 'Pressure':
                    ds[var] = ds[var]/100
                    ds[var].attrs = {'units': 'hPa', 'longname':'Pressure'}

            for var in _VARNAMES:
                if (len(ds[var].dims)) == 2:
                    ds[var +'_pregrid'] = xr.DataArray(data=da.full(shape = (ds[var].shape), fill_value=np.nan, chunks=(int(len(ds['nray'])/2), int(len(ds['nbin'])))), dims= list(ds[var].dims), coords=[ds[var]['nray'].values, ds[var]['nbin']])
                    # Before performing the average within a grid cell we will bring the Profiles on a common pressure grid which we created in get_pressure_grid.py
                    for nray in range(len(ds.nray)):
                        for nbin in range(len(ds.nbin)):
                            dmin= int(abs(ds['Pressure'].isel(nray = nray) - ds['Pressure_grid'].isel(nbin = nbin)).idxmin(dim = 'nbin'))
                            ds[var +'_pregrid'][nray, nbin] =  ds[var].isel(nray = nray, nbin = dmin)


            datasets.append(ds)
            h5file.close()
            h5file_ec.close()
            

    ds_cs = xr.concat(datasets,dim='nray')

# # add common pressure grid to dataset
# ds_cs['Pressure_grid'] = xr.open_dataset('{}ECMWF-AUX.P_R05/pressure_grid.nc'.format(cs_out))['Pressure_grid']

---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
Cell In [56], line 40
     38             for nbin in range(len(ds.nbin)):
     39                 dmin= int(abs(ds['Pressure'].isel(nray = nray) - ds['Pressure_grid'].isel(nbin = nbin)).idxmin(dim = 'nbin'))
---> 40                 ds[var +'_pregrid'][nray, nbin] =  ds[var].isel(nray = nray, nbin = dmin)
     43 datasets.append(ds)
     44 h5file.close()

File ~/miniconda3/envs/globalsnow/lib/python3.10/site-packages/xarray/core/dataarray.py:819, in DataArray.__setitem__(self, key, value)
    814     self.coords[key] = value
    815 else:
    816     # Coordinates in key, value and self[key] should be consistent.
    817     # TODO Coordinate consistency in key is checked here, but it
    818     # causes unnecessary indexing. It should be optimized.
--> 819     obj = self[key]
    820     if isinstance(value, DataArray):
    821         assert_coordinate_consistent(value, obj.coords.variables)

File ~/miniconda3/envs/globalsnow/lib/python3.10/site-packages/xarray/core/dataarray.py:810, in DataArray.__getitem__(self, key)
    807     return self._getitem_coord(key)
    808 else:
    809     # xarray-style array indexing
--> 810     return self.isel(indexers=self._item_key_to_dict(key))

File ~/miniconda3/envs/globalsnow/lib/python3.10/site-packages/xarray/core/dataarray.py:1381, in DataArray.isel(self, indexers, drop, missing_dims, **indexers_kwargs)
   1376     return self._from_temp_dataset(ds)
   1378 # Much faster algorithm for when all indexers are ints, slices, one-dimensional
   1379 # lists, or zero or one-dimensional np.ndarray's
-> 1381 variable = self._variable.isel(indexers, missing_dims=missing_dims)
   1382 indexes, index_variables = isel_indexes(self.xindexes, indexers)
   1384 coords = {}

File ~/miniconda3/envs/globalsnow/lib/python3.10/site-packages/xarray/core/variable.py:1258, in Variable.isel(self, indexers, missing_dims, **indexers_kwargs)
   1255 indexers = drop_dims_from_indexers(indexers, self.dims, missing_dims)
   1257 key = tuple(indexers.get(dim, slice(None)) for dim in self.dims)
-> 1258 return self[key]

File ~/miniconda3/envs/globalsnow/lib/python3.10/site-packages/xarray/core/variable.py:817, in Variable.__getitem__(self, key)
    804 """Return a new Variable object whose contents are consistent with
    805 getting the provided key from the underlying data.
    806 
   (...)
    814 array `x.values` directly.
    815 """
    816 dims, indexer, new_order = self._broadcast_indexes(key)
--> 817 data = as_indexable(self._data)[indexer]
    818 if new_order:
    819     data = np.moveaxis(data, range(len(new_order)), new_order)

File ~/miniconda3/envs/globalsnow/lib/python3.10/site-packages/xarray/core/indexing.py:1367, in DaskIndexingAdapter.__getitem__(self, key)
   1364         key = type(key)(tuple(new_indexer))
   1366 if isinstance(key, BasicIndexer):
-> 1367     return self.array[key.tuple]
   1368 elif isinstance(key, VectorizedIndexer):
   1369     return self.array.vindex[key.tuple]

File ~/miniconda3/envs/globalsnow/lib/python3.10/site-packages/dask/array/core.py:1976, in Array.__getitem__(self, index)
   1973 out = "getitem-" + tokenize(self, index2)
   1974 dsk, chunks = slice_array(out, self.name, self.chunks, index2, self.itemsize)
-> 1976 graph = HighLevelGraph.from_collections(out, dsk, dependencies=[self])
   1978 meta = meta_from_array(self._meta, ndim=len(chunks))
   1979 if np.isscalar(meta):

File ~/miniconda3/envs/globalsnow/lib/python3.10/site-packages/dask/highlevelgraph.py:700, in HighLevelGraph.from_collections(cls, name, layer, dependencies)
    666 """Construct a HighLevelGraph from a new layer and a set of collections
    667 
    668 This constructs a HighLevelGraph in the common case where we have a single
   (...)
    697 ...     return new_collection(name, graph)
    698 """
    699 if len(dependencies) == 1:
--> 700     return cls._from_collection(name, layer, dependencies[0])
    701 layers = {name: layer}
    702 deps = {name: set()}

KeyboardInterrupt: 

datasets.append(ds)

ds_cs = xr.concat(datasets,dim='nray')

ds_cs['Temperature']

<xarray.DataArray 'Temperature' (nray: 37081, nbin: 125)>
array([[210.33583069, 210.40197754, 210.68574524, ...,          nan,
                 nan,          nan],
       [210.33924866, 210.40563965, 210.68916321, ...,          nan,
                 nan,          nan],
       [210.34265137, 210.4092865 , 210.69258118, ...,          nan,
                 nan,          nan],
       ...,
       [211.10948181, 211.37901306, 211.66918945, ...,          nan,
                 nan,          nan],
       [211.11125183, 211.38110352, 211.67153931, ...,          nan,
                 nan,          nan],
       [211.11303711, 211.38319397, 211.67388916, ...,          nan,
                 nan,          nan]])
Coordinates:
  * nbin        (nbin) int64 0 1 2 3 4 5 6 7 ... 117 118 119 120 121 122 123 124
    height_agl  (nbin) float64 2.256e+04 2.24e+04 ... -5.198e+03 -5.49e+03
  * nray        (nray) int64 0 1 2 3 4 5 ... 37075 37076 37077 37078 37079 37080
Attributes:
    longname:  Temperature
    units:     K

xarray.DataArray

'Temperature'

nray: 37081
nbin: 125

210.3 210.4 210.7 211.0 211.3 211.5 211.3 ... nan nan nan nan nan nan

array([[210.33583069, 210.40197754, 210.68574524, ...,          nan,
                 nan,          nan],
       [210.33924866, 210.40563965, 210.68916321, ...,          nan,
                 nan,          nan],
       [210.34265137, 210.4092865 , 210.69258118, ...,          nan,
                 nan,          nan],
       ...,
       [211.10948181, 211.37901306, 211.66918945, ...,          nan,
                 nan,          nan],
       [211.11125183, 211.38110352, 211.67153931, ...,          nan,
                 nan,          nan],
       [211.11303711, 211.38319397, 211.67388916, ...,          nan,
                 nan,          nan]])

Coordinates: (3)

nbin

(nbin)

int64

0 1 2 3 4 5 ... 120 121 122 123 124

array([  0,   1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,  13,
        14,  15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,  26,  27,
        28,  29,  30,  31,  32,  33,  34,  35,  36,  37,  38,  39,  40,  41,
        42,  43,  44,  45,  46,  47,  48,  49,  50,  51,  52,  53,  54,  55,
        56,  57,  58,  59,  60,  61,  62,  63,  64,  65,  66,  67,  68,  69,
        70,  71,  72,  73,  74,  75,  76,  77,  78,  79,  80,  81,  82,  83,
        84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,  96,  97,
        98,  99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111,
       112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124])

height_agl

(nbin)

float64

2.256e+04 2.24e+04 ... -5.49e+03

units :: m
longname :: height above ground

array([22562.879932, 22401.931573, 22242.790106, 22085.586479, 21929.98546 ,
       21769.632777, 21598.995342, 21429.126676, 21262.42332 , 21098.655138,
       20937.186757, 20772.73411 , 20595.102174, 20416.684989, 20241.541418,
       20069.460527, 19899.257853, 19720.392265, 19533.069086, 19348.422028,
       19167.19685 , 18987.184176, 18798.241686, 18600.325082, 18404.884839,
       18213.208774, 18025.298642, 17835.662738, 17632.308347, 17427.684843,
       17225.853883, 17027.65767 , 16825.408662, 16615.547093, 16407.652471,
       16200.239631, 15983.619446, 15766.331561, 15550.902846, 15337.774133,
       15119.215202, 14897.920018, 14678.027746, 14459.95477 , 14237.242519,
       14014.271619, 13793.432428, 13569.05121 , 13342.829478, 13117.520071,
       12891.550014, 12662.246304, 12434.515413, 12206.627926, 11974.901548,
       11743.049222, 11512.192345, 11280.528989, 11045.588641, 10811.448906,
       10577.909342, 10340.670386, 10103.400763,  9868.109312,  9631.586092,
        9393.424071,  9155.989448,  8917.28533 ,  8678.596047,  8441.820369,
        8201.929472,  7962.560012,  7724.043898,  7483.759424,  7244.529956,
        7007.051238,  6767.00545 ,  6528.145324,  6290.015079,  6051.283406,
        5812.219799,  5573.826509,  5335.648774,  5095.929855,  4856.920127,
        4618.896279,  4379.977344,  4141.757461,  3901.598001,  3660.940585,
        3418.819538,  3177.959199,  2936.758029,  2695.831194,  2454.612547,
        2212.561637,  1971.113629,  1729.356529,  1487.219476,  1246.019943,
        1004.353056,   761.840913,   519.361663,   277.155644,    37.156263,
        -222.541534,  -392.115117,  -720.434586,  -990.288164, -1261.411686,
       -1533.819796, -1807.527008, -2082.547717, -2358.896201, -2636.586631,
       -2915.633073, -3196.049494, -3477.849769, -3761.047686, -4045.656948,
       -4331.691181, -4619.163936, -4908.088696, -5198.478877, -5490.347834])

nray

(nray)

int64

0 1 2 3 ... 37077 37078 37079 37080

array([    0,     1,     2, ..., 37078, 37079, 37080])

Attributes: (2)
longname :
Temperature
units :
K

# # convert ECMWF pressure to hPa
# ds_cs['Pressure'] = ds_cs['Pressure']/100
# ds_cs['Pressure'].attrs = {'units': ds_era['level'].attrs['units'], 'longname':ds_era['level'].attrs['long_name']}

# # create dataset with same isobaric pressure levels
# ds_pres = ds_cs.drop(('DEM_elevation', 'Height', ))
# # create array

# for var in DATA_VARNAMES:
#     if (len(ds_cs[var].dims)) == 2:
#         ds_pres[var] = xr.DataArray(data=da.full(shape = (ds_cs[var].shape), fill_value=np.nan, chunks=(len(ds_cs['nray'])/2, len(ds_cs['nbin']))), dims= list(ds_cs[var].dims), coords=list(ds_cs[var].indexes.values()))


#         # Before performing the average within a grid cell we will bring the Profiles on a common pressure grid which we created in get_pressure_grid.py
#         for nray in range(len(ds_cs.nray)):
#             for nbin in range(len(ds_cs.nbin)):
                
#                 dmin= int(abs(ds_cs['Pressure'].isel(nray = nray) - ds_cs['Pressure_grid'].isel(nbin = nbin)).idxmin(dim = 'nbin'))
#                 ds_pres[var][nray, nbin] =  ds_cs[var].isel(nray = nray, nbin = dmin)

lat_lon = xr.Dataset()

# create array 
for var in DATA_VARNAMES:
    if len(ds_pres[var].dims) == 1:
        lat_lon[var] = xr.DataArray(data=da.full(shape = (ds_era['longitude'].shape[0], ds_era['latitude'].shape[0], ), fill_value=np.nan, chunks=(ds_era['longitude'].shape[0]/2, ds_era['latitude'].shape[0])), 
                                                               dims=[ds_era['longitude'].dims[0], ds_era['latitude'].dims[0]], 
                                                               coords=[ds_era['longitude'].values, ds_era['latitude'].values])
    if len(ds_pres[var].dims) == 2:
        lat_lon[var] = xr.DataArray(data=da.full(shape = (ds_era['longitude'].shape[0], ds_era['latitude'].shape[0], ds_pres['Pressure_grid'].shape[0]), fill_value=np.nan, chunks=(ds_era['longitude'].shape[0]/2, ds_era['latitude'].shape[0], ds_pres['Pressure_grid'].shape[0])), 
                                                               dims=[ds_era['longitude'].dims[0], ds_era['latitude'].dims[0], ds_pres['Pressure_grid'].dims[0]], 
                                                               coords=[ds_era['longitude'].values, ds_era['latitude'].values, ds_pres['Pressure_grid'].values])

    # Calculate the average value within a grid box
    for i in range(len(lat_lon.longitude)+1):
        # loop through longitude
        if i == len(lat_lon.longitude):
            lon1 = 360
        else:
            lon1 = lat_lon['longitude'].isel(longitude=slice(i, i+2)).mean().values.tolist()
        if i == 0:
            lon0 = lat_lon['longitude'].isel(longitude=slice(i, i+1)).mean().values.tolist()
        else:
            lon0 = lat_lon['longitude'].isel(longitude=slice(i-1, i+1)).mean().values.tolist()

        # loop through latitude
        for k in range( len(lat_lon.latitude)):
            lat0 = lat_lon['latitude'].isel(latitude = slice(k, k+2)).mean().values.tolist()
            if k == 0:
                lat1 = lat_lon['latitude'].isel(latitude = slice(k, k+1)).mean().values.tolist()
            else:
                lat1 = lat_lon['latitude'].isel(latitude = slice(k-1, k+1)).mean().values.tolist()
            
            bbox = (lon0, lon1, lat0, lat1)
            # print(bbox)

            filter = ((ds_pres['Longitude'] > bbox[0]) & (ds_pres['Longitude'] < bbox[1]) & \
                (ds_pres['Latitude'] > bbox[2]) & (ds_pres['Latitude'] < bbox[3]))

            if len(ds_pres[var].dims) == 1:
                lat_lon[var][i, k] = ds_pres[var].where(filter).mean('nray',skipna=True,keep_attrs=True)
            if len(ds_pres[var].dims) == 2:
                lat_lon[var][i, k,:] = ds_pres[var].where(filter).mean('nray',skipna=True,keep_attrs=True)

fig = plt.figure()

ax = fig.add_subplot(111, projection=ccrs.Robinson())
ax.coastlines()
# ax.set_global()
# ax.set_extent([-30, 50, 50, 85], crs=ccrs.PlateCarree())
ax.set_extent([-10, 30, 25, 85], crs=ccrs.PlateCarree())

ax.plot(combined['Longitude'].where(filter), combined['Latitude'].where(filter), linewidth=2, transform=ccrs.PlateCarree());