Regrid CloudSat data to horizontal grid#
Table of Contents#
1. Introduction #
Questions
NOTE: The pressure grid was created in the Jupyter Notebook to calculate the daily means of CloudSat variables.
2. Data Wrangling #
Time period: 2007 - 2010
time resolution: daily atmospheric overpasses
Variables:
Long name |
Units |
Dimension |
|---|
import os
import pathlib
import sys
import socket
hostname = socket.gethostname()
abs_path = str(pathlib.Path(hostname).parent.absolute())
WORKDIR = abs_path[:- (len(abs_path.split('/')[-2] + abs_path.split('/')[-1])+1)]
if "mimi" in hostname:
print(hostname)
DATA_DIR = "/scratch/franzihe/"
# FIG_DIR = "/uio/kant/geo-metos-u1/franzihe/Documents/Figures/ERA5/"
elif "glefsekaldt" in hostname:
DATA_DIR = "/home/franzihe/Data/"
# FIG_DIR = "/home/franzihe/Documents/Figures/ERA5/"
INPUT_DATA_DIR = os.path.join(DATA_DIR, 'input')
OUTPUT_DATA_DIR = os.path.join(DATA_DIR, 'output')
UTILS_DIR = os.path.join(WORKDIR, 'utils')
sys.path.append(UTILS_DIR)
# make figure directory
# try:
# os.mkdir(FIG_DIR)
# except OSError:
# pass
mimi.uio.no
Import python packages#
Pythonenvironment requirements: file globalsnow.ymlload
pythonpackages from imports.pyload
functionsfrom functions.py
# supress warnings
import warnings
warnings.filterwarnings('ignore') # don't output warnings
# import packages
from imports import(xr, ccrs, cy, plt, glob, fct, np, datetime, timedelta, h5py, da)
# reload imports
%load_ext autoreload
%autoreload 2
Open CloudSat variables#
Get the data requried for the analysis. Beforehand we downloaded the ECMWF-AUX files with the script provided on the CloudSat webpage.
The CloudSat data is located in the folder /input/CloudSat/ and individual folders for the CloudSat product. The 2C-SNOW-PROFILEs are in /2C-SNOW-PROFILE.P1_R05/ while the ECMWF-AUX files are in /ECMWF-AUX.P_R05/. Each year has extra folders for the day of the year, where all CloudSat granules can be found of that specific day.
cs_in = os.path.join(INPUT_DATA_DIR, 'CloudSat/2C-SNOW-PROFILE.P1_R05')
cs_out = os.path.join(OUTPUT_DATA_DIR, 'CloudSat/2C-SNOW-PROFILE.P1_R05')
# make output data directory
try:
os.mkdir(cs_out)
except OSError:
pass
ff_cs = sorted(glob(cs_in+'/2007/*/*.h5'))
GEOLOC_VARNAMES = ["DEM_elevation","Height", "Latitude", "Longitude"]
DATA_VARNAMES = ["snow_water_content", "snowfall_rate", "snowfall_rate_sfc"]
DATA_VARNAMES_EC = ["Pressure", "Temperature"]
_VARNAMES = np.append(DATA_VARNAMES, DATA_VARNAMES_EC)
ds = xr.Dataset()
ds['Pressure_grid'] = xr.open_dataset('{}/CloudSat/ECMWF-AUX.P_R05/pressure_grid.nc'.format(OUTPUT_DATA_DIR))['Pressure_grid']
# # load era grid
# ds_era = xr.open_dataset('{}/clwc_Amon_ERA5_198501_198912.nc'.format(era_in, ))
# # ds_era = ds_era.isel(time = month-1).squeeze().drop('clwc')
# ds_era = ds_era.drop('clwc')
datasets = []
for file in ff_cs[1:2]:
year = int(file.split('/')[-1].split('_')[0][0:4])
doy = int(file.split('/')[-1].split('_')[0][4:7]) # day of the year
tt = datetime(year, 1, 1) + timedelta(doy - 1)
file_ec = sorted(glob('{}/CloudSat/ECMWF-AUX.P_R05/{}/{}_*.h5'.format(INPUT_DATA_DIR,year, file.split('/')[-1].split('_')[0])))[0]
for month in range(1, 2):
if tt.month == month:
# ds = xr.Dataset()
# ds['Pressure_grid'] = xr.open_dataset('{}ECMWF-AUX.P_R05/pressure_grid.nc'.format(cs_out))['Pressure_grid']
h5file = h5py.File(file, "r") # open h5 file
ds['Profile_time'] = fct.get_profile_times(h5file['2C-SNOW-PROFILE'])
for var in GEOLOC_VARNAMES:
ds[var] = fct.get_geoloc_var(h5file['2C-SNOW-PROFILE'], var )
for var in DATA_VARNAMES:
ds[var] = fct.get_data_var(h5file['2C-SNOW-PROFILE'], var)
# load ECMWF-AUX file
h5file_ec = h5py.File(file_ec, "r")
for var in DATA_VARNAMES_EC:
ds[var] = fct.get_data_var(h5file_ec['ECMWF-AUX'], var)
if var == 'Pressure':
ds[var] = ds[var]/100
ds[var].attrs = {'units': 'hPa', 'longname':'Pressure'}
for var in _VARNAMES:
if (len(ds[var].dims)) == 2:
ds[var +'_pregrid'] = xr.DataArray(data=da.full(shape = (ds[var].shape), fill_value=np.nan, chunks=(int(len(ds['nray'])/2), int(len(ds['nbin'])))), dims= list(ds[var].dims), coords=[ds[var]['nray'].values, ds[var]['nbin']])
# Before performing the average within a grid cell we will bring the Profiles on a common pressure grid which we created in get_pressure_grid.py
for nray in range(len(ds.nray)):
for nbin in range(len(ds.nbin)):
dmin= int(abs(ds['Pressure'].isel(nray = nray) - ds['Pressure_grid'].isel(nbin = nbin)).idxmin(dim = 'nbin'))
ds[var +'_pregrid'][nray, nbin] = ds[var].isel(nray = nray, nbin = dmin)
datasets.append(ds)
h5file.close()
h5file_ec.close()
ds_cs = xr.concat(datasets,dim='nray')
# # add common pressure grid to dataset
# ds_cs['Pressure_grid'] = xr.open_dataset('{}ECMWF-AUX.P_R05/pressure_grid.nc'.format(cs_out))['Pressure_grid']
---------------------------------------------------------------------------
KeyboardInterrupt Traceback (most recent call last)
Cell In [56], line 40
38 for nbin in range(len(ds.nbin)):
39 dmin= int(abs(ds['Pressure'].isel(nray = nray) - ds['Pressure_grid'].isel(nbin = nbin)).idxmin(dim = 'nbin'))
---> 40 ds[var +'_pregrid'][nray, nbin] = ds[var].isel(nray = nray, nbin = dmin)
43 datasets.append(ds)
44 h5file.close()
File ~/miniconda3/envs/globalsnow/lib/python3.10/site-packages/xarray/core/dataarray.py:819, in DataArray.__setitem__(self, key, value)
814 self.coords[key] = value
815 else:
816 # Coordinates in key, value and self[key] should be consistent.
817 # TODO Coordinate consistency in key is checked here, but it
818 # causes unnecessary indexing. It should be optimized.
--> 819 obj = self[key]
820 if isinstance(value, DataArray):
821 assert_coordinate_consistent(value, obj.coords.variables)
File ~/miniconda3/envs/globalsnow/lib/python3.10/site-packages/xarray/core/dataarray.py:810, in DataArray.__getitem__(self, key)
807 return self._getitem_coord(key)
808 else:
809 # xarray-style array indexing
--> 810 return self.isel(indexers=self._item_key_to_dict(key))
File ~/miniconda3/envs/globalsnow/lib/python3.10/site-packages/xarray/core/dataarray.py:1381, in DataArray.isel(self, indexers, drop, missing_dims, **indexers_kwargs)
1376 return self._from_temp_dataset(ds)
1378 # Much faster algorithm for when all indexers are ints, slices, one-dimensional
1379 # lists, or zero or one-dimensional np.ndarray's
-> 1381 variable = self._variable.isel(indexers, missing_dims=missing_dims)
1382 indexes, index_variables = isel_indexes(self.xindexes, indexers)
1384 coords = {}
File ~/miniconda3/envs/globalsnow/lib/python3.10/site-packages/xarray/core/variable.py:1258, in Variable.isel(self, indexers, missing_dims, **indexers_kwargs)
1255 indexers = drop_dims_from_indexers(indexers, self.dims, missing_dims)
1257 key = tuple(indexers.get(dim, slice(None)) for dim in self.dims)
-> 1258 return self[key]
File ~/miniconda3/envs/globalsnow/lib/python3.10/site-packages/xarray/core/variable.py:817, in Variable.__getitem__(self, key)
804 """Return a new Variable object whose contents are consistent with
805 getting the provided key from the underlying data.
806
(...)
814 array `x.values` directly.
815 """
816 dims, indexer, new_order = self._broadcast_indexes(key)
--> 817 data = as_indexable(self._data)[indexer]
818 if new_order:
819 data = np.moveaxis(data, range(len(new_order)), new_order)
File ~/miniconda3/envs/globalsnow/lib/python3.10/site-packages/xarray/core/indexing.py:1367, in DaskIndexingAdapter.__getitem__(self, key)
1364 key = type(key)(tuple(new_indexer))
1366 if isinstance(key, BasicIndexer):
-> 1367 return self.array[key.tuple]
1368 elif isinstance(key, VectorizedIndexer):
1369 return self.array.vindex[key.tuple]
File ~/miniconda3/envs/globalsnow/lib/python3.10/site-packages/dask/array/core.py:1976, in Array.__getitem__(self, index)
1973 out = "getitem-" + tokenize(self, index2)
1974 dsk, chunks = slice_array(out, self.name, self.chunks, index2, self.itemsize)
-> 1976 graph = HighLevelGraph.from_collections(out, dsk, dependencies=[self])
1978 meta = meta_from_array(self._meta, ndim=len(chunks))
1979 if np.isscalar(meta):
File ~/miniconda3/envs/globalsnow/lib/python3.10/site-packages/dask/highlevelgraph.py:700, in HighLevelGraph.from_collections(cls, name, layer, dependencies)
666 """Construct a HighLevelGraph from a new layer and a set of collections
667
668 This constructs a HighLevelGraph in the common case where we have a single
(...)
697 ... return new_collection(name, graph)
698 """
699 if len(dependencies) == 1:
--> 700 return cls._from_collection(name, layer, dependencies[0])
701 layers = {name: layer}
702 deps = {name: set()}
KeyboardInterrupt:
datasets.append(ds)
ds_cs = xr.concat(datasets,dim='nray')
ds_cs['Temperature']
<xarray.DataArray 'Temperature' (nray: 37081, nbin: 125)>
array([[210.33583069, 210.40197754, 210.68574524, ..., nan,
nan, nan],
[210.33924866, 210.40563965, 210.68916321, ..., nan,
nan, nan],
[210.34265137, 210.4092865 , 210.69258118, ..., nan,
nan, nan],
...,
[211.10948181, 211.37901306, 211.66918945, ..., nan,
nan, nan],
[211.11125183, 211.38110352, 211.67153931, ..., nan,
nan, nan],
[211.11303711, 211.38319397, 211.67388916, ..., nan,
nan, nan]])
Coordinates:
* nbin (nbin) int64 0 1 2 3 4 5 6 7 ... 117 118 119 120 121 122 123 124
height_agl (nbin) float64 2.256e+04 2.24e+04 ... -5.198e+03 -5.49e+03
* nray (nray) int64 0 1 2 3 4 5 ... 37075 37076 37077 37078 37079 37080
Attributes:
longname: Temperature
units: K# # convert ECMWF pressure to hPa
# ds_cs['Pressure'] = ds_cs['Pressure']/100
# ds_cs['Pressure'].attrs = {'units': ds_era['level'].attrs['units'], 'longname':ds_era['level'].attrs['long_name']}
# # create dataset with same isobaric pressure levels
# ds_pres = ds_cs.drop(('DEM_elevation', 'Height', ))
# # create array
# for var in DATA_VARNAMES:
# if (len(ds_cs[var].dims)) == 2:
# ds_pres[var] = xr.DataArray(data=da.full(shape = (ds_cs[var].shape), fill_value=np.nan, chunks=(len(ds_cs['nray'])/2, len(ds_cs['nbin']))), dims= list(ds_cs[var].dims), coords=list(ds_cs[var].indexes.values()))
# # Before performing the average within a grid cell we will bring the Profiles on a common pressure grid which we created in get_pressure_grid.py
# for nray in range(len(ds_cs.nray)):
# for nbin in range(len(ds_cs.nbin)):
# dmin= int(abs(ds_cs['Pressure'].isel(nray = nray) - ds_cs['Pressure_grid'].isel(nbin = nbin)).idxmin(dim = 'nbin'))
# ds_pres[var][nray, nbin] = ds_cs[var].isel(nray = nray, nbin = dmin)
lat_lon = xr.Dataset()
# create array
for var in DATA_VARNAMES:
if len(ds_pres[var].dims) == 1:
lat_lon[var] = xr.DataArray(data=da.full(shape = (ds_era['longitude'].shape[0], ds_era['latitude'].shape[0], ), fill_value=np.nan, chunks=(ds_era['longitude'].shape[0]/2, ds_era['latitude'].shape[0])),
dims=[ds_era['longitude'].dims[0], ds_era['latitude'].dims[0]],
coords=[ds_era['longitude'].values, ds_era['latitude'].values])
if len(ds_pres[var].dims) == 2:
lat_lon[var] = xr.DataArray(data=da.full(shape = (ds_era['longitude'].shape[0], ds_era['latitude'].shape[0], ds_pres['Pressure_grid'].shape[0]), fill_value=np.nan, chunks=(ds_era['longitude'].shape[0]/2, ds_era['latitude'].shape[0], ds_pres['Pressure_grid'].shape[0])),
dims=[ds_era['longitude'].dims[0], ds_era['latitude'].dims[0], ds_pres['Pressure_grid'].dims[0]],
coords=[ds_era['longitude'].values, ds_era['latitude'].values, ds_pres['Pressure_grid'].values])
# Calculate the average value within a grid box
for i in range(len(lat_lon.longitude)+1):
# loop through longitude
if i == len(lat_lon.longitude):
lon1 = 360
else:
lon1 = lat_lon['longitude'].isel(longitude=slice(i, i+2)).mean().values.tolist()
if i == 0:
lon0 = lat_lon['longitude'].isel(longitude=slice(i, i+1)).mean().values.tolist()
else:
lon0 = lat_lon['longitude'].isel(longitude=slice(i-1, i+1)).mean().values.tolist()
# loop through latitude
for k in range( len(lat_lon.latitude)):
lat0 = lat_lon['latitude'].isel(latitude = slice(k, k+2)).mean().values.tolist()
if k == 0:
lat1 = lat_lon['latitude'].isel(latitude = slice(k, k+1)).mean().values.tolist()
else:
lat1 = lat_lon['latitude'].isel(latitude = slice(k-1, k+1)).mean().values.tolist()
bbox = (lon0, lon1, lat0, lat1)
# print(bbox)
filter = ((ds_pres['Longitude'] > bbox[0]) & (ds_pres['Longitude'] < bbox[1]) & \
(ds_pres['Latitude'] > bbox[2]) & (ds_pres['Latitude'] < bbox[3]))
if len(ds_pres[var].dims) == 1:
lat_lon[var][i, k] = ds_pres[var].where(filter).mean('nray',skipna=True,keep_attrs=True)
if len(ds_pres[var].dims) == 2:
lat_lon[var][i, k,:] = ds_pres[var].where(filter).mean('nray',skipna=True,keep_attrs=True)
fig = plt.figure()
ax = fig.add_subplot(111, projection=ccrs.Robinson())
ax.coastlines()
# ax.set_global()
# ax.set_extent([-30, 50, 50, 85], crs=ccrs.PlateCarree())
ax.set_extent([-10, 30, 25, 85], crs=ccrs.PlateCarree())
ax.plot(combined['Longitude'].where(filter), combined['Latitude'].where(filter), linewidth=2, transform=ccrs.PlateCarree());