Example to get CloudSat mean pressure level profile#
Table of Contents#
1. Introduction #
Questions
NOTE: The pressure grid created here is used later in the Jupyter Notebook to calculate the monthly means of CloudSat variables.
2. Data Wrangling #
This study will retrieve the CloudSat mean pressure grid from the ECMWF-AUX files. We decided to use the CloudSat overpasses from 2008 as this is the year with the most CloudSat available data. We make use of the scipy.optimize.curve_fit function to calculate pressure levels.
Time period: 2007
time resolution: daily atmospheric overpasses
Variables:
Long name |
Units |
Dimension |
|---|---|---|
Pressure |
[Pa] |
nray, nbin |
import os
import pathlib
import sys
import socket
hostname = socket.gethostname()
abs_path = str(pathlib.Path(hostname).parent.absolute())
WORKDIR = abs_path[:- (len(abs_path.split('/')[-2] + abs_path.split('/')[-1])+1)]
if "mimi" in hostname:
print(hostname)
DATA_DIR = "/scratch/franzihe/"
# FIG_DIR = "/uio/kant/geo-metos-u1/franzihe/Documents/Figures/ERA5/"
elif "glefsekaldt" in hostname:
DATA_DIR = "/home/franzihe/Data/"
# FIG_DIR = "/home/franzihe/Documents/Figures/ERA5/"
INPUT_DATA_DIR = os.path.join(DATA_DIR, 'input')
OUTPUT_DATA_DIR = os.path.join(DATA_DIR, 'output')
UTILS_DIR = os.path.join(WORKDIR, 'utils')
sys.path.append(UTILS_DIR)
# make figure directory
# try:
# os.mkdir(FIG_DIR)
# except OSError:
# pass
Import python packages#
Pythonenvironment requirements: file globalsnow.ymlload
pythonpackages from imports.pyload
functionsfrom functions.py
# supress warnings
import warnings
warnings.filterwarnings('ignore') # don't output warnings
# import packages
from imports import(xr, ccrs, cy, plt, glob, fct, np, da, datetime, timedelta, h5py, curve_fit)
# reload imports
%load_ext autoreload
%autoreload 2
Open CloudSat variables#
Get the data requried for the analysis. Beforehand we downloaded the ECMWF-AUX files with the script provided on the CloudSat webpage.
The CloudSat data is located in the folder /input/CloudSat/ and individual folders for the CloudSat product. The 2C-SNOW-PROFILEs are in /2C-SNOW-PROFILE.P1_R05/ while the ECMWF-AUX files are in /ECMWF-AUX.P_R05/. Each year has extra folders for the day of the year, where all CloudSat granules can be found of that specific day.
cs_in = os.path.join(INPUT_DATA_DIR, 'CloudSat/ECMWF-AUX.P_R05')
cs_out = os.path.join(OUTPUT_DATA_DIR, 'CloudSat/ECMWF-AUX.P_R05')
# make output data directory
try:
os.mkdir(cs_out)
except OSError:
pass
ff_ec = sorted(glob('{}/2007/*_ECMWF-AUX_GRANULE_P_R05_*.h5'.format(cs_in, )))
DATA_VARNAMES_EC = ["Pressure", ]
empty = np.empty(shape=(37081, 125), dtype='float64')
empty[:] = np.nan
datasets = []
for file in ff_ec:
year = int(file.split('/')[-1].split('_')[0][0:4])
doy = int(file.split('/')[-1].split('_')[0][4:7]) # day of the year
tt = datetime(year, 1, 1) + timedelta(doy - 1)
for month in range(1, 13):
# for month in range(1, 2):
if tt.month == month:
ds = xr.Dataset()
try:
# load ECMWF-AUX file
print('load ECMWF-AUX...', file)
h5file = h5py.File(file, "r")
for var in DATA_VARNAMES_EC:
ds[var] = fct.get_data_var(h5file['ECMWF-AUX'], var)
datasets.append(ds)
h5file.close()
except OSError:
for var in DATA_VARNAMES_EC:
ds[var] = xr.DataArray(data=empty,
dims=["nray", "nbin"],
coords=dict(
nray=(["nray"], np.arange(0,37081)),
nbin=(["nbin"], np.arange(0,125)),),
attrs=dict(
longname='Pressure',
units='Pa'),)
datasets.append(ds)
continue
ds_cs = xr.concat(datasets,dim='nray')
# convert ECMWF pressure to hPa
ds_cs['Pressure'] = ds_cs['Pressure']/100
# create mean over all granules to create pressure grid
ds_cs['Pressure'] = ds_cs['Pressure'].mean('nray', keep_attrs=True, skipna=True)
ds_cs['Pressure'].attrs = {'units': 'hPa', 'longname':'Pressure'}
# create pressure variable without NaN values
cs_pressure = ds_cs['Pressure'][np.isfinite(ds_cs['Pressure'])]
# find the index where pressure has NaN values
idx_where_p_nan = ds_cs['Pressure'][~np.isfinite(ds_cs['Pressure'])].nbin
# Generate exponential pressure grid from mean over all granules
fitting_parameters, covariance = curve_fit(f=fct.exponential_fit, xdata=range(len(cs_pressure)), ydata=cs_pressure.values, p0=[1, -1, 0])
# pressure where values were found to be NaN
# corrected by 15hPa to fit to existing curve (roughly determined via including last point in exponential_fit)
p_where_p_nan = fct.exponential_fit(idx_where_p_nan, fitting_parameters[0], fitting_parameters[1], fitting_parameters[2]) - 15
# Combine the arrays with pressure levels provided by the ECMWF-AUX file and the ones calculated with the exponential fit
ds_cs['Pressure_grid'] = xr.concat([cs_pressure, p_where_p_nan], dim = 'nbin', coords='minimal')
# Email from Sally:
# Create a linear height array grid to put all the data on
# vertical_binsize=240.0 ;m
# height=vertical_binsize/2.0
vertical_binsize = 240.
height = vertical_binsize/2
ds_cs['height'] = xr.DataArray(data = np.arange(height,len(ds_cs['Pressure'])*vertical_binsize, vertical_binsize)[::-1], dims = ds_cs.nbin.dims[0], coords=[ds_cs['nbin'].values])
ds_cs['height'].attrs = ({'units':'m', 'longname':'height above ground'})
# The altitude in meters can be calculated with the international barometric formula:
# H = 44330 * [1 - (P/p0)^(1/5.255) ]
height_agl = 44330 * (1 - (ds_cs['Pressure_grid']/1013.25) ** (1/5.255))
# Assign coordinates and attributes to the coordinate as this will be handy for any plotting later
ds_cs['Pressure_grid'] = ds_cs['Pressure_grid'].assign_coords(height_agl=("nbin", height_agl.values))
ds_cs['height_agl'].attrs = ({'units':ds_cs['height'].attrs['units'], 'longname': 'height above ground'})
# Round grid values
ds_cs['Pressure_grid'] = ds_cs['Pressure_grid'].round(decimals=0)
# # Example plot of the new pressure grid retrieved with the exponetial fit and the pressure provided by the ECMWF-AUX file
# ds_cs['Pressure_grid'].plot(x = 'height_agl')
# ds_cs['Pressure'].plot(x = 'height_agl', marker='o')
# Save DataSet to NetCDF
ds_cs.drop(('Pressure', 'nray' )).to_netcdf('{}/pressure_grid.nc'.format(cs_out))
print('file written: {}/pressure_grid.nc'.format(cs_out))