Commit bc8bd6bc authored by Lana B's avatar Lana B
Browse files

clean up: export functions to separate file

("get_subpolar_gyre_functions.py")
parent 57b6920b
......@@ -13,118 +13,43 @@ import cartopy
import os
import glob
import xarray as xr
from get_subpolar_gyre_functions import *
# Region
lat_max = 61
lat_min = 46
lon_max = 360-20
lon_min = 360-55
lon_max_W = -20
lon_min_W = -55
# Regional variables
latitude_maximum = 61
latitude_minimum = 46
longitude_maximum = 360-20
longitude_minimum = 360-55
set_boundaries(
longitude_maximum = longitude_maximum,
longitude_minimum = longitude_minimum,
latitude_maximum = latitude_maximum,
latitude_minimum = latitude_minimum
)
# these seem to be the three latitude/longitude keys
lat_keys = ['lat', 'latitude', 'nav_lat']
lon_keys = ['lon', 'longitude', 'nav_lon']
set_lon_lat_keys(
longitudes=lon_keys,
latitudes =lat_keys
)
# ## Functions
# function to get the longitude / latitude keys from the model
def get_lon_lat_keys(data, location_keys):
str_loc = [key for key in data.coords.keys() if key in location_keys]
if len(str_loc) != 1:
print('CAUTION: For model ' + model + ' there exist the keys: ' + str(str_loc) + '. We must take care of that separately!')
return str_loc[0]
# let all datasets have the same representation of time
def time_to_year_month(data):
return data.assign_coords(dict(time = [np.datetime64(d, 'M') for d in data.time.values]))
# function to extract subpolar gyre mean
def subpolargyre(data, str_lon, str_lat, print_info=False):
mean_dims = [d for d in data.dims if d != "time"]
if print_info:
print(" The mean is taken over the dimensions ", mean_dims)
if np.sum(data[str_lon].values<0) > 0:
lon_max_tmp = lon_max_W
lon_min_tmp = lon_min_W
else:
lon_max_tmp = lon_max
lon_min_tmp = lon_min
spg = (
data
.where(data[str_lon] >= lon_min_tmp)
.where(data[str_lon] <= lon_max_tmp)
.where(data[str_lat] >= lat_min)
.where(data[str_lat] <= lat_max)
)
spg = spg.mean(dim = mean_dims)
gl = data.mean(dim = mean_dims)
spg = spg - gl
# spg = spg.groupby('time.year').mean() # that would produce yearly means
return spg
# wrapper to get the spg mean for an ensemble member
def get_subpolar_gyre(model, emember, print_info=False):
paths = paths_all_models[model][emember]
if len(paths) != 1:
# there is one model where there are 2 paths for the same ensemble member...
print("CAUTION: Model '" + model + "', ensemble member '" + emember + "' was not added!")
print(" (For this ensemble-member, " + str(len(paths)) + " versions are provided, so the version must be selected manullay!")
return None
else:
data = xr.open_mfdataset(paths[0], concat_dim="time").tos
str_lon = get_lon_lat_keys(data, lon_keys)
str_lat = get_lon_lat_keys(data, lat_keys)
if print_info:
print(' + Longitude: ' + str_lon)
print(' + Latitude: ' + str_lat)
spg = subpolargyre(data, str_lon, str_lat, print_info)
spg = time_to_year_month(spg)
if print_info: print(' Ensemble-members added:')
print(' - ' +emember)
return spg
# ## Get paths as dict of models and ensemble_members
path_base = '/p/tmp/mayayami/SYNDA/data/CMIP6/CMIP/'
institutes = os.listdir(path_base)
paths_all_models = dict()
for i in institutes:
pi = path_base + i
models = os.listdir(pi)
for m in models:
pim = pi + '/' + m + '/historical/'
emembers = os.listdir(pim)
tmp = dict()
for v in emembers:
paths = [x[0] for x in os.walk(pim+v)]
data_paths = []
for path in paths:
if glob.glob("{}/tos_Omon_*.nc".format(path)):
data_paths.append(glob.glob("{}/tos_Omon_*.nc".format(path)))
if (len(data_paths)) != 1:
print(m)
print(data_paths)
tmp[v] = data_paths
paths_all_models[m] = tmp
get_values()
# # Loop over all models and ensemble members
# Get paths as dict of models and ensemble_members
paths_all_models = get_paths_dict()
# Load shared amoc index file
amoc_index = xr.open_dataarray("CMIP6_amoc_index.nc")
amoc_index = time_to_year_month(amoc_index)
# # Loop over all models and ensemble members
for model, emembers in paths_all_models.items():
print('------------------------------------------')
print(model)
......@@ -133,12 +58,18 @@ for model, emembers in paths_all_models.items():
# the models were only f2 or f3 are available still have the "f1" coordinates in the amoc_index file
emember_in_amoc_index = emember[:-1]+"1"
spg = get_subpolar_gyre(model, emember, print_info)
spg = get_subpolar_gyre(
model = model,
emember = emember,
print_info = print_info,
paths_all_models = paths_all_models)
if not spg is None:
print_info = False
amoc_index.loc[dict(models=model, ensemble_members=emember_in_amoc_index)] = spg
# Save the amoc index file with its new values
amoc_index.to_netcdf('CMIP6_amoc_index.nc')
import numpy as np
import netCDF4 as nc
import matplotlib.pyplot as plt
from jupyterthemes import jtplot
import matplotlib as mpl
import pandas as pd
import datetime
from mpl_toolkits.axes_grid1 import make_axes_locatable
import cartopy.crs as ccrs
import os
import glob
import xarray as xr
# Global variables
# Region
lat_max = 61
lat_min = 46
lon_max = 360-20
lon_min = 360-55
lon_max_W = -20
lon_min_W = -55
# these seem to be the three latitude/longitude keys
lat_keys = ['lat', 'latitude', 'nav_lat']
lon_keys = ['lon', 'longitude', 'nav_lon']
# Functions to specify/edit the global variables
# set parameters
def set_boundaries(
longitude_maximum = 360-20,
longitude_minimum = 360-55,
latitude_maximum = 61,
latitude_minimum = 46):
global lon_max, lon_min, lat_max, lat_min, lon_max_W, lon_min_W
lon_max = longitude_maximum
lon_min = longitude_minimum
lat_max = latitude_maximum
lat_min = latitude_minimum
lon_max_W = lon_max-360
lon_min_W = lon_min-360
def set_lon_lat_keys(
longitudes=['lon', 'longitude', 'nav_lon'],
latitudes =['lat', 'latitude', 'nav_lat']):
global lon_keys, lat_keys
lon_keys = longitudes
lat_keys = latitudes
def get_values():
print("lon_min: "+str(lon_min))
print("lon_max: "+str(lon_max))
print("lat_min: "+str(lat_min))
print("lat_max: "+str(lat_max))
print("lon_keys: "+str(lon_keys))
print("lat_keys: "+str(lat_keys))
# function to get the longitude / latitude keys from the model
def get_lon_lat_keys(data, location_keys):
str_loc = [key for key in data.coords.keys() if key in location_keys]
if len(str_loc) != 1:
print('CAUTION: There exist the keys: ' + str(str_loc) + '. The first one is used!')
return str_loc[0]
# let all datasets have the same representation of time
def time_to_year_month(data):
return data.assign_coords(dict(time = [np.datetime64(d, 'M') for d in data.time.values]))
# function to extract subpolar gyre mean
def subpolargyre(data, str_lon, str_lat, print_info=False):
mean_dims = [d for d in data.dims if d != "time"]
if print_info:
print(" The mean is taken over the dimensions ", mean_dims)
if np.sum(data[str_lon].values<0) > 0:
lon_max_tmp = lon_max_W
lon_min_tmp = lon_min_W
else:
lon_max_tmp = lon_max
lon_min_tmp = lon_min
spg = (
data
.where(data[str_lon] >= lon_min_tmp)
.where(data[str_lon] <= lon_max_tmp)
.where(data[str_lat] >= lat_min)
.where(data[str_lat] <= lat_max)
)
spg = spg.mean(dim = mean_dims)
gl = data.mean(dim = mean_dims)
spg = spg - gl
# spg = spg.groupby('time.year').mean() # that would produce yearly means
return spg
# wrapper to get the spg mean for an ensemble member
def get_subpolar_gyre(model, emember, print_info=False, paths_all_models=None):
if paths_all_models is None:
paths_all_models = get_paths_dict()
paths = paths_all_models[model][emember]
if len(paths) != 1:
# there is one model where there are 2 paths for the same ensemble member...
print("CAUTION: The model '" + model + "' provides " + str(len(paths)) + " paths for the ensemble member '" + emember + "'. It is NOT added to CMIP6_amoc_index.nc, this must be done manually!")
return None
if model == 'EC-Earth3-Veg' and emember == 'r10i1p1f1':
print(" Ensemble-member '"+emember+ "' does not provide all years. It also does not occur in Matthew's data, so its not included here.")
return None
else:
data = xr.open_mfdataset(paths[0], concat_dim="time").tos
str_lon = get_lon_lat_keys(data, lon_keys)
str_lat = get_lon_lat_keys(data, lat_keys)
if print_info:
print(' + Longitude: ' + str_lon)
print(' + Latitude: ' + str_lat)
spg = subpolargyre(data, str_lon, str_lat, print_info)
spg = time_to_year_month(spg)
if print_info: print(' Ensemble-members added:')
print(' - ' +emember)
return spg
# make plots of world
def make_nice_plot(ds, str_lon, str_lat):
fig, axis = plt.subplots(
1, 1, subplot_kw=dict(projection=ccrs.Orthographic(central_longitude=320))
)
ds.plot(
x = str_lon,
y = str_lat,
ax = axis,
transform = ccrs.PlateCarree()
)
axis.coastlines(linewidth=2,alpha=0.6)
return fig
# make nice plots of spg area
def make_nice_plot_of_spg(data, str_lon, str_lat, model=None):
if np.sum(data[str_lon].values<0) > 0:
lon_max_tmp = lon_max_W
lon_min_tmp = lon_min_W
else:
lon_max_tmp = lon_max
lon_min_tmp = lon_min
spg = (
data
.where(data[str_lon] >= lon_min_tmp)
.where(data[str_lon] <= lon_max_tmp)
.where(data[str_lat] >= lat_min)
.where(data[str_lat] <= lat_max)
)
fig, axis = plt.subplots(
1, 1, subplot_kw=dict(projection=ccrs.Orthographic(central_longitude=320))
)
spg.plot(
x = str_lon,
y = str_lat,
ax = axis,
transform = ccrs.PlateCarree()
)
axis.coastlines(linewidth=2,alpha=0.6)
if not model is None:
fig.suptitle(model)
return fig
def get_paths_dict():
path_base = '/p/tmp/mayayami/SYNDA/data/CMIP6/CMIP/'
institutes = os.listdir(path_base)
paths_all_models = dict()
for i in institutes:
pi = path_base + i
models = os.listdir(pi)
for m in models:
pim = pi + '/' + m + '/historical/'
emembers = os.listdir(pim)
tmp = dict()
for v in emembers:
paths = [x[0] for x in os.walk(pim+v)]
data_paths = []
for path in paths:
if glob.glob("{}/tos_Omon_*.nc".format(path)):
data_paths.append(glob.glob("{}/tos_Omon_*.nc".format(path)))
if (len(data_paths)) != 1:
print("Several paths provided for model '"+m+"' and ensemble member '"+v+"'!")
print(data_paths)
tmp[v] = data_paths
paths_all_models[m] = tmp
return paths_all_models
......@@ -9,9 +9,18 @@
},
{
"cell_type": "code",
"execution_count": 1,
"execution_count": 36,
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"The autoreload extension is already loaded. To reload it, use:\n",
" %reload_ext autoreload\n"
]
}
],
"source": [
"import numpy as np\n",
"import netCDF4 as nc\n",
......@@ -26,7 +35,10 @@
"import os\n",
"import glob\n",
"import xarray as xr\n",
"from get_subpolar_gyre_functions import *\n",
"\n",
"%load_ext autoreload\n",
"%autoreload 2\n",
"\n",
"jtplot.style(context='paper', fscale=1.4, spines=True, grid=False, ticks=True,gridlines='--')\n",
"\n",
......@@ -52,222 +64,65 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": 37,
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"lon_min: 305\n",
"lon_max: 340\n",
"lat_min: 46\n",
"lat_max: 61\n",
"lon_keys: ['lon', 'longitude', 'nav_lon']\n",
"lat_keys: ['lat', 'latitude', 'nav_lat']\n"
]
}
],
"source": [
"# Region\n",
"lat_max = 61\n",
"lat_min = 46\n",
"lon_max = 360-20\n",
"lon_min = 360-55\n",
"lon_max_W = -20\n",
"lon_min_W = -55"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Functions"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"latitude_maximum = 61\n",
"latitude_minimum = 46\n",
"longitude_maximum = 360-20\n",
"longitude_minimum = 360-55\n",
"\n",
"set_boundaries(\n",
" longitude_maximum = longitude_maximum,\n",
" longitude_minimum = longitude_minimum,\n",
" latitude_maximum = latitude_maximum,\n",
" latitude_minimum = latitude_minimum\n",
")\n",
"\n",
"# these seem to be the three latitude/longitude keys\n",
"lat_keys = ['lat', 'latitude', 'nav_lat']\n",
"lon_keys = ['lon', 'longitude', 'nav_lon']\n",
"\n",
"# function to get the longitude / latitude keys from the model\n",
"def get_lon_lat_keys(data, location_keys):\n",
" str_loc = [key for key in data.coords.keys() if key in location_keys]\n",
" if len(str_loc) != 1:\n",
" print('CAUTION: For model ' + model + ' there exist the keys: ' + str(str_loc) + '. We must take care of that separately!')\n",
" return str_loc[0]"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"# let all datasets have the same representation of time\n",
"def time_to_year_month(data):\n",
" return data.assign_coords(dict(time = [np.datetime64(d, 'M') for d in data.time.values]))"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"# function to extract subpolar gyre mean\n",
"def subpolargyre(data, str_lon, str_lat, print_info=False):\n",
" mean_dims = [d for d in data.dims if d != \"time\"]\n",
" if print_info:\n",
" print(\" The mean is taken over the dimensions \", mean_dims)\n",
" if np.sum(data[str_lon].values<0) > 0:\n",
" lon_max_tmp = lon_max_W\n",
" lon_min_tmp = lon_min_W\n",
" else:\n",
" lon_max_tmp = lon_max\n",
" lon_min_tmp = lon_min\n",
" spg = (\n",
" data\n",
" .where(data[str_lon] >= lon_min_tmp)\n",
" .where(data[str_lon] <= lon_max_tmp)\n",
" .where(data[str_lat] >= lat_min)\n",
" .where(data[str_lat] <= lat_max)\n",
" )\n",
" spg = spg.mean(dim = mean_dims)\n",
" gl = data.mean(dim = mean_dims)\n",
" spg = spg - gl\n",
" # spg = spg.groupby('time.year').mean() # that would produce yearly means\n",
" return spg"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"# wrapper to get the spg mean for an ensemble member\n",
"def get_subpolar_gyre(model, emember, print_info=False):\n",
"\n",
" paths = paths_all_models[model][emember]\n",
"\n",
" if len(paths) != 1:\n",
" # there is one model where there are 2 paths for the same ensemble member...\n",
" print(\"CAUTION: The model '\" + model + \"' provides \" + str(len(paths)) + \" paths for the ensemble member '\" + emember + \"', we have to think about what to do here\")\n",
" return None\n",
" if model == 'EC-Earth3-Veg' and emember == 'r10i1p1f1':\n",
" print(\" Ensemble-member '\"+emember+ \"' does not provide all years. It also does not occur in Matthew's data, so its not included here.\")\n",
" return None\n",
" else:\n",
"\n",
" data = xr.open_mfdataset(paths[0], concat_dim=\"time\").tos\n",
"\n",
" str_lon = get_lon_lat_keys(data, lon_keys)\n",
" str_lat = get_lon_lat_keys(data, lat_keys)\n",
" if print_info:\n",
" print(' + Longitude: ' + str_lon)\n",
" print(' + Latitude: ' + str_lat)\n",
"\n",
" spg = subpolargyre(data, str_lon, str_lat, print_info)\n",
" spg = time_to_year_month(spg)\n",
"set_lon_lat_keys(\n",
" longitudes=lon_keys,\n",
" latitudes =lat_keys\n",
")\n",
"\n",
" # if print_info: print(' Ensemble-members added:')\n",
" # print(' - ' +emember)\n",
"\n",
" return spg"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"def make_nice_plot(ds, str_lon, str_lat):\n",
" fig, axis = plt.subplots(\n",
" 1, 1, subplot_kw=dict(projection=ccrs.Orthographic(central_longitude=320))\n",
" )\n",
" ds.plot(\n",
" x = str_lon, \n",
" y = str_lat,\n",
" ax = axis,\n",
" transform = ccrs.PlateCarree()\n",
" )\n",
" axis.coastlines(linewidth=2,alpha=0.6)\n",
" return fig"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"def make_nice_plot_of_spg(data, str_lon, str_lat, model=None):\n",
" if np.sum(data[str_lon].values<0) > 0:\n",
" lon_max_tmp = lon_max_W\n",
" lon_min_tmp = lon_min_W\n",
" else:\n",
" lon_max_tmp = lon_max\n",
" lon_min_tmp = lon_min\n",
" spg = (\n",
" data\n",
" .where(data[str_lon] >= lon_min_tmp)\n",
" .where(data[str_lon] <= lon_max_tmp)\n",
" .where(data[str_lat] >= lat_min)\n",
" .where(data[str_lat] <= lat_max)\n",
" )\n",
" fig, axis = plt.subplots(\n",
" 1, 1, subplot_kw=dict(projection=ccrs.Orthographic(central_longitude=320))\n",
" )\n",
" spg.plot(\n",
" x = str_lon, \n",
" y = str_lat,\n",
" ax = axis,\n",
" transform = ccrs.PlateCarree()\n",
" )\n",
" axis.coastlines(linewidth=2,alpha=0.6)\n",
" if not model is None:\n",
" fig.suptitle(model)\n",
" return fig"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Get paths as dict of models and ensemble_members"
"get_values()"
]
},
{
"cell_type": "code",
"execution_count": 9,
"execution_count": 38,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"UKESM1-0-LL\n",
"Several paths provided for model 'UKESM1-0-LL' and ensemble member 'r1i1p1f2'!\n",
"[['/p/tmp/mayayami/SYNDA/data/CMIP6/CMIP/MOHC/UKESM1-0-LL/historical/r1i1p1f2/Omon/tos/gn/v20190406/tos_Omon_UKESM1-0-LL_historical_r1i1p1f2_gn_195001-201412.nc', '/p/tmp/mayayami/SYNDA/data/CMIP6/CMIP/MOHC/UKESM1-0-LL/historical/r1i1p1f2/Omon/tos/gn/v20190406/tos_Omon_UKESM1-0-LL_historical_r1i1p1f2_gn_185001-194912.nc'], ['/p/tmp/mayayami/SYNDA/data/CMIP6/CMIP/MOHC/UKESM1-0-LL/historical/r1i1p1f2/Omon/tos/gn/v20190627/tos_Omon_UKESM1-0-LL_historical_r1i1p1f2_gn_195001-201412.nc', '/p/tmp/mayayami/SYNDA/data/CMIP6/CMIP/MOHC/UKESM1-0-LL/historical/r1i1p1f2/Omon/tos/gn/v20190627/tos_Omon_UKESM1-0-LL_historical_r1i1p1f2_gn_185001-194912.nc']]\n"
]
}
],
"source": [
"path_base = '/p/tmp/mayayami/SYNDA/data/CMIP6/CMIP/'\n",
"institutes = os.listdir(path_base)\n",
"paths_all_models = dict()\n",
"\n",