Skip to content
Snippets Groups Projects
Commit 860ec2d3 authored by Simon Treu's avatar Simon Treu
Browse files

added scatterplot for example stations similar to fig. 3 in muis et al

parent 3dd79562
No related branches found
No related tags found
No related merge requests found
%% Cell type:code id: tags:
``` python
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
from gslcomp.data_loading import load_gtsm_output
from gslcomp.utils import h5load, read_tide_gauge, remove_annual_mean
from gslcomp.config import ConfigParser
import numpy as np
import pwlf
from scipy.fft import fft, fftfreq
from scipy.stats import gaussian_kde, linregress
import settings as s
import re
from pathlib import Path
from shapely.geometry import Point
from scipy.spatial import cKDTree
import datetime
```
%% Cell type:code id: tags:
``` python
# matplotlib settings
plt.rcParams["figure.figsize"] = (15,5)
alpha=.5
```
%% Cell type:code id: tags:
``` python
validation_file = s.OUTPUT_BASE / "GSLcomp_01_complete" / "validation" / "validation_measures.h5"
validation_df, _ = h5load(validation_file)
```
%% Cell type:code id: tags:
``` python
validation_df
```
%% Cell type:code id: tags:
``` python
# add station name as regex
station_names = [
'aburatsu', 'boston',
'brisbane', 'goteborg', 'lerwick',
'los.*angeles.*usa',
'pohnpei.*micronesia',
'rio.*de.*janeiro',
'tofino', 'valparaiso', 'zanzibar']
```
%% Cell type:code id: tags:
``` python
run_id = "GSLcomp_01_complete"
# todo set output_base here and remove settings.py
args_file = s.OUTPUT_BASE / run_id / "args.txt"
conf = ConfigParser().parse(args=[f"@{str(args_file)}"])
gesla_2_dir = conf.input_dir / 'GESLA_2'
```
%% Cell type:code id: tags:
``` python
class ValidationLocation:
def __init__(self, validation_df, name, source=None):
self.validation_df = validation_df
self.name = name
self.source = source
if source is not None:
self.source_filter = validation_df['tide_gauge_name'].str.contains(source)
self.validation_df_selected = self.select_stations()
self.get_tide_gauges_and_gtsm_stations()
def select_stations(self):
selected = self.validation_df['tide_gauge_name'].str.contains(self.name)
if self.source is None:
stations = validation_df[selected]
else:
stations = validation_df[selected] & self.source_filter
return stations
def get_tide_gauges_and_gtsm_stations(self):
self.tide_gauge_list = []
# todo tide_gauge -> longest_tide_gauge
self.tide_gauge = None
self.gtsm_tide_surge = None
self.combined_data = None
l = 0
for _, row in self.validation_df_selected.iterrows():
tide_gauge_name = row['tide_gauge_name']
tide_gauge = {
'name': tide_gauge_name,
'df': read_tide_gauge(gesla_2_dir / tide_gauge_name)['sl'] * 1000
}
self.tide_gauge_list.append(tide_gauge)
# get longest tide gauge
if tide_gauge['df'].dropna().size > l:
self.tide_gauge = tide_gauge
gtsm_tide_surge_df, _, _, _ = load_gtsm_output(
Path(conf.input_dir) / 'GTSM_tide_surge' / f'{row["gtsm_filename"]}.nc',
conf
)
self.gtsm_tide_surge = {
'name': row['gtsm_filename'],
'df': gtsm_tide_surge_df['waterlevel']
}
combined_data_df = pd.read_hdf(
conf.output_dir / "combined_data" / f"{row['gtsm_filename']}.h5"
)
self.combined_data = {
'name': row['gtsm_filename'],
'df': combined_data_df
}
l = tide_gauge['df'].dropna().size
self.gtsm_tide_surge_list = []
self.combined_data_list = []
for gtsm_filename in self.validation_df_selected['gtsm_filename'].unique():
gtsm_tide_surge_df, _, _, _ = load_gtsm_output(
Path(conf.input_dir) / 'GTSM_tide_surge' / f'{gtsm_filename}.nc',
conf
)
gtsm_tide_surge = {
'name': gtsm_filename,
'df': gtsm_tide_surge_df['waterlevel']
}
self.gtsm_tide_surge_list.append(gtsm_tide_surge)
combined_data_df = pd.read_hdf(
conf.output_dir / "combined_data" / f"{gtsm_filename}.h5"
)
combined_data = {
'name': gtsm_filename,
'df': combined_data_df
}
self.combined_data_list.append(combined_data)
```
%% Cell type:code id: tags:
``` python
validation_locations = []
for i, station_name in enumerate(station_names):
validation_locations.append(ValidationLocation(validation_df, station_name))
```
%% Cell type:code id: tags:
``` python
fig, axs = plt.subplots(nrows = len(station_names), ncols=1, figsize=(10,55))
for i, validation_location in enumerate(validation_locations):
for tide_gauge in validation_location.tide_gauge_list:
axs[i].plot(
tide_gauge['df'].loc['1979-01-01':'2015-12-31'].resample('1d').mean(),
label=tide_gauge['name'],
alpha=.5
)
for gtsm_tide_surge in validation_location.gtsm_tide_surge_list:
axs[i].plot(
gtsm_tide_surge['df'].resample('1d').mean(),
label = f"gtsm {gtsm_tide_surge['name']}",
alpha = 0.5
)
for combined_data in validation_location.combined_data_list:
axs[i].plot(
combined_data['df'].resample('1d').mean(),
label = f"combined {combined_data['name']}",
alpha = 0.5
)
for tide_gauge in validation_location.tide_gauge_list:
axs[i].plot(
tide_gauge['df'].loc['1979-01-01':'2015-12-31'].resample('1d').mean(),
label=tide_gauge['name'],
alpha=.5
)
axs[i].legend()
```
%% Cell type:code id: tags:
``` python
# plotting with sanne-style normalization
```
%% Cell type:code id: tags:
``` python
fig, axs = plt.subplots(nrows=len(validation_locations), ncols=1, figsize=(15, 55))
fig, axs = plt.subplots(nrows=len(validation_locations), ncols=2, figsize=(15, 55))
for i, validation_location in enumerate(validation_locations):
axs[i].set_title(validation_location.name)
axs[i,0].set_title(f"{validation_location.name} gtsm")
axs[i,1].set_title(f"{validation_location.name} combined")
normalize = remove_annual_mean
df = pd.DataFrame(
{
'observed': normalize(validation_location.tide_gauge['df']).resample('1d').max(),
'gtsm': normalize(validation_location.gtsm_tide_surge['df']).resample('1d').max(),
#'combined': (tide_gauge['combined_data']-tide_gauge['combined_data'].median())['1980-01-01':'2010-01-01'].resample('1d').max()
'observed': remove_annual_mean(validation_location.tide_gauge['df']).resample('1d').max(),
'gtsm': remove_annual_mean(validation_location.gtsm_tide_surge['df']).resample('1d').max(),
'combined': remove_annual_mean(validation_location.combined_data['df']).resample('1d').max()
})
df = df.dropna()
# gtsm
#observed
x = df['observed'].to_numpy()
# modelled
y = df['gtsm'].to_numpy()
try:
xy = np.vstack([x,y])
z = gaussian_kde(xy)(xy)
# Sort the points by density, so that the densest points are plotted last
idx = z.argsort()
x, y, z = x[idx], y[idx], z[idx]
axs[i].scatter(x, y, c=z, cmap=plt.get_cmap('magma'))
axs[i,0].scatter(x, y, c=z, cmap=plt.get_cmap('magma'))
except ValueError as e:
print(f"{validation_location.name} throws ValueError: {e}")
#combined_data
x = df['observed'].to_numpy()
y = df['combined'].to_numpy()
try:
xy = np.vstack([x,y])
z = gaussian_kde(xy)(xy)
# Sort the points by density, so that the densest points are plotted last
idx = z.argsort()
x, y, z = x[idx], y[idx], z[idx]
axs[i,1].scatter(x, y, c=z, cmap=plt.get_cmap('magma'))
except ValueError as e:
print(f"{validation_location.name} throws ValueError: {e}")
xy_min=0
xy_max=3000
axs[i].set_aspect('equal')
axs[i].set_xlim([xy_min, xy_max])
axs[i].set_ylim([xy_min, xy_max])
axs[i].plot(
axs[i].get_xlim(),
axs[i].get_ylim(),
c='black'
)
for j in [0, 1]:
axs[i, j].set_aspect('equal')
axs[i, j].set_xlim([xy_min, xy_max])
axs[i, j].set_ylim([xy_min, xy_max])
axs[i, j].plot(
axs[i, j].get_xlim(),
axs[i, j].get_ylim(),
'--',
c='lightgray'
)
```
%% Cell type:code id: tags:
``` python
fig, axs = plt.subplots(nrows=len(validation_locations), ncols=1, figsize=(15, 55))
for i, validation_location in enumerate(validation_locations):
axs[i].set_title(validation_location.name)
normalize = remove_annual_mean
df = pd.DataFrame(
{
'observed': normalize(validation_location.tide_gauge['df']).resample('1d').max(),
'gtsm': normalize(validation_location.gtsm_tide_surge['df']).resample('1d').max(),
'combined': normalize(validation_location.combined_data['df']).resample('1d').max()
#'combined': (tide_gauge['combined_data']-tide_gauge['combined_data'].median())['1980-01-01':'2010-01-01'].resample('1d').max()
})
df = df.dropna()
axs[i].scatter(
df['observed'],
df['gtsm'],
label='gtsm',
alpha=.03
)
axs[i].scatter(
df['observed'],
df['combined'],
label='combined',
alpha=.03
)
#linear fit
try:
lin_fit_gtsm = linregress(df['observed'], df['gtsm'])
axs[i].plot(x, lin_fit_gtsm.intercept + lin_fit_gtsm.slope*x,
'green',
label='linear fit gtsm')
lin_fit_gtsm = linregress(df['observed'], df['combined'])
axs[i].plot(x, lin_fit_gtsm.intercept + lin_fit_gtsm.slope*x,
'red',
label='linear fit combined')
except ValueError as e:
print(f"{validation_location.name} throws ValueError: {e}")
#xy_min = min(df['observed'].min(), df['gtsm'].min(), df['combined'].min())
#xy_max = max(df['observed'].max(), df['gtsm'].max(), df['combined'].max())
xy_min=0
xy_max=3000
#xy_min = -1000
#xy_max = 1000
axs[i].set_aspect('equal')
try:
axs[i].set_xlim([xy_min, xy_max])
axs[i].set_ylim([xy_min, xy_max])
except:
axs[i].set_xlim([-500, 2000])
axs[i].set_ylim([-500, 2000])
axs[i].plot(axs[i].get_xlim(), axs[i].get_ylim(), '--', c='black')
axs[0].legend();
```
%% Cell type:code id: tags:
``` python
```
%% Cell type:code id: tags:
``` python
```
%% Cell type:code id: tags:
``` python
```
%% Cell type:code id: tags:
``` python
```
%% Cell type:markdown id: tags:
# Alternative detrending, relative to observations
%% Cell type:code id: tags:
``` python
def lin_detrend(df):
df1 = df.dropna().copy()
for key in ['observed', 'gtsm', 'combined']:
df1[f'n_{key}'] = df1.loc[:, key]-df1.loc[:,key].mean()
df1['hours_since_1979'] = df1.index.map(lambda x: (x.to_pydatetime()-datetime.datetime(1979, 1, 1, 0, 0)).total_seconds()/3600)
lin_trend = linregress(df1['hours_since_1979'], df1['n_observed'])
df1['lin_trend'] = df1['hours_since_1979'].map(lambda x: (lin_trend.slope * x + lin_trend.intercept))
for key in ['observed', 'gtsm', 'combined']:
df1[f'detrended_{key}'] = df1[f'n_{key}']-df1['lin_trend']
return df1
```
%% Cell type:code id: tags:
``` python
fig, axs = plt.subplots(nrows=len(validation_locations), ncols=2, figsize=(15, 55))
for i, validation_location in enumerate(validation_locations):
axs[i,0].set_title(f"{validation_location.name} annual_means")
axs[i,1].set_title(f"{validation_location.name} annual_means detrended")
#normalize = remove_annual_mean
df = pd.DataFrame(
{
'observed': validation_location.tide_gauge['df'],
'gtsm': validation_location.gtsm_tide_surge['df'],
'combined': validation_location.combined_data['df']
})
try:
df = lin_detrend(df)
except ValueError as e:
print(f"{validation_location.name} throws ValueError: {e}")
continue
for key in ['observed', 'gtsm', 'combined']:
axs[i, 0].plot(
df[f'n_{key}'].resample('1a').mean(),
label=key
)
axs[i, 1].plot(
df[f'detrended_{key}'].resample('1a').mean(),
label=key + ' detrended'
)
axs[i,0].plot(df['lin_trend'])
axs[i,0].legend()
```
%% Cell type:code id: tags:
``` python
fig, axs = plt.subplots(nrows=len(validation_locations), ncols=2, figsize=(15, 55))
for i, validation_location in enumerate(validation_locations):
axs[i,0].set_title(f"{validation_location.name} gtsm")
axs[i,1].set_title(f"{validation_location.name} combined")
#normalize = remove_annual_mean
df = pd.DataFrame(
{
'observed': validation_location.tide_gauge['df'],
'gtsm': validation_location.gtsm_tide_surge['df'],
'combined': validation_location.combined_data['df']
})
try:
df = lin_detrend(df)
except ValueError as e:
print(f"{validation_location.name} throws ValueError: {e}")
continue
# gtsm
#observed
x = df['detrended_observed'].resample('1d').max().dropna().to_numpy()
# modelled
y = df['detrended_gtsm'].resample('1d').max().dropna().to_numpy()
try:
xy = np.vstack([x,y])
z = gaussian_kde(xy)(xy)
# Sort the points by density, so that the densest points are plotted last
idx = z.argsort()
x, y, z = x[idx], y[idx], z[idx]
axs[i,0].scatter(x, y, c=z, cmap=plt.get_cmap('magma'))
except ValueError as e:
print(f"{validation_location.name} throws ValueError: {e}")
#combined_data
x = df['detrended_observed'].resample('1d').max().dropna().to_numpy()
y = df['detrended_combined'].resample('1d').max().dropna().to_numpy()
try:
xy = np.vstack([x,y])
z = gaussian_kde(xy)(xy)
# Sort the points by density, so that the densest points are plotted last
idx = z.argsort()
x, y, z = x[idx], y[idx], z[idx]
axs[i,1].scatter(x, y, c=z, cmap=plt.get_cmap('magma'))
except ValueError as e:
print(f"{validation_location.name} throws ValueError: {e}")
xy_min=0
xy_max=3000
for j in [0, 1]:
axs[i, j].set_aspect('equal')
axs[i, j].set_xlim([xy_min, xy_max])
axs[i, j].set_ylim([xy_min, xy_max])
axs[i, j].plot(
axs[i, j].get_xlim(),
axs[i, j].get_ylim(),
'--',
c='lightgray'
)
axs[i,j].set_xlabel('observed')
axs[i,0].set_ylabel('gtsm')
axs[i,1].set_ylabel('combined')
```
%% Cell type:code id: tags:
``` python
fig, axs = plt.subplots(nrows=len(validation_locations), ncols=1, figsize=(15, 55))
for i, validation_location in enumerate(validation_locations):
axs[i].set_title(validation_location.name)
df = pd.DataFrame(
{
'observed': validation_location.tide_gauge['df'],
'gtsm': validation_location.gtsm_tide_surge['df'],
'combined': validation_location.combined_data['df']
})
try:
df = lin_detrend(df)
except ValueError as e:
print(f"{validation_location.name} throws ValueError: {e}")
continue
obs = df['detrended_observed'].resample('1d').max().dropna()
gtsm = df['detrended_gtsm'].resample('1d').max().dropna()
combined = df['detrended_combined'].resample('1d').max().dropna()
axs[i].scatter(
obs,
gtsm,
label='gtsm',
alpha=.03
)
axs[i].scatter(
obs,
combined,
label='combined',
alpha=.03
)
#linear fit
try:
lin_fit_gtsm = linregress(obs, gtsm)
axs[i].plot(x, lin_fit_gtsm.intercept + lin_fit_gtsm.slope*x,
'green',
label='linear fit gtsm')
lin_fit_gtsm = linregress(obs, combined)
axs[i].plot(x, lin_fit_gtsm.intercept + lin_fit_gtsm.slope*x,
'red',
label='linear fit combined')
except ValueError as e:
print(f"{validation_location.name} throws ValueError: {e}")
#xy_min = min(df['observed'].min(), df['gtsm'].min(), df['combined'].min())
#xy_max = max(df['observed'].max(), df['gtsm'].max(), df['combined'].max())
xy_min=0
xy_max=3000
#xy_min = -1000
#xy_max = 1000
axs[i].set_aspect('equal')
try:
axs[i].set_xlim([xy_min, xy_max])
axs[i].set_ylim([xy_min, xy_max])
except:
axs[i].set_xlim([-500, 2000])
axs[i].set_ylim([-500, 2000])
axs[i].set_xlabel('observed')
axs[i].set_ylabel('modelled')
axs[i].plot(axs[i].get_xlim(), axs[i].get_ylim(), '--', c='black')
axs[0].legend();
```
%% Cell type:code id: tags:
``` python
```
%% Cell type:code id: tags:
``` python
```
%% Cell type:code id: tags:
``` python
```
%% Cell type:code id: tags:
``` python
```
%% Cell type:code id: tags:
``` python
```
%% Cell type:code id: tags:
``` python
```
%% Cell type:code id: tags:
``` python
```
%% Cell type:code id: tags:
``` python
```
%% Cell type:code id: tags:
``` python
world = gpd.read_file(
gpd.datasets.get_path('naturalearth_lowres')
)
ax = world.plot(color='white', edgecolor='black')
for tide_gauge in tide_gauges:
tide_gauge['stations'].plot(ax=ax, color='red')
```
%% Cell type:code id: tags:
``` python
```
%% Cell type:code id: tags:
``` python
```
%% Cell type:code id: tags:
``` python
```
%% Cell type:code id: tags:
``` python
```
%% Cell type:code id: tags:
``` python
```
%% Cell type:code id: tags:
``` python
```
%% Cell type:code id: tags:
``` python
```
%% Cell type:code id: tags:
``` python
```
%% Cell type:code id: tags:
``` python
```
%% Cell type:code id: tags:
``` python
```
%% Cell type:code id: tags:
``` python
```
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment