Skip to content
GitLab
Explore
Sign in
Register
Primary navigation
Search or go to…
Project
G
GSLcomp-jupyter
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package Registry
Container Registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Terms and privacy
Keyboard shortcuts
?
Snippets
Groups
Projects
This is an archived project. Repository and other project resources are read-only.
Show more breadcrumbs
Simon Treu
GSLcomp-jupyter
Commits
860ec2d3
Commit
860ec2d3
authored
4 years ago
by
Simon Treu
Browse files
Options
Downloads
Patches
Plain Diff
added scatterplot for example stations similar to fig. 3 in muis et al
parent
3dd79562
No related branches found
Branches containing commit
No related tags found
No related merge requests found
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
scatterplot_obs_modelled.ipynb
+254
-25
254 additions, 25 deletions
scatterplot_obs_modelled.ipynb
with
254 additions
and
25 deletions
scatterplot_obs_modelled.ipynb
+
254
−
25
View file @
860ec2d3
...
...
@@ -20,7 +20,8 @@
"import re\n",
"from pathlib import Path\n",
"from shapely.geometry import Point\n",
"from scipy.spatial import cKDTree"
"from scipy.spatial import cKDTree\n",
"import datetime"
]
},
{
...
...
@@ -51,6 +52,16 @@
"metadata": {},
"outputs": [],
"source": [
"validation_df"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# add station name as regex\n",
"station_names = [\n",
" 'aburatsu', 'boston', \n",
" 'brisbane', 'goteborg', 'lerwick', \n",
...
...
@@ -67,6 +78,7 @@
"outputs": [],
"source": [
"run_id = \"GSLcomp_01_complete\"\n",
"# todo set output_base here and remove settings.py\n",
"\n",
"args_file = s.OUTPUT_BASE / run_id / \"args.txt\"\n",
"conf = ConfigParser().parse(args=[f\"@{str(args_file)}\"])\n",
...
...
@@ -100,6 +112,7 @@
" \n",
" def get_tide_gauges_and_gtsm_stations(self):\n",
" self.tide_gauge_list = []\n",
" # todo tide_gauge -> longest_tide_gauge\n",
" self.tide_gauge = None\n",
" self.gtsm_tide_surge = None\n",
" self.combined_data = None\n",
...
...
@@ -169,17 +182,13 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"fig, axs = plt.subplots(nrows = len(station_names), ncols=1, figsize=(10,55))\n",
"for i, validation_location in enumerate(validation_locations):\n",
" for tide_gauge in validation_location.tide_gauge_list:\n",
" axs[i].plot(\n",
" tide_gauge['df'].loc['1979-01-01':'2015-12-31'].resample('1d').mean(), \n",
" label=tide_gauge['name'],\n",
" alpha=.5\n",
" )\n",
" for gtsm_tide_surge in validation_location.gtsm_tide_surge_list:\n",
" axs[i].plot(\n",
" gtsm_tide_surge['df'].resample('1d').mean(),\n",
...
...
@@ -192,6 +201,12 @@
" label = f\"combined {combined_data['name']}\",\n",
" alpha = 0.5\n",
" )\n",
" for tide_gauge in validation_location.tide_gauge_list:\n",
" axs[i].plot(\n",
" tide_gauge['df'].loc['1979-01-01':'2015-12-31'].resample('1d').mean(), \n",
" label=tide_gauge['name'],\n",
" alpha=.5\n",
" )\n",
" axs[i].legend()\n",
" "
]
...
...
@@ -211,19 +226,22 @@
"metadata": {},
"outputs": [],
"source": [
"fig, axs = plt.subplots(nrows=len(validation_locations), ncols=
1
, figsize=(15, 55))\n",
"fig, axs = plt.subplots(nrows=len(validation_locations), ncols=
2
, figsize=(15, 55))\n",
"for i, validation_location in enumerate(validation_locations):\n",
" axs[i].set_title(validation_location.name)\n",
" axs[i,0].set_title(f\"{validation_location.name} gtsm\")\n",
" axs[i,1].set_title(f\"{validation_location.name} combined\")\n",
" \n",
" \n",
" normalize = remove_annual_mean\n",
" \n",
" df = pd.DataFrame(\n",
" {\n",
" 'observed':
normalize
(validation_location.tide_gauge['df']).resample('1d').max(),\n",
" 'gtsm':
normalize
(validation_location.gtsm_tide_surge['df']).resample('1d').max(),\n",
"
#
'combined':
(tide_gauge['combined_data']-tide_gauge['combined_data'].median())['1980-01-01':'2010-01-01
'].resample('1d').max()\n",
" 'observed':
remove_annual_mean
(validation_location.tide_gauge['df']).resample('1d').max(),\n",
" 'gtsm':
remove_annual_mean
(validation_location.gtsm_tide_surge['df']).resample('1d').max(),\n",
" 'combined':
remove_annual_mean(validation_location.combined_data['df
']
)
.resample('1d').max()\n",
" })\n",
" df = df.dropna()\n",
" \n",
" # gtsm\n",
" #observed\n",
" x = df['observed'].to_numpy()\n",
" # modelled\n",
...
...
@@ -235,20 +253,36 @@
" # Sort the points by density, so that the densest points are plotted last\n",
" idx = z.argsort()\n",
" x, y, z = x[idx], y[idx], z[idx]\n",
" axs[i].scatter(x, y, c=z, cmap=plt.get_cmap('magma'))\n",
" axs[i,0].scatter(x, y, c=z, cmap=plt.get_cmap('magma'))\n",
" except ValueError as e:\n",
" print(f\"{validation_location.name} throws ValueError: {e}\")\n",
" \n",
" #combined_data\n",
" x = df['observed'].to_numpy()\n",
" y = df['combined'].to_numpy()\n",
" \n",
" try:\n",
" xy = np.vstack([x,y])\n",
" z = gaussian_kde(xy)(xy)\n",
" # Sort the points by density, so that the densest points are plotted last\n",
" idx = z.argsort()\n",
" x, y, z = x[idx], y[idx], z[idx]\n",
" axs[i,1].scatter(x, y, c=z, cmap=plt.get_cmap('magma'))\n",
" except ValueError as e:\n",
" print(f\"{validation_location.name} throws ValueError: {e}\")\n",
" \n",
" xy_min=0\n",
" xy_max=3000\n",
" axs[i].set_aspect('equal')\n",
" axs[i].set_xlim([xy_min, xy_max])\n",
" axs[i].set_ylim([xy_min, xy_max])\n",
" axs[i].plot(\n",
" axs[i].get_xlim(), \n",
" axs[i].get_ylim(), \n",
" c='black'\n",
" )"
" for j in [0, 1]:\n",
" axs[i, j].set_aspect('equal')\n",
" axs[i, j].set_xlim([xy_min, xy_max])\n",
" axs[i, j].set_ylim([xy_min, xy_max])\n",
" axs[i, j].plot(\n",
" axs[i, j].get_xlim(), \n",
" axs[i, j].get_ylim(), \n",
" '--',\n",
" c='lightgray'\n",
" )"
]
},
{
...
...
@@ -348,19 +382,214 @@
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Alternative detrending, relative to observations"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
"source": [
"def lin_detrend(df):\n",
" df1 = df.dropna().copy()\n",
" for key in ['observed', 'gtsm', 'combined']:\n",
" df1[f'n_{key}'] = df1.loc[:, key]-df1.loc[:,key].mean()\n",
" df1['hours_since_1979'] = df1.index.map(lambda x: (x.to_pydatetime()-datetime.datetime(1979, 1, 1, 0, 0)).total_seconds()/3600)\n",
" lin_trend = linregress(df1['hours_since_1979'], df1['n_observed'])\n",
" df1['lin_trend'] = df1['hours_since_1979'].map(lambda x: (lin_trend.slope * x + lin_trend.intercept))\n",
" for key in ['observed', 'gtsm', 'combined']:\n",
" df1[f'detrended_{key}'] = df1[f'n_{key}']-df1['lin_trend']\n",
" return df1"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
"source": [
"fig, axs = plt.subplots(nrows=len(validation_locations), ncols=2, figsize=(15, 55))\n",
"for i, validation_location in enumerate(validation_locations):\n",
" axs[i,0].set_title(f\"{validation_location.name} annual_means\")\n",
" axs[i,1].set_title(f\"{validation_location.name} annual_means detrended\")\n",
" \n",
" #normalize = remove_annual_mean\n",
" \n",
" df = pd.DataFrame(\n",
" {\n",
" 'observed': validation_location.tide_gauge['df'],\n",
" 'gtsm': validation_location.gtsm_tide_surge['df'],\n",
" 'combined': validation_location.combined_data['df']\n",
" })\n",
" try:\n",
" df = lin_detrend(df)\n",
" except ValueError as e:\n",
" print(f\"{validation_location.name} throws ValueError: {e}\")\n",
" continue\n",
" \n",
" for key in ['observed', 'gtsm', 'combined']:\n",
" axs[i, 0].plot(\n",
" df[f'n_{key}'].resample('1a').mean(),\n",
" label=key\n",
" )\n",
" axs[i, 1].plot(\n",
" df[f'detrended_{key}'].resample('1a').mean(),\n",
" label=key + ' detrended'\n",
" )\n",
" axs[i,0].plot(df['lin_trend'])\n",
" axs[i,0].legend()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"fig, axs = plt.subplots(nrows=len(validation_locations), ncols=2, figsize=(15, 55))\n",
"for i, validation_location in enumerate(validation_locations):\n",
" axs[i,0].set_title(f\"{validation_location.name} gtsm\")\n",
" axs[i,1].set_title(f\"{validation_location.name} combined\")\n",
" \n",
" #normalize = remove_annual_mean\n",
" \n",
" df = pd.DataFrame(\n",
" {\n",
" 'observed': validation_location.tide_gauge['df'],\n",
" 'gtsm': validation_location.gtsm_tide_surge['df'],\n",
" 'combined': validation_location.combined_data['df']\n",
" })\n",
" try:\n",
" df = lin_detrend(df)\n",
" except ValueError as e:\n",
" print(f\"{validation_location.name} throws ValueError: {e}\")\n",
" continue\n",
" \n",
" # gtsm\n",
" #observed\n",
" x = df['detrended_observed'].resample('1d').max().dropna().to_numpy()\n",
" # modelled\n",
" y = df['detrended_gtsm'].resample('1d').max().dropna().to_numpy()\n",
" \n",
" try:\n",
" xy = np.vstack([x,y])\n",
" z = gaussian_kde(xy)(xy)\n",
" # Sort the points by density, so that the densest points are plotted last\n",
" idx = z.argsort()\n",
" x, y, z = x[idx], y[idx], z[idx]\n",
" axs[i,0].scatter(x, y, c=z, cmap=plt.get_cmap('magma'))\n",
" except ValueError as e:\n",
" print(f\"{validation_location.name} throws ValueError: {e}\")\n",
" \n",
" #combined_data\n",
" x = df['detrended_observed'].resample('1d').max().dropna().to_numpy()\n",
" y = df['detrended_combined'].resample('1d').max().dropna().to_numpy()\n",
" \n",
" try:\n",
" xy = np.vstack([x,y])\n",
" z = gaussian_kde(xy)(xy)\n",
" # Sort the points by density, so that the densest points are plotted last\n",
" idx = z.argsort()\n",
" x, y, z = x[idx], y[idx], z[idx]\n",
" axs[i,1].scatter(x, y, c=z, cmap=plt.get_cmap('magma'))\n",
" except ValueError as e:\n",
" print(f\"{validation_location.name} throws ValueError: {e}\")\n",
" \n",
" xy_min=0\n",
" xy_max=3000\n",
" for j in [0, 1]:\n",
" axs[i, j].set_aspect('equal')\n",
" axs[i, j].set_xlim([xy_min, xy_max])\n",
" axs[i, j].set_ylim([xy_min, xy_max])\n",
" axs[i, j].plot(\n",
" axs[i, j].get_xlim(), \n",
" axs[i, j].get_ylim(), \n",
" '--',\n",
" c='lightgray'\n",
" )\n",
" axs[i,j].set_xlabel('observed')\n",
" axs[i,0].set_ylabel('gtsm')\n",
" axs[i,1].set_ylabel('combined')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"fig, axs = plt.subplots(nrows=len(validation_locations), ncols=1, figsize=(15, 55))\n",
"for i, validation_location in enumerate(validation_locations):\n",
" axs[i].set_title(validation_location.name)\n",
" \n",
" df = pd.DataFrame(\n",
" {\n",
" 'observed': validation_location.tide_gauge['df'],\n",
" 'gtsm': validation_location.gtsm_tide_surge['df'],\n",
" 'combined': validation_location.combined_data['df']\n",
" })\n",
" try:\n",
" df = lin_detrend(df)\n",
" except ValueError as e:\n",
" print(f\"{validation_location.name} throws ValueError: {e}\")\n",
" continue\n",
" obs = df['detrended_observed'].resample('1d').max().dropna()\n",
" gtsm = df['detrended_gtsm'].resample('1d').max().dropna()\n",
" combined = df['detrended_combined'].resample('1d').max().dropna()\n",
" axs[i].scatter(\n",
" obs,\n",
" gtsm,\n",
" label='gtsm',\n",
" alpha=.03\n",
" )\n",
" axs[i].scatter(\n",
" obs,\n",
" combined,\n",
" label='combined',\n",
" alpha=.03\n",
" )\n",
" \n",
" #linear fit\n",
" try:\n",
" lin_fit_gtsm = linregress(obs, gtsm)\n",
" axs[i].plot(x, lin_fit_gtsm.intercept + lin_fit_gtsm.slope*x, \n",
" 'green', \n",
" label='linear fit gtsm')\n",
"\n",
" lin_fit_gtsm = linregress(obs, combined)\n",
" axs[i].plot(x, lin_fit_gtsm.intercept + lin_fit_gtsm.slope*x, \n",
" 'red', \n",
" label='linear fit combined')\n",
" except ValueError as e:\n",
" print(f\"{validation_location.name} throws ValueError: {e}\")\n",
" \n",
" #xy_min = min(df['observed'].min(), df['gtsm'].min(), df['combined'].min())\n",
" #xy_max = max(df['observed'].max(), df['gtsm'].max(), df['combined'].max())\n",
" xy_min=0\n",
" xy_max=3000\n",
" #xy_min = -1000\n",
" #xy_max = 1000\n",
" axs[i].set_aspect('equal')\n",
"\n",
" \n",
" \n",
" try:\n",
" axs[i].set_xlim([xy_min, xy_max])\n",
" axs[i].set_ylim([xy_min, xy_max])\n",
" except:\n",
" axs[i].set_xlim([-500, 2000])\n",
" axs[i].set_ylim([-500, 2000])\n",
" \n",
" axs[i].set_xlabel('observed')\n",
" axs[i].set_ylabel('modelled')\n",
" axs[i].plot(axs[i].get_xlim(), axs[i].get_ylim(), '--', c='black')\n",
"axs[0].legend();"
]
},
{
"cell_type": "code",
...
...
%% Cell type:code id: tags:
```
python
import
pandas
as
pd
import
geopandas
as
gpd
import
matplotlib.pyplot
as
plt
from
gslcomp.data_loading
import
load_gtsm_output
from
gslcomp.utils
import
h5load
,
read_tide_gauge
,
remove_annual_mean
from
gslcomp.config
import
ConfigParser
import
numpy
as
np
import
pwlf
from
scipy.fft
import
fft
,
fftfreq
from
scipy.stats
import
gaussian_kde
,
linregress
import
settings
as
s
import
re
from
pathlib
import
Path
from
shapely.geometry
import
Point
from
scipy.spatial
import
cKDTree
import
datetime
```
%% Cell type:code id: tags:
```
python
# matplotlib settings
plt
.
rcParams
[
"
figure.figsize
"
]
=
(
15
,
5
)
alpha
=
.
5
```
%% Cell type:code id: tags:
```
python
validation_file
=
s
.
OUTPUT_BASE
/
"
GSLcomp_01_complete
"
/
"
validation
"
/
"
validation_measures.h5
"
validation_df
,
_
=
h5load
(
validation_file
)
```
%% Cell type:code id: tags:
```
python
validation_df
```
%% Cell type:code id: tags:
```
python
# add station name as regex
station_names
=
[
'
aburatsu
'
,
'
boston
'
,
'
brisbane
'
,
'
goteborg
'
,
'
lerwick
'
,
'
los.*angeles.*usa
'
,
'
pohnpei.*micronesia
'
,
'
rio.*de.*janeiro
'
,
'
tofino
'
,
'
valparaiso
'
,
'
zanzibar
'
]
```
%% Cell type:code id: tags:
```
python
run_id
=
"
GSLcomp_01_complete
"
# todo set output_base here and remove settings.py
args_file
=
s
.
OUTPUT_BASE
/
run_id
/
"
args.txt
"
conf
=
ConfigParser
().
parse
(
args
=
[
f
"
@
{
str
(
args_file
)
}
"
])
gesla_2_dir
=
conf
.
input_dir
/
'
GESLA_2
'
```
%% Cell type:code id: tags:
```
python
class
ValidationLocation
:
def
__init__
(
self
,
validation_df
,
name
,
source
=
None
):
self
.
validation_df
=
validation_df
self
.
name
=
name
self
.
source
=
source
if
source
is
not
None
:
self
.
source_filter
=
validation_df
[
'
tide_gauge_name
'
].
str
.
contains
(
source
)
self
.
validation_df_selected
=
self
.
select_stations
()
self
.
get_tide_gauges_and_gtsm_stations
()
def
select_stations
(
self
):
selected
=
self
.
validation_df
[
'
tide_gauge_name
'
].
str
.
contains
(
self
.
name
)
if
self
.
source
is
None
:
stations
=
validation_df
[
selected
]
else
:
stations
=
validation_df
[
selected
]
&
self
.
source_filter
return
stations
def
get_tide_gauges_and_gtsm_stations
(
self
):
self
.
tide_gauge_list
=
[]
# todo tide_gauge -> longest_tide_gauge
self
.
tide_gauge
=
None
self
.
gtsm_tide_surge
=
None
self
.
combined_data
=
None
l
=
0
for
_
,
row
in
self
.
validation_df_selected
.
iterrows
():
tide_gauge_name
=
row
[
'
tide_gauge_name
'
]
tide_gauge
=
{
'
name
'
:
tide_gauge_name
,
'
df
'
:
read_tide_gauge
(
gesla_2_dir
/
tide_gauge_name
)[
'
sl
'
]
*
1000
}
self
.
tide_gauge_list
.
append
(
tide_gauge
)
# get longest tide gauge
if
tide_gauge
[
'
df
'
].
dropna
().
size
>
l
:
self
.
tide_gauge
=
tide_gauge
gtsm_tide_surge_df
,
_
,
_
,
_
=
load_gtsm_output
(
Path
(
conf
.
input_dir
)
/
'
GTSM_tide_surge
'
/
f
'
{
row
[
"
gtsm_filename
"
]
}
.nc
'
,
conf
)
self
.
gtsm_tide_surge
=
{
'
name
'
:
row
[
'
gtsm_filename
'
],
'
df
'
:
gtsm_tide_surge_df
[
'
waterlevel
'
]
}
combined_data_df
=
pd
.
read_hdf
(
conf
.
output_dir
/
"
combined_data
"
/
f
"
{
row
[
'
gtsm_filename
'
]
}
.h5
"
)
self
.
combined_data
=
{
'
name
'
:
row
[
'
gtsm_filename
'
],
'
df
'
:
combined_data_df
}
l
=
tide_gauge
[
'
df
'
].
dropna
().
size
self
.
gtsm_tide_surge_list
=
[]
self
.
combined_data_list
=
[]
for
gtsm_filename
in
self
.
validation_df_selected
[
'
gtsm_filename
'
].
unique
():
gtsm_tide_surge_df
,
_
,
_
,
_
=
load_gtsm_output
(
Path
(
conf
.
input_dir
)
/
'
GTSM_tide_surge
'
/
f
'
{
gtsm_filename
}
.nc
'
,
conf
)
gtsm_tide_surge
=
{
'
name
'
:
gtsm_filename
,
'
df
'
:
gtsm_tide_surge_df
[
'
waterlevel
'
]
}
self
.
gtsm_tide_surge_list
.
append
(
gtsm_tide_surge
)
combined_data_df
=
pd
.
read_hdf
(
conf
.
output_dir
/
"
combined_data
"
/
f
"
{
gtsm_filename
}
.h5
"
)
combined_data
=
{
'
name
'
:
gtsm_filename
,
'
df
'
:
combined_data_df
}
self
.
combined_data_list
.
append
(
combined_data
)
```
%% Cell type:code id: tags:
```
python
validation_locations
=
[]
for
i
,
station_name
in
enumerate
(
station_names
):
validation_locations
.
append
(
ValidationLocation
(
validation_df
,
station_name
))
```
%% Cell type:code id: tags:
```
python
fig
,
axs
=
plt
.
subplots
(
nrows
=
len
(
station_names
),
ncols
=
1
,
figsize
=
(
10
,
55
))
for
i
,
validation_location
in
enumerate
(
validation_locations
):
for
tide_gauge
in
validation_location
.
tide_gauge_list
:
axs
[
i
].
plot
(
tide_gauge
[
'
df
'
].
loc
[
'
1979-01-01
'
:
'
2015-12-31
'
].
resample
(
'
1d
'
).
mean
(),
label
=
tide_gauge
[
'
name
'
],
alpha
=
.
5
)
for
gtsm_tide_surge
in
validation_location
.
gtsm_tide_surge_list
:
axs
[
i
].
plot
(
gtsm_tide_surge
[
'
df
'
].
resample
(
'
1d
'
).
mean
(),
label
=
f
"
gtsm
{
gtsm_tide_surge
[
'
name
'
]
}
"
,
alpha
=
0.5
)
for
combined_data
in
validation_location
.
combined_data_list
:
axs
[
i
].
plot
(
combined_data
[
'
df
'
].
resample
(
'
1d
'
).
mean
(),
label
=
f
"
combined
{
combined_data
[
'
name
'
]
}
"
,
alpha
=
0.5
)
for
tide_gauge
in
validation_location
.
tide_gauge_list
:
axs
[
i
].
plot
(
tide_gauge
[
'
df
'
].
loc
[
'
1979-01-01
'
:
'
2015-12-31
'
].
resample
(
'
1d
'
).
mean
(),
label
=
tide_gauge
[
'
name
'
],
alpha
=
.
5
)
axs
[
i
].
legend
()
```
%% Cell type:code id: tags:
```
python
# plotting with sanne-style normalization
```
%% Cell type:code id: tags:
```
python
fig
,
axs
=
plt
.
subplots
(
nrows
=
len
(
validation_locations
),
ncols
=
1
,
figsize
=
(
15
,
55
))
fig
,
axs
=
plt
.
subplots
(
nrows
=
len
(
validation_locations
),
ncols
=
2
,
figsize
=
(
15
,
55
))
for
i
,
validation_location
in
enumerate
(
validation_locations
):
axs
[
i
].
set_title
(
validation_location
.
name
)
axs
[
i
,
0
].
set_title
(
f
"
{
validation_location
.
name
}
gtsm
"
)
axs
[
i
,
1
].
set_title
(
f
"
{
validation_location
.
name
}
combined
"
)
normalize
=
remove_annual_mean
df
=
pd
.
DataFrame
(
{
'
observed
'
:
normalize
(
validation_location
.
tide_gauge
[
'
df
'
]).
resample
(
'
1d
'
).
max
(),
'
gtsm
'
:
normalize
(
validation_location
.
gtsm_tide_surge
[
'
df
'
]).
resample
(
'
1d
'
).
max
(),
#
'combined':
(tide_gauge['combined_data']-tide_gauge['combined_data'].median())['1980-01-01':'2010-01-01
'].resample('1d').max()
'
observed
'
:
remove_annual_mean
(
validation_location
.
tide_gauge
[
'
df
'
]).
resample
(
'
1d
'
).
max
(),
'
gtsm
'
:
remove_annual_mean
(
validation_location
.
gtsm_tide_surge
[
'
df
'
]).
resample
(
'
1d
'
).
max
(),
'
combined
'
:
remove_annual_mean
(
validation_location
.
combined_data
[
'
df
'
]
)
.
resample
(
'
1d
'
).
max
()
})
df
=
df
.
dropna
()
# gtsm
#observed
x
=
df
[
'
observed
'
].
to_numpy
()
# modelled
y
=
df
[
'
gtsm
'
].
to_numpy
()
try
:
xy
=
np
.
vstack
([
x
,
y
])
z
=
gaussian_kde
(
xy
)(
xy
)
# Sort the points by density, so that the densest points are plotted last
idx
=
z
.
argsort
()
x
,
y
,
z
=
x
[
idx
],
y
[
idx
],
z
[
idx
]
axs
[
i
].
scatter
(
x
,
y
,
c
=
z
,
cmap
=
plt
.
get_cmap
(
'
magma
'
))
axs
[
i
,
0
].
scatter
(
x
,
y
,
c
=
z
,
cmap
=
plt
.
get_cmap
(
'
magma
'
))
except
ValueError
as
e
:
print
(
f
"
{
validation_location
.
name
}
throws ValueError:
{
e
}
"
)
#combined_data
x
=
df
[
'
observed
'
].
to_numpy
()
y
=
df
[
'
combined
'
].
to_numpy
()
try
:
xy
=
np
.
vstack
([
x
,
y
])
z
=
gaussian_kde
(
xy
)(
xy
)
# Sort the points by density, so that the densest points are plotted last
idx
=
z
.
argsort
()
x
,
y
,
z
=
x
[
idx
],
y
[
idx
],
z
[
idx
]
axs
[
i
,
1
].
scatter
(
x
,
y
,
c
=
z
,
cmap
=
plt
.
get_cmap
(
'
magma
'
))
except
ValueError
as
e
:
print
(
f
"
{
validation_location
.
name
}
throws ValueError:
{
e
}
"
)
xy_min
=
0
xy_max
=
3000
axs
[
i
].
set_aspect
(
'
equal
'
)
axs
[
i
].
set_xlim
([
xy_min
,
xy_max
])
axs
[
i
].
set_ylim
([
xy_min
,
xy_max
])
axs
[
i
].
plot
(
axs
[
i
].
get_xlim
(),
axs
[
i
].
get_ylim
(),
c
=
'
black
'
)
for
j
in
[
0
,
1
]:
axs
[
i
,
j
].
set_aspect
(
'
equal
'
)
axs
[
i
,
j
].
set_xlim
([
xy_min
,
xy_max
])
axs
[
i
,
j
].
set_ylim
([
xy_min
,
xy_max
])
axs
[
i
,
j
].
plot
(
axs
[
i
,
j
].
get_xlim
(),
axs
[
i
,
j
].
get_ylim
(),
'
--
'
,
c
=
'
lightgray
'
)
```
%% Cell type:code id: tags:
```
python
fig
,
axs
=
plt
.
subplots
(
nrows
=
len
(
validation_locations
),
ncols
=
1
,
figsize
=
(
15
,
55
))
for
i
,
validation_location
in
enumerate
(
validation_locations
):
axs
[
i
].
set_title
(
validation_location
.
name
)
normalize
=
remove_annual_mean
df
=
pd
.
DataFrame
(
{
'
observed
'
:
normalize
(
validation_location
.
tide_gauge
[
'
df
'
]).
resample
(
'
1d
'
).
max
(),
'
gtsm
'
:
normalize
(
validation_location
.
gtsm_tide_surge
[
'
df
'
]).
resample
(
'
1d
'
).
max
(),
'
combined
'
:
normalize
(
validation_location
.
combined_data
[
'
df
'
]).
resample
(
'
1d
'
).
max
()
#'combined': (tide_gauge['combined_data']-tide_gauge['combined_data'].median())['1980-01-01':'2010-01-01'].resample('1d').max()
})
df
=
df
.
dropna
()
axs
[
i
].
scatter
(
df
[
'
observed
'
],
df
[
'
gtsm
'
],
label
=
'
gtsm
'
,
alpha
=
.
03
)
axs
[
i
].
scatter
(
df
[
'
observed
'
],
df
[
'
combined
'
],
label
=
'
combined
'
,
alpha
=
.
03
)
#linear fit
try
:
lin_fit_gtsm
=
linregress
(
df
[
'
observed
'
],
df
[
'
gtsm
'
])
axs
[
i
].
plot
(
x
,
lin_fit_gtsm
.
intercept
+
lin_fit_gtsm
.
slope
*
x
,
'
green
'
,
label
=
'
linear fit gtsm
'
)
lin_fit_gtsm
=
linregress
(
df
[
'
observed
'
],
df
[
'
combined
'
])
axs
[
i
].
plot
(
x
,
lin_fit_gtsm
.
intercept
+
lin_fit_gtsm
.
slope
*
x
,
'
red
'
,
label
=
'
linear fit combined
'
)
except
ValueError
as
e
:
print
(
f
"
{
validation_location
.
name
}
throws ValueError:
{
e
}
"
)
#xy_min = min(df['observed'].min(), df['gtsm'].min(), df['combined'].min())
#xy_max = max(df['observed'].max(), df['gtsm'].max(), df['combined'].max())
xy_min
=
0
xy_max
=
3000
#xy_min = -1000
#xy_max = 1000
axs
[
i
].
set_aspect
(
'
equal
'
)
try
:
axs
[
i
].
set_xlim
([
xy_min
,
xy_max
])
axs
[
i
].
set_ylim
([
xy_min
,
xy_max
])
except
:
axs
[
i
].
set_xlim
([
-
500
,
2000
])
axs
[
i
].
set_ylim
([
-
500
,
2000
])
axs
[
i
].
plot
(
axs
[
i
].
get_xlim
(),
axs
[
i
].
get_ylim
(),
'
--
'
,
c
=
'
black
'
)
axs
[
0
].
legend
();
```
%% Cell type:code id: tags:
```
python
``
`
%%
Cell
type
:
code
id
:
tags
:
```
python
```
%% Cell type:code id: tags:
```
python
```
%% Cell type:code id: tags:
```
python
```
%% Cell type:markdown id: tags:
# Alternative detrending, relative to observations
%% Cell type:code id: tags:
```
python
def lin_detrend(df):
df1 = df.dropna().copy()
for key in ['observed', 'gtsm', 'combined']:
df1[f'n_{key}'] = df1.loc[:, key]-df1.loc[:,key].mean()
df1['hours_since_1979'] = df1.index.map(lambda x: (x.to_pydatetime()-datetime.datetime(1979, 1, 1, 0, 0)).total_seconds()/3600)
lin_trend = linregress(df1['hours_since_1979'], df1['n_observed'])
df1['lin_trend'] = df1['hours_since_1979'].map(lambda x: (lin_trend.slope
*
x + lin_trend.intercept))
for key in ['observed', 'gtsm', 'combined']:
df1[f'detrended_{key}'] = df1[f'n_{key}']-df1['lin_trend']
return df1
```
%% Cell type:code id: tags:
```
python
fig, axs = plt.subplots(nrows=len(validation_locations), ncols=2, figsize=(15, 55))
for i, validation_location in enumerate(validation_locations):
axs[i,0].set_title(f"{validation_location.name} annual_means")
axs[i,1].set_title(f"{validation_location.name} annual_means detrended")
#normalize = remove_annual_mean
df = pd.DataFrame(
{
'observed': validation_location.tide_gauge['df'],
'gtsm': validation_location.gtsm_tide_surge['df'],
'combined': validation_location.combined_data['df']
})
try:
df = lin_detrend(df)
except ValueError as e:
print(f"{validation_location.name} throws ValueError: {e}")
continue
for key in ['observed', 'gtsm', 'combined']:
axs[i, 0].plot(
df[f'n_{key}'].resample('1a').mean(),
label=key
)
axs[i, 1].plot(
df[f'detrended_{key}'].resample('1a').mean(),
label=key + ' detrended'
)
axs[i,0].plot(df['lin_trend'])
axs[i,0].legend()
```
%% Cell type:code id: tags:
```
python
fig, axs = plt.subplots(nrows=len(validation_locations), ncols=2, figsize=(15, 55))
for i, validation_location in enumerate(validation_locations):
axs[i,0].set_title(f"{validation_location.name} gtsm")
axs[i,1].set_title(f"{validation_location.name} combined")
#normalize = remove_annual_mean
df = pd.DataFrame(
{
'observed': validation_location.tide_gauge['df'],
'gtsm': validation_location.gtsm_tide_surge['df'],
'combined': validation_location.combined_data['df']
})
try:
df = lin_detrend(df)
except ValueError as e:
print(f"{validation_location.name} throws ValueError: {e}")
continue
# gtsm
#observed
x = df['detrended_observed'].resample('1d').max().dropna().to_numpy()
# modelled
y = df['detrended_gtsm'].resample('1d').max().dropna().to_numpy()
try:
xy = np.vstack([x,y])
z = gaussian_kde(xy)(xy)
# Sort the points by density, so that the densest points are plotted last
idx = z.argsort()
x, y, z = x[idx], y[idx], z[idx]
axs[i,0].scatter(x, y, c=z, cmap=plt.get_cmap('magma'))
except ValueError as e:
print(f"{validation_location.name} throws ValueError: {e}")
#combined_data
x = df['detrended_observed'].resample('1d').max().dropna().to_numpy()
y = df['detrended_combined'].resample('1d').max().dropna().to_numpy()
try:
xy = np.vstack([x,y])
z = gaussian_kde(xy)(xy)
# Sort the points by density, so that the densest points are plotted last
idx = z.argsort()
x, y, z = x[idx], y[idx], z[idx]
axs[i,1].scatter(x, y, c=z, cmap=plt.get_cmap('magma'))
except ValueError as e:
print(f"{validation_location.name} throws ValueError: {e}")
xy_min=0
xy_max=3000
for j in [0, 1]:
axs[i, j].set_aspect('equal')
axs[i, j].set_xlim([xy_min, xy_max])
axs[i, j].set_ylim([xy_min, xy_max])
axs[i, j].plot(
axs[i, j].get_xlim(),
axs[i, j].get_ylim(),
'--',
c='lightgray'
)
axs[i,j].set_xlabel('observed')
axs[i,0].set_ylabel('gtsm')
axs[i,1].set_ylabel('combined')
```
%% Cell type:code id: tags:
```
python
fig, axs = plt.subplots(nrows=len(validation_locations), ncols=1, figsize=(15, 55))
for i, validation_location in enumerate(validation_locations):
axs[i].set_title(validation_location.name)
df = pd.DataFrame(
{
'observed': validation_location.tide_gauge['df'],
'gtsm': validation_location.gtsm_tide_surge['df'],
'combined': validation_location.combined_data['df']
})
try:
df = lin_detrend(df)
except ValueError as e:
print(f"{validation_location.name} throws ValueError: {e}")
continue
obs = df['detrended_observed'].resample('1d').max().dropna()
gtsm = df['detrended_gtsm'].resample('1d').max().dropna()
combined = df['detrended_combined'].resample('1d').max().dropna()
axs[i].scatter(
obs,
gtsm,
label='gtsm',
alpha=.03
)
axs[i].scatter(
obs,
combined,
label='combined',
alpha=.03
)
#linear fit
try:
lin_fit_gtsm = linregress(obs, gtsm)
axs[i].plot(x, lin_fit_gtsm.intercept + lin_fit_gtsm.slope*x,
'green',
label='linear fit gtsm')
lin_fit_gtsm = linregress(obs, combined)
axs[i].plot(x, lin_fit_gtsm.intercept + lin_fit_gtsm.slope*x,
'red',
label='linear fit combined')
except ValueError as e:
print(f"{validation_location.name} throws ValueError: {e}")
#xy_min = min(df['observed'].min(), df['gtsm'].min(), df['combined'].min())
#xy_max = max(df['observed'].max(), df['gtsm'].max(), df['combined'].max())
xy_min=0
xy_max=3000
#xy_min = -1000
#xy_max = 1000
axs[i].set_aspect('equal')
try:
axs[i].set_xlim([xy_min, xy_max])
axs[i].set_ylim([xy_min, xy_max])
except:
axs[i].set_xlim([-500, 2000])
axs[i].set_ylim([-500, 2000])
axs[i].set_xlabel('observed')
axs[i].set_ylabel('modelled')
axs[i].plot(axs[i].get_xlim(), axs[i].get_ylim(), '--', c='black')
axs[0].legend();
```
%% Cell type:code id: tags:
```
python
```
%% Cell type:code id: tags:
```
python
```
%% Cell type:code id: tags:
```
python
```
%% Cell type:code id: tags:
```
python
```
%% Cell type:code id: tags:
```
python
```
%% Cell type:code id: tags:
```
python
```
%% Cell type:code id: tags:
```
python
```
%% Cell type:code id: tags:
```
python
```
%% Cell type:code id: tags:
```
python
world = gpd.read_file(
gpd.datasets.get_path('naturalearth_lowres')
)
ax = world.plot(color='white', edgecolor='black')
for tide_gauge in tide_gauges:
tide_gauge['stations'].plot(ax=ax, color='red')
```
%% Cell type:code id: tags:
```
python
```
%% Cell type:code id: tags:
```
python
```
%% Cell type:code id: tags:
```
python
```
%% Cell type:code id: tags:
```
python
```
%% Cell type:code id: tags:
```
python
```
%% Cell type:code id: tags:
```
python
```
%% Cell type:code id: tags:
```
python
```
%% Cell type:code id: tags:
```
python
```
%% Cell type:code id: tags:
```
python
```
%% Cell type:code id: tags:
```
python
```
%% Cell type:code id: tags:
```
python
```
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment