Commit 646a5f0c authored by Sebastian Ostberg's avatar Sebastian Ostberg
Browse files

first version of finished yield loss risk indicator, code cleaning

parent 72a82dbb
......@@ -27,6 +27,6 @@ The script first downloads LPJmL outputs for the historical time period from the
In a second step, outputs from LPJmL crop yield forecast simulations are downloaded for all weather options available on the remote server for the specified combination of `irrigation_scenario`, `fertilizer_scenario` and `sowing_scenario`. Forecasted yield during the `forecast_year` is calculated for each crop and and grid cell and compared against the historical reference yield. This risk of yield loss is calculated for each crop and grid cell as the fraction of weather options that show a yield loss (compared to reference yield) of at least `yield_loss` percent.
To do: Grid-cell level risks are aggregated to the country level by summing grid cells where the risk of yield loss is at least `loss_probability` percent. The area at risk at the country level will be specified as absolute area (in hectare) as well as in percent of total crop area (e.g. 25% of all rainfed wheat areas in the country).
To do: Confirm format of returned risk indicator with CauseMos.
The script returns two output variables:
1. `yield_loss_risk`: The risk that yields fall at least `yield_loss` percent below historical reference yield for each crop and grid cell
2. `harvested_area_at_risk`: Harvested area affected by yield loss of at least `yield_loss` percent, grouped into 5 risk categories (0-20%, 20-40%, 40-60%, 60-80%, 80-100%), for each crop and grid cell
#' @title Calculate average yield
#'
#' @description Calculates the average across time for a crop data time series from LPJmL.
#' The function always calculates the mean across different years. Multiple time steps within
#' the same year may be either aggregated as mean or as sum.
#' @description Calculates the average across time for a crop data time series
#' from LPJmL. The function always calculates the mean across different years.
#' Multiple time steps within the same year may be either aggregated as mean
#' or as sum.
#'
#' @param filename File name of the NetCDF file containing the time series data
#' @param avg_variable Name of the NetCDF variable containing the crop time series data. This should
#' normally be crop yield but could also be other crop variables such as crop production or
#' harvested area
#' @param avg_variable Name of the NetCDF variable containing the crop time
#' series data. This should normally be crop yield but could also be other
#' crop variables such as crop production or harvested area
#' @param avg_startyear Beginning of period used for averaging
#' @param avg_endyear End of period used for averaging
#' @param multiple_per_year Function to use to aggregate multiple time steps within the same year
#' (either "mean" or "sum", default: "mean")
#' @param multiple_weight_variable Optional name of NetCDF variable used as weights if
#' multiple_per_year == "mean" (default: NULL)
#' @param multiple_per_year Function to use to aggregate multiple time steps
#' within the same year (either "mean" or "sum", default: "mean")
#' @param multiple_weight_variable Optional name of NetCDF variable used as
#' weights if multiple_per_year == "mean" (default: NULL)
#'
#' @return Array of crop data averaged across time. Spatial and crop dimensions are kept
#' @return Array of crop data averaged across time. Spatial and crop dimensions
#' are kept.
#'
#' @export
average_yield <- function(filename, avg_variable, avg_startyear, avg_endyear, multiple_per_year = "mean", multiple_weight_variable = NULL) {
## check for valid setting for multiple_per_year
if(!multiple_per_year %in% c("mean", "sum")) {
stop(paste("multiple_per_year must be either 'mean' or 'sum'. You provided:", sQuote(multiple_per_year)))
average_yield <- function(filename,
avg_variable,
avg_startyear,
avg_endyear,
multiple_per_year = "mean",
multiple_weight_variable = NULL
) {
## Check for valid setting for multiple_per_year
if (!multiple_per_year %in% c("mean", "sum")) {
stop(
paste(
"multiple_per_year must be either 'mean' or 'sum'. You provided:",
sQuote(multiple_per_year)
)
)
}
## check that file exists
if(!file.exists(filename)) {
## Check that file exists
if (!file.exists(filename)) {
stop(paste(sQuote(filename), "does not exist."))
}
## open file
## Open file
nc <- nc_open(filename)
## check if avg_variable is available in file
if(!avg_variable %in% names(nc$var)) {
stop(paste("Variable", sQuote(avg_variable), "not available in file", filename))
## Check if avg_variable is available in file
if (!avg_variable %in% names(nc$var)) {
stop(
paste(
"Variable", sQuote(avg_variable), "not available in file", filename
)
)
}
if(!is.null(multiple_weight_variable) && !multiple_weight_variable %in% names(nc$var)) {
stop(paste("Variable", sQuote(multiple_weight_variable), "not available in file", filename))
if (!is.null(multiple_weight_variable) &&
!multiple_weight_variable %in% names(nc$var)
) {
stop(
paste(
"Variable", sQuote(multiple_weight_variable),
"not available in file", filename
)
)
}
## get time
## Get time
# unit string in file
timeunit <- nc$dim$time$units
# Is time unit relative to an origin?
if(grepl("since", timeunit)) {
# detect origin date
origin <- as.POSIXct(sub("years|days|seconds since ", "", timeunit), tz="UTC")
# detect unit of offset compared to origin
origin_unit <- regmatches(timeunit, regexec("^([a-z]+) since", timeunit))[[1]][-1]
if (grepl("since", timeunit)) {
# Detect origin date
origin <- as.POSIXct(
sub("years|days|seconds since ", "", timeunit),
tz = "UTC"
)
# Detect unit of offset compared to origin
origin_unit <- regmatches(
timeunit,
regexec("^([a-z]+) since", timeunit)
)[[1]][-1]
time_is_relative <- TRUE
} else {
time_is_relative <- FALSE
if(!grepl("year", timeunit)) {
# currently only support "year" as absolute time unit
stop(paste("Time unit", sQuote(timeunit), "not supported in file", filename))
if (!grepl("year", timeunit)) {
# Currently only support "year" as absolute time unit
stop(
paste(
"Time unit", sQuote(timeunit),
"not supported in file", filename
)
)
}
}
# load values of time axis
# Load values of time axis
timevals <- ncvar_get(nc, "time")
## find timesteps within period avg_startyear to avg_endyear
if(!time_is_relative) {
# timevals equal year values
## Find timesteps within period avg_startyear to avg_endyear
if (!time_is_relative) {
# Timevals equal year values
avg_steps <- which(timevals >= avg_startyear & timevals <= avg_endyear)
if(length(avg_steps) < 1) {
stop(paste0("avg_startyear ", sQuote(avg_startyear), " or avg_endyear ", sQuote(avg_endyear), " outside of time period covered covered by file ", filename, " (", paste(range(timevals), collapse=("-")), ")"))
if (length(avg_steps) < 1) {
stop(
paste0(
"avg_startyear ", sQuote(avg_startyear),
" or avg_endyear ", sQuote(avg_endyear),
" outside of time period covered covered by file ", filename,
" (", paste(range(timevals), collapse = "-"), ")"
)
)
}
} else {
# need to convert timevals into year values using origin and origin_unit
year_timeval <- switch(origin_unit,
years = as.double(format(origin, "%Y"))+timevals,
days = as.double(format(as.Date(origin)+timevals, "%Y")),
seconds = as.double(format(origin+timevals, "%Y")))
avg_steps <- which(year_timeval >= avg_startyear & year_timeval <= avg_endyear)
if(length(avg_steps) < 1) {
stop(paste0("Start year ", sQuote(avg_startyear), " or end year ", sQuote(avg_endyear), " outside of time period covered covered by file ", filename, " (", paste(range(year_timeval), collapse=("-")), ")"))
# Need to convert timevals into year values using origin and origin_unit
year_timeval <- switch(
origin_unit,
years = as.double(format(origin, "%Y")) + timevals,
days = as.double(format(as.Date(origin) + timevals, "%Y")),
seconds = as.double(format(origin + timevals, "%Y"))
)
avg_steps <- which(
year_timeval >= avg_startyear & year_timeval <= avg_endyear
)
if (length(avg_steps) < 1) {
stop(
paste0(
"Start year ", sQuote(avg_startyear),
" or end year ", sQuote(avg_endyear),
" outside of time period covered covered by file ", filename,
" (", paste(range(year_timeval), collapse = "-"), ")"
)
)
}
# set timevals to year values
# Set timevals to year values
timevals <- year_timeval
}
if(avg_startyear < min(timevals)) {
stop(paste("avg_startyear", avg_startyear, "is earlier than period covered by of file", filename))
if (avg_startyear < min(timevals)) {
stop(
paste(
"avg_startyear", avg_startyear,
"is earlier than period covered by of file", filename
)
)
}
if(avg_endyear > max(timevals)) {
stop(paste("avg_endyear", avg_endyear, "is later than period covered by file", filename))
if (avg_endyear > max(timevals)) {
stop(
paste(
"avg_endyear", avg_endyear,
"is later than period covered by file", filename
)
)
}
## names of variable dimensions
var_dims <- names(nc$dim)[nc$var[[avg_variable]]$dimids+1]
## index of time dimension in variable dimensions
## Names of variable dimensions
var_dims <- names(nc$dim)[nc$var[[avg_variable]]$dimids + 1]
## Index of time dimension in variable dimensions
time_dim <- which(var_dims == "time")
var_dims_has_names <- rep(FALSE, length(var_dims))
names(var_dims_has_names) <- names(var_dims)
## find dimensions for which name attributes are saved as character variables
for(namevar in names(which(apply(sapply(var_dims, grepl, x = grep("Name", names(nc$var), value = TRUE, ignore.case = TRUE), ignore.case = TRUE), 2, any)))) {
nc_var <- intersect(grep("Name", names(nc$var), value = TRUE, ignore.case = TRUE), grep(namevar, names(nc$var), value = TRUE, ignore.case = TRUE))
## Find dimensions for which name attributes are saved as character variables
for (namevar in names(
which(
apply(
sapply(
var_dims,
grepl,
x = grep("Name", names(nc$var), value = TRUE, ignore.case = TRUE),
ignore.case = TRUE
),
2,
any
)
)
)) {
nc_var <- intersect(
grep("Name", names(nc$var), value = TRUE, ignore.case = TRUE),
grep(namevar, names(nc$var), value = TRUE, ignore.case = TRUE)
)
name_values <- ncvar_get(nc, nc_var)
assign(paste0(namevar, "_names"), name_values)
var_dims_has_names[namevar] <- TRUE
}
result_array <- array(dim=nc$var[[avg_variable]]$varsize[-time_dim])
for(namevar in names(which(var_dims_has_names))) {
dimnames(result_array)[[which(var_dims[-time_dim] == namevar)]] <- get(paste0(namevar, "_names"))
result_array <- array(dim = nc$var[[avg_variable]]$varsize[-time_dim])
for (namevar in names(which(var_dims_has_names))) {
dimnames(result_array)[[which(var_dims[-time_dim] == namevar)]] <-
get(paste0(namevar, "_names"))
}
nvalid <- array(0, dim=nc$var[[avg_variable]]$varsize[-time_dim])
for(y in avg_startyear:avg_endyear) {
year_result <- array(dim=nc$var[[avg_variable]]$varsize[-time_dim])
year_nvalid <- array(0, dim=nc$var[[avg_variable]]$varsize[-time_dim])
for(yearstep in which(timevals==y)) {
filedata <- ncvar_get(nc, avg_variable, start=ifelse(var_dims == "time", yearstep, 1), count=ifelse(var_dims == "time", 1, -1))
nvalid <- array(0, dim = nc$var[[avg_variable]]$varsize[- time_dim])
for (y in avg_startyear:avg_endyear) {
year_result <- array(dim = nc$var[[avg_variable]]$varsize[- time_dim])
year_nvalid <- array(0, dim = nc$var[[avg_variable]]$varsize[- time_dim])
for (yearstep in which(timevals == y)) {
filedata <- ncvar_get(
nc,
avg_variable,
start = ifelse(var_dims == "time", yearstep, 1),
count = ifelse(var_dims == "time", 1, -1)
)
valid_index <- which(!is.na(filedata))
update_index <- intersect(which(!is.na(year_result)), valid_index)
set_index <- intersect(which(is.na(year_result)), valid_index)
if(multiple_per_year == "mean" && !is.null(multiple_weight_variable)) {
## Normally harvested area for several timesteps within the same year
## should be identical for LPJmL results.However, if specified
if (multiple_per_year == "mean" && !is.null(multiple_weight_variable)) {
## Normally harvested area for several timesteps within the same year
## should be identical for LPJmL results.However, if specified
## multiple_weight_variable is used as weight. This variable must have
## the same dimensions as avg_variable
weightdata <- ncvar_get(nc, multiple_weight_variable, start=ifelse(var_dims == "time", yearstep, 1), count=ifelse(var_dims == "time", 1, -1))
if(anyNA(weightdata[valid_index])) {
stop(paste("Inconsistency between", sQuote(avg_variable), "and", sQuote(multiple_weight_variable), "data in year", y))
weightdata <- ncvar_get(
nc,
multiple_weight_variable,
start = ifelse(var_dims == "time", yearstep, 1),
count = ifelse(var_dims == "time", 1, -1)
)
if (anyNA(weightdata[valid_index])) {
stop(
paste(
"Inconsistency between", sQuote(avg_variable),
"and", sQuote(multiple_weight_variable),
"data in year", y
)
)
}
weightdata[-valid_index] <- NA
year_nvalid[valid_index] <- year_nvalid[valid_index] + weightdata[valid_index]
weightdata[- valid_index] <- NA
year_nvalid[valid_index] <- year_nvalid[valid_index] +
weightdata[valid_index]
year_result[set_index] <- filedata[set_index] * weightdata[set_index]
year_result[update_index] <- year_result[update_index] + filedata[update_index] * weightdata[update_index]
year_result[update_index] <- year_result[update_index] +
filedata[update_index] * weightdata[update_index]
} else {
year_nvalid[valid_index] <- year_nvalid[valid_index]+1
year_nvalid[valid_index] <- year_nvalid[valid_index] + 1
year_result[set_index] <- filedata[set_index]
year_result[update_index] <- year_result[update_index] + filedata[update_index]
year_result[update_index] <- year_result[update_index] +
filedata[update_index]
}
}
if(multiple_per_year == "mean") {
year_result <- year_result/year_nvalid
if (multiple_per_year == "mean") {
year_result <- year_result / year_nvalid
year_result[which(year_nvalid == 0)] <- NA
}
valid_index <- which(!is.na(year_result))
update_index <- intersect(which(!is.na(result_array)), valid_index)
set_index <- intersect(which(is.na(result_array)), valid_index)
nvalid[valid_index] <- nvalid[valid_index]+1
nvalid[valid_index] <- nvalid[valid_index] + 1
result_array[set_index] <- year_result[set_index]
result_array[update_index] <- result_array[update_index] + year_result[update_index]
result_array[update_index] <- result_array[update_index] +
year_result[update_index]
}
result_array <- result_array/nvalid
result_array <- result_array / nvalid
result_array[which(nvalid == 0)] <- NA
nc_close(nc)
rm(nvalid, year_result, year_nvalid)
......
......@@ -3,7 +3,8 @@
#' @description This function can be used to download historical time
#' series data as well as forecast data for LPJmL
#'
#' @param runtype Type of LPJmL crop simulation. Either 'historical' or 'forecast'
#' @param runtype Type of LPJmL crop simulation. Either 'historical' or
#' 'forecast'
#' @param localname Local file name to be used for downloaded data
#' @param experiment The name of the set of crop simulations
#' @param irrigation_scenario Irrigation scenario for which to download
......@@ -13,60 +14,112 @@
#' @param sowing_scenario Sowing date scenario for which to download
#' data (if runtype == 'forecast')
#'
#' @return The name of the local file if downloaded successfully. This
#' @return The name of the local file if downloaded successfully. This
#' function will throw an error if any of the provided parameters are invalid.
#'
#' @export
download_yield_data <- function(runtype, localname, experiment = "August2021_experiment", irrigation_scenario = NULL, fertilizer_scenario = NULL, sowing_scenario = NULL, weather_year = NULL) {
if(!runtype %in% c("historical", "forecast")) {
stop(paste("Invalid runtype", sQuote(runtype), "provided. Must be 'historical' or 'forecast'"))
download_yield_data <- function(runtype,
localname,
experiment = "August2021_experiment",
irrigation_scenario = NULL,
fertilizer_scenario = NULL,
sowing_scenario = NULL,
weather_year = NULL
) {
if (!runtype %in% c("historical", "forecast")) {
stop(
paste(
"Invalid runtype", sQuote(runtype),
"provided. Must be 'historical' or 'forecast'"
)
)
}
## check for valid experiment
if((runtype == "historical" && !experiment %in% names(historical_remote_dir)) || (runtype == "forecast" && !experiment %in% names(forecast_remote_dir))) {
## Check for valid experiment
if (
(runtype == "historical" && !experiment %in% names(historical_remote_dir)) ||
(runtype == "forecast" && !experiment %in% names(forecast_remote_dir))
) {
stop(paste("Invalid experiment", sQuote(experiment)))
}
if(runtype == "historical") {
rsync_url <- paste0(rsync_baseurl, experiment, "/lpjml-results/", historical_remote_dir[experiment], "/")
if (runtype == "historical") {
rsync_url <- paste0(
rsync_baseurl, experiment,
"/lpjml-results/",
historical_remote_dir[experiment],
"/"
)
## get contents of remote directory
remote_contents <- system2(rsync_command, args=c(rsync_args, rsync_url), stdout=TRUE)
source_file <- grep(".nc$", unname(sapply(sapply(remote_contents, strsplit, split = " "), function(indata) return(indata[length(indata)]))), value = TRUE)
if(length(source_file) != 1) {
stop(paste("Cannot determine source file from contents of source directory:", toString(sQuote(unname(sapply(sapply(remote_contents, strsplit, split = " "), function(indata) return(indata[length(indata)])))))))
remote_contents <- system2(
rsync_command,
args = c(rsync_args, rsync_url),
stdout = TRUE
)
dir_entries <- unname(
sapply(
sapply(remote_contents, strsplit, split = " "),
function(indata) return(indata[length(indata)])
)
)
source_file <- grep(".nc$", dir_entries, value = TRUE)
if (length(source_file) != 1) {
stop(
paste(
"Cannot determine source file from contents of source directory:",
toString(sQuote(dir_entries))
)
)
}
} else if(runtype == "forecast") {
for(opt in c("irrigation_scenario", "fertilizer_scenario", "sowing_scenario", "weather_year")) {
if(!exists(opt) || is.null(get(opt))) {
} else if (runtype == "forecast") {
for (opt in c(
"irrigation_scenario",
"fertilizer_scenario",
"sowing_scenario",
"weather_year")
) {
if (!exists(opt) || is.null(get(opt))) {
stop(paste(sQuote(opt), "not set"))
}
}
weather_options <- get_weather_options(experiment = experiment,
remote_dir = forecast_remote_dir[experiment],
irrigation_scenario = irrigation_scenario,
fertilizer_scenario = fertilizer_scenario,
sowing_scenario = sowing_scenario,
include_filenames = TRUE)
if(!weather_year %in% weather_options$weather_options) {
weather_options <- get_weather_options(
experiment = experiment,
remote_dir = forecast_remote_dir[experiment],
irrigation_scenario = irrigation_scenario,
fertilizer_scenario = fertilizer_scenario,
sowing_scenario = sowing_scenario,
include_filenames = TRUE
)
if (!weather_year %in% weather_options$weather_options) {
stop(paste("Invalid weather_year", sQuote(weather_year)))
}
source_file <- grep(weather_year, weather_options$filenames, value = TRUE)
if(length(source_file) != 1) {
stop(paste0("Cannot determine matching filename for irrigation_scenario ",
sQuote(irrigation_scenario),
", fertilizer_scenario ",
sQuote(fertilizer_scenario),
", sowing_scenario ",
sQuote(sowing_scenario),
", weather_year ", sQuote(weather_year)))
if (length(source_file) != 1) {
stop(
paste0(
"Cannot determine matching filename for irrigation_scenario ",
sQuote(irrigation_scenario),
", fertilizer_scenario ", sQuote(fertilizer_scenario),
", sowing_scenario ", sQuote(sowing_scenario),
", weather_year ", sQuote(weather_year)
)
)
}
rsync_url <- paste0(rsync_baseurl, experiment, "/lpjml-results/", forecast_remote_dir[experiment], "/irrigation_target_", irrigation_scenario, "_n_rate_shift_", fertilizer_scenario, "_sowing_delta_", sowing_scenario, "/")
rsync_url <- paste0(
rsync_baseurl,
experiment,
"/lpjml-results/",
forecast_remote_dir[experiment],
"/irrigation_target_", irrigation_scenario,
"_n_rate_shift_", fertilizer_scenario,
"_sowing_delta_", sowing_scenario,
"/"
)
} else {
stop(paste("runtype", sQuote(runtype), "not defined"))
}
rsync_url <- paste0(rsync_url, source_file)
## Download file
system2(rsync_command, args=c(rsync_args, rsync_url, localname))
if(file.exists(localname)) {
system2(rsync_command, args = c(rsync_args, rsync_url, localname))
if (file.exists(localname)) {
return(localname)
} else {
return(FALSE)
......
......@@ -5,30 +5,67 @@
#' date scenario options available for a set of forecast simulations
#'
#' @param experiment The name of the set of forecast simulations
#' @param remote_dir Remote directory on the rsync server containing
#' @param remote_dir Remote directory on the rsync server containing
#' the forecast files
#'
#' @return A list of character vectors with elements 'irrigation_options',
#' 'fertilizer_options', 'sowing_options'
#'
#' @export
get_forecast_options <- function(experiment, remote_dir) {
if(!experiment %in% names(remote_dir)) {
if (!experiment %in% names(remote_dir)) {
stop(paste("Invalid experiment", sQuote(experiment)))
}
## parts of rsync command
rsync_url <- paste0(rsync_baseurl, experiment, "/lpjml-results/", remote_dir[experiment], "/")
rsync_url <- paste0(
rsync_baseurl,
experiment,
"/lpjml-results/",
remote_dir[experiment],
"/"
)
## get contents of remote directory
remote_contents <- system2(rsync_command, args=c(rsync_args, rsync_url), stdout=TRUE)
forecast_versions <- unname(sapply(sapply(remote_contents, strsplit, split = " "), function(indata) return(indata[length(indata)])))
remote_contents <- system2(
rsync_command,
args = c(rsync_args, rsync_url),
stdout = TRUE
)
forecast_versions <- unname(
sapply(
sapply(remote_contents, strsplit, split = " "),
function(indata) return(indata[length(indata)])
)
)
forecast_versions <- forecast_versions[grep("irrigation", forecast_versions)]
scenario_options <- regmatches(forecast_versions, regexec("^irrigation_target_(.+)_n_rate_shift_(.+)_sowing_delta_(.+)", forecast_versions))
if(any(sapply(scenario_options, length) != 4)) {
stop(paste("Invalid forecast scenario(s):", toString(sQuote(sapply(scenario_options[which(sapply(scenario_options, length) != 4)], function(indata) indata[1])))))
scenario_options <- regmatches(
forecast_versions,
regexec(
"^irrigation_target_(.+)_n_rate_shift_(.+)_sowing_delta_(.+)",
forecast_versions
)
)
invalid <- which(sapply(scenario_options, length) != 4)
if (length(invalid) > 0) {
stop(
paste(
"Invalid forecast scenario(s):",
toString(
sQuote(sapply(scenario_options[invalid], function(indata) indata[1]))
)
)
)
}
irrigation_options <- unique(sapply(scenario_options, function(indata) indata[2]))
fertilizer_options <- unique(sapply(scenario_options, function(indata) indata[3]))
sowing_options <- unique(sapply(scenario_options, function(indata) indata[4]))
return(list(irrigation_options = irrigation_options, fertilizer_options = fertilizer_options, sowing_options = sowing_options))
irrigation_options <- unique(
sapply(scenario_options, function(indata) indata[2])
)
fertilizer_options <- unique(
sapply(scenario_options, function(indata) indata[3])
)
sowing_options <- unique(
sapply(scenario_options, function(indata) indata[4])
)
list(
irrigation_options = irrigation_options,
fertilizer_options = fertilizer_options,
sowing_options = sowing_options
)
}
......@@ -4,7 +4,7 @@
#' specific set of LPJmL forecasts.
#'
#' @param experiment The name of the set of forecast simulations
#' @param remote_dir Remote directory on the rsync server containing
#' @param remote_dir Remote directory on the rsync server containing
#' the forecast files
#' @param irrigation_scenario Irrigation scenario for which to query
#' weather options
......@@ -12,7 +12,7 @@
#' weather options
#' @param sowing_scenario Sowing date scenario for which to query
#' weather options