-
Fabian Stenzel authoredFabian Stenzel authored
utils.R 8.73 KiB
# function to get file extension
get_major_file_ext <- function(path) {
# Get all files in path
all_files <- list.files(
path,
full.names = TRUE
)
# Get file extensions
all_file_types <- all_files %>%
strsplit("/") %>%
sapply(utils::tail, 1) %>%
strsplit("\\.") %>%
sapply(function(x) {
y <- x[-1]
if ("json" %in% y) {
return(paste(tail(strsplit(y, "\\."), 2), collapse = "."))
} else {
return(tail(strsplit(y, "\\."), 1))
}
}) %>%
unlist()
# Get most frequent file types
# TODO not yet working
most_frequent <- all_file_types %>%
factor() %>%
table() %>%
names() %>%
.[seq_len(5)]
# 5 exemplaric files to detect type
files_to_check <- sapply(
most_frequent,
function(x, y, z) {
y[which(z == x)[1]]
},
y = all_files,
z = all_file_types
) %>%
na.omit()
# Detect actual LPJmL data type
types <- sapply(
files_to_check,
lpjmlkit:::detect_io_type
) %>%
stats::setNames(names(.), .)
# Assign file type after ranking which is available
# first preferable: "meta", second: "clm", last: "raw"
if ("meta" %in% names(types)) {
file_type <- types["meta"]
} else if ("clm" %in% names(types)) {
file_type <- types["clm"]
} else if ("raw" %in% names(types)) {
file_type <- types["raw"]
}
return(file_type)
}
# function to get absolute file names
get_filenames <- function(path, # nolint
output_files,
diff_output_files,
input_files,
file_ext) {
file_names <- list()
# Iterate over required outputs
for (ofile in names(output_files)) {
# Get required max. temporal resolution and convert to nstep
resolution <- output_files[[ofile]]$resolution
nstep <- switch(resolution,
annual = 1,
monthly = 12,
daily = 365,
stop(paste0("Not supported time resolution: ", dQuote(nstep), "."))
)
# If input file supplied use it as first priority
if (ofile %in% names(input_files)) {
file_name <- input_files[[ofile]]
} else if (ofile %in% names(diff_output_files)) {
# If different output file should be used - as second priority
file_name <- paste0(
path, "/",
diff_output_files[[ofile]], ".",
file_ext
)
} else {
file_name <- NULL
}
if (!is.null(file_name)) {
# Check if data could be read in
meta <- lpjmlkit::read_meta(file_name)
# Then check if temporal resultion of file matches required nstep
if (nstep != meta$nstep && nstep != meta$nbands) {
stop(
paste0(
"Required temporal resolution (nstep = ", nstep, ") ",
"not supported by file ", dQuote(file_name),
" (", meta$nstep, ")"
)
)
}
# If nothing specified try to read required files from provided path
} else {
# Iterate over file name options (e.g. runoff, mrunoff, ...)
for (cfile in seq_along(output_files[[ofile]]$file_name)) {
file_name <- paste0(
path, "/",
output_files[[ofile]]$file_name[cfile], ".",
file_ext
)
# Check if file exists and if so check required temporal resolution
# else next
if (file.exists(file_name)) {
meta <- lpjmlkit::read_meta(file_name)
if (nstep <= meta$nstep || nstep == meta$nbands) {
# Matching file found, break and use current file_name
break
}
}
# At end of iteraton raise error that no matching file_name was found
if (cfile == length(output_files[[ofile]]$file_name) &&
!output_files[[ofile]]$optional) {
stop(
paste0(
"No matching output for ", dQuote(ofile),
" with required temporal resolution (nstep = ", nstep, ") ",
"found at path ", dQuote(path), "."
)
)
}
}
}
file_names[[ofile]] <- file_name
}
file_names
}
#' List required output files for given metric
#'
#' List required output files for given metric based on parameter file
#' `inst/extfiles/metric_files.yml`
#'
#' @param metric character string/list of strings. metrics to list outputs for
#' can be one of:
#' "all" - list all outputs for all metrics
#' "ecorisk" - list outputs for ecorisk metric without nitrogen
#' "ecorisk_nitrogen" - list outputs for ecorisk metric with nitrogen
#' "biocol" - list outputs for biocol metric
#' "biome" - list outputs for the biome classification
#' @param only_first_filename if several legal output names are listed, only
#' output the first of them (default: TRUE)
#'
#' @return list object with required outputs, their required temporal resolution
#' and if it is optional
#'
#' @examples
#' \dontrun{
#' list_outputs(metric = "ecorisk_nitrogen")
#' }
#'
#' @md
#' @export
list_outputs <- function(metric = "all",
only_first_filename = TRUE) {
metric <- process_metric(metric = metric)
system.file(
"extdata",
"metric_files.yml",
package = "biospheremetrics"
) %>%
yaml::read_yaml() %>%
get_outputs(metric, only_first_filename)
}
# List arguments of functions used in metrics from metric_files.yml
list_function_args <- function(metric = "all") {
metric <- process_metric(metric = metric)
system.file(
"extdata",
"metric_files.yml",
package = "boundaries"
) %>%
yaml::read_yaml() %>%
get_function_args(metric)
}
# Translate metric options into internal metric names
process_metric <- function(metric = "all") {
all_metrics <- c(
"ecorisk", "ecorisk_nitrogen", "biocol", "biome"
)
if ("all" %in% metric) {
metric <- all_metrics
}
metric <- match.arg(
arg = metric,
choices = all_metrics,
several.ok = TRUE
)
metric
}
# for input list a, all duplicate keys are unified, taking the value with
# highest temporal resolution (daily>monthly>annual)
get_outputs <- function(x, metric_name, only_first_filename) { # nolint
outputs <- list()
# Iterate over all metrics
for (metric in x$metric[metric_name]) {
# Iterate over all unique keys
for (item in names(metric$output)) {
# Check if output is already in list or if it has higher resolution
if (!item %in% names(outputs) ||
(item %in% names(outputs) &&
higher_res(
metric$output[[item]]$resolution,
outputs[[item]]$resolution
))
) {
# Assign output resolution from metric file
outputs[[item]]$resolution <- metric$output[[item]]$resolution
outputs[[item]]$optional <- metric$output[[item]]$optional
# Assign output file name from metric file
if (only_first_filename) {
outputs[[item]]$file_name <- x$file_name[[item]][1]
} else {
outputs[[item]]$file_name <- x$file_name[[item]]
}
}
}
}
outputs
}
# Get arguments of functions used in metrics
get_function_args <- function(x, metric_name) {
# List functions of metrics (metric_name)
funs <- list()
for (metric in x$metric[metric_name]) {
funs[[metric$fun_name]] <- metric$funs
}
# Get arguments of functions
funs %>%
lapply(function(x) {
unlist(
lapply(mget(x, inherits = TRUE), methods::formalArgs),
use.names = FALSE
)
})
}
# Check if resolution of x is higher than resolution of y
higher_res <- function(x, y) {
levels <- c("annual", "monthly", "daily")
resolution_x <- match(match.arg(x, levels), levels)
resolution_y <- match(match.arg(y, levels), levels)
if (resolution_x > resolution_y) {
return(TRUE)
} else {
return(FALSE)
}
}
# split calculation string for variable addition/subtraction into signs & vars
split_sign <- function(string) {
# add spaces around +- signs
string <- gsub(
"-",
" - ",
gsub("+", " + ", string, fixed = TRUE),
fixed = TRUE
)
# reduce multiple spaces to one
string <- trimws(gsub("\\s+", " ", string))
a <- strsplit(string, " ")[[1]]
if (length(a) == 1) {
outarray <- array("", dim = c(1, 2))
} else {
outarray <- array("", dim = c(round((length(a) + 1) / 2), 2))
}
for (i in seq_along(a)) {
i2 <- floor(i / 2 + 1)
if (i == 1 && !(grepl(a[1], "+-", fixed = TRUE))) outarray[1, 1] <- "+"
if (grepl(a[i], "+-", fixed = TRUE)) {
outarray[i2, 1] <- a[i]
} else {
outarray[i2, 2] <- a[i]
}
}
colnames(outarray) <- c("sign", "variable")
return(outarray)
}
DIM <- function(...) {
args <- list(...)
lapply(args, function(x) {
if (is.null(dim(x))) {
return(length(x))
}
dim(x)
})[[1]]
}
# Avoid note for "."...
utils::globalVariables(".") # nolint:undesirable_function_linter