Skip to content
Snippets Groups Projects
roc.R 11.44 KiB
calc_roc_data <- function(external_binary, #binary transgression: 0 - no,1 - yes
                          internal_continuous,
                          external_name,
                          range_internal, # range of internal data, e.g. c(0,1)
                          sampling_res = 0.01, # together with range used to create potential thresholds
                          cellArea = cellarea) {

  # check dimensions
  internal_continuous <- drop(internal_continuous)
  external_binary <- drop(external_binary)
  if (length(internal_continuous) != length(external_binary) ||
      length(internal_continuous) != length(cellArea)) {
    stop("Dimensions of external_binary (", length(external_binary),
         "), internal_continuous (", length(internal_continuous),
         "), cellArea (", length(cellArea),
         ") need to match. Aborting.")
  }

  # controls sampling distance
  potential_thresholds <- seq(range_internal[1],range_internal[2],sampling_res)

  values <- array(NA, dim=c(length(potential_thresholds),1,3))
  dimnames(values) <- list(threshold = potential_thresholds,
                           indicator = external_name, value = c("TP","FP","slope"))
  for (i in seq_along(potential_thresholds)) {
    t <- potential_thresholds[i]
    internal_cells <- which(internal_continuous > t)
    TP <- sum(cellArea[which(external_binary == 1 & internal_continuous > t)]) # True Positive
    FP <- sum(cellArea[which(!(external_binary == 1) & internal_continuous > t)]) # False Positive
    values[i,1,1] <- TP
    values[i,1,2] <- FP
  }
  #browser()
  #plotGlobalManToScreen(data = external_binary, title = "change",
  #                      brks = seq(0,1,0.05),legYes = T,palette = palette_viridis,
  #                      legendtitle = "")
  values[,1,1] <- values[,1,1]/max(values[,1,1])
  values[,1,2] <- values[,1,2]/max(values[,1,2])
  # calculate tangent angle between each pair of values
  n <- length(potential_thresholds)
  xs <- values[c(1,1:n,n),1,1]
  ys <- values[c(1,1:n,n),1,2]
  n_e <- length(xs)
  slope <- (ys[1:(n_e-2)] - ys[3:n_e]) / (xs[1:(n_e-2)] - xs[3:n_e])
  values[,1,3] <- slope

  return(values)
}
calc_roc_data_3d <- function( external_continuous,
                              internal_continuous,
                              range_internal, # range of internal data, e.g. c(0,1)
                              range_external, # range of external data, e.g. c(0,1)
                              sampling = 101, # together with range used to create potential thresholds
                              cellArea = cellarea) {
  # check dimensions
  internal_continuous <- drop(internal_continuous)
  external_continuous <- drop(external_continuous)
  if (length(internal_continuous) != length(external_continuous) ||
      length(internal_continuous) != length(cellArea)) {
    stop("Dimensions of external_binary (", length(external_continuous),
         "), internal_continuous (", length(internal_continuous),
         "), cellArea (", length(cellArea),
         ") need to match. Aborting.")
  }

  # controls sampling distance
  potential_thresholds_internal <- seq(range_internal[1],range_internal[2], length.out = sampling)
  potential_thresholds_external <- seq(range_external[1],range_external[2], length.out = sampling)

  values <- array(NA, dim=c(length(potential_thresholds_internal),length(potential_thresholds_external),3))
  dimnames(values) <- list(threshold_internal = potential_thresholds_internal,
                           threshold_external = potential_thresholds_external,
                           value = c("TP","FP","diff"))
  for (e in seq_along(potential_thresholds_internal)) {
    for (i in seq_along(potential_thresholds_internal)) {
      ti <- potential_thresholds_internal[i]
      te <- potential_thresholds_external[e]
      TP <- sum(cellArea[which(external_continuous > te & internal_continuous > ti)]) # True Positive
      FP <- sum(cellArea[which(external_continuous <= te & internal_continuous > ti)]) # False Positive
      values[i,e,1] <- TP
      values[i,e,2] <- FP
    }
  }

  #values[,,1] <- values[,,1]/max(values[,,1])
  #values[,,2] <- values[,,2]/max(values[,,2])
  values[,,3] <- values[,,1] - values[,,2]

  return(values)
}
roc_plot <- function(filename = NULL,
                     values # only one metric
) {
  if (!is.null(filename)) {
    png(filename, res = 300, width = 6, height = 7, units = "in",
        pointsize = 6, type = "cairo")
  }
  par(oma = c(0,0,0,0), mar = c(6,10,5,0), bty = "o", xpd = F,xaxs="i",yaxs="i")
  indizes <- dimnames(values)$indicator
  auc <- array(NA,dim = c(length(indizes)))
  dimnames(auc) <- list(indicator = indizes)
  best_fit <- array(NA,dim=c(length(indizes),2))
  dimnames(best_fit) <- list(indicator = indizes, type = c("max_diff","slope"))

  # Set plot layout
  #layout(mat = matrix(c(1,2,3), nrow = 1,  ncol = 3, byrow = FALSE),
  #       heights = c(2,2),    # Heights of the rows
  #       widths = c(1,0.4,0.4))     # Widths of the columns
  #layout.show(3)
  cex_perc = 2.5
  colz <- RColorBrewer::brewer.pal(12,"Set3")[-c(2)]

  plot(NA, type = "n", xlab = "FP", ylab = "TP", main = "ROC curve", cex.main = cex_perc,
       xlim = c(0,1), ylim = c(0,1), cex.lab = cex_perc, cex.axis = cex_perc, mgp = c(3, 2, 0))
  abline(a = 0, b = 1, lty = "dashed", col = "black")
  for (ind in indizes) {
    indx <- match(ind,indizes)
    lines(x = values[,ind,2], y = values[,ind,1], col = colz[indx], lwd = cex_perc)
    values[is.na(values[,ind,3]),ind,3] <- 0
    best_max_diff <- sort(abs(values[,ind,2] - values[,ind,1]),decreasing = T)[1:5]
    best_slope <- sort(abs(values[,ind,3] - 1 ),decreasing = F)[1:5]
    closest <- function(x,y){abs((x - y)/sqrt(2))}
    best_max_diff_thr <- names(sort(closest(x = values[names(best_max_diff),ind,2],
                                            y = values[names(best_max_diff),ind,1]), decreasing = T)[1])
    best_slope_thr <- names(sort(closest(x = values[names(best_slope),ind,2],
                                         y = values[names(best_slope),ind,1]), decreasing = T)[1])
    best_fit[ind,] <- c(best_max_diff_thr, best_slope_thr)
    points(x = values[best_max_diff_thr,ind,2], y = values[best_max_diff_thr,ind,1],
           col = colz[indx], cex = 4, lwd = cex_perc)
    # AUC
    n <- length(values[,ind,2])
    auc[indx] <- sum(abs(values[2:n,ind,2] - values[1:(n - 1),ind,2]) *
                       values[1:(n - 1),ind,1])
  }
  legend("bottomright",legend = paste(indizes,round(auc,2)), col = colz,
         cex = cex_perc, lwd = cex_perc)
  if (!is.null(filename)) dev.off()
  return(list(
    auc = auc,
    optimal_value = best_fit
  ))
}

roc_plot_paper <- function(filename = NULL,
                     values # both metric
) {
  if (!is.null(filename)) {
    png(filename, res = 300, width = 6, height = 7, units = "in",
        pointsize = 6, type = "cairo")
  }
  par(oma = c(0,0,0,0), mar = c(6,10,5,0), bty = "o", xpd = F,xaxs="i",yaxs="i")
  vars <- dimnames(values)$metric
  indicators <- dimnames(values)$indicator
  indizes <- seq_along(indicators)

  best_fit <- array(NA,dim=c(length(indicators),2,2))
  dimnames(best_fit) <- list(indicator = indicators,
                  metric = vars, type = c("max_diff","slope"))

  auc <- array(NA,dim = c(length(indicators),length(vars)))
  dimnames(auc) <- list(indicator = indicators, metric = vars)

  # Set plot layout
  layout(mat = matrix(c(1,2,3,4,5,6), nrow = 2,  ncol = 3, byrow = FALSE),
         heights = c(2,2),    # Heights of the rows
         widths = c(1,0.4,0.4))     # Widths of the columns
  layout.show(6)
  cex_perc = 2.5
  colz <- RColorBrewer::brewer.pal(12,"Set3")[-c(2)]

  for (var in vars){
    plot(NA, type = "n", xlab = "FP", ylab = "TP", main = "ROC curve", cex.main = cex_perc,
         xlim = c(0,1), ylim = c(0,1), cex.lab = cex_perc, cex.axis = cex_perc, mgp = c(3, 2, 0))
    if (var == "BioCol") {
      mtext(side = 3, at = -0.2, "A", xpd = NA, cex = 1.5, font = 2)
      mtext(side = 2, at = 0.5, line = 6, var, xpd = NA, cex = 2, font = 2, srt = 90)
    }else{ #var = "EcoRisk"
      mtext(side = 3, at = -0.2, "B", xpd = NA, cex = 1.5, font = 2)
      mtext(side = 2, at = 0.5, line = 6, var, xpd = NA, cex = 2, font = 2, srt = 90)
    }
    abline(a = 0, b = 1, lty = "dashed", col = "black")
    for (ind in indizes) {
      lines(x = values[,var,ind,2], y = values[,var,ind,1], col = colz[ind], lwd = cex_perc)
      #points(x = values[,ind,2], y = values[,ind,1])
      values[is.na(values[,var,ind,3]),var,ind,3] <- 0
      best_max_diff <- sort(abs(values[,var,ind,2] - values[,var,ind,1]),decreasing = T)[1:5]
      best_slope <- sort(abs(values[,var,ind,3] - 1 ),decreasing = F)[1:5]
      closest <- function(x,y){abs((x - y)/sqrt(2))}
      best_max_diff_thr <- names(sort(closest(x = values[names(best_max_diff),var,ind,2],
                                              y = values[names(best_max_diff),var,ind,1]), decreasing = T)[1])
      best_slope_thr <- names(sort(closest(x = values[names(best_slope),var,ind,2],
                                           y = values[names(best_slope),var,ind,1]), decreasing = T)[1])
      best_fit[ind,match(var,vars),] <- c(best_max_diff_thr, best_slope_thr)
      points(x = values[best_max_diff_thr,var,ind,2], y = values[best_max_diff_thr,var,ind,1],
             col = colz[ind], cex = 4, lwd = cex_perc)

      # AUC
      n <- length(values[,var,ind,2])
      auc[ind,var] <- sum(abs(values[2:n,var,ind,2] - values[1:(n - 1),var,ind,2]) *
                            values[1:(n - 1),var,ind,1])
      legend("bottomright",legend = indicators, col = colz[indizes],
             cex = cex_perc, lwd = cex_perc)
    }
  }

  # optimal thresholds boxplots
  biocol_threshold <- boxplot(as.numeric(best_fit[indizes,"BioCol",1]), ylim = c(0,1), cex.main = cex_perc,
                              main = "optimal\nthreshold", cex.lab = cex_perc, cex.axis = cex_perc)
  points(x = rep(0.7, length(indizes)), y = as.numeric(best_fit[indizes,"BioCol",1]),
         col = scales::alpha(colz[indizes],1), lwd = 2)
  text(x = 1, y = max(biocol_threshold$stats)+0.2, paste0("median:\n",median(biocol_threshold$stats)), cex = cex_perc)
  mtext(side = 3, at = 0, "C", xpd = NA, cex = 1.5, font = 2)

  ecorisk_threshold <- boxplot(as.numeric(best_fit[indizes,"EcoRisk",1]),
                               main = "optimal\nthreshold", cex.main = cex_perc,
                               ylim = c(0,1), cex.lab = cex_perc, cex.axis = cex_perc)
  points(x = rep(0.7, length(indizes)), y = as.numeric(best_fit[indizes,"EcoRisk",1]),
         col = scales::alpha(colz[indizes],1), lwd = 2)
  text(x = 1, y = max(ecorisk_threshold$stats)+0.2, paste0("median:\n", median(ecorisk_threshold$stats)), cex = cex_perc)
  mtext(side = 3, at = 0, "D", xpd = NA, cex = 1.5, font = 2)

  # AUC boxplots
  biocol_auc <- boxplot(auc[indizes,"BioCol"], ylim = c(0,1), main = "AUC", cex.main = cex_perc,
                        cex.lab = cex_perc, cex.axis = cex_perc)
  points(x = rep(0.7, length(indizes)), y = auc[indizes,"BioCol"],
         col = scales::alpha(colz[indizes],1), lwd = 2)
  mtext(side = 3, at = 0, "E", xpd = NA, cex = 1.5, font = 2)

  ecorisk_auc <- boxplot(auc[indizes,"EcoRisk"], main = "AUC", cex.main = cex_perc,
                         ylim = c(0,1), cex.lab = cex_perc, cex.axis = cex_perc)
  points(x = rep(0.7, length(indizes)), y = auc[indizes,"EcoRisk"],
         col = scales::alpha(colz[indizes],1), lwd = 2)
  mtext(side = 3, at = 0, "F", xpd = NA, cex = 1.5, font = 2)

  if (!is.null(filename)) dev.off()
  return(list(
    auc = auc,
    biocol_threshold = biocol_threshold,
    ecorisk_threshold = ecorisk_threshold
  ))
}