# ====================================================================
#
# Copyright 2025, PBL Netherlands Environmental Assessment Agency
# See the copyright notice at the end of this file.
#
# ====================================================================


# functions to evaluate the cross-validated SDMs 
#   1.  FindOptimStat: calculates the optimal TSS, AUC or MCC value. 
#       This script is modified from the bm_FindOptimStat function in the biomod2-package.
#       It is downloaded from https://rdrr.io/github/biomodhub/biomod2/src/R/bm_FindOptimStat.R on 12 October 2023

#       To the list of evaluation metrics Matthews Correlation Coefficient (MCC) is added 
#       in the functions bm_CalculateStat and get_optim_value

#       I also added the functions .fun_testIf01 and .fun_testIfPosNum to the script. 
#       As these are needed in the function .bm_FindOptimStat.check.args
#       it is downloaded from: https://rdrr.io/github/biomodhub/biomod2/src/R/biomod2_internal.R

#   2.  Evaluate_CV_SDMs: evaluate the cross validated SDM. 
#       the algorithms are weighted with a metric of choice. Either MCC, TSS or ROC
#       (you can find this function at the end of this script) 

###################################################################################################
##' @name bm_FindOptimStat
##' @aliases bm_FindOptimStat
##' @aliases bm_CalculateStat
##' @aliases get_optim_value
##' @author Damien Georges
##' 
##' @title Calculate the best score according to a given evaluation method
##'
##' @description This internal \pkg{biomod2} function allows the user to find the threshold to 
##' convert continuous values into binary ones leading to the best score for a given evaluation 
##' metric.
##'
##'
##' @param metric.eval a \code{character} corresponding to the evaluation metric to be used, must 
##' be either \code{ROC}, \code{TSS}, \code{KAPPA}, \code{ACCURACY}, \code{BIAS}, \code{POD}, 
##' \code{FAR}, \code{POFD}, \code{SR}, \code{CSI}, \code{ETS}, \code{HK}, \code{HSS}, \code{OR}, 
##' \code{ORSS}, \code{BOYCE}, \code{MPA}
##' @param obs a \code{vector} of observed values (binary, \code{0} or \code{1})
##' @param fit a \code{vector} of fitted values (continuous)
##' @param nb.thresh an \code{integer} corresponding to the number of thresholds to be 
##' tested over the range of fitted values
##' @param threshold (\emph{optional, default} \code{NULL}) \cr 
##' A \code{numeric} corresponding to the threshold used to convert the given data
##' @param boyce.bg.env (\emph{optional, default} \code{NULL}) \cr 
##' A \code{matrix}, \code{data.frame}, \code{\link[terra:vect]{SpatVector}}
##' or \code{\link[terra:rast]{SpatRaster}} object containing values of 
##' environmental variables (in columns or layers) extracted from the background 
##' (\emph{if presences are to be compared to background instead of absences or 
##' pseudo-absences selected for modeling})
##' \cr \emph{Note that old format from \pkg{raster} and \pkg{sp} are still supported such as 
##' \code{RasterStack} and \code{SpatialPointsDataFrame} objects. }
##' @param mpa.perc a \code{numeric} between \code{0} and \code{1} corresponding to the percentage 
##' of correctly classified presences for Minimal Predicted Area (see \code{ecospat.mpa()} in 
##' \pkg{ecospat})
##' @param misc a \code{matrix} corresponding to a contingency table
##'
##'
##' @return 
##' 
##' A \code{1} row x \code{5} columns \code{data.frame} containing :
##' \itemize{
##'   \item{\code{metric.eval}}{ : the chosen evaluation metric}
##'   \item{\code{cutoff}}{ : the associated cut-off used to transform the continuous values into 
##'   binary}
##'   \item{\code{sensitivity}}{ : the sensibility obtained on fitted values with this threshold}
##'   \item{\code{specificity}}{ : the specificity obtained on fitted values with this threshold}
##'   \item{\code{best.stat}}{ : the best score obtained for the chosen evaluation metric}
##' }
##' 
##'
##' @details
##' 
##' \emph{Please refer to \code{\link{BIOMOD_Modeling}} to get more information about these 
##' evaluation metrics.}
##' 
##' Note that if a value is given to \code{threshold}, no optimisation will be done., and 
##' only the score for this threshold will be returned.
##' 
##' The Boyce index returns \code{NA} values for \code{SRE} models because it can not be 
##' calculated with binary predictions. \cr This is also the reason why some \code{NA} values 
##' might appear for \code{GLM} models if they do not converge.
##'
##' 
##' @note In order to break dependency loop between packages \pkg{biomod2} and \pkg{ecospat}, 
##' code of \code{ecospat.boyce()} and \code{ecospat.mpa()} in \pkg{ecospat})
##' functions have been copied within this file from version 3.2.2 (august 2022).
## \code{\link[ecospat]{ecospat.mpa}}) # generate R CMD Check error due 
## to crossref missing ecospat package
## code of \code{\link[ecospat]{ecospat.boyce}} and \code{\link[ecospat]{ecospat.mpa}} 
## generate R CMD Check error due to crossref missing ecospat package 
##'
##'
##' @references
##' 
##' \itemize{
##'   \item Engler, R., Guisan, A., and Rechsteiner L. 2004. An improved approach for predicting 
##'   the distribution of rare and endangered species from occurrence and pseudo-absence data. 
##'   \emph{Journal of Applied Ecology}, \bold{41(2)}, 263-274.
##'   \item Hirzel, A. H., Le Lay, G., Helfer, V., Randin, C., and Guisan, A. 2006. Evaluating 
##'   the ability of habitat suitability models to predict species presences. \emph{Ecological 
##'   Modelling}, \bold{199(2)}, 142-152.
##' }
##'
##' @keywords models options evaluation
##' 
##'
##' @seealso \code{ecospat.boyce()} and \code{ecospat.mpa()} in \pkg{ecospat}, 
##' \code{\link{BIOMOD_Modeling}}, \code{\link{bm_RunModelsLoop}}, 
##' \code{\link{BIOMOD_EnsembleModeling}}
##' @family Secundary functions
##' 
##' 
##' @examples
##' 
##' ## Generate a binary vector
##' vec.a <- sample(c(0, 1), 100, replace = TRUE)
##' 
##' ## Generate a 0-1000 vector (random drawing)
##' vec.b <- runif(100, min = 0, max = 1000)
##' 
##' ## Generate a 0-1000 vector (biased drawing)
##' BiasedDrawing <- function(x, m1 = 300, sd1 = 200, m2 = 700, sd2 = 200) {
##'   return(ifelse(x < 0.5, rnorm(1, m1, sd1), rnorm(1, m2, sd2)))
##' }
##' vec.c <- sapply(vec.a, BiasedDrawing)
##' vec.c[which(vec.c < 0)] <- 0
##' vec.c[which(vec.c > 1000)] <- 1000
##' 
##' ## Find optimal threshold for a specific evaluation metric
##' bm_FindOptimStat(metric.eval = 'TSS', fit = vec.b, obs = vec.a)
##' bm_FindOptimStat(metric.eval = 'TSS', fit = vec.c, obs = vec.a, nb.thresh = 100)
##' bm_FindOptimStat(metric.eval = 'TSS', fit = vec.c, obs = vec.a, threshold = 280)
##'
##' 
##' @importFrom stats median complete.cases
##' @importFrom pROC roc coords auc
##' @importFrom PresenceAbsence presence.absence.accuracy
##' 
##' @export
##' 
##' 
###################################################################################################


FindOptimStat <- function(metric.eval = 'TSS',
                                      obs,
                                      fit,
                                      nb.thresh = 100,
                                      threshold = NULL,
                                      boyce.bg.env = NULL,
                                      mpa.perc = 0.9)
{
  ## 0. Check arguments ---------------------------------------------------------------------------
  args <- bm_FindOptimStat.check.args(threshold, boyce.bg.env, mpa.perc)
  for (argi in names(args)) { assign(x = argi, value = args[[argi]]) }
  rm(args)
  
  ## remove all unfinite values
  to_keep <- (is.finite(fit) & is.finite(obs))
  obs <- obs[to_keep]
  fit <- fit[to_keep]
  
  ## check some data is still here
  if (!length(obs) | !length(fit)) {
    cat("Non finite obs or fit available => model evaluation skipped !")
    eval.out <- matrix(NA, 1, 4, dimnames = list(metric.eval, c("best.stat", "cutoff", "sensitivity", "specificity")))
    return(eval.out)
  }
  
  if (length(unique(obs)) == 1 | length(unique(fit)) == 1) {
    warning("\nObserved or fitted data contains a unique value... Be careful with this models predictions\n", immediate. = TRUE)
  }
  
  
  if (!(metric.eval %in% c('ROC', 'BOYCE', 'MPA')))
  { ## for all evaluation metrics other than ROC, BOYCE, MPA --------------------------------------
    
    ## 1. get threshold values to be tested -----------------------------------
    if (is.null(threshold)) { # test a range of threshold to get the one giving the best score
      
      ## guess fit value scale (e.g. 0-1 for a classic fit or 0-1000 for a biomod2 model fit)
      fit.scale <- guess_scale(fit)
      
      if (length(unique(fit)) == 1) {
        valToTest <- unique(fit)
        ## add 2 values to test based on mean with 0 and the guessed max of fit (1 or 1000)
        valToTest <- round(c(mean(c(fit.scale["min"], valToTest)),
                             mean(c(fit.scale["max"], valToTest))))
      } else {
        mini <- max(min(fit, na.rm = TRUE), fit.scale["min"], na.rm = TRUE)
        maxi <- min(max(fit, na.rm = TRUE), fit.scale["max"], na.rm = TRUE)
        valToTest <- try(unique(round(c(seq(mini, maxi, length.out = nb.thresh), mini, maxi), digits = 3)))
        if (inherits(valToTest, "try-error")) {
          valToTest <- seq(fit.scale["min"], fit.scale["max"], length.out = nb.thresh)
        }
        # deal with unique value to test case
        if (length(valToTest) < 3) {
          valToTest <- round(c(mean(fit.scale["min"], mini), valToTest, mean(fit.scale["max"], maxi)))
        }
      }
    } else { # test only one value
      valToTest <- threshold
    }
    
    ## 2. apply the bm_CalculateStat function ---------------------------------
    calcStat <- sapply(lapply(valToTest, function(x) {
      return(table(fit > x, obs))
    }), bm_CalculateStat, metric.eval = metric.eval)
    
    ## 3. scale obtained scores and find best value ---------------------------
    StatOptimum <- get_optim_value(metric.eval)
    calcStat <- 1 - abs(StatOptimum - calcStat)
    if(metric.eval == "DSS"){
      best.stat <- min(calcStat, na.rm = TRUE)
      cutoff <- median(valToTest[which(calcStat == best.stat)]) # if several values are selected
      best.stat <- NA # DSS is not a performance metric, so the optimal value is not informative
    } else {
      best.stat <- max(calcStat, na.rm = TRUE)
      cutoff <- median(valToTest[which(calcStat == best.stat)]) # if several values are selected
      }
    
    
    misc <- table(fit > cutoff, obs)
    misc <- contingency_table_check(misc)
    
    ## calculate basic classification information -------------------------------
    hits <- as.numeric(misc['TRUE', '1']) ## true positives
    misses <- as.numeric(misc['FALSE', '1']) ##  false negatives
    false_alarms <- as.numeric(misc['TRUE', '0']) ## false positives 
    correct_negatives <- as.numeric(misc['FALSE', '0']) ## true negatives
    
    specificity <- (correct_negatives * 100) / sum(misc[, '0'])
    sensitivity <- (hits * 100) / sum(misc[, '1'])

  } else if (metric.eval == 'ROC') { ## specific procedure for ROC value --------------------------
    roc1 <- pROC::roc(obs, fit, percent = TRUE, direction = "<", levels = c(0, 1)) ## MH: I added the package of the function
    roc1.out <- pROC::coords(roc1, "best", ret = c("threshold", "sens", "spec"), transpose = TRUE) ## MH: I added the package of the function
    ## if two optimal values are returned keep only the first one
    if (!is.null(ncol(roc1.out))) { roc1.out <- roc1.out[, 1] }
    best.stat <- as.numeric(pROC::auc(roc1)) / 100
    cutoff <- as.numeric(roc1.out["threshold"])
    specificity <- as.numeric(roc1.out["specificity"])
    sensitivity <- as.numeric(roc1.out["sensitivity"])
    
    misc <- table(fit > cutoff, obs)
    misc <- contingency_table_check(misc)
    
    ## calculate basic classification information -------------------------------
    hits <- as.numeric(misc['TRUE', '1']) ## true positives
    misses <- as.numeric(misc['FALSE', '1']) ##  false negatives
    false_alarms <- as.numeric(misc['TRUE', '0']) ## false positives 
    correct_negatives <- as.numeric(misc['FALSE', '0']) ## true negatives
    

  } else { ## specific procedure for BOYCE, MPA values --------------------------------------------
    ## Prepare table to compute evaluation scores
    DATA <- cbind(1:length(fit), obs, fit / 1000)
    DATA[is.na(DATA[, 2]), 2] <- 0
    DATA <- DATA[complete.cases(DATA), ]
    cutoff <- sensitivity <- specificity <- best.stat <- NA
    
    if (metric.eval == "BOYCE") { ## Boyce index
      boy <- ecospat.boyce(obs = fit[which(obs == 1)], fit = fit, PEplot = FALSE)
      best.stat <- boy$cor
      if (sum(boy$F.ratio < 1, na.rm = TRUE) > 0) {
        cutoff <- round(boy$HS[max(which(boy$F.ratio < 1))], 0)
      }
    } else if (metric.eval == "MPA") {
      cutoff <- ecospat.mpa(fit[which(obs == 1)], perc = mpa.perc)
    }
    
    if (!is.na(cutoff / 1000)) {
      EVAL <- presence.absence.accuracy(DATA, threshold = cutoff / 1000)
      sensitivity <- EVAL$sensitivity * 100
      specificity <- EVAL$specificity * 100
    }
  }
  
  eval.out <- data.frame(metric.eval, cutoff, sensitivity, specificity, best.stat)
  return(eval.out)
}


# Check Arguments ---------------------------------------------------------------------------------

fun_testIf01 <- function(test, objName, objValue) 
{
  test <- fun_testIfPosNum(test, objName, objValue)
  if (test && objValue > 1) {
    stop(paste0("\n", objName, " must be a 0 to 1 numeric"))
    test <- FALSE
  }
  return(test)
}


fun_testIfPosNum <- function(test, objName, objValue)
{
  if (!is.numeric(objValue)) {
    stop(paste0("\n", objName, " must be a numeric"))
    test <- FALSE
  } else if (objValue < 0) {
    stop(paste0("\n", objName, " must be a positive numeric"))
    test <- FALSE
  }
  return(test)
}

bm_FindOptimStat.check.args <- function(threshold, boyce.bg.env = NULL, mpa.perc = 0.9)
{
  ## 1. Check boyce.bg.env -----------------------------------------------------------
  if (!is.null(boyce.bg.env)) {
    available.types.resp <- c('numeric', 'data.frame', 'matrix', 'Raster',
                              'SpatialPointsDataFrame', 'SpatVector', 'SpatRaster')
    fun_testIfInherits(TRUE, "boyce.bg.env", boyce.bg.env, available.types.resp)
    
    if(is.numeric(boyce.bg.env)) {
      boyce.bg.env <- as.data.frame(boyce.bg.env)
      colnames(boyce.bg.env) <- expl.var.names[1]
    } else if (inherits(boyce.bg.env, 'Raster')) {
      if(any(raster::is.factor(boyce.bg.env))){
        boyce.bg.env <- .categorical_stack_to_terra(boyce.bg.env)
      } else {
        boyce.bg.env <- rast(boyce.bg.env)
      }
    }
    if (is.matrix(boyce.bg.env) || inherits(boyce.bg.env, 'SpatRaster') || inherits(boyce.bg.env, 'SpatVector')) {
      boyce.bg.env <- as.data.frame(boyce.bg.env)
    } else if (inherits(boyce.bg.env, 'SpatialPoints')) {
      boyce.bg.env <- as.data.frame(boyce.bg.env@data)
    }
    
    ## remove NA from background data
    if (sum(is.na(boyce.bg.env)) > 0) {
      cat("\n      ! NAs have been automatically removed from boyce.bg.env data")
      boyce.bg.env <- na.omit(boyce.bg.env)
    }
  }
  
  ## 2. Check perc -----------------------------------------------------------
  fun_testIf01(TRUE, "mpa.perc", mpa.perc)
  
  return(list(threshold = threshold
              , boyce.bg.env = boyce.bg.env
              , mpa.perc = mpa.perc))
}


###################################################################################################

##'
##' @rdname bm_FindOptimStat
##' @export
##'

get_optim_value <- function(metric.eval)
{
  switch(metric.eval
         , 'TSS' = 1
         , 'KAPPA' = 1
         , 'ACCURACY' = 1
         , 'BIAS' = 1
         , 'POD' = 1
         , 'FAR' = 0
         , 'POFD' = 0
         , 'SR' = 1
         , 'CSI' = 1
         , 'ETS' = 1
         , 'HK' = 1
         , 'HSS' = 1
         , 'OR' = 1000000
         , 'ORSS' = 1
         , 'ROC' = 1
         , 'BOYCE' = 1
         , 'MPA' = 1
         , 'MCC' = 1
         , 'F' = 1
         , 'DSS' = 1
  )
}

###################################################################################################

guess_scale <- function(fit)
{
  min <- 0
  max <- ifelse(max(fit, na.rm = TRUE) <= 1, 1, 1000)
  out <- c(min, max)
  names(out) <- c("min", "max")
  return(out)
}

contingency_table_check <- function(misc)
{
  # Contingency table checking
  if (dim(misc)[1] == 1) {
    if (row.names(misc)[1] == "FALSE") {
      misc <- rbind(misc, c(0, 0))
      rownames(misc) <- c('FALSE', 'TRUE')
    } else {
      a <- misc
      misc <- c(0, 0)
      misc <- rbind(misc, a)
      rownames(misc) <- c('FALSE', 'TRUE')
    }
  }
  
  if (ncol(misc) != 2 | nrow(misc) != 2) {
    misc = matrix(0, ncol = 2, nrow = 2, dimnames = list(c('FALSE', 'TRUE'), c('0', '1')))
  }
  
  if ((sum(colnames(misc) %in% c('FALSE', 'TRUE', '0', '1')) < 2) | 
      (sum(rownames(misc) %in% c('FALSE', 'TRUE', '0', '1')) < 2) ) {
    stop("Unavailable contingency table given")
  }
  
  if ('0' %in% rownames(misc)) { rownames(misc)[which(rownames(misc) == '0')] <- 'FALSE' }
  if ('1' %in% rownames(misc)) { rownames(misc)[which(rownames(misc) == '1')] <- 'TRUE' }
  
  return(misc)
}

##'
##' @rdname bm_FindOptimStat
##' @export
##'

bm_CalculateStat <- function(misc, metric.eval = 'TSS')
{
  ## check contagency table
  misc <- contingency_table_check(misc)
  
  ## calculate basic classification information -------------------------------
  hits <- as.numeric(misc['TRUE', '1']) ## true positives
  misses <- as.numeric(misc['FALSE', '1']) ##  false negatives
  false_alarms <- as.numeric(misc['TRUE', '0']) ## false positives 
  correct_negatives <- as.numeric(misc['FALSE', '0']) ## true negatives
  
  total <- as.numeric(sum(misc))
  forecast_1 <- as.numeric(sum(misc['TRUE', ]))
  forecast_0 <- as.numeric(sum(misc['FALSE', ]))
  observed_1 <- as.numeric(sum(misc[, '1']))
  observed_0 <- as.numeric(sum(misc[, '0']))
  
  ## calculate chosen evaluation metric ---------------------------------------
  out = switch(metric.eval
               , 'TSS' = (hits / (hits + misses)) + (correct_negatives / (false_alarms + correct_negatives)) - 1
               , 'KAPPA' = {
                 Po <- (1 / total) * (hits + correct_negatives)
                 Pe <- ((1 / total) ^ 2) * ((forecast_1 * observed_1) + (forecast_0 * observed_0))
                 return((Po - Pe) / (1 - Pe))
               }
               , 'ACCURACY' = (hits + correct_negatives) / total
               , 'BIAS' = (hits + false_alarms) / (hits + misses)
               , 'POD' =  hits / (hits + misses)
               , 'FAR' = false_alarms / (hits + false_alarms)
               , 'POFD' = false_alarms / (correct_negatives + false_alarms)
               , 'SR' = hits / (hits + false_alarms)
               , 'CSI' = hits / (hits + misses + false_alarms)
               , 'ETS' = {
                 hits_rand <- ((hits + misses) * (hits + false_alarms)) / total
                 return((hits - hits_rand) / (hits + misses + false_alarms - hits_rand))
               }
               , 'HK' = (hits / (hits + misses)) - (false_alarms / (false_alarms + correct_negatives))
               , 'HSS' = {
                 expected_correct_rand <- (1 / total) * 
                   (((hits + misses) * (hits + false_alarms)) + ((correct_negatives + misses) * (correct_negatives + false_alarms)))
                 return((hits + correct_negatives - expected_correct_rand) / (total - expected_correct_rand))
               }
               , 'OR' = (hits * correct_negatives) / (misses * false_alarms)
               , 'ORSS' = (hits * correct_negatives - misses * false_alarms) / (hits * correct_negatives + misses * false_alarms)
               , 'MCC' = {
                 MCC <- (hits * correct_negatives - false_alarms * misses) / 
                   sqrt(as.numeric(hits + false_alarms) * as.numeric(hits + misses) * as.numeric(correct_negatives + false_alarms) * as.numeric(correct_negatives + misses) )
                 
                 # MCC is undefined when more than 1 value in the confusion matrix equals 0
                 # for these instances a mathematical approximation can be made
                 
                 # if FN and FP are 0, the prediction is perfect, MCC = 1
                 if(misses == 0 & false_alarms == 0 & (hits > 0 | correct_negatives > 0) ){MCC <- 1}
                 # if TN and TP are 0, the prediction is perfectly incorrect, MCC = -1
                 if(correct_negatives == 0 & hits == 0 & (false_alarms > 0 | misses > 0)){MCC <- -1}
                 # if two other combinations of values in the confusion matrix are eqaul to 0, the prediction is random. MCC = 0
                 if(false_alarms == 0 & correct_negatives == 0 & (hits > 0 & misses > 0)){MCC <- 0}
                 if(misses == 0 & correct_negatives == 0 & (hits > 0 & false_alarms > 0)){MCC <- 0}
                 if(hits == 0 & false_alarms == 0 & (misses > 0 & correct_negatives > 0)){MCC <- 0}
                 if(hits == 0 & misses == 0 & (false_alarms > 0 & correct_negatives > 0)){MCC <- 0}
                 
                 return(MCC)
               }
               , 'F' = 2*hits / (misses + 2*hits + false_alarms)
               , 'DSS' = abs((hits / (hits + misses)) - (correct_negatives / (correct_negatives +  false_alarms)))
               
  )
}


###################################################################################################
## FROM ECOSPAT PACKAGE VERSION 3.2.2 (august 2022)

#### Calculating Boyce index as in Hirzel et al. 2006
# fit: A vector or Raster-Layer containing the predicted suitability values 
# obs: A vector containing the predicted suitability values or xy-coordinates 
# (if fit is a Raster-Layer) of the validation points (presence records)
# nclass : number of classes or vector with classes threshold. If nclass=0, Boyce index is 
# calculated with a moving window (see next parameters)
# windows.w : width of the moving window (by default 1/10 of the suitability range)
# res : resolution of the moving window (by default 100 focals)
# PEplot : if True, plot the predicted to expected ratio along the suitability class
# rm.duplicate : if TRUE, the correlation exclude successive duplicated values
# method : correlation method used to compute the boyce index


ecospat.boyce <- function(fit, obs, nclass = 0, window.w = "default", res = 100, 
                          PEplot = TRUE, rm.duplicate = TRUE, method = 'spearman')
{
  
  #### internal function calculating predicted-to-expected ratio for each class-interval
  boycei <- function(interval, obs, fit) {
    pi <- sum(as.numeric(obs >= interval[1] & obs <= interval[2])) / length(obs)
    ei <- sum(as.numeric(fit >= interval[1] & fit <= interval[2])) / length(fit)
    return(round(pi/ei,10))
  }
  
  # here, ecospat.boyce should not receive RasterLayer
  # if (inherits(fit,"RasterLayer")) {
  #   if (is.data.frame(obs) || is.matrix(obs)) {
  #     obs <- extract(fit, obs)
  #   }
  #   fit <- getValues(fit)
  #   fit <- fit[!is.na(fit)]
  # }
  
  mini <- min(fit,obs)
  maxi <- max(fit,obs)
  
  if(length(nclass)==1){
    if (nclass == 0) { #moving window
      if (window.w == "default") {window.w <- (max(fit) - min(fit))/10}
      vec.mov <- seq(from = mini, to = maxi - window.w, by = (maxi - mini - window.w)/res)
      vec.mov[res + 1] <- vec.mov[res + 1] + 1  #Trick to avoid error with closed interval in R
      interval <- cbind(vec.mov, vec.mov + window.w)
    } else{ #window based on nb of class
      vec.mov <- seq(from = mini, to = maxi, by = (maxi - mini)/nclass)
      interval <- cbind(vec.mov, c(vec.mov[-1], maxi))
    }
  } else{ #user defined window
    vec.mov <- c(mini, sort(nclass[!nclass>maxi|nclass<mini]))
    interval <- cbind(vec.mov, c(vec.mov[-1], maxi))
  }
  
  f <- apply(interval, 1, boycei, obs, fit)
  to.keep <- which(f != "NaN")  # index to keep no NaN data
  f <- f[to.keep]
  if (length(f) < 2) {
    b <- NA  #at least two points are necessary to draw a correlation
  } else {
    r<-1:length(f)
    if(rm.duplicate == TRUE){
      r <- c(1:length(f))[f != c( f[-1],TRUE)]  #index to remove successive duplicates
    }
    b <- cor(f[r], vec.mov[to.keep][r], method = method)  # calculation of the correlation (i.e. Boyce index) after removing successive duplicated values
  }
  HS <- apply(interval, 1, sum)/2  # mean habitat suitability in the moving window
  if(length(nclass)==1 & nclass == 0) {
    HS[length(HS)] <- HS[length(HS)] - 1  #Correction of the 'trick' to deal with closed interval
  }
  HS <- HS[to.keep]  #exclude the NaN
  if (PEplot == TRUE) {
    plot(HS, f, xlab = "Habitat suitability", ylab = "Predicted/Expected ratio", col = "grey", cex = 0.75)
    points(HS[r], f[r], pch = 19, cex = 0.75)
  }
  return(list(F.ratio = f, cor = round(b, 3), HS = HS))
}


###################################################################################################
## FROM ECOSPAT PACKAGE VERSION 3.2.2 (august 2022)

## This function calculates the Minimal Predicted Area.
## 
## R. Patin, Nov. 2022 : the function does not return minimal predicted area, but
## rather the quantile of suitability corresponding to perc% of presence
##  correctly predicted
##  
## FUNCTION'S ARGUMENTS
## Pred:      numeric or RasterLayer .predicted suitabilities from a SDM prediction
## Sp.occ.xy: xy-coordinates of the species (if Pred is a RasterLayer)
## perc:      Percentage of Sp.occ.xy that should be encompassed by the binary map.

## Details:

## Value
# Returns the Minimal Predicted Area

## Author(s)
# Frank Breiner

## References
# Engler, R., A. Guisan, and L. Rechsteiner. 2004. An improved approach for predicting the 
# distribution of rare and endangered species from occurrence and pseudo-absence data. 
# Journal of Applied Ecology 41:263-274.


ecospat.mpa <- function(Pred, Sp.occ.xy = NULL, perc = 0.9)
{
  perc <- 1 - perc
  if (!is.null(Sp.occ.xy)) {
    Pred <- extract(Pred, Sp.occ.xy)
  }
  round(quantile(Pred, probs = perc,na.rm = TRUE), 3)
}

### EXAMPLE

# obs <- (ecospat.testData$glm_Saxifraga_oppositifolia
# [which(ecospat.testData$Saxifraga_oppositifolia==1)]) ecospat.mpa(obs) ecospat.mpa(obs,perc=1) ##
# 100% of the presences encompassed


## Example script for using Ensemble Small Models ESMs according to Lomba et al. 2010 Written by
## Frank Breiner Swiss Federal Research Institute WSL, July 2014.

## References: Lomba, A., Pellissier, L., Randin, C.F., Vicente, J., Moreira, F., Honrado, J. &
## Guisan, A. (2010). Overcoming the rare species modeling paradox: A novel hierarchical framework
## applied to an Iberian endemic plant. Biological Conservation, 143, 2647-2657.



################====================================================================================
################====================================================================================
################====================================================================================
################====================================================================================


 

Evaluate_CV_SDMs <- function(ModelFits_CV, ObservationData, myRespName){
  glm_model <- BIOMOD_LoadModels(ModelFits_CV$ModelFit_GAMGLM,algo=c("GLM"))
  gam_model <- BIOMOD_LoadModels(ModelFits_CV$ModelFit_GAMGLM,algo=c("GAM"))
  gbm_model <- BIOMOD_LoadModels(ModelFits_CV$ModelFit_BRT,algo=c("GBM"))
  
  # ONLY CONTINUE MODELLING IF ALL MODELS ARE CONVERGED
  ContinueAnalysis <- if(TRUE %in% c(is.na( glm_model ),is.na( gam_model ),is.na( gbm_model ))) FALSE else  TRUE
  if(ContinueAnalysis == FALSE){
    WriteLogFile(paste(user_dir, base_dir, species_out_dir,LogFile,sep="/"),
                 ln=paste0("ERROR: for " ,myRespName, " not all models converged. Stop script!"))
  }
  
  # make predictions for an ensemble based on the test dataset.
  # create new x and y coordinates, with the purpose of making a raster to calculate projections
  # the stackraster is made out of the dataframe instead of the orginical 1km-raster stack 
  AllData_test_temp <- ObservationData$AllData_test
  AllData_test_temp$x_temp <- seq(1,length(AllData_test_temp$x),1)
  AllData_test_temp$y_temp <- c( rep(1,ceiling(length(AllData_test_temp$y)/2)),
                                 rep(2,length(AllData_test_temp$y) - ceiling(length(AllData_test_temp$y)/2)) )
  
  for(Var in VariableData$Variables){ 
    print ( Var)
    df <- subset(  AllData_test_temp, select = c("x_temp" , "y_temp", Var))
    # create spatial points data frame
    coordinates(df) <- ~x_temp + y_temp
    if(Var == "salt") {
      df$salt <- as.factor(df$salt)
    }
    # coerce to raster
    gridded(df) <- TRUE
    raster_Var <- raster(df)
    if(Var == "Precipitation") {
      rstack_TEST <- stack(raster_Var)
    }
    else{ rstack_TEST <- stack(rstack_TEST, raster_Var) }
  }
  
  
  # project GLM and GAM on cut off raster
  if(ContinueAnalysis == TRUE){
    proj_GAMGLM_TEST <- BIOMOD_Projection(
      bm.mod = ModelFits_CV$ModelFit_GAMGLM,
      new.env = rstack_TEST,
      proj.name = "Threshold",
      models.chosen = "all",
      compress ='xz',
      build.clamping.mask = F)
    
    R_Proj_GAMGLM_TEST <- get_predictions(proj_GAMGLM_TEST)
  } else { 
    R_Proj_GAMGLM_TEST <- NA
  }
  
  if(ContinueAnalysis == TRUE){
    proj_BRT_TEST <- BIOMOD_Projection(
      bm.mod = ModelFits_CV$ModelFit_BRT,
      new.env = rstack_TEST,
      proj.name = "Threshold",
      models.chosen = "all",
      compress ='xz',
      build.clamping.mask = FALSE )
    
    R_Proj_BRT_TEST <- get_predictions(proj_BRT_TEST)
  } else { 
    R_Proj_BRT_TEST <- NA
  }
  
  predictions_CO <- c(R_Proj_GAMGLM_TEST, R_Proj_BRT_TEST)
  #  stack predictions of the algoritms
  R_TEST_algo <- stack(predictions_CO)
  
  # make a spatial point dataframe
  obs_spdf<-SpatialPointsDataFrame( AllData_test_temp[,.(x_temp,y_temp)], AllData_test_temp[,.(observed)])
  
  # extract the value of environmental variables at a point location and add it to the coordinates
  obs_p_algo <- data.frame( extract( R_TEST_algo, obs_spdf, method='simple', sp=TRUE))
  # make column names without species name
  colnames(obs_p_algo)[grep("GLM" ,colnames(obs_p_algo) )] <- "GLM"
  colnames(obs_p_algo)[grep("GAM" ,colnames(obs_p_algo) )] <- "GAM"
  colnames(obs_p_algo)[grep("GBM" ,colnames(obs_p_algo) )] <- "GBM"
  
  Obs <- obs_p_algo$observed
  
  # evaluate cross validated algorithm predictions
  # GLM
  Fit <- obs_p_algo$GLM
  MCC.GLM.cv <- FindOptimStat(metric.eval='MCC',fit=Fit, obs=Obs, nb.thresh = 100)
  MCC.GLM.cv <- MCC.GLM.cv$best.stat
  TSS.GLM.cv <- FindOptimStat(metric.eval='TSS',fit=Fit, obs=Obs, nb.thresh = 100)
  TSS.GLM.cv <- TSS.GLM.cv$best.stat
  ROC.GLM.cv <- FindOptimStat(metric.eval='ROC',fit=Fit, obs=Obs, nb.thresh = 100)
  ROC.GLM.cv <- ROC.GLM.cv$best.stat
  # GAM
  Fit <- obs_p_algo$GAM
  MCC.GAM.cv <- FindOptimStat(metric.eval='MCC',fit=Fit, obs=Obs, nb.thresh = 100)
  MCC.GAM.cv <- MCC.GAM.cv$best.stat
  TSS.GAM.cv <- FindOptimStat(metric.eval='TSS',fit=Fit, obs=Obs, nb.thresh = 100)
  TSS.GAM.cv <- TSS.GAM.cv$best.stat
  ROC.GAM.cv <- FindOptimStat(metric.eval='ROC',fit=Fit, obs=Obs, nb.thresh = 100)
  ROC.GAM.cv <- ROC.GAM.cv$best.stat
  # GBM
  Fit <- obs_p_algo$GBM
  MCC.BRT.cv <- FindOptimStat(metric.eval='MCC',fit=Fit, obs=Obs, nb.thresh = 100)
  MCC.BRT.cv <- MCC.BRT.cv$best.stat
  TSS.BRT.cv <- FindOptimStat(metric.eval='TSS',fit=Fit, obs=Obs, nb.thresh = 100)
  TSS.BRT.cv <- TSS.BRT.cv$best.stat
  ROC.BRT.cv <- FindOptimStat(metric.eval='ROC',fit=Fit, obs=Obs, nb.thresh = 100)
  ROC.BRT.cv <- ROC.BRT.cv$best.stat
  
  # calculate the ensemble map, based on the weighted mean
  if(PerformanceMetric == "ROC"){
    weights <- c(ROC.GLM.cv, ROC.GAM.cv , ROC.BRT.cv)
  }
  if(PerformanceMetric == "TSS"){
    weights <- c(TSS.GLM.cv, TSS.GAM.cv , TSS.BRT.cv)
  }
  if(PerformanceMetric == "MCC"){
    weights <- c(MCC.GLM.cv, MCC.GAM.cv , MCC.BRT.cv)
  }
  
  R_TEST_ens <- weighted.mean(R_TEST_algo, weights)
  
  # extract the value of environmental variables at a point location and add it to the coordinates
  obs_p_ens <- data.frame( extract( R_TEST_ens, obs_spdf, method='simple', sp=TRUE))
  Fit <- obs_p_ens$layer
  
  # evaluate cross validated predictions of the ensemble model
  MCC.ens.cv <- FindOptimStat(metric.eval='MCC',fit=Fit, obs=Obs, nb.thresh = 100)
  MCC.ens.cv <- MCC.ens.cv$best.stat
  TSS.ens.cv <- FindOptimStat(metric.eval='TSS',fit=Fit, obs=Obs, nb.thresh = 100)
  TSS.ens.cv <- TSS.ens.cv$best.stat
  ROC.ens.cv <- FindOptimStat(metric.eval='ROC',fit=Fit, obs=Obs, nb.thresh = 100)
  ROC.ens.cv <- ROC.ens.cv$best.stat
  
  # make a datatable with all evaluation data
  CV_Eval_Stats <- data.table()
  CV_Eval_Stats[, c("Species", "MCC.GLM.cv", "TSS.GLM.cv", "ROC.GLM.cv", "MCC.GAM.cv", "TSS.GAM.cv", "ROC.GAM.cv", "MCC.BRT.cv", "TSS.BRT.cv", "ROC.BRT.cv", "MCC.ens.cv", "TSS.ens.cv", "ROC.ens.cv") :=
       list(myRespName, MCC.GLM.cv, TSS.GLM.cv, ROC.GLM.cv, MCC.GAM.cv, TSS.GAM.cv, ROC.GAM.cv, MCC.BRT.cv, TSS.BRT.cv, ROC.BRT.cv, MCC.ens.cv, TSS.ens.cv, ROC.ens.cv)]
  
  

  return(list(
    weights = weights,
    CV_Eval_Stats = CV_Eval_Stats
  ))
  
}


# ====================================================================
#
# Copyright 2025, PBL Netherlands Environmental Assessment Agency
# 
# This source code of the BioScore model is owned by PBL Netherlands Environmental Assessment Agency. 
# It is not permitted to copy, redistribute, remix, transform, and build upon the material without written approval of PBL. 
# Permission for commercial purposes will not be granted. 
# This code is published to improve the transparency of the models used by PBL, 
# but without any warranty for fitness for any other purpose. 
# After approval of PBL to use the code, PBL will not provide any support.
#
# This copyright statement does not apply to the parts of this file that are retrieved from the biomod2-package (see the top of this file for specifications).
# 
# ====================================================================

