# ====================================================================
#
# Copyright 2025, PBL Netherlands Environmental Assessment Agency
# See the copyright notice at the end of this file.
#
# ====================================================================


# functions to project the SDMs and make a map with the ensembled probability of occurrence
# ProjectSDMs:      function to project the SDM
# MakeEnsembleMaps: function which calculates the ensemble maps for all scenarios
# CutOffValue:      function which extracts (if previously calculated) or calculates the optimal PoO cut off value for binarization.


ProjectSDMs <- function(myRespName,ScenarioName, Env_Data, myBiomodModelOut_GAMGLM_100p, myBiomodModelOut_BRT_100p,ModelFits_100p,weights){
  setwd(file.path(user_dir, base_dir, species_out_dir))
  

  # project GAM and GLM
  proj_GAMGLM_100p <- BIOMOD_Projection(
    bm.mod = myBiomodModelOut_GAMGLM_100p,
    new.env = Env_Data$df_Env_Vars ,
    proj.name = ScenarioName,
    models.chosen = 'all',
    compress ='xz',
    build.clamping.mask = FALSE )
  
  df_Proj_GAMGLM_100p <- get_predictions(proj_GAMGLM_100p)
  R_Proj_GLM <- rasterFromXYZ(data.frame(Env_Data$Coordinates,df_Proj_GAMGLM_100p[df_Proj_GAMGLM_100p$algo == "GLM","pred"]))
  R_Proj_GAM <- rasterFromXYZ(data.frame(Env_Data$Coordinates,df_Proj_GAMGLM_100p[df_Proj_GAMGLM_100p$algo == "GAM","pred"]))
  
  # remove large dataset from memory
  rm(df_Proj_GAMGLM_100p)
  rm(proj_GAMGLM_100p)
  gc()
  
  # project BRT
  proj_BRT_100p <- BIOMOD_Projection(
    bm.mod = myBiomodModelOut_BRT_100p,
    new.env = Env_Data$df_Env_Vars ,
    proj.name = ScenarioName,
    models.chosen = 'all',
    compress ='xz',
    build.clamping.mask = FALSE )
  
  df_Proj_GBM_100p <- get_predictions(proj_BRT_100p)
  R_Proj_GBM <- rasterFromXYZ(data.frame(Env_Data$Coordinates,df_Proj_GBM_100p[df_Proj_GBM_100p$algo == "GBM","pred"]))
  
  # remove large dataset from memory
  rm(df_Proj_GBM_100p)
  rm(proj_BRT_100p)
  gc()
  
  #  stack predictions of the algoritms
  R_Proj_algo <- stack(c(R_Proj_GLM, R_Proj_GAM, R_Proj_GBM))
  
  # calculate the ensemble map, based on the weighted mean
  R_Proj_ens <- weighted.mean(R_Proj_algo, weights)
  
  # save a copy of the ensemble projection
  writeRaster(R_Proj_ens, file.path(modelsetup$output.maps,ScenarioName, paste0(myRespName,".tif")), format="GTiff", options=tifoptions, overwrite=TRUE)
  
  return(R_Proj_ens)
}

MakeEnsembleMaps <- function(AllScenarioNames, ModelFits_100p,myRespName,weights){ 
  
  # for each scenario
  for(ScenarioName in AllScenarioNames){
    # write to logfile
    WriteLogFile(paste(user_dir,base_dir,species_out_dir,LogFile,sep="/"),ln=paste0("6. Start projecting range map of ", myRespName," for scenario: ", ScenarioName))
    
    # load and format environmental rasters for the current scenario
    InputVariables <- FormatInputForProjections(ScenarioName,myRespName)
    
    # project SDMs 
    EnsembledRangeMap <- ProjectSDMs(myRespName,
                                     ScenarioName, 
                                     InputVariables, 
                                     ModelFits_100p$ModelFit_GAMGLM, 
                                     ModelFits_100p$ModelFit_BRT,
                                     ModelFits_100p,
                                     weights)  
    }
}
  


CutOffValue <- function(myRespName, BioModData){
  
  cutoff_value <- NA
  
  # check whether the cut off value is already calculated before
  try({
    LocationAllMetricFile <- file.path(user_dir, base_dir, species_out_dir, "Output", "AllMetrics.csv")
    AllMetrics <- fread(LocationAllMetricFile)
    if(myRespName %in% AllMetrics$species){
      cutoff_value <- AllMetrics[species == myRespName, .(cutoff_TSS,cutoff_DSS,cutoff_MCC,cutoff_F)]  
    }
  }, silent = TRUE)
  
  # calculate the cut off value if it doesn't exist yet
  if(is.na(cutoff_value)){
    # make a spatial point dataframe of all observations
    df_alldata <- cbind(BioModData$BioModData_GAMGLM_100p@coord, BioModData$BioModData_GAMGLM_100p@data.species)
    colnames(df_alldata) <- c("x", "y", "observed")
    obs_spdf<-SpatialPointsDataFrame( df_alldata[,c("x","y")],  data = df_alldata[,c("x","y","observed")])
    
    # read the current range map from the hard disk
    R_Proj_ens <- raster(file.path(modelsetup$output.maps,"current", paste0(myRespName,".tif")))
    names(R_Proj_ens) <- "Predictions"
    
    # extract the value of environmental variables at a point location and add it to the coordinates
    obs_p_ens <- data.frame( extract( R_Proj_ens, obs_spdf, method='simple', sp=TRUE))
    obs_p_ens <- na.omit(obs_p_ens)
    # calculate the optimal cut off value for the probability of occurrence
    Fit <- obs_p_ens$Predictions
    Obs <- obs_p_ens$observed 
    
    ### calculate the cut off value with four different threshold-selection methods.
    # maximising TSS
    ens.stats.TSS <- FindOptimStat(metric.eval = "TSS", fit=Fit, obs=Obs, nb.thresh = 100)
    # extract the cut off value
    cutoff_TSS <- ens.stats.TSS$cutoff
    # minimising the difference between sensitivity and specificity
    ens.stats.DSS <- FindOptimStat(metric.eval = "DSS", fit=Fit, obs=Obs, nb.thresh = 100)
    # extract the cut off value
    cutoff_DSS <- ens.stats.DSS$cutoff
    # maximising MCC
    ens.stats.MCC <- FindOptimStat(metric.eval = "MCC", fit=Fit, obs=Obs, nb.thresh = 100)
    # extract the cut off value
    cutoff_MCC <- ens.stats.MCC$cutoff
    # maximising F
    ens.stats.F <- FindOptimStat(metric.eval = "F", fit=Fit, obs=Obs, nb.thresh = 100)
    # extract the cut off value
    cutoff_F <- ens.stats.F$cutoff

    
    cutoff_value <- data.frame(cutoff_TSS,cutoff_DSS,cutoff_MCC,cutoff_F)
  }
  
  return(cutoff_value)
}


ImageOfRangeMap <- function(AllScenarioNames, cutoff.ens, myRespName){
  for(ScenarioName in AllScenarioNames){
    
    # make a directory to store the output of the scenario
    image.output.dir <- file.path(modelsetup$ImagesOfRangeMaps.dir,ScenarioName)
    if (!file.exists(image.output.dir)) dir.create(image.output.dir)
    
    # load the PoO range map
    R_Proj_ens <- raster(file.path(modelsetup$output.maps,ScenarioName, paste0(myRespName,".tif")))
      
    # reclassify ensemble map to make a binary presence-absence map
    ens_Bin <- R_Proj_ens
    ens_Bin[ens_Bin >= 0 & ens_Bin < cutoff.ens$cutoff_TSS] <- 0
    ens_Bin[ens_Bin >= cutoff.ens$cutoff_TSS] <- 1
      
    # Store a plot of the ensemple projection for visual inspection
    ens <- stack(R_Proj_ens, ens_Bin)
    names(ens) <- c("continuous", "binarised with TSS")
    png(file = file.path(image.output.dir, paste0("Range_", myRespName,".png")),
        width = 180, height = 60, units = 'mm', pointsize = 8, res = 900)
    print (plot(ens, 
                main = paste0(myRespName, " ", ScenarioName, " distribution range","\ncontinuous and binarised with TSS")
                ) )
    dev.off()
  }
}





# ====================================================================
#
# Copyright 2025, PBL Netherlands Environmental Assessment Agency
# 
# This source code of the BioScore model is owned by PBL Netherlands Environmental Assessment Agency. 
# It is not permitted to copy, redistribute, remix, transform, and build upon the material without written approval of PBL. 
# Permission for commercial purposes will not be granted. 
# This code is published to improve the transparency of the models used by PBL, 
# but without any warranty for fitness for any other purpose. 
# After approval of PBL to use the code, PBL will not provide any support.
#
# 
# ====================================================================



