# ====================================================================
#
# Copyright 2025, PBL Netherlands Environmental Assessment Agency
# See the copyright notice at the end of this file.
#
# ====================================================================


# This script is doing the post-processing of the species range maps predicted with BioScore-terrestrial or BioScore-wetlands
# The script has several options:
#     - mask the species predicted range maps to the realised range
#     - mask the species predicted range maps to suitable land cover types
#     - set the probility of occurrence (PoO) values below a threshold-value to zero or binarise the PoO with the threshold-value
#     - in case of a future scenario it can implement a no, realistic or unlimited dispersal assumption
# moreover the script calculates the range sizes of the resulting range maps and the species richness.

rm(list = ls())

#============1. START MODEL SET UP 
#===================================================================================================

library(data.table)
library(raster)
library(terra)
library(snowfall)
library(git2r)
library(sf)
library(foreach)
library(doMC)
library(parallel)
library(iterators)
library(dplyr)
library(knitr)
library(doParallel)

user_dir <- "File.Path"
base_dir <- "File.Path"
github_dir <- "File.Path"
settings_system <-  file.path(github_dir, "PostProcessingSDMs/PostProcessingSDMs_system_settings.R")
settings_model <- file.path(github_dir, "PostProcessingSDMs/PostProcessingSDMs_model_settings.R")

## Read model and system settings from file
setwd(github_dir)
source(settings_model)
source(settings_system)

# load functions to open and write the logfile
source("core/02_Create-logfile.R")
# load functions specific to this script
source("PostProcessingSDMs/01_PostProcessingSDMs.R")
source("PostProcessingSDMs/02_RemoveUnsuitableAreas.R")
source("PostProcessingSDMs/03_Binarisation.R")
source("PostProcessingSDMs/04_DispersalAssumptions.R")
source("PostProcessingSDMs/05_SpeciesRichness.R")

# make output directory
dir.create(file.path(user_dir,base_dir,species_out_dir))
# make a folder to save all range maps within the output directory
dir.create(file.path(user_dir,base_dir,species_out_dir,"RangeMaps"))

# make output directory for dispersal model
IntermediateResults <- file.path(species_out_dir, "IntermediateResults")
dir.create(file.path(user_dir,base_dir,IntermediateResults))
# make output directory for dispersal range maps
result_dir <- file.path(species_out_dir, "IntermediateResults", "Bio_results")
dir.create(file.path(user_dir,base_dir,result_dir))

for(i in ProjectScenarios){
  dir.create(file.path(user_dir,base_dir,species_out_dir,"RangeMaps", i))
}

registerDoMC(cores = CPU)


setwd(file.path(user_dir,base_dir))

#============2. START CREATING LOGFILE
#===================================================================================================

# create a logfile
LogFile <- OpenLogFile()


#============3. START PREPARING INPUT DATA
#===================================================================================================

# 1. load table all metrics calculated with BioScore
dt_AllMetrics <- fread(AllMetrics_file)
# Load table with all the dispersal data if dispersal assumption is "realistic dispersal"
if(DispersalAssumption == "realistic dispersal"){
  dispersal_rates <- read.csv(file.path(user_dir,base_dir,Dispersal_data_file))
}

# select all species for which all models converged and which had enough observations for model fitting
#dt_AllMetrics <- na.omit(dt_AllMetrics)
# select all species with for which the SDM outputs are considered good enough
if(EvaluationMetric == "TSS"){
  dt_AllMetrics <- dt_AllMetrics[TSS.ens.cv >= MinimalMetricValue]  
}
if(EvaluationMetric == "ROC"){
  dt_AllMetrics <- dt_AllMetrics[ROC.ens.cv >= MinimalMetricValue]  
}

# select the cut-off values
if(BinarizationMetric == "TSS"){dt_cutoffs <- dt_AllMetrics[,.( Species, cutoff_TSS )]}
if(BinarizationMetric == "DSS"){dt_cutoffs <- dt_AllMetrics[,.( Species, cutoff_DSS )]}
if(BinarizationMetric == "MCC"){dt_cutoffs <- dt_AllMetrics[,.( Species, cutoff_MCC )]}
if(BinarizationMetric == "F"){dt_cutoffs <- dt_AllMetrics[,.( Species, cutoff_F )]}
names(dt_cutoffs) <- c("species", "cutoff.ens")

RespNames <- dt_cutoffs$species

if(DispersalAssumption == "realistic dispersal"){
  #Create dispersalrate dataframe and store it in the global environment
  dispersaldata <- data.frame(Species = dispersal_rates$Taxon, Rate = dispersal_rates$Rate)
  dispersaldata <- na.omit(dispersaldata)
  
  #Filter the dispersal dataset according to the species used in the previous function
  dispersaldata <- dispersaldata %>%
    filter(Species %in% RespNames)
  
  #Save dispersaldata file for dispersalRate function
  write.csv(dispersaldata, file.path(user_dir,base_dir,IntermediateResults,"dispersaldata.csv"), row.names = FALSE)
}

#RespNames <- RespNames[1:2]
#============4. START POST PROCESSING THE SDM-PREDICTIONS
#===================================================================================================

system.time(All_RangeSizes <- foreach(i = RespNames, .combine = rbind) %dopar% {
  RangeSize_species <- F_PostProcessingSDMs(RespName = i, 
                           Scenarios = ProjectScenarios,
                           ClipToRealisedRange = ClipToRealisedRange,
                           SetValuesUnderThresholdToZero = SetValuesUnderThresholdToZero,
                           SetValuesAboveThresholdToOne = SetValuesUnderThresholdToZero,
                           DispersalAssumption = DispersalAssumption)
})

# save table with range sizes to the hard disk.
write.csv(All_RangeSizes, file.path(user_dir,base_dir,species_out_dir, "Range_Sizes.csv"))

# Delete intermediate results needed for dispersal functions
# Make sure to close the file manager and any other files within the IntermediateResults! Otherwise the unlink function doesn't work well.
unlink(file.path(user_dir, base_dir, IntermediateResults), recursive = TRUE)

#Creating species richness tables
dt_SpeciesRichness <- F_Species_Richness(RespNames)



# ====================================================================
#
# Copyright 2025, PBL Netherlands Environmental Assessment Agency
# 
# This source code of the BioScore model is owned by PBL Netherlands Environmental Assessment Agency. 
# It is not permitted to copy, redistribute, remix, transform, and build upon the material without written approval of PBL. 
# Permission for commercial purposes will not be granted. 
# This code is published to improve the transparency of the models used by PBL, 
# but without any warranty for fitness for any other purpose. 
# After approval of PBL to use the code, PBL will not provide any support.
#
# 
# ====================================================================


