# ====================================================================
#
# Copyright 2025, PBL Netherlands Environmental Assessment Agency
# See the copyright notice at the end of this file.
#
# ====================================================================


#6_Format input data
# FormatInputForFitting: for fitting the SDMs
# FormatInputForProjections: for projecting the SDMs


FormatInputForFitting <- function(AllData_test, Vars, AllData_GLM_GAM_fit, AllData_BRT_fit,myRespName){
#### start biomod procedure 
# format data for use in biomod


  # format data for test
  df_TEST <-  subset(AllData_test, select = c("observed","x","y",Vars))
  df_TEST$salt <- as.factor(df_TEST$salt)
  myResp_TEST <- as.numeric(df_TEST[,observed])
  myExpl_TEST <- subset(df_TEST, select = Vars)
  # define the x and y coordinates of the species observations
  myRespXY_TEST <- df_TEST[,c("x","y")]

 # format data for fit of GAM and GLM
 # dataframe for GAM and GLM
 # difference with the database of BRT is the number of absences.
  AllData_GLM_GAM_fit <- AllData_GLM_GAM_fit
  df_GLM_GAM <-subset(AllData_GLM_GAM_fit, select = c("observed","x","y",Vars))
  df_GLM_GAM$salt <- as.factor(df_GLM_GAM$salt)
  myResp_GAMGLM <- as.numeric(df_GLM_GAM[,observed])
  myExpl_GAMGLM <- subset(df_GLM_GAM, select = Vars)
  # define the x and y coordinates of the species observations
  myRespXY_GAMGLM <- df_GLM_GAM[,c("x","y")]

  # put all data in de right format for Biomod
  BioModData_GAMGLM <- BIOMOD_FormatingData(resp.var = myResp_GAMGLM,
                                          expl.var = data.frame(myExpl_GAMGLM),
                                          resp.xy = myRespXY_GAMGLM,
                                          resp.name = myRespName,
                                          eval.resp.var = myResp_TEST,
                                          eval.expl.var = data.frame(myExpl_TEST),
                                          eval.resp.xy = myRespXY_TEST)


  # format data for fit of BRT
  # dataframe for BRT
  # difference with the database of GAMM and GLMM is the number of absences.
  df_BRT <- subset(AllData_BRT_fit, select = c("observed","x", "y", Vars))
  df_BRT$salt <- as.factor(df_BRT$salt)
  myResp_BRT <- as.numeric(df_BRT[,observed])
  myExpl_BRT <- subset(df_BRT, select = Vars)
  # define the x and y coordinates of the species observations
  myRespXY_BRT <- df_BRT[,c("x","y")]
  
  # put all data in de right format for Biomod
  BioModData_BRT <- BIOMOD_FormatingData(resp.var = myResp_BRT,
                                         expl.var = data.frame(myExpl_BRT),
                                         resp.xy = myRespXY_BRT,
                                         resp.name = myRespName,
                                         eval.resp.var = myResp_TEST,
                                         eval.expl.var = data.frame(myExpl_TEST),
                                         eval.resp.xy = myRespXY_TEST)
  
  # put all data in de right format for Biomod
  # for 100% of the data
  BioModData_GAMGLM_100p <- BIOMOD_FormatingData(resp.var = c(myResp_GAMGLM,myResp_TEST),
                                                 expl.var = data.frame(rbind(myExpl_GAMGLM,myExpl_TEST)),
                                                 resp.xy = rbind(myRespXY_GAMGLM,myRespXY_TEST),
                                                 resp.name = myRespName)
  
  # put all data in de right format for Biomod
  # for 100% of the data
  BioModData_BRT_100p <- BIOMOD_FormatingData(resp.var = c(myResp_BRT,myResp_TEST),
                                              expl.var = data.frame(rbind(myExpl_BRT,myExpl_TEST)),
                                              resp.xy = rbind(myRespXY_BRT,myRespXY_TEST),
                                              resp.name = myRespName)
  
  return(list(BioModData_BRT_CV=BioModData_BRT, 
              BioModData_GAMGLM_CV=BioModData_GAMGLM,
              BioModData_GAMGLM_100p=BioModData_GAMGLM_100p,
              BioModData_BRT_100p=BioModData_BRT_100p
              ))

}


FormatInputForProjections <- function(ScenarioName, myRespName){
  
  # make a directory to store the output of the scenario
  scenario.output.dir <- file.path(user_dir,base_dir,species_out_dir,"RangeMaps",ScenarioName)
  if (!file.exists(scenario.output.dir)) dir.create(scenario.output.dir)
  
  # read table with all file paths for all variables
  OverviewVariables <- fread(file.path(user_dir, base_dir,var_fit_dir,TableWithOverviewVariables))
  
  # read all data
  EnvVar <- lapply(VariableData$Variables, FUN = function(x){
    raster_name <- unlist(OverviewVariables[SharedNameVariable == x,..ScenarioName], use.names = FALSE)
    raster_file <- raster(file.path(user_dir, base_dir,var_fit_dir,raster_name))
    names(raster_file) <- x
    return(raster_file)
  })
  
  # make a dataframe out of the rasters
  EnvVar2 <- lapply(EnvVar, function(x){raster::as.data.frame(x)})
  EnvVar2 <- data.frame(EnvVar2)
  
  # Convert the salt variable to factor
  EnvVar2$salt <- as.factor(EnvVar2$salt)
  
  # make a dataframe with the coordinates from the raster cells
  EnvVar_xy <- coordinates(EnvVar[[1]])
  
  ### START making env. rasters with a small extent, which can be used to test the model
  if(modeltesting == TRUE){
    EnvVar_crop <- lapply(EnvVar,function(x){crop( x,extent(4000000,5000000,2000000,3000000))} )
    # make a dataframe with the coordinates from the raster cells
    EnvVar_xy <- coordinates(EnvVar_crop[[1]])
    EnvVar_crop_df <- lapply(EnvVar_crop, function(x){raster::as.data.frame(x)})
    EnvVar2 <- data.frame(EnvVar_crop_df)
    # Convert the salt variable to factor
    EnvVar2$salt <- as.factor(EnvVar2$salt)
  }
  ### END making env. rasters with a small extent, which can be used to test the model
  
  return(list(df_Env_Vars = EnvVar2,
              Coordinates = EnvVar_xy 
  ))
} 

# ====================================================================
#
# Copyright 2025, PBL Netherlands Environmental Assessment Agency
# 
# This source code of the BioScore model is owned by PBL Netherlands Environmental Assessment Agency. 
# It is not permitted to copy, redistribute, remix, transform, and build upon the material without written approval of PBL. 
# Permission for commercial purposes will not be granted. 
# This code is published to improve the transparency of the models used by PBL, 
# but without any warranty for fitness for any other purpose. 
# After approval of PBL to use the code, PBL will not provide any support.
# 
# ====================================================================

