# ====================================================================
#
# Copyright 2025, PBL Netherlands Environmental Assessment Agency
# See the copyright notice at the end of this file.
#
# ====================================================================


#Functions to make species selection 

Selectspecies <- function (taxo_group, Drophabitatspecies){
  
      # load species observation data
      system.time(dt_species_data <- fread(file.path(user_dir,base_dir, species_in_dir), quote = "", sep = "\t"))
          
      # select for taxon group 
      if(any(unique(dt_species_data$`Taxon group`) == taxo_group)){
      
        dt_species_data <- copy(dt_species_data[`Taxon group` == taxo_group])
        dt_species_data <- dt_species_data[,.( `PlotObservationID`,`Matched concept`, `Taxon group`)]
        colnames(dt_species_data ) <- c( "PlotID","Species", "Taxon group")
        SpeciesNames <- unique(dt_species_data$Species)
        WriteLogFile(paste(species_out_dir,LogFile,sep="/"),ln=paste0("Total number of species in database for ",taxo_group, ": ",length(SpeciesNames)))
        
        
        } else {
          
          dt_species_data <- dt_species_data[,.( `PlotObservationID`,`Matched concept`, `Taxon group`)]
          colnames(dt_species_data ) <- c( "PlotID","Species", "Taxon group")
          SpeciesNames <- unique(dt_species_data$Species)
          WriteLogFile(paste(species_out_dir,LogFile,sep="/"),ln=paste0("Total number of species in database for ",taxo_group, ": ",length(SpeciesNames)))
          
        }
  
        # define the species of interest
        RespNames <- fread(file.path(user_dir,base_dir,species_special))
        AllTypes <- unique(RespNames [,`Habitat code`])
        
        #make selection of species in preferred habitat type
        Remove_Habitats <- AllTypes[grep(paste(Drophabitatspecies, collapse="|"), AllTypes)]
        AllTypes <- copy(AllTypes[!AllTypes %in% c(Remove_Habitats,"", "?")])
        RespNames <- copy(RespNames[`Habitat code` %in% AllTypes,]) 
        nrow(RespNames)
        RespNames <- RespNames[Species %in% SpeciesNames] 
        RespNames <- unique(RespNames$Species)
        length(RespNames)
        WriteLogFile(paste(species_out_dir,LogFile,sep="/"),ln=paste0("Number of species selected for fitting SDMs for ", taxo_group, ": ",length(RespNames)))
        
        # check whether there are species for which output is already calculated
        # and remove these from the list of species names
        files <- list.files(file.path(user_dir,base_dir,species_out_dir,"RangeMaps"))
        files <- grep("*.tif$", files, value = TRUE)
        species_done <- gsub(".tif$", "", files)
        RespNames <- RespNames[!RespNames %in% species_done]
        
        return(list(RespNames=RespNames, dt_species_data=dt_species_data))

}


# ====================================================================
#
# Copyright 2025, PBL Netherlands Environmental Assessment Agency
# 
# This source code of the BioScore model is owned by PBL Netherlands Environmental Assessment Agency. 
# It is not permitted to copy, redistribute, remix, transform, and build upon the material without written approval of PBL. 
# Permission for commercial purposes will not be granted. 
# This code is published to improve the transparency of the models used by PBL, 
# but without any warranty for fitness for any other purpose. 
# After approval of PBL to use the code, PBL will not provide any support.
# 
# ====================================================================

