# discriminante virus
library (MASS)
library(caret)
library(ggplot2)
library(ROCR)
library(grid)
library(gridExtra)

## funciÃ³n de ploteo 
decisionplot_ggplot <- function(model, 
                                vars, 
                                data, 
                                resolution = 100,
                                showgrid = TRUE, 
                                ...,
                                modes.means) {
  if(missing(model) || missing(vars) || missing(data))
    stop('model, vars or data is missing')
  if(!(is.character(vars) && length(vars) == 2) && !('formula' %in% class(vars) && length(vars <- all.vars(vars)) == 2))
    stop('vars should be either a formula or a character vector oflength 2.')
  if(!is.data.frame(data))
    stop('data does not seem to comform with standard types.')
  
  t <- terms(model)
  if(!all((other.vars <- attr(t, 'term.labels')) %in% colnames(data)))
    stop('data is missing one or more variables in model.')
  lhs <- as.character(t[[2]])
  # Set up data for prediction, for the data in vars
  prd.vars <- lapply(data[, vars], function(x){
    if(is.character(x) || is.factor(x)){
      unique(x)
    }else{
      r <- range(x)
      seq(r[1], r[2], length.out = resolution)
    }
  })
  names(prd.vars) <- vars
  # set up data for prediction for the remaining data
  if(missing(modes.means)){
    other.vars <- other.vars[!other.vars %in% vars]
    if(length(other.vars)){
      modes.means <- lapply(data[, other.vars], function(x){
        if(is.character(x)){
          unique(x)[1]
        }else if(is.factor(x)){
          levels(x)[1]
        }else{
          mean(x)
        }
      }) 
      names(modes.means) <- other.vars
    }else
      modes.means <- NULL
  }else{
    if(is.null(other.vars))
      stop('other.vars is null but modes.means was provided. Please leave this missing.')
    if(!all(other.vars %in% names(modes.means)))
      stop('modes.means are lacking one or more variables.')
    modes.means <- as.list(modes.means)
    if(any(lengths(modes.means) > 1))
      stop('modes.means should only contain a single values for all variables.')
  }
  prd.data <- expand.grid(c(prd.vars, modes.means))
  p <- predict(model, prd.data, ...)
  prd.data$nm <- if(is.list(p)) 
    p$class 
  else 
    p
  names(prd.data)[ncol(prd.data)] <- lhs
  # Create the final plot.
  gg <- ggplot(aes_string(vars[1], vars[2]), data = data) + 
    geom_point(aes_string(col = lhs), size = 2, alpha = 0.5) + 
    geom_contour(aes_string(vars[1], vars[2], z = paste0('as.integer(', lhs, ') + 1L')), 
                 data = prd.data, inherit.aes = FALSE)
  if(showgrid)
    gg <- gg + 
    geom_point(aes_string(vars[1], vars[2], col = lhs), data = prd.data, 
               shape = 20, size = 0.1, alpha = 0.2)
  gg
}

###########
dat <- read.csv("discrim.csv", header = T, stringsAsFactors = T)
dat <- na.omit(dat)
dat[, -1] <- scale(dat[, -1])
summary(dat)

## lda
lda1 <- lda(Clade ~ MFE + Nc_index + GC3_Bias + AT3_Bias, data = dat)
lda1
pre <- predict(lda1, dat[, 2:5])
confusionMatrix(pre$class, dat$Clade)

lda_plot <- cbind(dat, predict(lda1)$x)
ggplot(lda_plot, aes(LD1, LD2)) +
  geom_point(aes(color = Clade))


### qda
qda1 <- qda(Clade ~ MFE + Nc_index + GC3_Bias + AT3_Bias, data = dat)
qda1

pre <- predict(qda1, dat[, 2:5])
confusionMatrix(pre$class, dat$Clade)

## plot de qda
q1 <- decisionplot_ggplot(qda1, MFE ~ Nc_index, data = dat, showgrid = TRUE, resolution = 150)+
  theme_bw() + guides(color = "none")
q2 <- decisionplot_ggplot(qda1, MFE ~ GC3_Bias, data = dat, showgrid = TRUE, resolution = 150)+
  theme_bw() + guides(color = "none")
q3 <- decisionplot_ggplot(qda1, MFE ~ AT3_Bias, data = dat, showgrid = TRUE, resolution = 150)+
  theme_bw() + guides(color = "none")
q4 <- decisionplot_ggplot(qda1, Nc_index ~ GC3_Bias, data = dat, showgrid = TRUE, resolution = 150)+
  theme_bw() + guides(color = "none")
q5 <- decisionplot_ggplot(qda1, Nc_index ~ AT3_Bias, data = dat, showgrid = TRUE, resolution = 150)+
  theme_bw() + guides(color = "none")
q6 <- decisionplot_ggplot(qda1, GC3_Bias ~ AT3_Bias, data = dat, showgrid = TRUE, resolution = 150)+
  theme_bw() + guides(color = "none")
q0 <- decisionplot_ggplot(qda1, MFE ~ Nc_index, data = dat, showgrid = TRUE, resolution = 150)+
  theme_bw()
legend <- cowplot::get_legend(q0)

ng <- nullGrob()

pdf("qda_clasif.pdf", width = 11, height = 8)
grid.arrange(q1, ng, legend,
             q2, q3, ng,
             q4, q5, q6)
dev.off()

## plot de lda
l1 <- decisionplot_ggplot(lda1, MFE ~ Nc_index, data = dat, showgrid = TRUE, resolution = 150)+
  theme_bw() + guides(color = "none")
l2 <- decisionplot_ggplot(lda1, MFE ~ GC3_Bias, data = dat, showgrid = TRUE, resolution = 150)+
  theme_bw() + guides(color = "none")
l3 <- decisionplot_ggplot(lda1, MFE ~ AT3_Bias, data = dat, showgrid = TRUE, resolution = 150)+
  theme_bw() + guides(color = "none")
l4 <- decisionplot_ggplot(lda1, Nc_index ~ GC3_Bias, data = dat, showgrid = TRUE, resolution = 150)+
  theme_bw() + guides(color = "none")
l5 <- decisionplot_ggplot(lda1, Nc_index ~ AT3_Bias, data = dat, showgrid = TRUE, resolution = 150)+
  theme_bw() + guides(color = "none")
l6 <- decisionplot_ggplot(lda1, GC3_Bias ~ AT3_Bias, data = dat, showgrid = TRUE, resolution = 150)+
  theme_bw() + guides(color = "none")
l0 <- decisionplot_ggplot(lda1, MFE ~ Nc_index, data = dat, showgrid = TRUE, resolution = 150)+
  theme_bw()
legend <- cowplot::get_legend(l0)

ng <- nullGrob()

pdf("lda_clasif.pdf", width = 11, height = 8)
grid.arrange(l1, ng, legend,
             l2, l3, ng,
             l4, l5,l6)
dev.off()