# discriminante virus library (MASS) library(caret) library(ggplot2) library(ROCR) library(grid) library(gridExtra) ## función de ploteo decisionplot_ggplot <- function(model, vars, data, resolution = 100, showgrid = TRUE, ..., modes.means) { if(missing(model) || missing(vars) || missing(data)) stop('model, vars or data is missing') if(!(is.character(vars) && length(vars) == 2) && !('formula' %in% class(vars) && length(vars <- all.vars(vars)) == 2)) stop('vars should be either a formula or a character vector oflength 2.') if(!is.data.frame(data)) stop('data does not seem to comform with standard types.') t <- terms(model) if(!all((other.vars <- attr(t, 'term.labels')) %in% colnames(data))) stop('data is missing one or more variables in model.') lhs <- as.character(t[[2]]) # Set up data for prediction, for the data in vars prd.vars <- lapply(data[, vars], function(x){ if(is.character(x) || is.factor(x)){ unique(x) }else{ r <- range(x) seq(r[1], r[2], length.out = resolution) } }) names(prd.vars) <- vars # set up data for prediction for the remaining data if(missing(modes.means)){ other.vars <- other.vars[!other.vars %in% vars] if(length(other.vars)){ modes.means <- lapply(data[, other.vars], function(x){ if(is.character(x)){ unique(x)[1] }else if(is.factor(x)){ levels(x)[1] }else{ mean(x) } }) names(modes.means) <- other.vars }else modes.means <- NULL }else{ if(is.null(other.vars)) stop('other.vars is null but modes.means was provided. Please leave this missing.') if(!all(other.vars %in% names(modes.means))) stop('modes.means are lacking one or more variables.') modes.means <- as.list(modes.means) if(any(lengths(modes.means) > 1)) stop('modes.means should only contain a single values for all variables.') } prd.data <- expand.grid(c(prd.vars, modes.means)) p <- predict(model, prd.data, ...) prd.data$nm <- if(is.list(p)) p$class else p names(prd.data)[ncol(prd.data)] <- lhs # Create the final plot. gg <- ggplot(aes_string(vars[1], vars[2]), data = data) + geom_point(aes_string(col = lhs), size = 2, alpha = 0.5) + geom_contour(aes_string(vars[1], vars[2], z = paste0('as.integer(', lhs, ') + 1L')), data = prd.data, inherit.aes = FALSE) if(showgrid) gg <- gg + geom_point(aes_string(vars[1], vars[2], col = lhs), data = prd.data, shape = 20, size = 0.1, alpha = 0.2) gg } ########### dat <- read.csv("discrim.csv", header = T, stringsAsFactors = T) dat <- na.omit(dat) dat[, -1] <- scale(dat[, -1]) summary(dat) ## lda lda1 <- lda(Clade ~ MFE + Nc_index + GC3_Bias + AT3_Bias, data = dat) lda1 pre <- predict(lda1, dat[, 2:5]) confusionMatrix(pre$class, dat$Clade) lda_plot <- cbind(dat, predict(lda1)$x) ggplot(lda_plot, aes(LD1, LD2)) + geom_point(aes(color = Clade)) ### qda qda1 <- qda(Clade ~ MFE + Nc_index + GC3_Bias + AT3_Bias, data = dat) qda1 pre <- predict(qda1, dat[, 2:5]) confusionMatrix(pre$class, dat$Clade) ## plot de qda q1 <- decisionplot_ggplot(qda1, MFE ~ Nc_index, data = dat, showgrid = TRUE, resolution = 150)+ theme_bw() + guides(color = "none") q2 <- decisionplot_ggplot(qda1, MFE ~ GC3_Bias, data = dat, showgrid = TRUE, resolution = 150)+ theme_bw() + guides(color = "none") q3 <- decisionplot_ggplot(qda1, MFE ~ AT3_Bias, data = dat, showgrid = TRUE, resolution = 150)+ theme_bw() + guides(color = "none") q4 <- decisionplot_ggplot(qda1, Nc_index ~ GC3_Bias, data = dat, showgrid = TRUE, resolution = 150)+ theme_bw() + guides(color = "none") q5 <- decisionplot_ggplot(qda1, Nc_index ~ AT3_Bias, data = dat, showgrid = TRUE, resolution = 150)+ theme_bw() + guides(color = "none") q6 <- decisionplot_ggplot(qda1, GC3_Bias ~ AT3_Bias, data = dat, showgrid = TRUE, resolution = 150)+ theme_bw() + guides(color = "none") q0 <- decisionplot_ggplot(qda1, MFE ~ Nc_index, data = dat, showgrid = TRUE, resolution = 150)+ theme_bw() legend <- cowplot::get_legend(q0) ng <- nullGrob() pdf("qda_clasif.pdf", width = 11, height = 8) grid.arrange(q1, ng, legend, q2, q3, ng, q4, q5, q6) dev.off() ## plot de lda l1 <- decisionplot_ggplot(lda1, MFE ~ Nc_index, data = dat, showgrid = TRUE, resolution = 150)+ theme_bw() + guides(color = "none") l2 <- decisionplot_ggplot(lda1, MFE ~ GC3_Bias, data = dat, showgrid = TRUE, resolution = 150)+ theme_bw() + guides(color = "none") l3 <- decisionplot_ggplot(lda1, MFE ~ AT3_Bias, data = dat, showgrid = TRUE, resolution = 150)+ theme_bw() + guides(color = "none") l4 <- decisionplot_ggplot(lda1, Nc_index ~ GC3_Bias, data = dat, showgrid = TRUE, resolution = 150)+ theme_bw() + guides(color = "none") l5 <- decisionplot_ggplot(lda1, Nc_index ~ AT3_Bias, data = dat, showgrid = TRUE, resolution = 150)+ theme_bw() + guides(color = "none") l6 <- decisionplot_ggplot(lda1, GC3_Bias ~ AT3_Bias, data = dat, showgrid = TRUE, resolution = 150)+ theme_bw() + guides(color = "none") l0 <- decisionplot_ggplot(lda1, MFE ~ Nc_index, data = dat, showgrid = TRUE, resolution = 150)+ theme_bw() legend <- cowplot::get_legend(l0) ng <- nullGrob() pdf("lda_clasif.pdf", width = 11, height = 8) grid.arrange(l1, ng, legend, l2, l3, ng, l4, l5,l6) dev.off()