# The MIT License # Copyright (c) 2007 The GGobi Foundation # http://www.ggobi.org/book/code-license.txt # Read data d.olive<-read.csv("olive.csv",row.names=1) d.olive.sub <- subset(d.olive, select=c(region,palmitic:eicosenoic)) # load libraries library(randomForest) library(rggobi) # Fitting forest to three large regions olive.rf <- randomForest(as.factor(region)~., data=d.olive.sub, importance=TRUE, proximity=TRUE, mtry=4) order(olive.rf$importance[,5],decreasing=T) pred<-as.numeric(olive.rf$predicted) table(d.olive[,1],olive.rf$predicted) margin<-olive.rf$vote colnames(margin)<-c("Vote1","Vote2","Vote3") d.olive.rf<-cbind(pred, margin,d.olive) gd <- ggobi(d.olive.rf)[1] glyph_color(gd) <- c(rep(6,323), rep(5,98), rep(1,151)) # Subset areas in the southern region d.olive.sth <- subset(d.olive, region==1, select=area:eicosenoic) # Checking for reasonable input choices for (i in 1:5) { olive.rf<-randomForest(as.factor(area)~.,data=d.olive.sth, importance=TRUE,proximity=TRUE,mtry=i,ntree=1500) cat(apply(olive.rf$err.rate,2,mean),"\n") cat(apply(olive.rf$err.rate,2,sd),"\n") cat(order(olive.rf$importance[,5],decreasing=T),"\n") } plot(1:323,sort(olive.rf$oob.times),ylim=c(0,max(olive.rf$oob.times))) # Fitting the forest to areas of south olive.rf<-randomForest(as.factor(area)~.,data=d.olive.sth, importance=TRUE,proximity=TRUE,mtry=2,ntree=1500) apply(olive.rf$err.rate,2,mean) apply(olive.rf$err.rate,2,sd) order(olive.rf$importance[,5],decreasing=T) pred<-as.numeric(olive.rf$predicted) table(d.olive.sth[,1],olive.rf$predicted) margin<-olive.rf$vote colnames(margin)<-c("Vote1","Vote2","Vote3","Vote4") d.olive.rf<-cbind(pred, margin, d.olive.sth) gd <- ggobi(d.olive.rf)[1] glyph_color(gd) <- c(6,3,2,9)[d.olive.rf$area]