# The MIT License
# Copyright (c) 2007 The GGobi Foundation
# http://www.ggobi.org/book/code-license.txt


# Read data
d.olive<-read.csv("olive.csv",row.names=1)
d.olive.sub <- subset(d.olive,
   select=c(region,palmitic:eicosenoic))

# load libraries
library(randomForest)
library(rggobi)

# Fitting forest to three large regions
olive.rf <- randomForest(as.factor(region)~., 
  data=d.olive.sub, importance=TRUE, proximity=TRUE, mtry=4)
order(olive.rf$importance[,5],decreasing=T)
pred<-as.numeric(olive.rf$predicted)
table(d.olive[,1],olive.rf$predicted)
margin<-olive.rf$vote
colnames(margin)<-c("Vote1","Vote2","Vote3")
d.olive.rf<-cbind(pred, margin,d.olive)
gd <- ggobi(d.olive.rf)[1]
glyph_color(gd) <- c(rep(6,323), rep(5,98), rep(1,151))

# Subset areas in the southern region
d.olive.sth <- subset(d.olive, region==1, 
   select=area:eicosenoic)

# Checking for reasonable input choices
for (i in 1:5) {
olive.rf<-randomForest(as.factor(area)~.,data=d.olive.sth,
  importance=TRUE,proximity=TRUE,mtry=i,ntree=1500)
cat(apply(olive.rf$err.rate,2,mean),"\n")
cat(apply(olive.rf$err.rate,2,sd),"\n")
cat(order(olive.rf$importance[,5],decreasing=T),"\n")
}
plot(1:323,sort(olive.rf$oob.times),ylim=c(0,max(olive.rf$oob.times)))

# Fitting the forest to areas of south
olive.rf<-randomForest(as.factor(area)~.,data=d.olive.sth,
  importance=TRUE,proximity=TRUE,mtry=2,ntree=1500)
apply(olive.rf$err.rate,2,mean)
apply(olive.rf$err.rate,2,sd)
order(olive.rf$importance[,5],decreasing=T)
pred<-as.numeric(olive.rf$predicted)
table(d.olive.sth[,1],olive.rf$predicted)
margin<-olive.rf$vote
colnames(margin)<-c("Vote1","Vote2","Vote3","Vote4")
d.olive.rf<-cbind(pred, margin, d.olive.sth)
gd <- ggobi(d.olive.rf)[1]
glyph_color(gd) <- c(6,3,2,9)[d.olive.rf$area]



