# The MIT License # Copyright (c) 2007 The GGobi Foundation # http://www.ggobi.org/book/code-license.txt # Read in data d.tao <- read.csv("tao.csv",row.names=1) attach(d.tao) # Load libraries library(rggobi) library(norm) # Preliminary statistics d.tao.nm<-prelim.norm(as.matrix(d.tao[,4:8])) d.tao.nm$nmis d.tao.nm$r # Load data into ggobi and set up colors/glyphs gg <- ggobi(d.tao) g <- gg[1] # Initialize glyph/color glyph_type(g) <- 6 glyph_size(g) <- 2 # Color by Year gcolor = c(rep(9,368), rep(3,368)) glyph_color(g) = gcolor # Preliminary statistics by year d.tao.nm.97<-prelim.norm(as.matrix(d.tao[Year==1997,4:8])) d.tao.nm.97$nmis d.tao.nm.93<-prelim.norm(as.matrix(d.tao[Year==1993,4:8])) d.tao.nm.93$nmis # Find rows which are missing, color a darker shade of the same color gcolor[Year==1993] <- 3; gcolor[Year==1997] <- 9 glyph_color(g) = gcolor ismis <- apply(d.tao[,4:8],MAR=1,function(x) { return(sum(is.na(x))>0)} ) gcolor[which(ismis)]<-gcolor[which(ismis)]+1 glyph_color(g) = gcolor # Start imputation rngseed(1234567) theta.97<-em.norm(d.tao.nm.97, showits=TRUE) theta.93<-em.norm(d.tao.nm.93,showits=TRUE) # Loop to do multiple imputations, and watch results for (i in 1:10) { d.tao.impute.97 <- imp.norm(d.tao.nm.97, theta.97, as.matrix(d.tao[Year==1997,4:8])) d.tao.impute.93 <- imp.norm(d.tao.nm.93, theta.93, as.matrix(d.tao[Year==1993,4:8])) # Set the values of missings to be the imputed values. g[,"Sea.Surface.Temp"] = c(d.tao.impute.97[,"Sea.Surface.Temp"], d.tao.impute.93[,"Sea.Surface.Temp"]) g[,"Air.Temp"] = c(d.tao.impute.97[,"Air.Temp"], d.tao.impute.93[,"Air.Temp"]) g[,"Humidity"] = c(d.tao.impute.97[,"Humidity"], d.tao.impute.93[,"Humidity"]) readline("Ready to continue? Press return ") } # An alternative library library(Hmisc) tmp1<-impute(Humidity[Year==1993],mean) tmp2<-impute(Air.Temp[Year==1993],mean) plot(tmp1,tmp2) tmp1<-impute(Sea.Surface.Temp[Year==1997],mean) tmp2<-impute(Air.Temp[Year==1997],mean) plot(tmp1,tmp2) tmp1<-c(impute(Sea.Surface.Temp[Year==1997],mean), impute(Sea.Surface.Temp[Year==1993],mean)) tmp2<-c(impute(Air.Temp[Year==1997],mean),impute(Air.Temp[Year==1993],mean)) tmp3<-c(Humidity[Year==1997],impute(Humidity[Year==1993],mean)) pairs(cbind(tmp1,tmp2,tmp3)) tmp1<-c(impute(Sea.Surface.Temp[Year==1997],"random"), impute(Sea.Surface.Temp[Year==1993],"random")) tmp2<-c(impute(Air.Temp[Year==1997],"random"),impute(Air.Temp[Year==1993],"random")) tmp3<-c(Humidity[Year==1997],impute(Humidity[Year==1993],"random")) pairs(cbind(tmp1,tmp2,tmp3)) # Needs to be done separately for Year, but this could also be another MI example. tmp<-aregImpute(~Sea.Surface.Temp+Air.Temp+Humidity,n.impute=1,data=d.tao) tmp1<-Sea.Surface.Temp tmp1[tmp$na$Sea.Surface.Temp]<-tmp$imputed$Sea.Surface.Temp tmp2<-Air.Temp tmp2[tmp$na$Air.Temp]<-tmp$imputed$Air.Temp tmp3<-Humidity tmp3[tmp$na$Humidity]<-tmp$imputed$Humidity pairs(cbind(tmp1,tmp2,tmp3))