# The MIT License # Copyright (c) 2007 The GGobi Foundation # http://www.ggobi.org/book/code-license.txt # Read in data d.tao <- read.csv("tao.csv",row.names=1) attach(d.tao) # Load libraries library(rggobi) library(norm) # Preliminary statistics d.tao.nm<-prelim.norm(as.matrix(d.tao[,4:8])) d.tao.nm$nmis d.tao.nm$r # Load data into ggobi and set up colors/glyphs gg <- ggobi(d.tao) g <- gg[1] # Initialize glyph/color glyph_type(g) <- 6 glyph_size(g) <- 2 # Color by Year gcolor = c(rep(9,368), rep(3,368)) glyph_color(g) = gcolor # Preliminary statistics by year d.tao.nm.97<-prelim.norm(as.matrix(d.tao[year==1997,4:8])) d.tao.nm.97$nmis d.tao.nm.93<-prelim.norm(as.matrix(d.tao[year==1993,4:8])) d.tao.nm.93$nmis # Find rows which are missing, color a darker shade of the same color gcolor[year==1993] <- 3; gcolor[year==1997] <- 9 glyph_color(g) = gcolor ismis <- apply(d.tao[,4:8],MAR=1,function(x) { return(sum(is.na(x))>0)} ) gcolor[which(ismis)]<-gcolor[which(ismis)]+1 glyph_color(g) = gcolor # Start imputation rngseed(1234567) theta.97<-em.norm(d.tao.nm.97, showits=TRUE) theta.93<-em.norm(d.tao.nm.93,showits=TRUE) # Loop to do multiple imputations, and watch results for (i in 1:10) { d.tao.impute.97 <- imp.norm(d.tao.nm.97, theta.97, as.matrix(d.tao[year==1997,4:8])) d.tao.impute.93 <- imp.norm(d.tao.nm.93, theta.93, as.matrix(d.tao[year==1993,4:8])) # Set the values of missings to be the imputed values. g[,"sea.surface.temp"] = c(d.tao.impute.97[,"sea.surface.temp"], d.tao.impute.93[,"sea.surface.temp"]) g[,"air.temp"] = c(d.tao.impute.97[,"air.temp"], d.tao.impute.93[,"air.temp"]) g[,"humidity"] = c(d.tao.impute.97[,"humidity"], d.tao.impute.93[,"humidity"]) readline("Ready to continue? Press return ") } # An alternative library library(Hmisc) tmp1<-impute(humidity[year==1993],mean) tmp2<-impute(air.temp[year==1993],mean) plot(tmp1,tmp2) tmp1<-impute(sea.surface.temp[year==1997],mean) tmp2<-impute(air.temp[year==1997],mean) plot(tmp1,tmp2) tmp1<-c(impute(sea.surface.temp[year==1997],mean), impute(sea.surface.temp[year==1993],mean)) tmp2<-c(impute(air.temp[year==1997],mean),impute(air.temp[year==1993],mean)) tmp3<-c(humidity[year==1997],impute(humidity[year==1993],mean)) pairs(cbind(tmp1,tmp2,tmp3)) tmp1<-c(impute(sea.surface.temp[year==1997],"random"), impute(sea.surface.temp[year==1993],"random")) tmp2<-c(impute(air.temp[year==1997],"random"),impute(air.temp[year==1993],"random")) tmp3<-c(humidity[year==1997],impute(humidity[year==1993],"random")) pairs(cbind(tmp1,tmp2,tmp3)) # Needs to be done separately for year, but this could also be another MI example. tmp<-aregImpute(~sea.surface.temp+air.temp+humidity,n.impute=1,data=d.tao) tmp1<-sea.surface.temp tmp1[tmp$na$sea.surface.temp]<-tmp$imputed$sea.surface.temp tmp2<-air.temp tmp2[tmp$na$air.temp]<-tmp$imputed$air.temp tmp3<-humidity tmp3[tmp$na$humidity]<-tmp$imputed$humidity pairs(cbind(tmp1,tmp2,tmp3))