# The MIT License
# Copyright (c) 2007 The GGobi Foundation
# http://www.ggobi.org/book/code-license.txt


# Read in data
d.tao <- read.csv("tao.csv",row.names=1)
attach(d.tao)

# Load libraries
library(rggobi)
library(norm)

# Preliminary statistics
d.tao.nm<-prelim.norm(as.matrix(d.tao[,4:8]))
d.tao.nm$nmis
d.tao.nm$r

# Load data into ggobi and set up colors/glyphs
gg <- ggobi(d.tao) 
g <- gg[1]

# Initialize glyph/color
glyph_type(g) <- 6
glyph_size(g) <- 2

# Color by Year
gcolor = c(rep(9,368), rep(3,368))
glyph_color(g) = gcolor

# Preliminary statistics by year
d.tao.nm.97<-prelim.norm(as.matrix(d.tao[year==1997,4:8]))
d.tao.nm.97$nmis
d.tao.nm.93<-prelim.norm(as.matrix(d.tao[year==1993,4:8]))
d.tao.nm.93$nmis

# Find rows which are missing, color a darker shade of the same color
gcolor[year==1993] <- 3; gcolor[year==1997] <- 9
glyph_color(g) = gcolor
ismis <- apply(d.tao[,4:8],MAR=1,function(x) { return(sum(is.na(x))>0)} )
gcolor[which(ismis)]<-gcolor[which(ismis)]+1
glyph_color(g) = gcolor

# Start imputation
rngseed(1234567) 
theta.97<-em.norm(d.tao.nm.97, showits=TRUE)
theta.93<-em.norm(d.tao.nm.93,showits=TRUE)

# Loop to do multiple imputations, and watch results
for (i in 1:10) { 
d.tao.impute.97 <- imp.norm(d.tao.nm.97, theta.97,
    as.matrix(d.tao[year==1997,4:8]))
d.tao.impute.93 <- imp.norm(d.tao.nm.93, theta.93,
    as.matrix(d.tao[year==1993,4:8]))

# Set the values of missings to be the imputed values.
g[,"sea.surface.temp"] = c(d.tao.impute.97[,"sea.surface.temp"],
    d.tao.impute.93[,"sea.surface.temp"])
g[,"air.temp"] = c(d.tao.impute.97[,"air.temp"],
    d.tao.impute.93[,"air.temp"])
g[,"humidity"] = c(d.tao.impute.97[,"humidity"],
    d.tao.impute.93[,"humidity"])

readline("Ready to continue? Press return ")
}

# An alternative library
library(Hmisc)
tmp1<-impute(humidity[year==1993],mean)
tmp2<-impute(air.temp[year==1993],mean)
plot(tmp1,tmp2)
tmp1<-impute(sea.surface.temp[year==1997],mean)
tmp2<-impute(air.temp[year==1997],mean)
plot(tmp1,tmp2)
tmp1<-c(impute(sea.surface.temp[year==1997],mean),
  impute(sea.surface.temp[year==1993],mean))
tmp2<-c(impute(air.temp[year==1997],mean),impute(air.temp[year==1993],mean))
tmp3<-c(humidity[year==1997],impute(humidity[year==1993],mean))
pairs(cbind(tmp1,tmp2,tmp3))
tmp1<-c(impute(sea.surface.temp[year==1997],"random"),
  impute(sea.surface.temp[year==1993],"random"))
tmp2<-c(impute(air.temp[year==1997],"random"),impute(air.temp[year==1993],"random"))
tmp3<-c(humidity[year==1997],impute(humidity[year==1993],"random"))
pairs(cbind(tmp1,tmp2,tmp3))

# Needs to be done separately for year, but this could also be another MI example.
tmp<-aregImpute(~sea.surface.temp+air.temp+humidity,n.impute=1,data=d.tao)
tmp1<-sea.surface.temp
tmp1[tmp$na$sea.surface.temp]<-tmp$imputed$sea.surface.temp
tmp2<-air.temp
tmp2[tmp$na$air.temp]<-tmp$imputed$air.temp
tmp3<-humidity
tmp3[tmp$na$humidity]<-tmp$imputed$humidity
pairs(cbind(tmp1,tmp2,tmp3))

