# The MIT License
# Copyright (c) 2007 The GGobi Foundation
# http://www.ggobi.org/book/code-license.txt


# Interfacing with ggobi
library(rggobi)
gg<-ggobi() # Open wages.xml

# Get data into R
d.wages1<-gg[1]
row.names(d.wages1)<-1:6402
d.wages2<-gg[2]
row.names(d.wages2)<-1:5514
d.wages3<-gg[3]

# Animate a selection of 50 subjects
smp <- sample(1:888,50)
for (i in smp) {
  gtype <- rep(1,6402)
  gcolor <- rep(1,6402)
  gtype[d.wages1[[1]]==d.wages3[[1]][i]] <- 6
  gcolor[d.wages1[[1]]==d.wages3[[1]][i]] <- 6
  glyph_type(d.wages1) <- gtype
  glyph_color(d.wages1) <- gcolor
  glyph_size(d.wages1) <- 5
  cat(i,"\n")
}
gtype <- rep(1,6402)
gcolor <- rep(1,6402)
glyph_type(d.wages1) <- gtype
glyph_color(d.wages1) <- gcolor

# Characterize profiles
# Linear model fit
wages.linear<-rep(0,888)
for (i in 1:888) {
    x<-d.wages1[d.wages1[[1]]==d.wages3[[1]][i]][3:2]
    if (dim(x)[1]>2)
      wages.linear[i]<-coef(lm(lnw~exper,data=x))[2]
    cat(i," ")
}

# Robust linear model fit
wages.rlin<-rep(0,888)
for (i in 1:888) {
    x<-d.wages1[d.wages1[[1]]==d.wages3[[1]][i]][3:2]
    if (dim(x)[1]>3)
      wages.rlin[i]<-coef(lm(lnw~exper,data=x,
        weights=1/(residuals(lm(lnw~exper,data=x))^10+1)))[2]
    cat(i," ")
}

# Number of observations for each subject
wages.count<-summary(d.wages1[[1]],maxsum=888)

# This measures volatility of each subject's wages
wages.sd<-NULL
for (i in 1:888)
{
  id<-d.wages3[[1]][i]
  indx<-c(1:6402)
  indx<-indx[d.wages1[[1]]==id]
  wages.sd<-c(wages.sd,sd(d.wages1[[2]][indx]))
  cat(i," ")
}

# This measures differences in sd
wages.sddif<-NULL
for (i in 1:888)
{
  id<-d.wages3[[1]][i]
  indx<-c(1:6402)
  indx<-indx[d.wages1[[1]]==id] #find set of values for the subject
  if (length(indx)>1) {
    difs<-NULL
    for (j in 2:length(indx))
      difs<-c(difs,d.wages1[[2]][indx[j]]-d.wages1[[2]][indx[j]-1])
  }
  else
    difs<-c(0,0,0)
  wages.sddif<-c(wages.sddif,sd(difs))
  cat(i," ")
}

# This measures smoothly upward trend
wages.up<-NULL
for (i in 1:888)
{
  chngsgn<-1
  id<-d.wages3[[1]][i]
  indx<-c(1:6402)
  indx<-indx[d.wages1[[1]]==id] #find set of values for the subject
  if (length(indx)>1) {
    difs<-NULL
    for (j in 2:length(indx))
      difs<-c(difs,d.wages1[[2]][indx[j]]-d.wages1[[2]][indx[j]-1])
    way<-sum(sign(difs))
    if (way<0) chngsgn<-(-1)
  }
  else
    difs<-c(0,0,0)
  wages.up<-c(wages.up,sd(difs)*chngsgn)
  cat(i," ")
}

# Cycle over individuals in an ordered manner
# Pick one criterion - for loop - at a time
for (i in order(wages.linear,decreasing=T)[1:150])
for (i in order(wages.up,decreasing=T)[1:150])
for (i in order(wages.up)[1:150])
for (i in order(wages.sd)[1:50])
for (i in order(wages.rlin)[1:50])
for (i in order(wages.rlin,decreasing=T)[1:50])
{
#  if (wages.count[i]>5) {
    gtype <- rep(1,6402)
    gcolor <- rep(1,6402)
    gtype[d.wages1[[1]]==d.wages3[[1]][i]] <- 6
    gcolor[d.wages1[[1]]==d.wages3[[1]][i]] <- 6
    glyph_type(d.wages1) <- gtype
    glyph_color(d.wages1) <- gcolor
    glyph_size(d.wages1) <- 4
  cat(i," ")
#  }
}

# Add profile descriptors to the data in ggobi
d.wages4<-data.frame(id=d.wages3$id,count=wages.count,sd=wages.sd, 
  sddif=wages.sddif,linear=wages.linear,rlin=wages.rlin)
gg["descriptors"]<-d.wages4
