======Simulation study ====== ===== Algorithm =====

#################################################################################################
#
#	PAPER COMPANION: SIMULATION STUDY
#	Authors:	Ernesto Jardim  
#			Paulo Ribeiro Jr. 
#	Date: 20081013
#	Objective: 	Test paper methodology against design-based estimators regarding
# 			association between age compositions and the age aggregated abundance.
#			Tree options are simulated: (i) no association, (ii) week association, 
#			(iii) strong association.
#	Note:		The association is induced by considering correlated multivariate gaussian 
#			processes as the basis for building the age aggregated abundance and age 
#			compositions. 
#
#################################################################################################

# R libraries
library(geoR)
library(lattice)
library(MASS)
library(RandomFields)
library(compositions)

#==============================================================================================
# SIMULATING THE PROCESSES
#==============================================================================================
# grid
gs <- expand.grid((0:40)/40, (0:40)/40)
# size of processes
n <- nrow(gs)
# number of replicates
nsim <- 250

#----------------------------------------------------------------------------------------------
# STEP 1 : gaussian processes to build abundance and compositions
#----------------------------------------------------------------------------------------------

# object
arr0 <- array(NA, dim=c(n,7,nsim), dimnames=list(loc=1:n, age=c("all","1i","2i","1w","2w","1s","2s"), nsim=1:nsim))
# variance-covariance matrix
s2 <- 0.5
Sig <- diag(c(1,1,1,1,1,1,1))
Sig[1,4] <- 0.45; Sig[4,1] <- 0.45; Sig[1,6] <- 0.9; Sig[6,1] <- 0.9; Sig[6,4] <- 0.5; Sig[4,6] <- 0.5
Sig <- Sig*s2
# simulation
set.seed(111)
for(i in 1:nsim){
	arr0[,,i] <- mvrnorm(nrow(gs), c(1,0,0,0,0,0,0), Sig)
}
 
#----------------------------------------------------------------------------------------------
# STEP 2: generate 250 replicates of a log-gaussian spatial process (Diggle and Ribeiro Jr, 2007)
#         with  mu=1, phi=0.2, sigma2=0.5, tau2=0.5
#----------------------------------------------------------------------------------------------

# Generate a spatial Gaussian process Z
phi <- 0.2
sigmasq <- 0.5
set.seed(222)
Z <- grf(grid=gs, cov.pars=c(sigmasq, phi), nsim=nsim)
# build a log gausian process Y
Ysim <- Z
Ysim$data <- exp(Z$data+arr0[,1,])
# Y characteristics
Ysim.lmean <- apply(log(Ysim$data), 2, mean)
Ysim.lvar <- apply(log(Ysim$data), 2, var)
Ysim.lnhat <- exp(Ysim.lmean+Ysim.lvar/2)

#----------------------------------------------------------------------------------------------
# STEP 3: build compositions
#----------------------------------------------------------------------------------------------

# objects
arr00 <- array(1, dim=c(n,3,nsim), dimnames=list(loc=1:n, age=1:3, nsim=1:nsim))
Psim <- array(NA, dim=c(n,3,nsim,3), dimnames=list(loc=1:n, age=1:3, nsim=1:nsim, mucor=c("indep", "dep045","dep090")))
# option 1: no association between compositions and age aggregated abundance
arr00[,1:2,] <- exp(arr0[,2:3,])
Psim[,,,1] <- aperm(apply(arr00,c(1,3),function(x) x/sum(x)),c(2,1,3))
# option 2: week association between compositions and age aggregated abundance
arr00[,1:2,] <- exp(arr0[,4:5,])
Psim[,,,2] <- aperm(apply(arr00,c(1,3),function(x) x/sum(x)),c(2,1,3))
# option 3: strong association between compositions and age aggregated abundance
arr00[,1:2,] <- exp(arr0[,6:7,])
Psim[,,,3] <- aperm(apply(arr00,c(1,3),function(x) x/sum(x)),c(2,1,3))
# P characteristics

#----------------------------------------------------------------------------------------------
# STEP 4: build abundance at age 
#----------------------------------------------------------------------------------------------

# objects
Isim.ln <- array(NA, dim=c(n,3,nsim,3), dimnames=list(loc=1:n, age=1:3, nsim=1:nsim, mucor=c("indep", "dep045","dep090")))
# sim
for(i in 1:3){
	for(j in 1:3){
		Isim.ln[,i,,j] <- Psim[,i,,j]*Ysim$data
	}
}
# I characteristics
Isim.lnmean <- apply(log(Isim.ln), c(2,3,4), mean)
Isim.lnvar <- apply(log(Isim.ln), c(2,3,4), var)
Isim.lnhat <- exp(Isim.lnmean+Isim.lnvar/2)

#==============================================================================================
# ESTIMATION
#==============================================================================================
# number of samples to be drawn from each of the 250 replicate
ns <- 2

#----------------------------------------------------------------------------------------------
# STEP 5: estimation with methodology proposed by the paper
#----------------------------------------------------------------------------------------------

# objects
Yres <- array(NA, dim=c(ns,4,nsim), dimnames=list(samp=1:ns, stat=c("Ybar", "Ybarvar", "Yhat", "Yhatvar"), nsim=1:nsim))
Ihat <- array(NA, dim=c(ns,3,nsim,3), dimnames=list(samp=1:ns, age=1:3, nsim=1:nsim, mucor=c("indep", "dep045","dep090")))
xd <- expand.grid(dimnames(Ihat)[-c(1,2)])
# estimation
set.seed(222)
for(j in 1:ns){
cat("\n",j ,":", date(),"-", sep="")
	samp <- sample(1:n, 100)
	for(i in 1:nrow(xd)){
		x <- xd[i,]
		cat(".", sep="")
		# building the sample
		Isamp <- Isim.ln[samp,,x[[1]],x[[2]]]
		locsamp <- gs[samp,]
		Ysamp <- apply(Isamp,1,sum)
		# estimation of age aggregated abundance
		lf <- likfit(as.geodata(cbind(locsamp, Ysamp)), lambda=0, ini.cov.pars=c(1,0.2), messages=FALSE)
		Yhat <- exp(lf$beta+lf$tausq/2+lf$sigmasq/2)
		Yhatvar <- exp(2*lf$beta+lf$tausq+lf$sigmasq)*(exp(lf$tausq+lf$sigmasq)-1)
		Yres[j,c("Yhat", "Yhatvar"),x[[1]]] <- c(Yhat, Yhatvar)	
		# estimation of abundance-at-age
		prop <- acomp(Isamp)
		Ihat[j,,x[[1]],x[[2]]] <- Yhat*c(mean(prop))
	}
}

#----------------------------------------------------------------------------------------------
# STEP 6: estimation with design-based estimators
#----------------------------------------------------------------------------------------------

# objects
Ibar <- array(NA, dim=c(ns,3,nsim,3), dimnames=list(samp=1:ns, page=1:3, nsim=1:nsim, mucor=c("indep", "dep045","dep090")))
# estimation
set.seed(222)
for(j in 1:ns){
cat("\n",j ,":", date(),"-", sep="")
	samp <- sample(1:n, 100)
	for(i in 1:nrow(xd)){
		x <- xd[i,]
		cat(".", sep="")
		# building the sample
		Isamp <- Isim.ln[samp,,x[[1]],x[[2]]]
		Ysamp <- apply(Isamp,1,sum)
		# store means
		Yres[j,c("Ybar", "Ybarvar"),x[[1]]] <- c(mean(Ysamp), var(Ysamp))	
		# estimation of abundance-at-age
		Ibar[j,,x[[1]],x[[2]]] <- apply(Isamp,2,mean)
	}
}

#==============================================================================================
# RESULTS
#==============================================================================================

#----------------------------------------------------------------------------------------------
# STEP 7: summary statistics
#----------------------------------------------------------------------------------------------

# objects
sim.res <- array(NA, dim=c(nsim,3,3,2,3), dimnames=list(iter=1:nsim, age=1:3, mucor=c("indep", "dep045","dep090"), meth=c("geo","samp"), stats=c("bias", "mse", "ICcov")))
# computation
for(i in 1:nsim){
	lmusim <- Isim.lnhat[,i,]
	# geo 
	m <- Ihat[,,i,]
	q025 <- apply(m,c(2,3), quantile, probs=0.025, na.rm=T)
	q975 <- apply(m,c(2,3), quantile, probs=0.975, na.rm=T)
	sim.res[i,,,"geo","ICcov"] <- q025<=lmusim & q975>=lmusim
	for(j in 1:ns){
		m[j,,] <- m[j,,]-lmusim 
	}
	sim.res[i,,,"geo","bias"] <- apply(m, c(2,3), mean)
	sim.res[i,,,"geo","mse"] <- apply(m, c(2,3), var)

	# samp
	m <- Ibar[,,i,]
	q025 <- apply(m,c(2,3), quantile, probs=0.025, na.rm=T)
	q975 <- apply(m,c(2,3), quantile, probs=0.975, na.rm=T)
	sim.res[i,,,"samp","ICcov"] <- q025<=lmusim & q975>=lmusim
	for(j in 1:ns){
		m[j,,] <- m[j,,]-lmusim 
	}
	sim.res[i,,,"samp","bias"] <- apply(m, c(2,3), mean)
	sim.res[i,,,"samp","mse"] <- apply(m, c(2,3), var)
}
# table
tab.res <-  apply(sim.res, c(2,3,4,5), mean)

#################################################################################################
# THE END (or the beginning ...) 
#################################################################################################
===== Results =====