Experimento em arranjo fatorial

Walmes Zeviani


Definições da sessão

##-----------------------------------------------------------------------------
## Definições da sessão, pacotes a serem usados.

pkg <- c("lattice", "latticeExtra", "reshape",
         "doBy", "multcomp", "nlme", "rootSolve",
         "plyr", "wzRfun")
sapply(pkg, library, character.only=TRUE, logical.return=TRUE)
##      lattice latticeExtra      reshape         doBy     multcomp         nlme 
##         TRUE         TRUE         TRUE         TRUE         TRUE         TRUE 
##    rootSolve         plyr       wzRfun 
##         TRUE         TRUE         TRUE

source("lattice_setup.R")

##-----------------------------------------------------------------------------
## Informações sobre as versões dos pacotes.

sessionInfo()
## R version 3.1.1 (2014-07-10)
## Platform: i686-pc-linux-gnu (32-bit)
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C               LC_TIME=pt_BR.UTF-8       
##  [4] LC_COLLATE=en_US.UTF-8     LC_MONETARY=pt_BR.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=pt_BR.UTF-8       LC_NAME=C                  LC_ADDRESS=C              
## [10] LC_TELEPHONE=C             LC_MEASUREMENT=pt_BR.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] splines   stats     graphics  grDevices utils     datasets  base     
## 
## other attached packages:
##  [1] wzRfun_0.1          plyr_1.8.1          rootSolve_1.6.5     nlme_3.1-117       
##  [5] multcomp_1.3-3      TH.data_1.0-3       mvtnorm_0.9-99992   doBy_4.5-10        
##  [9] MASS_7.3-33         survival_2.37-7     reshape_0.8.5       latticeExtra_0.6-26
## [13] RColorBrewer_1.0-5  lattice_0.20-29     knitr_1.5          
## 
## loaded via a namespace (and not attached):
##  [1] evaluate_0.5.5      formatR_0.10        grid_3.1.1          lme4_1.1-6         
##  [5] Matrix_1.1-4        methods_3.1.1       minqa_1.2.3         Rcpp_0.11.1        
##  [9] RcppEigen_0.3.2.1.2 sandwich_2.3-0      stringr_0.6.2       tools_3.1.1        
## [13] zoo_1.7-11

## obs: Para instalar um pacote faça:
## install.packages("nome_do_pacote", dependencies=TRUE)

Análise dos dados

##-----------------------------------------------------------------------------
## Dados de experimento com soja sob 3 níveis de umidade do solo (massa
## de água/massa de solo) e níveis de potássio fornecidos na adubação. A
## hipótese sob estudo é que presença de potássio dá suporte para a
## cultura superar a ocorrência de períodos sem chuva. Experimento
## conduzido em casa de vegetação com 5 blocos, 2 plantas por
## u.e. (vaso).

## Para acessar o artigo (pdf online).
## browseURL("http://www.scielo.br/pdf/rca/v43n2/a03v43n2.pdf")

link <- "http://www.leg.ufpr.br/~walmes/data/soja.txt"
da <- read.table(link, header=TRUE, sep="\t", dec=",")
da <- subset(da, select=c(agua, potassio, rengrao, bloco))
da <- da[-74,]
da$AG <- factor(da$agua)
names(da)[c(2:3)] <- c("x","y")

## Informa que bloco é de efeito aleatório.
db <- groupedData(y~x|bloco, data=da, order=FALSE)
str(db)
## Classes 'nfnGroupedData', 'nfGroupedData', 'groupedData' and 'data.frame':   74 obs. of  5 variables:
##  $ agua : num  37.5 37.5 37.5 37.5 37.5 50 50 50 50 50 ...
##  $ x    : int  0 30 60 120 180 0 30 60 120 180 ...
##  $ y    : num  14.6 21.5 24.6 21.9 28.1 ...
##  $ bloco: Factor w/ 5 levels "I","II","III",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ AG   : Factor w/ 3 levels "37.5","50","62.5": 1 1 1 1 1 2 2 2 2 2 ...
##  - attr(*, "formula")=Class 'formula' length 3 y ~ x | bloco
##   .. ..- attr(*, ".Environment")=<environment: R_GlobalEnv> 
##  - attr(*, "FUN")=function (x)  
##  - attr(*, "order.groups")= logi FALSE

##-----------------------------------------------------------------------------
## Visualiza os dados.

p0 <-
    xyplot(y~x|AG, data=db, layout=c(3,1), col=1,
           xlab=expression("Conteúdo de potássio no solo"~(mg~dm^{-3})),
           ylab=expression("Rendimento de grãos"~(g~vaso^{-1})))
print(p0)

plot of chunk unnamed-chunk-3

##-----------------------------------------------------------------------------
## Parametrizações do modelo quadrático-platô.

## Original
##  b0: intercepto
##  b1: coeficiente angular ou taxa na origem
##  b2: coeficiente de concavidade/curvatura ou grau de especificidade

fx0 <- function(panel){
    with(panel, {
        xb <- -b1/(2*b2)
        curve(b0+(b1*x+b2*x^2)*(x<=xb)+
              (b1*xb+b2*xb^2)*(x>xb), xlim[1], xlim[2], ylim=ylim)
        abline(a=b0, b=b1, lty=2)
        abline(v=xb, lty=2)
    })
    panel
}

panel <- rp.control(title="Original",
                    xlim=c(0,10), ylim=c(0,10))
rp.slider(panel, b0, 0, 10, initval=2, showvalue=TRUE, action=fx0)
rp.slider(panel, b1, 0, 15, initval=0, showvalue=TRUE, action=fx0)
rp.slider(panel, b2, -5, 0, initval=0, showvalue=TRUE, action=fx0)

## Canônica (ver Tese Walmes)
##  yb: valor crítico em y e máximo/mínimo da função
##  xb: valor crítico em x e ponto de troca/união
##  b2: coeficiente de concavidade/curvatura ou grau de especificidade

fx1 <- function(panel){
    with(panel, {
        curve(yb+(x<=xb)*b2*(x-xb)^2, xlim[1], xlim[2], ylim=ylim)
        abline(v=xb-0:1, h=yb-c(0,-b2), lty=2)
    })
    panel
}

panel <- rp.control(title="Canônica",
                    xlim=c(0,10), ylim=c(0,10))
rp.slider(panel, yb, 0, 10, initval=5, showvalue=TRUE, action=fx1)
rp.slider(panel, xb, 0, 10, initval=5, showvalue=TRUE, action=fx1)
rp.slider(panel, b2, -5, 0, initval=0, showvalue=TRUE, action=fx1)

## Mistura original e canônica (feito para o Curso)
##  b0: intercepto
##  b1: coeficiente angular ou taxa na origem
##  xb: valor crítico em x e ponto de troca/união

fx2 <- function(panel){
    with(panel, {
        curve(b0+(x<xb)*(b1*x-0.5*b1*x^2/xb)+(x>=xb)*(0.5*b1*xb),
              xlim[1], xlim[2], ylim=ylim)
        abline(v=xb-0:1, lty=2)
        abline(a=b0, b=b1, lty=2)
    })
    panel
}

panel <- rp.control(title="Mistura",
                    xlim=c(0,10), ylim=c(0,10))
rp.slider(panel, b0, 0, 10, initval=2, showvalue=TRUE, action=fx2)
rp.slider(panel, b1, 0, 10, initval=0, showvalue=TRUE, action=fx2)
rp.slider(panel, xb, 0, 10, initval=5, showvalue=TRUE, action=fx2)

##-----------------------------------------------------------------------------
## Ajuste do modelo linear-platô.

## Modelo linear plato.
n1.lps <- nlme(y~th0+th1*x*(x<=thb)+th1*thb*(x>thb),
               data=db, method="ML",
               fixed=th0+th1+thb~AG,
               random=th0~1,
               start=c(
                   15,0,0,
                   0.22,0,0,
                   40,20,50))

anova(n1.lps, type="marginal")
##                 numDF denDF F-value p-value
## th0.(Intercept)     1    61  158.11  <.0001
## th0.AG              2    61    1.61  0.2086
## th1.(Intercept)     1    61   22.97  <.0001
## th1.AG              2    61    0.08  0.9278
## thb.(Intercept)     1    61   35.90  <.0001
## thb.AG              2    61    8.26  0.0007

## Modelo linear plato reduzido.
n1.lpr <- update(n1.lps,
                 fixed=list(th0+th1~1, thb~AG),
                 start=c(
                     15,
                     0.22,
                     40,20,50))

anova(n1.lps, n1.lpr)
##        Model df   AIC   BIC logLik   Test L.Ratio p-value
## n1.lps     1 11 348.0 373.3 -163.0                       
## n1.lpr     2  7 346.3 362.5 -166.2 1 vs 2   6.356  0.1741

##-----------------------------------------------------------------------------
## Predição.

pred <- expand.grid(x=0:180, AG=levels(da$AG))
pred$y1s <- predict(n1.lps, newdata=pred, level=0)
pred$y1r <- predict(n1.lpr, newdata=pred, level=0)

print(p0)+
    as.layer(xyplot(y1s+y1r~x|AG, data=pred, type="l"))

plot of chunk unnamed-chunk-5plot of chunk unnamed-chunk-5


##-----------------------------------------------------------------------------
## Predição com bandas.

numF1 <- function(theta, xi, AG){
    theta[1]+
        (xi<=AG%*%theta[3:5])*(theta[2]*xi)+
            (xi>AG%*%theta[3:5])*(theta[2]*AG%*%theta[3:5])
}

##-----------------------------------------------------------------------------
## Faz predição com bandas.

pred1 <- expand.grid(x=seq(0,180,2), AG=levels(da$AG))
c1 <- fixef(n1.lpr)
F1 <- gradient(numF1, x=c1, xi=pred1$x,
               AG=model.matrix(~AG, pred1))
dim(F1)
## [1] 273   5
colnames(F1)==names(c1)
## [1] TRUE TRUE TRUE TRUE TRUE
head(F1)
##      th0 th1 thb.(Intercept) thb.AG50 thb.AG62.5
## [1,]   1   0               0        0          0
## [2,]   1   2               0        0          0
## [3,]   1   4               0        0          0
## [4,]   1   6               0        0          0
## [5,]   1   8               0        0          0
## [6,]   1  10               0        0          0

U1 <- chol(vcov(n1.lpr))
pred1$se <- sqrt(apply(F1%*%t(U1), 1, function(x) sum(x^2)))
zval <- qnorm(p=c(lwr=0.025, fit=0.5, upr=0.975))
me <- outer(pred1$se, zval, "*")
fit <- predict(n1.lpr, newdata=pred1, level=0)
pred1 <- cbind(pred1, sweep(me, 1, fit, "+"))
str(pred1)
## 'data.frame':    273 obs. of  6 variables:
##  $ x  : num  0 2 4 6 8 10 12 14 16 18 ...
##  $ AG : Factor w/ 3 levels "37.5","50","62.5": 1 1 1 1 1 1 1 1 1 1 ...
##  $ se : num  0.63 0.612 0.595 0.579 0.564 ...
##  $ lwr: num  13.6 14.1 14.6 15.1 15.6 ...
##  $ fit: num  14.8 15.3 15.8 16.2 16.7 ...
##  $ upr: num  16.1 16.5 16.9 17.4 17.8 ...

print(p0)+
    as.layer(xyplot(fit~x|AG, data=pred1, type="l", col=2,
                    prepanel=prepanel.cbH, cty="bands",
                    ly=pred1$lwr, uy=pred1$upr,
                    panel=panel.cbH
                    ))

plot of chunk unnamed-chunk-5plot of chunk unnamed-chunk-5


##-----------------------------------------------------------------------------
## Ajusta modelo quadrático-platô reparametrizado.

## Modelo quadrático platô mistura de parametrizações.
n3.qms <- nlme(y~b0+(x<xb)*(b1*x-0.5*b1*x^2/xb)+(x>=xb)*(0.5*b1*xb),
               data=db, method="ML",
               fixed=b0+b1+xb~AG,
               random=b0~1,
               start=c(
                   15,0,0,
                   0.22,0,0,
                   88,20,40))

anova(n3.qms, type="marginal")
##                numDF denDF F-value p-value
## b0.(Intercept)     1    61  161.33  <.0001
## b0.AG              2    61    1.11  0.3365
## b1.(Intercept)     1    61   24.94  <.0001
## b1.AG              2    61    0.24  0.7871
## xb.(Intercept)     1    61   30.87  <.0001
## xb.AG              2    61    5.98  0.0042

## Modelo quadrático platô mistura de parametrizações reduzido.
n3.qmr <- update(n3.qms,
                 fixed=list(b0+b1~1, xb~AG),
                 start=c(
                     15,
                     0.22,
                     88,20,40))

anova(n3.qms, n3.qmr)
##        Model df   AIC   BIC logLik   Test L.Ratio p-value
## n3.qms     1 11 350.5 375.8 -164.2                       
## n3.qmr     2  7 349.7 365.8 -167.8 1 vs 2   7.195   0.126

##-----------------------------------------------------------------------------
## Predição.

pred$y3s <- predict(n3.qms, newdata=pred, level=0)
pred$y3r <- predict(n3.qmr, newdata=pred, level=0)

print(p0)+
    as.layer(xyplot(y3s+y3r~x|AG, data=pred, type="l"))

plot of chunk unnamed-chunk-5plot of chunk unnamed-chunk-5


##-----------------------------------------------------------------------------
## Ajusta na restrição de estimativas por casela.

n3.qmc <- update(n3.qms,
                 fixed=list(b0+b1~1, xb~AG-1),
                 start=c(
                     15,
                     0.22,
                     88,90,140))

summary(n3.qmc)$tTable
##              Value Std.Error DF t-value   p-value
## b0         14.7323   0.67482 65   21.83 1.246e-31
## b1          0.2975   0.02499 65   11.90 5.586e-18
## xb.AG37.5  68.3697   6.28133 65   10.88 2.747e-16
## xb.AG50    99.6447   8.42337 65   11.83 7.339e-18
## xb.AG62.5 151.5082  13.68609 65   11.07 1.337e-16
intervals(n3.qmc)
## Approximate 95% confidence intervals
## 
##  Fixed effects:
##              lower     est.    upper
## b0         13.4310  14.7323  16.0337
## b1          0.2493   0.2975   0.3457
## xb.AG37.5  56.2563  68.3697  80.4832
## xb.AG50    83.4003  99.6447 115.8890
## xb.AG62.5 125.1148 151.5082 177.9017
## attr(,"label")
## [1] "Fixed effects:"
## 
##  Random Effects:
##   Level: bloco 
##         lower   est. upper
## sd(b0) 0.2938 0.7802 2.072
## 
##  Within-group standard error:
## lower  est. upper 
## 1.911 2.258 2.668

##-----------------------------------------------------------------------------
## Matriz gradiente, \frac{\partial \eta(x_i, \theta)}{\partial \theta_j}.

## Baseado em derivadas numéricas.
numF <- function(theta, xi, AG){
    ## b0+(x<xb)*(b1*x-0.5*b1*x^2/xb)+(x>=xb)*(0.5*b1*xb)
    theta[1]+
        (xi<AG%*%theta[3:5])*(theta[2]*xi-0.5*theta[2]*xi^2/AG%*%theta[3:5])+
            (xi>=AG%*%theta[3:5])*(0.5*theta[2]*AG%*%theta[3:5])
}

c3 <- fixef(n3.qmc)
s <- seq(0,180,30)
gradient(numF, x=c3, xi=s, AG=cbind(rep(1,length(s)),0,0))[,1:3]
##      b0    b1 xb.AG37.5
## [1,]  1  0.00   0.00000
## [2,]  1 23.42   0.02864
## [3,]  1 33.67   0.11455
## [4,]  1 34.18   0.14873
## [5,]  1 34.18   0.14873
## [6,]  1 34.18   0.14873
## [7,]  1 34.18   0.14873

##-----------------------------------------------------------------------------
## Faz predição com bandas.

pred3 <- expand.grid(x=seq(0,180,2), AG=levels(da$AG))

F3 <- gradient(numF, x=c3, xi=pred3$x,
               AG=model.matrix(~-1+AG, pred3))
dim(F3)
## [1] 273   5
colnames(F3)==names(c3)
## [1] TRUE TRUE TRUE TRUE TRUE
head(F3)
##      b0    b1 xb.AG37.5 xb.AG50 xb.AG62.5
## [1,]  1 0.000 0.0000000       0         0
## [2,]  1 1.971 0.0001273       0         0
## [3,]  1 3.883 0.0005091       0         0
## [4,]  1 5.737 0.0011455       0         0
## [5,]  1 7.532 0.0020364       0         0
## [6,]  1 9.269 0.0031819       0         0

U3 <- chol(vcov(n3.qmc))
pred3$se <- sqrt(apply(F3%*%t(U3), 1, function(x) sum(x^2)))
zval <- qnorm(p=c(lwr=0.025, fit=0.5, upr=0.975))
me <- outer(pred3$se, zval, "*")

fit <- predict(n3.qmc, newdata=pred3, level=0)
pred3 <- cbind(pred3, sweep(me, 1, fit, "+"))
str(pred3)
## 'data.frame':    273 obs. of  6 variables:
##  $ x  : num  0 2 4 6 8 10 12 14 16 18 ...
##  $ AG : Factor w/ 3 levels "37.5","50","62.5": 1 1 1 1 1 1 1 1 1 1 ...
##  $ se : num  0.652 0.625 0.601 0.581 0.563 ...
##  $ lwr: num  13.5 14.1 14.7 15.3 15.9 ...
##  $ fit: num  14.7 15.3 15.9 16.4 17 ...
##  $ upr: num  16 16.5 17.1 17.6 18.1 ...

print(p0)+
    as.layer(xyplot(fit~x|AG, data=pred3, type="l", col=2,
                    prepanel=prepanel.cbH, cty="bands",
                    ly=pred3$lwr, uy=pred3$upr,
                    panel=panel.cbH
                    ))

plot of chunk unnamed-chunk-5plot of chunk unnamed-chunk-5


##-----------------------------------------------------------------------------
## Só os valores preditos.

xyplot(fit~x, groups=AG, data=pred1, type="l",
       prepanel=prepanel.cbH, cty="bands", fill=1, alpha=0.2,
       ly=pred1$lwr, uy=pred1$upr,
       panel.groups=panel.cbH, panel=panel.superpose)

plot of chunk unnamed-chunk-5


xyplot(fit~x, groups=AG, data=pred3, type="l",
       prepanel=prepanel.cbH, cty="bands", fill=1, alpha=0.2,
       ly=pred3$lwr, uy=pred3$upr,
       panel.groups=panel.cbH, panel=panel.superpose)

plot of chunk unnamed-chunk-5


##-----------------------------------------------------------------------------
## Compara os modelos pelas bandas.

update(p0, strip=strip.custom(bg="gray90"))+
    as.layer(xyplot(fit~x|AG, data=pred1, type="l", col=2,
                    prepanel=prepanel.cbH, cty="bands",
                    ly=pred1$lwr, uy=pred1$upr, fill="red",
                    panel=panel.cbH
                    ))+
    as.layer(xyplot(fit~x|AG, data=pred3, type="l", col=4,
                    prepanel=prepanel.cbH, cty="bands",
                    ly=pred3$lwr, uy=pred3$upr, fill="blue",
                    panel=panel.cbH
                    ))+
    layer(panel.abline(v=c(c1[3], c3[3]), col=c(2,4)), packets=1)+
    layer(panel.abline(v=c(sum(c1[3:4]), c3[4]), col=c(2,4)), packets=2)+
    layer(panel.abline(v=c(sum(c1[c(3,5)]), c3[5]), col=c(2,4)), packets=3)

plot of chunk unnamed-chunk-5


##-----------------------------------------------------------------------------
## Qual é o melhor modelo afinal?
## Compara as log-verossimilhanças dos modelos finais.

logLik(n1.lpr)
## 'log Lik.' -166.2 (df=7)
logLik(n3.qmr)
## 'log Lik.' -167.8 (df=7)

AIC(n1.lpr)
##       
## 346.3
AIC(n3.qmr)
##       
## 349.7

##-----------------------------------------------------------------------------

r <- residuals(n3.qmr)
f <- fitted(n3.qmr)
plot(r~f)

plot of chunk unnamed-chunk-5

qqnorm(r)

plot of chunk unnamed-chunk-5