Análise dos dados
##-----------------------------------------------------------------------------
## Dados de experimento com soja sob 3 níveis de umidade do solo (massa
## de água/massa de solo) e níveis de potássio fornecidos na adubação. A
## hipótese sob estudo é que presença de potássio dá suporte para a
## cultura superar a ocorrência de períodos sem chuva. Experimento
## conduzido em casa de vegetação com 5 blocos, 2 plantas por
## u.e. (vaso).
## Para acessar o artigo (pdf online).
## browseURL("http://www.scielo.br/pdf/rca/v43n2/a03v43n2.pdf")
link <- "http://www.leg.ufpr.br/~walmes/data/soja.txt"
da <- read.table(link, header=TRUE, sep="\t", dec=",")
names(da) <- substr(names(da), 1, 4)
da <- subset(da[-74,], select=c("agua", "pota", "reng", "bloc"))
da$AG <- factor(da$agua)
names(da)[c(2:3)] <- c("x","y")
##-----------------------------------------------------------------------------
## Informa que bloco é de efeito aleatório.
db <- groupedData(y~x|bloc, data=da, order=FALSE)
str(db)
## Classes 'nfnGroupedData', 'nfGroupedData', 'groupedData' and 'data.frame': 74 obs. of 5 variables:
## $ agua: num 37.5 37.5 37.5 37.5 37.5 50 50 50 50 50 ...
## $ x : int 0 30 60 120 180 0 30 60 120 180 ...
## $ y : num 14.6 21.5 24.6 21.9 28.1 ...
## $ bloc: Factor w/ 5 levels "I","II","III",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ AG : Factor w/ 3 levels "37.5","50","62.5": 1 1 1 1 1 2 2 2 2 2 ...
## - attr(*, "formula")=Class 'formula' length 3 y ~ x | bloc
## .. ..- attr(*, ".Environment")=<environment: R_GlobalEnv>
## - attr(*, "FUN")=function (x)
## - attr(*, "order.groups")= logi FALSE
##-----------------------------------------------------------------------------
## Visualiza os dados.
p0 <-
xyplot(y~x|AG, data=db, layout=c(3,1), col=1,
xlab=expression("Conteúdo de potássio no solo"~(mg~dm^{-3})),
ylab=expression("Rendimento de grãos"~(g~vaso^{-1})))
print(p0)
##-----------------------------------------------------------------------------
## Parametrizações do modelo quadrático-platô.
require(rpanel)
## Original
## b0: intercepto.
## b1: coeficiente angular ou taxa na origem.
## b2: coeficiente de concavidade/curvatura ou grau de especificidade.
fx0 <- function(panel){
with(panel, {
xb <- -b1/(2*b2)
curve(b0+(b1*x+b2*x^2)*(x<=xb)+
(b1*xb+b2*xb^2)*(x>xb), xlim[1], xlim[2], ylim=ylim)
abline(a=b0, b=b1, lty=2)
abline(v=xb, lty=2)
})
panel
}
panel <- rp.control(title="Original",
xlim=c(0,10), ylim=c(0,10))
rp.slider(panel, variable=b0, from=0, to=10, initval=2,
showvalue=TRUE, action=fx0, title="b0")
rp.slider(panel, variable=b1, from=0, to=10, initval=0,
showvalue=TRUE, action=fx0, title="b1")
rp.slider(panel, variable=b2, from=-2, to=0, initval=0,
showvalue=TRUE, action=fx0, title="b2")
## Canônica (ver Tese Walmes).
## yb: valor crítico em y e máximo/mínimo da função.
## xb: valor crítico em x e ponto de troca/união.
## b2: coeficiente de concavidade/curvatura ou grau de especificidade.
fx1 <- function(panel){
with(panel, {
curve(yb+(x<=xb)*b2*(x-xb)^2, xlim[1], xlim[2], ylim=ylim)
abline(v=xb-0:1, h=yb-c(0,-b2), lty=2)
})
panel
}
panel <- rp.control(title="Canônica",
xlim=c(0,10), ylim=c(0,10))
rp.slider(panel, variable=yb, from=0, to=10, initval=5,
showvalue=TRUE, action=fx1, title="yb")
rp.slider(panel, variable=xb, from=0, to=10, initval=5,
showvalue=TRUE, action=fx1, title="xb")
rp.slider(panel, variable=b2, from=-5, to=0, initval=0,
showvalue=TRUE, action=fx1, title="b2")
## Mistura original e canônica (feito para o Curso).
## b0: intercepto.
## b1: coeficiente angular ou taxa na origem.
## xb: valor crítico em x e ponto de troca/união.
fx2 <- function(panel){
with(panel, {
curve(b0+(x<xb)*(b1*x-0.5*b1*x^2/xb)+(x>=xb)*(0.5*b1*xb),
xlim[1], xlim[2], ylim=ylim)
abline(v=xb-0:1, lty=2)
abline(a=b0, b=b1, lty=2)
})
panel
}
panel <- rp.control(title="Mistura",
xlim=c(0,10), ylim=c(0,10))
rp.slider(panel, variable=b0, from=0, to=10, initval=2,
showvalue=TRUE, action=fx2, title="b0")
rp.slider(panel, variable=b1, from=0, to=10, initval=0,
showvalue=TRUE, action=fx2, title="b1")
rp.slider(panel, variable=xb, from=0, to=10, initval=5,
showvalue=TRUE, action=fx2, title="xb")
##-----------------------------------------------------------------------------
## Ajuste do modelo linear-platô.
## Modelo linear plato.
n1.lps <- nlme(y~th0+th1*x*(x<=thb)+th1*thb*(x>thb),
data=db, method="ML",
fixed=th0+th1+thb~AG,
random=th0~1,
start=c(
15,0,0,
0.22,0,0,
40,20,50))
anova(n1.lps, type="marginal")
## numDF denDF F-value p-value
## th0.(Intercept) 1 61 158.10695 <.0001
## th0.AG 2 61 1.60851 0.2086
## th1.(Intercept) 1 61 22.97344 <.0001
## th1.AG 2 61 0.07505 0.9278
## thb.(Intercept) 1 61 35.89719 <.0001
## thb.AG 2 61 8.25653 0.0007
## Modelo linear plato reduzido.
n1.lpr <- update(n1.lps,
fixed=list(th0+th1~1, thb~AG),
start=c(
15,
0.22,
40,20,50))
anova(n1.lps, n1.lpr)
## Model df AIC BIC logLik Test L.Ratio p-value
## n1.lps 1 11 347.9881 373.3328 -162.994
## n1.lpr 2 7 346.3440 362.4724 -166.172 1 vs 2 6.355875 0.1741
##-----------------------------------------------------------------------------
## Predição.
pred <- expand.grid(x=0:180, AG=levels(da$AG))
pred$y1s <- predict(n1.lps, newdata=pred, level=0)
pred$y1r <- predict(n1.lpr, newdata=pred, level=0)
print(p0)+
as.layer(xyplot(y1s+y1r~x|AG, data=pred, type="l"))
##-----------------------------------------------------------------------------
## Predição com bandas.
numF1 <- function(theta, xi, AG){
theta[1]+
(xi<=AG%*%theta[3:5])*(theta[2]*xi)+
(xi>AG%*%theta[3:5])*(theta[2]*AG%*%theta[3:5])
}
##-----------------------------------------------------------------------------
## Faz predição com bandas.
pred1 <- expand.grid(x=seq(0,180,2), AG=levels(da$AG))
c1 <- fixef(n1.lpr)
F1 <- gradient(numF1, x=c1, xi=pred1$x,
AG=model.matrix(~AG, pred1))
dim(F1)
## [1] 273 5
colnames(F1)==names(c1)
## [1] TRUE TRUE TRUE TRUE TRUE
head(F1)
## th0 th1 thb.(Intercept) thb.AG50 thb.AG62.5
## [1,] 1 0 0 0 0
## [2,] 1 2 0 0 0
## [3,] 1 4 0 0 0
## [4,] 1 6 0 0 0
## [5,] 1 8 0 0 0
## [6,] 1 10 0 0 0
U1 <- chol(vcov(n1.lpr))
pred1$se <- sqrt(apply(F1%*%t(U1), 1, function(x) sum(x^2)))
zval <- qnorm(p=c(lwr=0.025, fit=0.5, upr=0.975))
me <- outer(pred1$se, zval, "*")
fit <- predict(n1.lpr, newdata=pred1, level=0)
pred1 <- cbind(pred1, sweep(me, 1, fit, "+"))
str(pred1)
## 'data.frame': 273 obs. of 6 variables:
## $ x : num 0 2 4 6 8 10 12 14 16 18 ...
## $ AG : Factor w/ 3 levels "37.5","50","62.5": 1 1 1 1 1 1 1 1 1 1 ...
## $ se : num 0.63 0.612 0.595 0.579 0.564 ...
## $ lwr: num 13.6 14.1 14.6 15.1 15.6 ...
## $ fit: num 14.8 15.3 15.8 16.2 16.7 ...
## $ upr: num 16.1 16.5 16.9 17.4 17.8 ...
print(p0)+
as.layer(xyplot(fit~x|AG, data=pred1, type="l", col=2,
prepanel=prepanel.cbH, cty="bands",
ly=pred1$lwr, uy=pred1$upr,
panel=panel.cbH
))
##-----------------------------------------------------------------------------
## Ajusta modelo quadrático-platô reparametrizado.
## Modelo quadrático platô mistura de parametrizações.
n3.qms <- nlme(y~b0+(x<xb)*(b1*x-0.5*b1*x^2/xb)+(x>=xb)*(0.5*b1*xb),
data=db, method="ML",
fixed=b0+b1+xb~AG,
random=b0~1,
start=c(
15,0,0,
0.22,0,0,
88,20,40))
anova(n3.qms, type="marginal")
## numDF denDF F-value p-value
## b0.(Intercept) 1 61 161.33157 <.0001
## b0.AG 2 61 1.10876 0.3365
## b1.(Intercept) 1 61 24.94174 <.0001
## b1.AG 2 61 0.24038 0.7871
## xb.(Intercept) 1 61 30.87004 <.0001
## xb.AG 2 61 5.98419 0.0042
## Modelo quadrático platô mistura de parametrizações reduzido.
n3.qmr <- update(n3.qms,
fixed=list(b0+b1~1, xb~AG),
start=c(
15,
0.22,
88,20,40))
anova(n3.qms, n3.qmr)
## Model df AIC BIC logLik Test L.Ratio p-value
## n3.qms 1 11 350.4609 375.8057 -164.2305
## n3.qmr 2 7 349.6555 365.7840 -167.8278 1 vs 2 7.194554 0.126
##-----------------------------------------------------------------------------
## Predição.
pred$y3s <- predict(n3.qms, newdata=pred, level=0)
pred$y3r <- predict(n3.qmr, newdata=pred, level=0)
print(p0)+
as.layer(xyplot(y3s+y3r~x|AG, data=pred, type="l"))
##-----------------------------------------------------------------------------
## Ajusta na restrição de estimativas por casela.
n3.qmc <- update(n3.qms,
fixed=list(b0+b1~1, xb~AG-1),
start=c(
15,
0.22,
88,90,140))
summary(n3.qmc)$tTable
## Value Std.Error DF t-value p-value
## b0 14.7323486 0.6748211 65 21.83149 1.245836e-31
## b1 0.2974695 0.0249935 65 11.90187 5.586195e-18
## xb.AG37.5 68.3697470 6.2813311 65 10.88460 2.746503e-16
## xb.AG50 99.6446545 8.4233734 65 11.82954 7.339089e-18
## xb.AG62.5 151.5082133 13.6860878 65 11.07024 1.337246e-16
intervals(n3.qmc)
## Approximate 95% confidence intervals
##
## Fixed effects:
## lower est. upper
## b0 13.4309649 14.7323486 16.0337324
## b1 0.2492698 0.2974695 0.3456691
## xb.AG37.5 56.2562808 68.3697470 80.4832131
## xb.AG50 83.4002874 99.6446545 115.8890216
## xb.AG62.5 125.1147694 151.5082133 177.9016571
## attr(,"label")
## [1] "Fixed effects:"
##
## Random Effects:
## Level: bloc
## lower est. upper
## sd(b0) 0.2937738 0.7802222 2.072162
##
## Within-group standard error:
## lower est. upper
## 1.911293 2.258342 2.668409
##-----------------------------------------------------------------------------
## Matriz gradiente, \frac{\partial \eta(x_i, \theta)}{\partial \theta_j}.
## Baseado em derivadas numéricas.
numF <- function(theta, xi, AG){
## b0+(x<xb)*(b1*x-0.5*b1*x^2/xb)+(x>=xb)*(0.5*b1*xb)
theta[1]+
(xi<AG%*%theta[3:5])*(theta[2]*xi-0.5*theta[2]*xi^2/AG%*%theta[3:5])+
(xi>=AG%*%theta[3:5])*(0.5*theta[2]*AG%*%theta[3:5])
}
c3 <- fixef(n3.qmc)
s <- seq(0,180,30)
gradient(numF, x=c3, xi=s, AG=cbind(rep(1,length(s)),0,0))[,1:3]
## b0 b1 xb.AG37.5
## [1,] 1 0.00000 0.00000000
## [2,] 1 23.41814 0.02863696
## [3,] 1 33.67257 0.11454785
## [4,] 1 34.18487 0.14873473
## [5,] 1 34.18487 0.14873473
## [6,] 1 34.18487 0.14873473
## [7,] 1 34.18487 0.14873473
##-----------------------------------------------------------------------------
## Faz predição com bandas.
pred3 <- expand.grid(x=seq(0,180,2), AG=levels(da$AG))
F3 <- gradient(numF, x=c3, xi=pred3$x,
AG=model.matrix(~-1+AG, pred3))
dim(F3)
## [1] 273 5
colnames(F3)==names(c3)
## [1] TRUE TRUE TRUE TRUE TRUE
head(F3)
## b0 b1 xb.AG37.5 xb.AG50 xb.AG62.5
## [1,] 1 0.000000 0.0000000000 0 0
## [2,] 1 1.970747 0.0001272736 0 0
## [3,] 1 3.882989 0.0005091021 0 0
## [4,] 1 5.736726 0.0011454777 0 0
## [5,] 1 7.531957 0.0020364083 0 0
## [6,] 1 9.268683 0.0031818860 0 0
U3 <- chol(vcov(n3.qmc))
pred3$se <- sqrt(apply(F3%*%t(U3), 1, function(x) sum(x^2)))
zval <- qnorm(p=c(lwr=0.025, fit=0.5, upr=0.975))
me <- outer(pred3$se, zval, "*")
fit <- predict(n3.qmc, newdata=pred3, level=0)
pred3 <- cbind(pred3, sweep(me, 1, fit, "+"))
str(pred3)
## 'data.frame': 273 obs. of 6 variables:
## $ x : num 0 2 4 6 8 10 12 14 16 18 ...
## $ AG : Factor w/ 3 levels "37.5","50","62.5": 1 1 1 1 1 1 1 1 1 1 ...
## $ se : num 0.652 0.625 0.601 0.581 0.563 ...
## $ lwr: num 13.5 14.1 14.7 15.3 15.9 ...
## $ fit: num 14.7 15.3 15.9 16.4 17 ...
## $ upr: num 16 16.5 17.1 17.6 18.1 ...
print(p0)+
as.layer(xyplot(fit~x|AG, data=pred3, type="l", col=2,
prepanel=prepanel.cbH, cty="bands",
ly=pred3$lwr, uy=pred3$upr,
panel=panel.cbH
))
##-----------------------------------------------------------------------------
## Só os valores preditos.
xyplot(fit~x, groups=AG, data=pred1, type="l",
prepanel=prepanel.cbH, cty="bands", fill=1, alpha=0.2,
ly=pred1$lwr, uy=pred1$upr,
panel.groups=panel.cbH, panel=panel.superpose)
xyplot(fit~x, groups=AG, data=pred3, type="l",
prepanel=prepanel.cbH, cty="bands", fill=1, alpha=0.2,
ly=pred3$lwr, uy=pred3$upr,
panel.groups=panel.cbH, panel=panel.superpose)
##-----------------------------------------------------------------------------
## Compara os modelos pelas bandas.
update(p0, strip=strip.custom(bg="gray90"))+
as.layer(xyplot(fit~x|AG, data=pred1, type="l", col=2,
prepanel=prepanel.cbH, cty="bands",
ly=pred1$lwr, uy=pred1$upr, fill="red",
panel=panel.cbH
))+
as.layer(xyplot(fit~x|AG, data=pred3, type="l", col=4,
prepanel=prepanel.cbH, cty="bands",
ly=pred3$lwr, uy=pred3$upr, fill="blue",
panel=panel.cbH
))+
layer(panel.abline(v=c(c1[3], c3[3]), col=c(2,4)), packets=1)+
layer(panel.abline(v=c(sum(c1[3:4]), c3[4]), col=c(2,4)), packets=2)+
layer(panel.abline(v=c(sum(c1[c(3,5)]), c3[5]), col=c(2,4)), packets=3)
##-----------------------------------------------------------------------------
## Qual é o melhor modelo afinal?
## Compara as log-verossimilhanças dos modelos finais.
logLik(n1.lpr)
## 'log Lik.' -166.172 (df=7)
logLik(n3.qmr)
## 'log Lik.' -167.8278 (df=7)
AIC(n1.lpr)
##
## 346.344
AIC(n3.qmr)
##
## 349.6555
##-----------------------------------------------------------------------------
## Diagnóstico.
r <- residuals(n3.qmr)
f <- fitted(n3.qmr)
plot(r~f)
qqnorm(r)