|
Modelos de Regressão para Dados de Contagem com o R
|
Walmes M. Zeviani, Eduardo E. Ribeiro Jr & Cesar A. Taconeli
# Definições da sessão.
library(lattice)
library(latticeExtra)
library(bbmle)
library(corrplot)
library(plyr)
library(car)
library(doBy)
library(multcomp)
library(MRDCr)
Se uma variável aleatória \(Y\) tem distribuição de probabilidades Poisson generalizada, então sua função de probabilidade, na parametrização de média, é
\[ f(y) = \left( \dfrac{\lambda}{1+\alpha\lambda} \right)^{y} \frac{(1+\alpha y)^{y-1}}{y!} \exp\left\{-\lambda \frac{(1+\alpha y)}{(1+\alpha \lambda)}\right\}, \] em que \(\lambda\) é a média da distribuição e \(\alpha\) é a o parâmetro de dispersão. Nessa parametrização, tem-se
A função de log-verossimilhança é \[ \ell(y; \lambda, \alpha) = \sum_{i=1}^{n} y_{i}\ln(\lambda)- \ln(1+\alpha\lambda)+ (y_{i}-1)\ln(1+\alpha y)- \lambda\frac{(1+\alpha y_{i})}{(1+\alpha\lambda)}- \ln(y_{i}!). \]
grid <- expand.grid(lambda = c(2, 8, 15),
alpha = c(-0.05, 0, 0.05))
y <- 0:35
py <- mapply(FUN = dpgnz,
lambda = grid$lambda,
alpha = grid$alpha,
MoreArgs = list(y = y), SIMPLIFY = FALSE)
grid <- cbind(grid[rep(1:nrow(grid), each = length(y)), ],
y = y,
py = unlist(py))
useOuterStrips(xyplot(py ~ y | factor(lambda) + factor(alpha),
ylab = expression(f(y)),
xlab = expression(y),
data = grid, type = "h",
panel = function(x, y, ...) {
m <- sum(x * y)
panel.xyplot(x, y, ...)
panel.abline(v = m, lty = 2)
}),
strip = strip.custom(
strip.names = TRUE,
var.name = expression(lambda == ""),
sep = ""),
strip.left = strip.custom(
strip.names = TRUE,
var.name = expression(alpha == ""),
sep = ""))
#-----------------------------------------------------------------------
# Relação média variância.
curve(lambda * (1 + 0 * lambda)^2,
from = 0, to = 10, xname = "lambda",
ylab = expression(lambda %.% (1 + alpha %.% lambda)^2),
xlab = expression(lambda))
alpha <- seq(-0.25, 0.25, by = 0.025)
col <- brewer.pal(n = 5, name = "Spectral")
col <- colorRampPalette(colors = col)(length(alpha))
for (a in seq_along(alpha)) {
curve(lambda * (1 + alpha[a] * lambda)^2,
add = TRUE, xname = "lambda", col = col[a], lwd = 2)
}
#-----------------------------------------------------------------------
# Gráfico do espaço paramétrico de lambda x alpha.
y <- 0:400
fun <- Vectorize(vectorize.args = c("lambda", "alpha"),
FUN = function(lambda, alpha) {
sum(dpgnz(y = y, lambda = lambda, alpha = alpha))
})
grid <- list(lambda = seq(0.2, 50, by = 0.2),
alpha = seq(-0.98, 0.98, by = 0.02))
grid$sum <- with(grid, outer(lambda, alpha, fun))
grid <- with(grid,
cbind(expand.grid(lambda = lambda, alpha = alpha),
data.frame(sum = c(sum))))
levelplot(sum ~ lambda + alpha,
data = subset(grid, round(sum, 3) == 1),
col.regions = gray.colors) +
layer(panel.abline(h = 0, lty = 2)) +
layer(panel.curve(-1/x))
# Já que lambda * alpha > -1, então alpha = -1/lambda dá a fronteira.
# Função de log-Verossimilhança da Poisson Generalizada na
# parametrização de modelo de regressão.
MRDCr::llpgnz
## function(params, y, X, offset = NULL) {
## # params: vetor de parâmetros;
## # params[1]: parâmetro de dispersão (alpha);
## # params[-1]: parâmetro de locação (lambda);
## # y: variável resposta (contagem);
## # X: matriz do modelo linear;
## # offset: tamanho do domínio onde y foi medido (exposição);
## #----------------------------------------
## if (is.null(offset)) {
## offset <- 1L
## }
## alpha <- params[1]
## lambda <- offset * exp(X %*% params[-1])
## z <- 1 + alpha * lambda
## w <- 1 + alpha * y
## ll <- y * (log(lambda) - log(z)) +
## (y - 1) * log(w) -
## lambda * (w/z) -
## lfactorial(y)
## # Negativo da log-likelihood.
## -sum(ll)
## }
## <environment: namespace:MRDCr>
#-----------------------------------------------------------------------
# Gerando uma amostra aleatória da Poisson, para teste.
# Offset = 2, lambda = 3.
y <- rpois(100, lambda = 2 * 3)
L <- list(y = y,
offset = rep(2, length(y)),
X = cbind(rep(1, length(y))))
start <- c(alpha = 0, lambda = 1)
parnames(llpgnz) <- names(start)
# Como \alpha foi fixado em 1, essa ll corresponde à Poisson.
n0 <- mle2(minuslogl = llpgnz,
start = start, data = L,
fixed = list(alpha = 0), vecpar = TRUE)
# Para conferir.
c(coef(n0)["lambda"],
coef(glm(y ~ offset(log(L$offset)), family = poisson)))
## lambda (Intercept)
## 1.061256 1.061257
# Estimando o \alpha.
n1 <- mle2(llpgnz, start = start, data = L, vecpar = TRUE)
coef(n1)
## alpha lambda
## 0.01299528 1.06125158
# Perfil de verossimilhança dos parâmetros.
plot(profile(n1))
# Covariância.
V <- cov2cor(vcov(n1))
corrplot.mixed(V, lower = "number", upper = "ellipse",
diag = "l", tl.pos = "lt", tl.col = "black",
tl.cex = 0.8, col = brewer.pal(9, "Greys")[-(1:3)])
dev.off()
## null device
## 1
Resultados de um experimento fatorial (3 x 5), em delineamento de blocos casualizados, que estudou a influência de níveis de potássio na adubação de soja em combinação com irrigação em casa de vegetação. As variáveis de contagem registradas nesse experimento foram o número de vagens viáveis (e não viáveis) e o número total de sementes por parcela com duas plantas de soja.
#-----------------------------------------------------------------------
# Carregando e explorando os dados.
data(soja, package = "MRDCr")
str(soja)
## 'data.frame': 75 obs. of 5 variables:
## $ K : int 0 30 60 120 180 0 30 60 120 180 ...
## $ umid: Factor w/ 3 levels "37,5","50","62,5": 1 1 1 1 1 2 2 2 2 2 ...
## $ bloc: Factor w/ 5 levels "I","II","III",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ ngra: int 136 159 156 171 190 140 193 200 208 237 ...
## $ nvag: int 56 62 66 68 82 63 86 94 86 97 ...
# A observação 74 é um outlier.
soja <- soja[-74, ]
xyplot(nvag ~ K | umid, data = soja, layout = c(NA, 1),
type = c("p", "smooth"),
ylab = "Número de vagens por parcela",
xlab = expression("Dose de potássio aplicada"~(mg ~ dm^{3})),
strip = strip.custom(strip.names = TRUE, var.name = "Umidade"))
soja <- transform(soja, K = factor(K))
#-----------------------------------------------------------------------
# Modelo Poisson.
m0 <- glm(nvag ~ bloc + umid * K, data = soja, family = poisson)
m1 <- update(m0, family = quasipoisson)
# Diagnóstico.
par(mfrow = c(2, 2))
plot(m0); layout(1)
# Medidas de decisão.
# anova(m0, test = "Chisq")
anova(m1, test = "F")
## Analysis of Deviance Table
##
## Model: quasipoisson, link: log
##
## Response: nvag
##
## Terms added sequentially (first to last)
##
##
## Df Deviance Resid. Df Resid. Dev F Pr(>F)
## NULL 73 322.68
## bloc 4 14.284 69 308.40 2.9785 0.02684 *
## umid 2 92.911 67 215.49 38.7485 3.157e-11 ***
## K 4 136.060 63 79.43 28.3719 8.316e-13 ***
## umid:K 8 14.150 55 65.28 1.4754 0.18750
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(m1)
##
## Call:
## glm(formula = nvag ~ bloc + umid * K, family = quasipoisson,
## data = soja)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.9681 -0.6393 -0.0195 0.4854 2.3306
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.95369 0.07547 52.389 < 2e-16 ***
## blocII -0.02928 0.04479 -0.654 0.516036
## blocIII -0.07265 0.04529 -1.604 0.114420
## blocIV -0.12544 0.04592 -2.732 0.008457 **
## blocV -0.10795 0.04705 -2.294 0.025606 *
## umid50 0.13404 0.09597 1.397 0.168117
## umid62,5 0.21656 0.09418 2.299 0.025303 *
## K30 0.27427 0.09300 2.949 0.004670 **
## K60 0.30797 0.09233 3.336 0.001530 **
## K120 0.32883 0.09192 3.577 0.000734 ***
## K180 0.25540 0.09338 2.735 0.008376 **
## umid50:K30 0.06322 0.12654 0.500 0.619324
## umid62,5:K30 -0.10747 0.12631 -0.851 0.398532
## umid50:K60 0.16561 0.12449 1.330 0.188897
## umid62,5:K60 0.10735 0.12286 0.874 0.386028
## umid50:K120 0.14920 0.12414 1.202 0.234563
## umid62,5:K120 0.11839 0.12487 0.948 0.347229
## umid50:K180 0.30370 0.12439 2.441 0.017873 *
## umid62,5:K180 0.19838 0.12325 1.610 0.113208
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for quasipoisson family taken to be 1.198902)
##
## Null deviance: 322.684 on 73 degrees of freedom
## Residual deviance: 65.278 on 55 degrees of freedom
## AIC: NA
##
## Number of Fisher Scoring iterations: 4
#-----------------------------------------------------------------------
# Modelo Poisson Generalizado.
L <- with(soja,
list(y = nvag, offset = 1, X = model.matrix(m0)))
# Usa as estimativas do Poisson como valores iniciais para a PGen.
start <- c(alpha = 0, coef(m0))
parnames(llpgnz) <- names(start)
# Com alpha fixo em 0 corresponde à Poisson.
m2 <- mle2(llpgnz, start = start, data = L,
fixed = list(alpha = 0), vecpar = TRUE)
# Mesma medida de ajuste e estimativas.
c(logLik(m2), logLik(m0))
## [1] -259.6165 -259.6165
cbind(coef(m2)[-1], coef(m0))
## [,1] [,2]
## (Intercept) 3.95368723 3.95368724
## blocII -0.02927855 -0.02927854
## blocIII -0.07265048 -0.07265048
## blocIV -0.12543798 -0.12543798
## blocV -0.10794857 -0.10794857
## umid50 0.13404356 0.13404356
## umid62,5 0.21656458 0.21656458
## K30 0.27427290 0.27427290
## K60 0.30796674 0.30796674
## K120 0.32883188 0.32883188
## K180 0.25540441 0.25540441
## umid50:K30 0.06322288 0.06322288
## umid62,5:K30 -0.10747272 -0.10747272
## umid50:K60 0.16561471 0.16561471
## umid62,5:K60 0.10735066 0.10735066
## umid50:K120 0.14920392 0.14920392
## umid62,5:K120 0.11838578 0.11838578
## umid50:K180 0.30369921 0.30369921
## umid62,5:K180 0.19837927 0.19837927
# Poisson Generalizada.
m3 <- mle2(llpgnz, start = start, data = L, vecpar = TRUE)
# Teste para nulinidade do parâmetro de dispersão (H_0: alpha == 0).
anova(m3, m2)
## Likelihood Ratio Tests
## Model 1: m3, [llpgnz]: alpha+(Intercept)+blocII+blocIII+blocIV+
## blocV+umid50+umid62,5+K30+K60+K120+K180+
## umid50:K30+umid62,5:K30+umid50:K60+umid62,5:K60+
## umid50:K120+umid62,5:K120+umid50:K180+umid62,5:K180
## Model 2: m2, [llpgnz]: (Intercept)+blocII+blocIII+blocIV+blocV+
## umid50+umid62,5+K30+K60+K120+K180+umid50:K30+
## umid62,5:K30+umid50:K60+umid62,5:K60+umid50:K120+
## umid62,5:K120+umid50:K180+umid62,5:K180
## Tot Df Deviance Chisq Df Pr(>Chisq)
## 1 20 518.19
## 2 19 519.23 1.0402 1 0.3078
# Estimativas dos coeficientes.
cbind("PoissonGLM" = c(NA, coef(m0)),
"PoissonML" = coef(m2),
"PGeneraliz" = coef(m3))
## PoissonGLM PoissonML PGeneraliz
## NA 0.00000000 -0.001042601
## (Intercept) 3.95368724 3.95368723 3.953181451
## blocII -0.02927854 -0.02927855 -0.027923784
## blocIII -0.07265048 -0.07265048 -0.072056920
## blocIV -0.12543798 -0.12543798 -0.127982907
## blocV -0.10794857 -0.10794857 -0.105466353
## umid50 0.13404356 0.13404356 0.134139707
## umid62,5 0.21656458 0.21656458 0.216194659
## K30 0.27427290 0.27427290 0.273827867
## K60 0.30796674 0.30796674 0.307818797
## K120 0.32883188 0.32883188 0.328229190
## K180 0.25540441 0.25540441 0.255998709
## umid50:K30 0.06322288 0.06322288 0.064744844
## umid62,5:K30 -0.10747272 -0.10747272 -0.106284339
## umid50:K60 0.16561471 0.16561471 0.166265663
## umid62,5:K60 0.10735066 0.10735066 0.108202657
## umid50:K120 0.14920392 0.14920392 0.149064097
## umid62,5:K120 0.11838578 0.11838578 0.120194391
## umid50:K180 0.30369921 0.30369921 0.302905099
## umid62,5:K180 0.19837927 0.19837927 0.198624027
# Perfil para o parâmetro de dispersão.
plot(profile(m3, which = "alpha"))
abline(v = 0, lty = 2)
V <- cov2cor(vcov(m3))
corrplot.mixed(V, lower = "number", upper = "ellipse",
diag = "l", tl.pos = "lt", tl.col = "black",
tl.cex = 0.8, col = brewer.pal(9, "Greys")[-(1:3)])
dev.off()
## null device
## 1
# Tamanho das covariâncias com \alpha.
each(sum, mean, max)(abs(V[1, -1]))
## sum mean max
## 0.30139729 0.01586302 0.06988971
#-----------------------------------------------------------------------
# Testes de hipótese.
# Teste de Wald para a interação.
a <- c(0, attr(model.matrix(m0), "assign"))
ai <- a == max(a)
L <- t(replicate(sum(ai), rbind(coef(m3) * 0), simplify = "matrix"))
L[, ai] <- diag(sum(ai))
# Cálculo da estatística Chi-quadrado.
# t(L %*% coef(m3)) %*%
# solve(L %*% vcov(m3) %*% t(L)) %*%
# (L %*% coef(m3))
crossprod(L %*% coef(m3),
solve(L %*% vcov(m3) %*% t(L),
L %*% coef(m3)))
## [,1]
## [1,] 16.21861
# Teste de Wald para interação (poderia ser LRT, claro).
# É necessário passar um objeto glm mesmo fornecendo o restante a parte.
linearHypothesis(model = m0,
hypothesis.matrix = L,
vcov. = vcov(m3),
coef. = coef(m3))
## Linear hypothesis test
##
## Hypothesis:
## umid50:K30 = 0
## umid62,5:K30 = 0
## umid50:K60 = 0
## umid62,5:K60 = 0
## umid50:K120 = 0
## umid62,5:K120 = 0
## umid50:K180 = 0
## umid62,5:K180 = 0
##
## Model 1: restricted model
## Model 2: nvag ~ bloc + umid * K
##
## Note: Coefficient covariance matrix supplied.
##
## Res.Df Df Chisq Pr(>Chisq)
## 1 63
## 2 55 8 16.219 0.03936 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#-----------------------------------------------------------------------
# Predição com bandas de confiança.
X <- LSmatrix(m0, effect = c("umid", "K"))
pred <- attr(X, "grid")
pred <- transform(pred,
K = as.integer(K),
umid = factor(umid))
pred <- list(pois = pred, pgen = pred)
# Preditos pela Poisson.
# aux <- predict(m0, newdata = pred$pois, se.fit = TRUE)
# aux <- exp(aux$fit + outer(aux$se.fit, qn, FUN = "*"))
# pred$pois <- cbind(pred$pois, aux)
aux <- confint(glht(m0, linfct = X),
calpha = univariate_calpha())$confint
colnames(aux)[1] <- "fit"
pred$pois <- cbind(pred$pois, exp(aux))
str(pred$pois)
## 'data.frame': 15 obs. of 5 variables:
## $ umid: Factor w/ 3 levels "37,5","50","62,5": 1 2 3 1 2 3 1 2 3 1 ...
## $ K : int 0 0 0 30 30 30 60 60 60 120 ...
## $ fit : num 48.7 55.7 60.5 64.1 78.1 ...
## $ lwr : num 43 49.6 54.1 57.5 70.7 ...
## $ upr : num 55.3 62.7 67.7 71.5 86.3 ...
# Predito para a Poisson Generalizada.
aux <- predict(m3, newdata = X,
interval = "confidence", type = "response")
pred$pgen <- cbind(pred$pgen, aux[, c(2, 1, 3)])
pred <- ldply(pred, .id = "modelo")
pred <- arrange(pred, umid, K, modelo)
key <- list(type = "o", divide = 1,
lines = list(pch = 1:nlevels(pred$modelo),
lty = 1, col = 1),
text = list(c("Poisson", "Poisson Generelizada")))
xyplot(fit ~ K | umid, data = pred,
layout = c(NA, 1), as.table = TRUE,
xlim = extendrange(range(pred$K), f = 0.1),
key = key, pch = pred$modelo,
xlab = expression("Dose de potássio"~(mg~dm^{-3})),
ylab = "Número de vagens por parcela",
ly = pred$lwr, uy = pred$upr, cty = "bars", length = 0,
prepanel = prepanel.cbH,
desloc = 8 * scale(as.integer(pred$modelo), scale = FALSE),
panel = panel.cbH)
Análise do número de grãos por pacela do experimento com soja.
#-----------------------------------------------------------------------
xyplot(ngra ~ K | umid, data = soja, layout = c(NA, 1),
type = c("p", "smooth"),
ylab = "Número de grãos por parcela",
xlab = expression("Dose de potássio aplicada"~(mg ~ dm^{3})),
strip = strip.custom(strip.names = TRUE, var.name = "Umidade"))
#-----------------------------------------------------------------------
# Modelo Poisson.
m0 <- glm(ngra ~ bloc + umid * K, data = soja, family = poisson)
m1 <- update(m0, family = quasipoisson)
# Diagnóstico.
par(mfrow = c(2, 2))
plot(m0); layout(1)
# Medidas de decisão.
# anova(m0, test = "Chisq")
anova(m1, test = "Chisq")
## Analysis of Deviance Table
##
## Model: quasipoisson, link: log
##
## Response: ngra
##
## Terms added sequentially (first to last)
##
##
## Df Deviance Resid. Df Resid. Dev Pr(>Chi)
## NULL 73 761.43
## bloc 4 28.34 69 733.09 0.01471 *
## umid 2 185.06 67 548.03 < 2e-16 ***
## K 4 380.32 63 167.71 < 2e-16 ***
## umid:K 8 42.99 55 124.72 0.01606 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(m1)
##
## Call:
## glm(formula = ngra ~ bloc + umid * K, family = quasipoisson,
## data = soja)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.6297 -1.0707 -0.0873 0.7428 3.2810
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.80196 0.06773 70.898 < 2e-16 ***
## blocII -0.01939 0.04017 -0.483 0.631230
## blocIII -0.03663 0.04035 -0.908 0.367964
## blocIV -0.10559 0.04107 -2.571 0.012884 *
## blocV -0.09313 0.04218 -2.208 0.031433 *
## umid50 0.13245 0.08611 1.538 0.129739
## umid62,5 0.18548 0.08506 2.180 0.033517 *
## K30 0.29799 0.08299 3.591 0.000703 ***
## K60 0.34434 0.08218 4.190 0.000102 ***
## K120 0.36493 0.08183 4.459 4.1e-05 ***
## K180 0.29542 0.08303 3.558 0.000778 ***
## umid50:K30 0.04344 0.11318 0.384 0.702600
## umid62,5:K30 -0.13669 0.11386 -1.201 0.235081
## umid50:K60 0.11559 0.11136 1.038 0.303817
## umid62,5:K60 0.09174 0.11027 0.832 0.409039
## umid50:K120 0.11860 0.11088 1.070 0.289461
## umid62,5:K120 0.16266 0.11145 1.460 0.150096
## umid50:K180 0.28832 0.11085 2.601 0.011917 *
## umid62,5:K180 0.21569 0.11021 1.957 0.055430 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for quasipoisson family taken to be 2.28854)
##
## Null deviance: 761.43 on 73 degrees of freedom
## Residual deviance: 124.72 on 55 degrees of freedom
## AIC: NA
##
## Number of Fisher Scoring iterations: 4
#-----------------------------------------------------------------------
# Modelo Poisson Generalizado.
L <- with(soja,
list(y = ngra, offset = 1, X = model.matrix(m0)))
# Usa as estimativas do Poisson como valores iniciais.
start <- c(alpha = 0, coef(m0))
parnames(llpgnz) <- names(start)
# Com alpha fixo em 0 corresponde à Poisson.
m2 <- mle2(llpgnz, start = start, data = L,
fixed = list(alpha = 0), vecpar = TRUE)
# Mesma medida de ajuste e estimativas.
c(logLik(m2), logLik(m0))
## [1] -321.6692 -321.6692
# Poisson Generalizada.
m3 <- mle2(llpgnz, start = start, data = L, vecpar = TRUE)
# Teste para nulinidade do parâmetro de dispersão (H_0: alpha == 0).
anova(m3, m2)
## Likelihood Ratio Tests
## Model 1: m3, [llpgnz]: alpha+(Intercept)+blocII+blocIII+blocIV+
## blocV+umid50+umid62,5+K30+K60+K120+K180+
## umid50:K30+umid62,5:K30+umid50:K60+umid62,5:K60+
## umid50:K120+umid62,5:K120+umid50:K180+umid62,5:K180
## Model 2: m2, [llpgnz]: (Intercept)+blocII+blocIII+blocIV+blocV+
## umid50+umid62,5+K30+K60+K120+K180+umid50:K30+
## umid62,5:K30+umid50:K60+umid62,5:K60+umid50:K120+
## umid62,5:K120+umid50:K180+umid62,5:K180
## Tot Df Deviance Chisq Df Pr(>Chisq)
## 1 20 630.79
## 2 19 643.34 12.544 1 0.0003975 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Estimaitvas dos parâmetros.
cbind("PoissonGLM" = c(NA, coef(m0)),
"PoissonML" = coef(m2),
"PGeneraliz" = coef(m3))
## PoissonGLM PoissonML PGeneraliz
## NA 0.00000000 0.001778618
## (Intercept) 4.80195973 4.80195972 4.802081183
## blocII -0.01939070 -0.01939070 -0.022097221
## blocIII -0.03662632 -0.03662632 -0.037831855
## blocIV -0.10559332 -0.10559332 -0.099023604
## blocV -0.09313375 -0.09313375 -0.098406090
## umid50 0.13245136 0.13245136 0.133402155
## umid62,5 0.18548293 0.18548293 0.187347770
## K30 0.29799144 0.29799144 0.299365942
## K60 0.34433662 0.34433662 0.344738743
## K120 0.36493092 0.36493092 0.366432817
## K180 0.29542405 0.29542405 0.294825457
## umid50:K30 0.04343930 0.04343930 0.039402786
## umid62,5:K30 -0.13669277 -0.13669277 -0.139668174
## umid50:K60 0.11559375 0.11559375 0.114460635
## umid62,5:K60 0.09174072 0.09174072 0.089168997
## umid50:K120 0.11859658 0.11859658 0.118496463
## umid62,5:K120 0.16266223 0.16266223 0.158280560
## umid50:K180 0.28832017 0.28832017 0.288513923
## umid62,5:K180 0.21568848 0.21568848 0.213872602
# Perfil para o parâmetro de dispersão.
plot(profile(m3, which = "alpha"))
abline(v = 0, lty = 2)
V <- cov2cor(vcov(m3))
corrplot.mixed(V, lower = "number", upper = "ellipse",
diag = "l", tl.pos = "lt", tl.col = "black",
tl.cex = 0.8, col = brewer.pal(9, "Greys")[-(1:3)])
dev.off()
## null device
## 1
# Tamanho das covariâncias com \alpha.
each(sum, mean, max)(abs(V[1, -1]))
## sum mean max
## 0.19898168 0.01047272 0.04733996
# Teste de Wald para a interação.
a <- c(0, attr(model.matrix(m0), "assign"))
ai <- a == max(a)
L <- t(replicate(sum(ai), rbind(coef(m3) * 0), simplify = "matrix"))
L[, ai] <- diag(sum(ai))
# Cáclculo da estatística Chi-quadrado.
crossprod(L %*% coef(m3),
solve(L %*% vcov(m3) %*% t(L),
L %*% coef(m3)))
## [,1]
## [1,] 25.04696
linearHypothesis(model = m0,
hypothesis.matrix = L,
vcov. = vcov(m3),
coef. = coef(m3))
## Linear hypothesis test
##
## Hypothesis:
## umid50:K30 = 0
## umid62,5:K30 = 0
## umid50:K60 = 0
## umid62,5:K60 = 0
## umid50:K120 = 0
## umid62,5:K120 = 0
## umid50:K180 = 0
## umid62,5:K180 = 0
##
## Model 1: restricted model
## Model 2: ngra ~ bloc + umid * K
##
## Note: Coefficient covariance matrix supplied.
##
## Res.Df Df Chisq Pr(>Chisq)
## 1 63
## 2 55 8 25.047 0.001526 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#-----------------------------------------------------------------------
# Predição com bandas de confiança.
X <- LSmatrix(m0, effect = c("umid", "K"))
pred <- attr(X, "grid")
pred <- transform(pred,
K = as.integer(K),
umid = factor(umid))
pred <- list(pois = pred, quasi = pred, pgen = pred)
# Preditos pela Poisson.
aux <- confint(glht(m0, linfct = X),
calpha = univariate_calpha())$confint
colnames(aux)[1] <- "fit"
pred$pois <- cbind(pred$pois, exp(aux))
# Preditos pela Quasi-Poisson.
aux <- confint(glht(m1, linfct = X),
calpha = univariate_calpha())$confint
colnames(aux)[1] <- "fit"
pred$quasi <- cbind(pred$quasi, exp(aux))
# Preditos pela Poisson Generalizada.
aux <- predict(m3, newdata = X,
interval = "confidence", type = "response")
pred$pgen <- cbind(pred$pgen, aux[, c(2, 1, 3)])
# Junta o resultado dos 3 modelos.
pred <- ldply(pred, .id = "modelo")
pred <- arrange(pred, umid, K, modelo)
str(pred)
## 'data.frame': 45 obs. of 6 variables:
## $ modelo: Factor w/ 3 levels "pois","quasi",..: 1 2 3 1 2 3 1 2 3 1 ...
## $ umid : Factor w/ 3 levels "37,5","50","62,5": 1 1 1 1 1 1 1 1 1 1 ...
## $ K : int 0 0 0 30 30 30 60 60 60 120 ...
## $ fit : num 116 116 116 156 156 ...
## $ lwr : num 107 102 105 145 140 ...
## $ upr : num 126 131 128 167 173 ...
key <- list(type = "o", divide = 1,
lines = list(pch = 1:nlevels(pred$modelo),
lty = 1, col = 1),
text = list(c("Poisson", "Quasi-Poisson",
"Poisson Generelizada")))
xyplot(fit ~ K | umid, data = pred,
layout = c(NA, 1), as.table = TRUE,
xlim = extendrange(range(pred$K), f = 0.1),
key = key, pch = pred$modelo,
xlab = expression("Dose de potássio"~(mg~dm^{-3})),
ylab = "Número de grãos por parcela",
ly = pred$lwr, uy = pred$upr, cty = "bars", length = 0,
prepanel = prepanel.cbH,
desloc = 8 * scale(as.integer(pred$modelo), scale = FALSE),
panel = panel.cbH)
#-----------------------------------------------------------------------
# Número de grãos por vagem (offset).
xyplot(ngra/nvag ~ K | umid, data = soja, layout = c(NA, 1),
type = c("p", "smooth"),
xlab = expression("Dose de potássio"~(mg~dm^{-3})),
ylab = "Número de grãos por vagem",
strip = strip.custom(strip.names = TRUE, var.name = "Umidade"))
#-----------------------------------------------------------------------
# Modelo Poisson.
m0 <- glm(ngra ~ offset(log(nvag)) + bloc + umid * K,
data = soja, family = poisson)
m1 <- update(m0, family = quasipoisson)
# Diagnóstico.
par(mfrow = c(2, 2))
plot(m0); layout(1)
# Medidas de decisão.
# anova(m0, test = "Chisq")
anova(m1, test = "F")
## Analysis of Deviance Table
##
## Model: quasipoisson, link: log
##
## Response: ngra
##
## Terms added sequentially (first to last)
##
##
## Df Deviance Resid. Df Resid. Dev F Pr(>F)
## NULL 73 34.238
## bloc 4 2.0264 69 32.212 1.1637 0.33686
## umid 2 1.7457 67 30.466 2.0050 0.14439
## K 4 3.8324 63 26.634 2.2009 0.08082 .
## umid:K 8 2.6490 55 23.984 0.7606 0.63838
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(m1)
##
## Call:
## glm(formula = ngra ~ offset(log(nvag)) + bloc + umid * K, family = quasipoisson,
## data = soja)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.52741 -0.46025 0.06782 0.45274 1.05490
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.848160 0.029477 28.774 <2e-16 ***
## blocII 0.011313 0.017535 0.645 0.5215
## blocIII 0.036250 0.017615 2.058 0.0444 *
## blocIV 0.019384 0.017940 1.080 0.2846
## blocV 0.016100 0.018448 0.873 0.3866
## umid50 -0.001908 0.037574 -0.051 0.9597
## umid62,5 -0.031173 0.037124 -0.840 0.4047
## K30 0.022867 0.036200 0.632 0.5302
## K60 0.034499 0.035862 0.962 0.3403
## K120 0.035322 0.035703 0.989 0.3268
## K180 0.040586 0.036221 1.121 0.2674
## umid50:K30 -0.018692 0.049385 -0.378 0.7065
## umid62,5:K30 -0.028459 0.049680 -0.573 0.5691
## umid50:K60 -0.048182 0.048583 -0.992 0.3257
## umid62,5:K60 -0.014035 0.048103 -0.292 0.7716
## umid50:K120 -0.030438 0.048367 -0.629 0.5317
## umid62,5:K120 0.045524 0.048714 0.935 0.3541
## umid50:K180 -0.016302 0.048354 -0.337 0.7373
## umid62,5:K180 0.016911 0.048076 0.352 0.7264
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for quasipoisson family taken to be 0.4353299)
##
## Null deviance: 34.238 on 73 degrees of freedom
## Residual deviance: 23.984 on 55 degrees of freedom
## AIC: NA
##
## Number of Fisher Scoring iterations: 3
# Declara um modelo aditivo.
m0 <- glm(ngra ~ offset(log(nvag)) + bloc + umid + K,
data = soja, family = poisson)
m1 <- update(m0, family = quasipoisson)
anova(m1, test = "F")
## Analysis of Deviance Table
##
## Model: quasipoisson, link: log
##
## Response: ngra
##
## Terms added sequentially (first to last)
##
##
## Df Deviance Resid. Df Resid. Dev F Pr(>F)
## NULL 73 34.238
## bloc 4 2.0264 69 32.212 1.2021 0.31880
## umid 2 1.7457 67 30.466 2.0711 0.13455
## K 4 3.8324 63 26.634 2.2734 0.07114 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#-----------------------------------------------------------------------
# Modelo Poisson Generalizado.
L <- with(soja,
list(y = ngra, offset = nvag, X = model.matrix(m0)))
# De acordo com a estimativa de phi da Quasi-Poisson, esse dado é
# subdisperso. Já que na verossimilhaça (1 + alpha * y) > -1 quando
# alpha < 0, então o menor valor possível para gamma para ter uma
# log-verossimilhança avaliável é
-1/max(soja$ngra)
## [1] -0.003690037
# Mesmo com esse lower bound, o valor chute para alpha foi definido por
# tentativa erro. O valor -0.003 dá Error e o valor -0.002 na hora de
# perfilhar encontra um mínimo melhor. Então, por tentativa erro
# chegou-se no -0.0026.
start <- c(alpha = -0.0026, coef(m0))
parnames(llpgnz) <- names(start)
# Com alpha fixo em 0 corresponde à Poisson.
m2 <- mle2(llpgnz, start = start, data = L,
fixed = list(alpha = 0), vecpar = TRUE)
# Mesma medida de ajuste e estimativas.
c(logLik(m2), logLik(m0))
## [1] -272.6263 -272.6263
# Poisson Generalizada.
m3 <- mle2(llpgnz, start = start, data = L, vecpar = TRUE)
# Teste para nulinidade do parâmetro de dispersão (H_0: alpha == 0).
anova(m3, m2)
## Likelihood Ratio Tests
## Model 1: m3, [llpgnz]: alpha+(Intercept)+blocII+blocIII+blocIV+
## blocV+umid50+umid62,5+K30+K60+K120+K180
## Model 2: m2, [llpgnz]: (Intercept)+blocII+blocIII+blocIV+blocV+
## umid50+umid62,5+K30+K60+K120+K180
## Tot Df Deviance Chisq Df Pr(>Chisq)
## 1 12 516.81
## 2 11 545.25 28.448 1 9.627e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
cbind("PoissonGLM" = c(NA, coef(m0)),
"PoissonML" = coef(m2),
"PGeneraliz" = coef(m3))
## PoissonGLM PoissonML PGeneraliz
## NA 0.000000000 -0.002108602
## (Intercept) 0.855552235 0.855552234 0.851442205
## blocII 0.011126118 0.011126118 0.008402336
## blocIII 0.036245340 0.036245340 0.035298311
## blocIV 0.018895822 0.018895822 0.017120556
## blocV 0.012659290 0.012659290 0.007123349
## umid50 -0.026631767 -0.026631767 -0.026018732
## umid62,5 -0.026250165 -0.026250165 -0.020361875
## K30 0.007448488 0.007448488 0.008477443
## K60 0.012551823 0.012551823 0.012559113
## K120 0.039757260 0.039757260 0.045813881
## K180 0.041632187 0.041632187 0.046369545
# Perfil para o parâmetro de dispersão.
plot(profile(m3, which = "alpha"))
V <- cov2cor(vcov(m3))
corrplot.mixed(V, lower = "number", upper = "ellipse",
diag = "l", tl.pos = "lt", tl.col = "black",
tl.cex = 0.8, col = brewer.pal(9, "Greys")[-(1:3)])
dev.off()
## null device
## 1
# Tamanho das covariâncias com \alpha.
each(sum, mean, max)(abs(V[1, -1]))
## sum mean max
## 0.49953096 0.04541191 0.11695102
# Teste de Wald para a interação.
a <- c(0, attr(model.matrix(m0), "assign"))
ai <- a == max(a)
L <- t(replicate(sum(ai), rbind(coef(m3) * 0), simplify = "matrix"))
L[, ai] <- diag(sum(ai))
# Cáclculo da estatística Chi-quadrado.
crossprod(L %*% coef(m3),
solve(L %*% vcov(m3) %*% t(L),
L %*% coef(m3)))
## [,1]
## [1,] 14.26048
linearHypothesis(model = m0,
hypothesis.matrix = L,
vcov. = vcov(m3),
coef. = coef(m3))
## Linear hypothesis test
##
## Hypothesis:
## K30 = 0
## K60 = 0
## K120 = 0
## K180 = 0
##
## Model 1: restricted model
## Model 2: ngra ~ offset(log(nvag)) + bloc + umid + K
##
## Note: Coefficient covariance matrix supplied.
##
## Res.Df Df Chisq Pr(>Chisq)
## 1 67
## 2 63 4 14.261 0.006508 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#-----------------------------------------------------------------------
# Predição com bandas de confiança.
# Por causa do offset, não tem como usar a LSmatrix.
pred <- unique(subset(soja, select = c("umid", "K")))
X <- model.matrix(formula(m0)[-2],
data = cbind(nvag = 1, bloc = soja$bloc[1], pred))
i <- grep(x = colnames(X), pattern = "^bloc")
X[, i] <- X[, i] * 0 + 1/(length(i) + 1)
head(X)
## (Intercept) blocII blocIII blocIV blocV umid50 umid62,5 K30 K60
## 1 1 0.2 0.2 0.2 0.2 0 0 0 0
## 2 1 0.2 0.2 0.2 0.2 0 0 1 0
## 3 1 0.2 0.2 0.2 0.2 0 0 0 1
## 4 1 0.2 0.2 0.2 0.2 0 0 0 0
## 5 1 0.2 0.2 0.2 0.2 0 0 0 0
## 6 1 0.2 0.2 0.2 0.2 1 0 0 0
## K120 K180
## 1 0 0
## 2 0 0
## 3 0 0
## 4 1 0
## 5 0 1
## 6 0 0
pred <- list(pois = pred, quasi = pred, pgen = pred)
# Preditos pela Poisson.
aux <- confint(glht(m0, linfct = X),
calpha = univariate_calpha())$confint
colnames(aux)[1] <- "fit"
pred$pois <- cbind(pred$pois, exp(aux))
# Preditos pela Quasi-Poisson.
aux <- confint(glht(m1, linfct = X),
calpha = univariate_calpha())$confint
colnames(aux)[1] <- "fit"
pred$quasi <- cbind(pred$quasi, exp(aux))
# Preditos pela Poisson Generalizada.
aux <- predict(m3, newdata = X,
interval = "confidence", type = "response")
pred$pgen <- cbind(pred$pgen, aux[, c(2, 1, 3)])
# Junta o resultado dos 3 modelos.
pred <- ldply(pred, .id = "modelo")
pred <- arrange(pred, umid, K, modelo)
pred$K <- as.numeric(as.character(pred$K))
key <- list(type = "o", divide = 1,
lines = list(pch = 1:nlevels(pred$modelo),
lty = 1, col = 1),
text = list(c("Poisson", "Quasi-Poisson",
"Poisson Generelizada")))
xyplot(fit ~ K | umid, data = pred,
layout = c(NA, 1), as.table = TRUE,
xlim = extendrange(range(pred$K), f = 0.2),
key = key, pch = pred$modelo,
xlab = expression("Dose de potássio"~(mg~dm^{-3})),
ylab = "Número de grãos por parcela",
ly = pred$lwr, uy = pred$upr, cty = "bars", length = 0,
prepanel = prepanel.cbH,
desloc = 8 * scale(as.integer(pred$modelo), scale = FALSE),
panel = panel.cbH)
Experimento conduzido em casa de vegetação para avaliar o efeito da desfolha, em diferentes fases de desenvolvimento do algodão, sobre a produção da cultura. As parcelas foram vasos com duas plantas que tiveram a área das folhas removidas com uma tesoura, simulando o ataque de insetos desfolhadores, nos níveis de 0, 25, 50, 75 e 100% de remoção de área foliar. Em cada fase de desenvolvimento (de 5), 5 parcelas sofreram desfolha nos níveis mencionados. O número de capulhos ao final do experimento foi contado.
#-----------------------------------------------------------------------
# Número de capulhos em função do nível de desfolha artificial e fase
# fenológica do algodoeiro.
data(capdesfo, package = "MRDCr")
str(capdesfo)
## 'data.frame': 125 obs. of 4 variables:
## $ est : Factor w/ 5 levels "vegetativo","botão floral",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ des : num 0 0 0 0 0 0.25 0.25 0.25 0.25 0.25 ...
## $ rept: int 1 2 3 4 5 1 2 3 4 5 ...
## $ ncap: int 10 9 8 8 10 11 9 10 10 10 ...
p1 <- xyplot(ncap ~ des | est, data = capdesfo,
col = 1, type = c("p", "smooth"), col.line = "gray50",
layout = c(2, 3), as.table = TRUE,
xlim = extendrange(c(0:1), f = 0.15),
xlab = "Nível de desfolhas artificial",
ylab = "Número de capulho produzidos",
spread = 0.035, panel = panel.beeswarm)
p1
#-----------------------------------------------------------------------
# Modelo Poisson.
m0 <- glm(ncap ~ est * (des + I(des^2)),
data = capdesfo, family = poisson)
m1 <- update(m0, family = quasipoisson)
par(mfrow = c(2, 2))
plot(m0); layout(1)
anova(m0, test = "Chisq")
## Analysis of Deviance Table
##
## Model: poisson, link: log
##
## Response: ncap
##
## Terms added sequentially (first to last)
##
##
## Df Deviance Resid. Df Resid. Dev Pr(>Chi)
## NULL 124 75.514
## est 4 19.9582 120 55.556 0.000509 ***
## des 1 15.8643 119 39.692 6.805e-05 ***
## I(des^2) 1 1.2926 118 38.399 0.255566
## est:des 4 6.7085 114 31.691 0.152119
## est:I(des^2) 4 6.3592 110 25.331 0.173883
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(m1, test = "F")
## Analysis of Deviance Table
##
## Model: quasipoisson, link: log
##
## Response: ncap
##
## Terms added sequentially (first to last)
##
##
## Df Deviance Resid. Df Resid. Dev F Pr(>F)
## NULL 124 75.514
## est 4 19.9582 120 55.556 21.5543 3.775e-13 ***
## des 1 15.8643 119 39.692 68.5317 3.269e-13 ***
## I(des^2) 1 1.2926 118 38.399 5.5839 0.01988 *
## est:des 4 6.7085 114 31.691 7.2450 3.236e-05 ***
## est:I(des^2) 4 6.3592 110 25.331 6.8677 5.674e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(m1)
##
## Call:
## glm(formula = ncap ~ est * (des + I(des^2)), family = quasipoisson,
## data = capdesfo)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.07706 -0.30981 -0.02283 0.27044 1.16650
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.21424 0.06706 33.018 < 2e-16 ***
## estbotão floral -0.08003 0.09655 -0.829 0.40898
## estflorecimento -0.02723 0.09626 -0.283 0.77781
## estmaça -0.14861 0.09867 -1.506 0.13489
## estcapulho 0.11291 0.09247 1.221 0.22465
## des 0.34859 0.32741 1.065 0.28935
## I(des^2) -0.73843 0.32397 -2.279 0.02458 *
## estbotão floral:des 0.13638 0.46401 0.294 0.76937
## estflorecimento:des -1.58189 0.49140 -3.219 0.00169 **
## estmaça:des 0.47548 0.49046 0.969 0.33444
## estcapulho:des -0.82100 0.45204 -1.816 0.07206 .
## estbotão floral:I(des^2) 0.10435 0.45451 0.230 0.81884
## estflorecimento:I(des^2) 1.40435 0.49033 2.864 0.00501 **
## estmaça:I(des^2) -0.92938 0.49667 -1.871 0.06397 .
## estcapulho:I(des^2) 1.07565 0.44314 2.427 0.01683 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for quasipoisson family taken to be 0.2314882)
##
## Null deviance: 75.514 on 124 degrees of freedom
## Residual deviance: 25.331 on 110 degrees of freedom
## AIC: NA
##
## Number of Fisher Scoring iterations: 4
#-----------------------------------------------------------------------
# Modelo Poisson Generalizada.
L <- with(capdesfo,
list(y = ncap, offset = 1, X = model.matrix(m0)))
start <- c(alpha = log(1), coef(m0))
parnames(llpgnz) <- names(start)
# Modelo Poisson também.
m2 <- mle2(llpgnz, start = start, data = L,
fixed = list(alpha = 0), vecpar = TRUE)
c(logLik(m2), logLik(m0))
## [1] -254.8414 -254.8414
# Modelo Poisson Generalizado.
m3 <- mle2(llpgnz, start = start, data = L, vecpar = TRUE)
logLik(m3)
## 'log Lik.' -210.6668 (df=16)
anova(m3, m2)
## Likelihood Ratio Tests
## Model 1: m3, [llpgnz]: alpha+(Intercept)+estbotão floral+
## estflorecimento+estmaça+estcapulho+des+I(des^2)+
## estbotão floral:des+estflorecimento:des+estmaça:des+
## estcapulho:des+estbotão floral:I(des^2)+
## estflorecimento:I(des^2)+estmaça:I(des^2)+
## estcapulho:I(des^2)
## Model 2: m2, [llpgnz]: (Intercept)+estbotão floral+
## estflorecimento+estmaça+estcapulho+des+I(des^2)+
## estbotão floral:des+estflorecimento:des+estmaça:des+
## estcapulho:des+estbotão floral:I(des^2)+
## estflorecimento:I(des^2)+estmaça:I(des^2)+
## estcapulho:I(des^2)
## Tot Df Deviance Chisq Df Pr(>Chisq)
## 1 16 421.33
## 2 15 509.68 88.349 1 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(m3)
## Maximum likelihood estimation
##
## Call:
## mle2(minuslogl = llpgnz, start = start, data = L, vecpar = TRUE)
##
## Coefficients:
## Estimate Std. Error z value Pr(z)
## alpha -0.0611259 0.0031211 -19.5847 < 2.2e-16
## (Intercept) 2.2159873 0.0603520 36.7177 < 2.2e-16
## estbotão floral -0.0792916 0.0914575 -0.8670 0.3859545
## estflorecimento -0.0022499 0.0894092 -0.0252 0.9799245
## estmaça -0.1453929 0.1031466 -1.4096 0.1586649
## estcapulho 0.1054861 0.0781060 1.3506 0.1768394
## des 0.3533745 0.3101810 1.1393 0.2545978
## I(des^2) -0.7506691 0.3305526 -2.2710 0.0231499
## estbotão floral:des 0.1116847 0.4465137 0.2501 0.8024899
## estflorecimento:des -1.7775352 0.5328740 -3.3358 0.0008507
## estmaça:des 0.2618588 0.5439359 0.4814 0.6302217
## estcapulho:des -0.7890219 0.4081733 -1.9331 0.0532292
## estbotão floral:I(des^2) 0.1369131 0.4587833 0.2984 0.7653777
## estflorecimento:I(des^2) 1.5933721 0.5624116 2.8331 0.0046098
## estmaça:I(des^2) -0.6273100 0.5754683 -1.0901 0.2756752
## estcapulho:I(des^2) 1.0546017 0.4257783 2.4769 0.0132536
##
## alpha ***
## (Intercept) ***
## estbotão floral
## estflorecimento
## estmaça
## estcapulho
## des
## I(des^2) *
## estbotão floral:des
## estflorecimento:des ***
## estmaça:des
## estcapulho:des .
## estbotão floral:I(des^2)
## estflorecimento:I(des^2) **
## estmaça:I(des^2)
## estcapulho:I(des^2) *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## -2 log L: 421.3336
plot(profile(m3, which = "alpha"))
cbind("PoissonGLM" = c(NA, coef(m0)),
"PoissonML" = coef(m2),
"PGeneraliz" = coef(m3))
## PoissonGLM PoissonML PGeneraliz
## NA 0.00000000 -0.061125928
## (Intercept) 2.21424242 2.21424242 2.215987267
## estbotão floral -0.08002756 -0.08002756 -0.079291571
## estflorecimento -0.02722855 -0.02722855 -0.002249855
## estmaça -0.14861263 -0.14861263 -0.145392921
## estcapulho 0.11291268 0.11291268 0.105486116
## des 0.34858564 0.34858564 0.353374487
## I(des^2) -0.73843427 -0.73843427 -0.750669137
## estbotão floral:des 0.13638395 0.13638395 0.111684700
## estflorecimento:des -1.58188650 -1.58188650 -1.777535174
## estmaça:des 0.47548300 0.47548300 0.261858845
## estcapulho:des -0.82100307 -0.82100307 -0.789021942
## estbotão floral:I(des^2) 0.10435096 0.10435096 0.136913065
## estflorecimento:I(des^2) 1.40435178 1.40435178 1.593372095
## estmaça:I(des^2) -0.92937720 -0.92937720 -0.627310018
## estcapulho:I(des^2) 1.07565430 1.07565430 1.054601664
V <- cov2cor(vcov(m3))
corrplot.mixed(V, lower = "number", upper = "ellipse",
diag = "l", tl.pos = "lt", tl.col = "black",
tl.cex = 0.8, col = brewer.pal(9, "Greys")[-(1:3)])
dev.off()
## null device
## 1
# Tamanho das covariâncias com \alpha.
each(sum, mean, max)(abs(V[1, -1]))
## sum mean max
## 0.22237413 0.01482494 0.04892709
# Teste de Wald para a interação.
a <- c(0, attr(model.matrix(m0), "assign"))
ai <- a == max(a)
L <- t(replicate(sum(ai), rbind(coef(m3) * 0), simplify = "matrix"))
L[, ai] <- diag(sum(ai))
# Teste de Wald explicito.
crossprod(L %*% coef(m3),
solve(L %*% vcov(m3) %*% t(L),
L %*% coef(m3)))
## [,1]
## [1,] 19.38121
# Teste de Wald para interação (poderia ser LRT, claro).
# É necessário um objeto glm, mas necesse caso ele não usado para nada.
linearHypothesis(model = m0,
hypothesis.matrix = L,
vcov. = vcov(m3),
coef. = coef(m3))
## Linear hypothesis test
##
## Hypothesis:
## estbotão floral:I(des^2) = 0
## estflorecimento:I(des^2) = 0
## estmaça:I(des^2) = 0
## estcapulho:I(des^2) = 0
##
## Model 1: restricted model
## Model 2: ncap ~ est * (des + I(des^2))
##
## Note: Coefficient covariance matrix supplied.
##
## Res.Df Df Chisq Pr(>Chisq)
## 1 114
## 2 110 4 19.381 0.0006613 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#-----------------------------------------------------------------------
# Predição com bandas de confiança.
pred <- with(capdesfo, expand.grid(est = levels(est),
des = seq(0, 1, by = 0.025)))
X <- model.matrix(formula(m0)[-2], data = pred)
pred <- list(pois = pred, quasi = pred, pgen = pred)
# Preditos pela Poisson.
aux <- confint(glht(m0, linfct = X),
calpha = univariate_calpha())$confint
colnames(aux)[1] <- "fit"
pred$pois <- cbind(pred$pois, exp(aux))
# Preditos pela Quasi-Poisson.
aux <- confint(glht(m1, linfct = X),
calpha = univariate_calpha())$confint
colnames(aux)[1] <- "fit"
pred$quasi <- cbind(pred$quasi, exp(aux))
# Preditos pela Poisson Generalizada.
aux <- predict(m3, newdata = X,
interval = "confidence", type = "response")
pred$pgen <- cbind(pred$pgen, aux[, c(2, 1, 3)])
pred <- ldply(pred, .id = "modelo")
pred <- arrange(pred, est, des, modelo)
key <- list(lines = list(lty = 1),
text = list(c("Poisson", "Quasi-Poisson",
"Poisson Generelizada")))
key$lines$col <-
trellis.par.get("superpose.line")$col[1:nlevels(pred$modelo)]
p2 <- xyplot(fit ~ des | est, data = pred, groups = modelo,
layout = c(NA, 1), as.table = TRUE,
xlim = extendrange(range(pred$des), f = 0.1),
type = "l", key = key,
ly = pred$lwr, uy = pred$upr,
cty = "bands", alpha = 0.35,
prepanel = prepanel.cbH,
panel.groups = panel.cbH,
panel = panel.superpose)
# p2
update(p1, type = "p", layout = c(NA, 1),
key = key, spread = 0.07) +
as.layer(p2, under = TRUE)
#-----------------------------------------------------------------------
data(nematoide, package = "MRDCr")
str(nematoide)
## 'data.frame': 94 obs. of 5 variables:
## $ cult: Factor w/ 19 levels "A","B","C","D",..: 1 1 1 1 1 2 2 2 2 2 ...
## $ mfr : num 10.52 11.03 6.42 8.16 4.48 ...
## $ vol : int 40 40 40 40 40 40 40 40 40 40 ...
## $ nema: int 4 5 3 4 3 2 2 2 2 2 ...
## $ off : num 0.263 0.276 0.161 0.204 0.112 ...
# Número de nematóides por grama de raíz.
plot(nema ~ off, data = nematoide)
# Média do número de nematóides por grama de raíz.
mv <- aggregate(cbind(y = nema/off) ~ cult, data = nematoide,
FUN = function(x) c(m = mean(x), v = var(x)))
xyplot(y[, "v"] ~ y[, "m"], data = mv,
xlab = "Média amostral",
ylab = "Variância amostral") +
layer(panel.abline(a = 0, b = 1, lty = 2))
#-----------------------------------------------------------------------
# Ajuste do Poisson.
m0 <- glm(nema ~ offset(log(off)) + cult,
data = nematoide,
family = poisson)
m1 <- update(m0, family = quasipoisson)
# Diagnóstico.
par(mfrow = c(2, 2))
plot(m0); layout(1)
# Estimativas dos parâmetros.
summary(m1)
##
## Call:
## glm(formula = nema ~ offset(log(off)) + cult, family = quasipoisson,
## data = nematoide)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -3.9455 -0.6693 0.1100 1.1519 4.7603
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.9293 0.4496 6.515 7.42e-09 ***
## cultB 0.1662 0.7657 0.217 0.828703
## cultC 0.3417 0.6769 0.505 0.615154
## cultD 0.9128 0.6359 1.436 0.155278
## cultE 0.9692 0.6076 1.595 0.114879
## cultF 0.3973 0.6903 0.576 0.566621
## cultG -0.4507 0.7931 -0.568 0.571522
## cultH 0.3218 0.8665 0.371 0.711433
## cultI 0.8808 0.6018 1.464 0.147507
## cultJ 1.1982 0.6359 1.884 0.063390 .
## cultK 1.4511 0.5965 2.433 0.017369 *
## cultL 1.4299 0.5362 2.667 0.009381 **
## cultM 1.6138 0.5507 2.931 0.004482 **
## cultN 1.7743 0.4958 3.579 0.000610 ***
## cultO 1.5776 0.5268 2.995 0.003718 **
## cultP 1.6719 0.5103 3.277 0.001593 **
## cultQ 2.2105 0.4913 4.499 2.45e-05 ***
## cultR 2.2080 0.5103 4.327 4.60e-05 ***
## cultS 1.9155 0.4958 3.863 0.000236 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for quasipoisson family taken to be 3.841169)
##
## Null deviance: 556.73 on 93 degrees of freedom
## Residual deviance: 212.54 on 75 degrees of freedom
## AIC: NA
##
## Number of Fisher Scoring iterations: 6
# Quadro de deviance.
# anova(m0, test = "Chisq")
anova(m1, test = "F")
## Analysis of Deviance Table
##
## Model: quasipoisson, link: log
##
## Response: nema
##
## Terms added sequentially (first to last)
##
##
## Df Deviance Resid. Df Resid. Dev F Pr(>F)
## NULL 93 556.73
## cult 18 344.19 75 212.54 4.978 3.475e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#-----------------------------------------------------------------------
# Ajuste da Poisson Generalizada.
L <- with(nematoide,
list(y = nema, offset = off, X = model.matrix(m0)))
start <- c(alpha = log(1), coef(m0))
parnames(llpgnz) <- names(start)
# Modelo Poisson também.
m2 <- mle2(llpgnz, start = start, data = L,
fixed = list(alpha = 0), vecpar = TRUE)
c(logLik(m2), logLik(m0))
## [1] -274.5007 -274.5007
# Poisson Generalizada.
m3 <- pgnz(formula(m0), data = nematoide)
# Diferença de deviance.
# 2 * diff(c(logLik(m0), logLik(m3)))
anova(m3, m2)
## Likelihood Ratio Tests
## Model 1: m3, [llpgnz]: alpha+(Intercept)+cultB+cultC+cultD+
## cultE+cultF+cultG+cultH+cultI+cultJ+cultK+cultL+
## cultM+cultN+cultO+cultP+cultQ+cultR+cultS
## Model 2: m2, [llpgnz]: (Intercept)+cultB+cultC+cultD+cultE+
## cultF+cultG+cultH+cultI+cultJ+cultK+cultL+cultM+
## cultN+cultO+cultP+cultQ+cultR+cultS
## Tot Df Deviance Chisq Df Pr(>Chisq)
## 1 20 497.89
## 2 19 549.00 51.112 1 8.725e-13 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Perfil de log-verossimilhança para o parâmetro de dispersão.
plot(profile(m3, which = "alpha"))
# Covariância.
V <- cov2cor(vcov(m3))
corrplot.mixed(V, lower = "number", upper = "ellipse",
diag = "l", tl.pos = "lt", tl.col = "black",
tl.cex = 0.8, col = brewer.pal(9, "Greys")[-(1:3)])
dev.off()
## null device
## 1
# Tamanho das covariâncias com \alpha.
each(sum, mean, max)(abs(V[1, -1]))
## sum mean max
## 1.06647709 0.05613037 0.16898969
# Gráfico das estimativas.
pars <- data.frame(Pois = c(0, coef(m0)), PGen = coef(m3))
xyplot(PGen ~ Pois, data = pars, aspect = "iso", grid = TRUE) +
layer(panel.abline(a = 0, b = 1, lty = 2))
#-----------------------------------------------------------------------
X <- model.matrix(m0)
# # Predito do número de nematóides observado (considera o offset).
# with(nematoide, {
# cbind(y = nema,
# Pois = nematoide$off * exp(X %*% coef(m0)),
# PGen = nematoide$off * exp(X %*% coef(m1)[-1]))
# })
# Predito do número de nematóides por grama de raíz.
pred <- with(nematoide, {
data.frame(y = nema/off,
Pois = c(exp(X %*% coef(m0))),
PGen = c(exp(X %*% coef(m3)[-1])))
})
str(pred)
## 'data.frame': 94 obs. of 3 variables:
## $ y : num 15.2 18.1 18.7 19.6 26.8 ...
## $ Pois: num 18.7 18.7 18.7 18.7 18.7 ...
## $ PGen: num 19 19 19 19 19 ...
splom(pred) + layer(panel.abline(a = 0, b = 1))
# Correlação predito x observado.
cor(pred)
## y Pois PGen
## y 1.0000000 0.6157455 0.6455661
## Pois 0.6157455 1.0000000 0.9497288
## PGen 0.6455661 0.9497288 1.0000000
# Média das observações de das estimativas por cultivar.
predm <- aggregate(as.matrix(pred) ~ cult, data = nematoide, FUN = mean)
cor(predm[, -1])
## y Pois PGen
## y 1.0000000 0.9374510 0.9824583
## Pois 0.9374510 1.0000000 0.9500745
## PGen 0.9824583 0.9500745 1.0000000
#-----------------------------------------------------------------------
# Predição com intervalos de confiança.
pred <- unique(subset(nematoide, select = cult))
X <- model.matrix(~cult, data = pred)
pred <- list(pois = pred, quasi = pred, pgen = pred)
# Preditos pela Poisson.
aux <- confint(glht(m0, linfct = X),
calpha = univariate_calpha())$confint
colnames(aux)[1] <- "fit"
pred$pois <- cbind(pred$pois, exp(aux))
# Preditos pela Quasi-Poisson.
aux <- confint(glht(m1, linfct = X),
calpha = univariate_calpha())$confint
colnames(aux)[1] <- "fit"
pred$quasi <- cbind(pred$quasi, exp(aux))
# Preditos pela Poisson Generalizada.
aux <- predict(m3, newdata = X,
interval = "confidence", type = "response")
pred$pgen <- cbind(pred$pgen, aux[, c(2, 1, 3)])
pred <- ldply(pred, .id = "modelo")
pred <- arrange(pred, cult, modelo)
key <- list(type = "o", divide = 1,
lines = list(pch = 1:nlevels(pred$modelo),
lty = 1, col = 1),
text = list(c("Poisson", "Quasi-Poisson",
"Poisson Generelizada")))
xyplot(nema/off ~ cult, data = nematoide,
key = key,
xlab = "Cultivar de feijão",
ylab = "Número de nematóides por grama de raíz") +
as.layer(
xyplot(fit ~ cult, data = pred,
pch = pred$modelo,
ly = pred$lwr, uy = pred$upr,
cty = "bars", length = 0,
prepanel = prepanel.cbH,
desloc = 0.25 * scale(as.integer(pred$modelo),
scale = FALSE),
panel = panel.cbH))
#-----------------------------------------------------------------------
# Resíduos de Pearson.
X <- model.matrix(m0)
# # Resíduos de Pearson no Poisson.
# with(nematoide, {
# y <- nema
# # haty <- fitted(m0)
# haty <- nematoide$off * exp(X %*% coef(m0))
# sdy <- sqrt(haty)
# cbind((y - haty)/sdy,
# residuals(m0, type = "pearson"))
# })
# Resíduos de Pearson do Poisson Generalizado.
rp <- with(nematoide, {
y <- nema
alph <- coef(m3)["alpha"]
haty <- c(nematoide$off * exp(X %*% coef(m3)[-1]))
sdy <- sqrt(haty) * (1 + alph * haty)
(y - haty)/sdy
})
rp <- stack(data.frame(Pois = residuals(m0, type = "pearson"),
PGen = rp))
qqmath(~values | ind, data = rp,
xlab = "Quantis teóricos",
ylab = "Resíduos de Pearson",
panel = function(...) {
panel.qqmathline(...)
panel.qqmath(...)
})
## Atualizado em 21 de julho de 2016.
##
## R version 3.3.1 (2016-06-21)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 16.04 LTS
##
## locale:
## [1] LC_CTYPE=pt_BR.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=pt_BR.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=pt_BR.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=pt_BR.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=pt_BR.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats4 stats graphics grDevices utils datasets
## [7] methods base
##
## other attached packages:
## [1] plyr_1.8.4 corrplot_0.77 hnp_1.2
## [4] sandwich_2.3-4 car_2.1-2 boot_1.3-18
## [7] lmtest_0.9-34 zoo_1.7-13 multcomp_1.4-6
## [10] TH.data_1.0-7 MASS_7.3-45 survival_2.39-5
## [13] mvtnorm_1.0-5 doBy_4.5-15 rmarkdown_1.0
## [16] latticeExtra_0.6-28 RColorBrewer_1.1-2 lattice_0.20-33
## [19] knitr_1.13 MRDCr_0.0-2 gsubfn_0.6-6
## [22] proto_0.3-10 bbmle_1.0.18 devtools_1.12.0
##
## loaded via a namespace (and not attached):
## [1] splines_3.3.1 tcltk_3.3.1 colorspace_1.2-6
## [4] testthat_1.0.2 htmltools_0.3.5 mgcv_1.8-12
## [7] yaml_2.1.13 nloptr_1.0.4 withr_1.0.2
## [10] effects_3.1-1 stringr_1.0.0 MatrixModels_0.4-1
## [13] codetools_0.2-14 memoise_1.0.0 evaluate_0.9
## [16] SparseM_1.7 quantreg_5.26 pbkrtest_0.4-6
## [19] parallel_3.3.1 highr_0.6 Rcpp_0.12.5
## [22] formatR_1.4 lme4_1.1-12 digest_0.6.9
## [25] stringi_1.1.1 numDeriv_2014.2-1 grid_3.3.1
## [28] tools_3.3.1 magrittr_1.5 crayon_1.3.2
## [31] Matrix_1.2-6 minqa_1.2.4 roxygen2_5.0.1
## [34] R6_2.1.2 nnet_7.3-12 nlme_3.1-128
## [37] compiler_3.3.1
Modelos de Regressão para Dados de Contagem com o R |
|
61 RBRAS | Departamento de Estatística - UFPR |
23 a 25 de Maio de 2016 | LEG, PET Estatística |
Salvador - Bahia | github.com/leg-ufpr/MRDCr |