rm(list = objects())
# Pacotes.
library(gdata)
library(latticeExtra)
library(wzRfun)
library(nlme)
# Para ter nomes dos meses em inglês.
Sys.setlocale("LC_TIME", "en_US.UTF-8")
# Informações da sessão.
# sessionInfo()
devtools::session_info()
#-----------------------------------------------------------------------
# Lê e cura o arquivo de dados.
# f <- dir(pattern = "*.xlsx")[1]
f <- "Planilha total dados 2011 e 2013.xlsx"
da1 <- read.xls(f,
sheet = 1,
header = TRUE,
stringsAsFactors = FALSE)
# Mantém só colunas com informações úteis. Renomeia variáveis.
da1 <- da1[, 1:4]
names(da1) <- c("ts", "dia", "dis", "numesp")
# Tranforma para data.
da1$ts <- as.Date(da1$ts)
# Estrutura.
str(da1)
## 'data.frame': 40 obs. of 4 variables:
## $ ts : Date, format: "2011-12-12" "2011-12-12" ...
## $ dia : int 0 0 0 0 0 4 4 4 4 4 ...
## $ dis : int 0 92 104 118 130 0 92 104 118 130 ...
## $ numesp: int 113 31 25 27 18 107 24 11 10 4 ...
xyplot(numesp ~ ts | factor(dis),
data = da1,
type = "o",
xlab = "Date",
ylab = "Number of spores in the trap",
main = list("Season 2011/12", font = 1),
as.table = TRUE)
xyplot(numesp ~ dis | factor(ts),
data = da1,
type = "o",
xlab = "Trap distance from the orchard border (m)",
ylab = "Number of spores in the trap",
main = list("Season 2011/12", font = 1),
as.table = TRUE)
Devido a resposta ser uma variável de contagem, assumiu-se um modelo baseado em quasi-verossimilhança com distribuição de Poisson (quasi-Poisson). A função de ligação foi a logaritmica e a função de variância foi a identidade. O modelo para explicar a média do número de esporos considerou, inicialmente, os produtos de termos lineares e quadráticos das variáveis distância da borda do pomar (dis
) e perÃodo após a primeira coleta (dia
). Os termos não significativos, de acordo com a estatÃstica \(z\) do teste de Wald, foram abandonados para obtenção de um modelo mais parcimonioso.
# Declarando um modelo para a contagem usando quasi-Poisson.
g0 <- glm(numesp ~ poly(dia, 2) * poly(dis, 2),
data = da1,
family = quasipoisson)
# ResÃduos.
par(mfrow = c(2, 2))
plot(g0)
layout(1)
A análise dos resÃduos não indica fuga nos pressupostos do modelo, sendo portanto o modelo considerado adequadamente ajustado aos dados.
# Reajuste com polinômio cru (trata-se ainda do mesmo modelo).
g0 <- update(g0, ~(dia + I(dia^2)) * (dis + I(dis^2)))
# Quadro de teste para efeito dos termos do modelo.
anova(g0, test = "F")
## Analysis of Deviance Table
##
## Model: quasipoisson, link: log
##
## Response: numesp
##
## Terms added sequentially (first to last)
##
##
## Df Deviance Resid. Df Resid. Dev F Pr(>F)
## NULL 39 1185.03
## dia 1 581.19 38 603.85 375.6570 < 2.2e-16 ***
## I(dia^2) 1 20.16 37 583.69 13.0280 0.001067 **
## dis 1 524.73 36 58.96 339.1645 < 2.2e-16 ***
## I(dis^2) 1 1.47 35 57.50 0.9481 0.337735
## dia:dis 1 6.77 34 50.73 4.3759 0.044734 *
## dia:I(dis^2) 1 0.36 33 50.36 0.2359 0.630612
## I(dia^2):dis 1 1.22 32 49.14 0.7882 0.381496
## I(dia^2):I(dis^2) 1 0.70 31 48.45 0.4499 0.507370
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
De acordo com o quadro de deviance acima, pela estatÃstica F, não se tem evidência contra a hipótese de não haver interação entre os fatores. O modelo reduzido, porém, apontou interação entre os termos lineares ao nÃvel de 5%.
# Resumo do ajuste do modelo maximal.
summary(g0)
##
## Call:
## glm(formula = numesp ~ dia + I(dia^2) + dis + I(dis^2) + dia:dis +
## dia:I(dis^2) + I(dia^2):dis + I(dia^2):I(dis^2), family = quasipoisson,
## data = da1)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.2373 -0.8710 -0.1429 0.2724 2.5063
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.731e+00 9.870e-02 47.935 < 2e-16 ***
## dia -1.147e-02 1.690e-02 -0.679 0.50214
## I(dia^2) -1.672e-03 5.157e-04 -3.241 0.00284 **
## dis -1.343e-02 8.663e-03 -1.550 0.13118
## I(dis^2) -1.572e-05 7.616e-05 -0.206 0.83787
## dia:dis 8.954e-04 1.704e-03 0.525 0.60299
## dia:I(dis^2) -1.200e-05 1.517e-05 -0.791 0.43485
## I(dia^2):dis -2.768e-05 5.315e-05 -0.521 0.60626
## I(dia^2):I(dis^2) 3.184e-07 4.698e-07 0.678 0.50294
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for quasipoisson family taken to be 1.547118)
##
## Null deviance: 1185.032 on 39 degrees of freedom
## Residual deviance: 48.446 on 31 degrees of freedom
## AIC: NA
##
## Number of Fisher Scoring iterations: 6
# Modelo reduzido obtido com o abandono de termos irrelevantes.
g1 <- update(g0, ~dia + dis + I(dia^2))
# ResÃduos.
par(mfrow = c(2, 2))
plot(g1)
layout(1)
# Teste para os termos abandonados.
anova(g1, g0, test = "F")
## Analysis of Deviance Table
##
## Model 1: numesp ~ dia + dis + I(dia^2)
## Model 2: numesp ~ dia + I(dia^2) + dis + I(dis^2) + dia:dis + dia:I(dis^2) +
## I(dia^2):dis + I(dia^2):I(dis^2)
## Resid. Df Resid. Dev Df Deviance F Pr(>F)
## 1 36 58.963
## 2 31 48.446 5 10.517 1.3596 0.2663
# Resumo do ajuste.
summary(g1)
##
## Call:
## glm(formula = numesp ~ dia + dis + I(dia^2), family = quasipoisson,
## data = da1)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.5092 -1.0794 -0.4905 0.5374 3.1572
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.8285351 0.0841405 57.387 < 2e-16 ***
## dia -0.0272389 0.0134481 -2.025 0.05028 .
## dis -0.0174778 0.0009651 -18.110 < 2e-16 ***
## I(dia^2) -0.0013982 0.0004146 -3.372 0.00179 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for quasipoisson family taken to be 1.523216)
##
## Null deviance: 1185.032 on 39 degrees of freedom
## Residual deviance: 58.963 on 36 degrees of freedom
## AIC: NA
##
## Number of Fisher Scoring iterations: 5
beta <- coef(g1)
eqn <- data.frame(sign = ifelse(beta >= 0, "+", "-"),
abs = abs(beta))
O modelo reduzido, considerando teste F para a diferença de deviance entre modelos encaixados, não difere em qualidade do modelo maximal. Dessa forma, esse modelo será considerado para a interpretação dos efeitos e previsão de quantidade de esporos.
Ess modelo final tem a seguinte equação ajustada para o número médio de esporos (\(\hat{y}\)) \[ \hat{y} = \exp\{ 4.8285351 2]` - 0.0272389 \times \texttt{dia} - 0.0013982 \times \texttt{dia}^2 - 0.0174778 \times \texttt{dis} \} \]
#-----------------------------------------------------------------------
# Predição em um grid para gerar tabela.
grid <- with(da1,
expand.grid(dis = c(0, 10, 25, 50, 100),
dia = seq(0, 60, by = 15)))
grid$z <- predict(g0, newdata = grid, type = "response")
tb <- reshape2::dcast(grid, formula = dis ~ dia, value.var = "z")
cap <-
"Número de esporos como função da distância (m) e do perÃodo após a primeira coleta (dias)."
knitr::kable(tb, caption = cap)
dis | 0 | 15 | 30 | 45 | 60 |
---|---|---|---|---|---|
0 | 113.45746 | 65.57520 | 17.862473 | 2.2931839 | 0.1387495 |
10 | 99.04208 | 60.85600 | 15.783938 | 1.7280473 | 0.0798591 |
25 | 80.30460 | 51.93232 | 12.679416 | 1.1687593 | 0.0406738 |
50 | 55.73353 | 35.21799 | 8.050711 | 0.6657721 | 0.0199177 |
100 | 25.30901 | 10.16961 | 2.322929 | 0.3016267 | 0.0222641 |
#-----------------------------------------------------------------------
# Predição para gerar a superficÃe de resposta.
grid <- with(da1,
expand.grid(dis = seq(0, 200, length.out = 151),
dia = seq(0, 50, length.out = 151)))
grid$z <- predict(g0, newdata = grid, type = "response")
at <- c(Inf, 90, 70, 50, 30, 10, 1, 0)
lv1 <- levelplot(z ~ dia + dis,
data = grid,
xlab = "Time after the first assessement (days)",
ylab = "Distance from orchard border (m)",
# col.regions = gray.colors,
col.regions = colorRampPalette(
colors = c("white", "gray60")), #(length(at) + 1),
at = at,
contour = FALSE) +
as.layer(
contourplot(z ~ dia + dis,
data = grid,
# label.style = "flat",
at = at))
lv1
# Visualizando o ajuste.
w1 <- wireframe(z ~ dia + dis,
data = grid,
drape = TRUE,
xlab = list("Time after the\nfirst assessement (days)",
rot = 32),
ylab = list("Distance from\norchard border (m)",
rot = -32),
zlab = list("Mean spores", rot = 90),
main = list("Season 2011/12", font = 1),
col.regions =
colorRampPalette(colors = c("white", "black"))(101),
screen = list(x = -110, z = -20, y = -140),
scpos = list(x = 9, y = 5, z = 2),
type = "on",
panel.3d.wireframe = panel.3d.contour,
scales = list(arrows = FALSE))
print(w1)
# rp.wire(w)
O modelo exponencial \[ y = a \exp\{-b x\}, \]
em que \(a\) e \(b\) são parâmetros e \(x\) é a variável tempo, é muito utilizado em problemas de modelagem em fitopatologia. Para fins de comparação, esse modelo será usado aqui.
NOTE: o modelo linear generalizado usado na sessão anterior é um modelo exponencial para a média. Veja que \[ y = \exp\{ \beta_0 + \beta_1 \times \texttt{dia} \beta_2 \times \texttt{dia}^2 \beta_3 \times \texttt{dis} \} \]
é um modelo exponencial. Considere apenas os dois primeiros termos, faça \(a = \exp\{\beta_0\}\) e faça \(\beta_1 = -b\), então tem-se
\[ y = \exp\{\beta_0 + \beta_1 \times \texttt{dia}\} = a \exp\{-b \times \texttt{dia}\} \]
A diferença, portanto, para o ajuste que faremos a seguir, é que será considerada distribuição normal para a variável resposta ao invés de uma especificação de momentos baseada na distribuição de Poisson.
n0 <- nls(numesp ~ exp(b0 + b1 * dia + b2 * dis + b3 * dia^2),
data = da1,
start = list(b0 = 4.8,
b1 = -0.027,
b2 = -0.0017,
b3 = -0.0013),
trace = FALSE)
# Estimativas. Compara entre modelos.
cbind(`m. não linear` = coef(n0),
`m. linear generalizado` = coef(g1))
## m. não linear m. linear generalizado
## b0 4.76110839 4.828535098
## b1 -0.02192437 -0.027238891
## b2 -0.01654884 -0.017477768
## b3 -0.00134138 -0.001398151
summary(n0)
##
## Formula: numesp ~ exp(b0 + b1 * dia + b2 * dis + b3 * dia^2)
##
## Parameters:
## Estimate Std. Error t value Pr(>|t|)
## b0 4.7611084 0.0345637 137.749 < 2e-16 ***
## b1 -0.0219244 0.0092719 -2.365 0.02356 *
## b2 -0.0165488 0.0007682 -21.543 < 2e-16 ***
## b3 -0.0013414 0.0003896 -3.443 0.00148 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.572 on 36 degrees of freedom
##
## Number of iterations to convergence: 7
## Achieved convergence tolerance: 1.251e-06
grid$z_exp <- predict(n0, newdata = grid)
# Resultado do ajuste.
wireframe(z_exp ~ dia + dis,
data = grid,
drape = TRUE,
xlab = list("Time after the\nfirst assessement (days)",
rot = 32),
ylab = list("Distance from\norchard border (m)",
rot = -32),
zlab = list("Mean spores", rot = 90),
main = list("Season 2011/12", font = 1),
col.regions =
colorRampPalette(colors = c("white", "black"))(101),
screen = list(x = -110, z = -20, y = -140),
scpos = list(x = 9, y = 5, z = 2),
type = "on",
panel.3d.wireframe = panel.3d.contour,
scales = list(arrows = FALSE))
O modelo potência também é muito usado para modelar problemas de epidemiologia. A expressão do modelo é
\[ y = a x^b, \] em que \(a\) e \(b\) são parâmetros do modelo e \(x\) a variável independente.
No contexto dessa aplicação o modelo deve acomodar o efeito de perÃodo e distância. Dessa forma, o modelo considerado, para manter a lógica considerada nos anteriores, é definido por \[ y = \beta_0 \cdot \texttt{dia}^{\beta_1} \cdot \texttt{dis}^{\beta_2}. \]
n1 <- nls(numesp ~ d0 * (dia + 0.01)^d1 * (dis + 0.01)^d2,
data = da1,
start = list(d0 = 4.8,
d1 = 0.5,
d2 = 0.5),
trace = FALSE)
summary(n1)
##
## Formula: numesp ~ d0 * (dia + 0.01)^d1 * (dis + 0.01)^d2
##
## Parameters:
## Estimate Std. Error t value Pr(>|t|)
## d0 24.89605 3.67837 6.768 5.78e-08 ***
## d1 -0.16260 0.02469 -6.586 1.02e-07 ***
## d2 -0.18484 0.03049 -6.063 5.17e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 14.9 on 37 degrees of freedom
##
## Number of iterations to convergence: 9
## Achieved convergence tolerance: 2.678e-06
grid$z_pow <- predict(n1, newdata = grid)
# Resultado do ajuste.
wireframe(z_pow ~ dia + dis,
data = grid,
drape = TRUE,
xlab = list("Time after the\nfirst assessement (days)",
rot = 32),
ylab = list("Distance from\norchard border (m)",
rot = -32),
zlab = list("Mean spores", rot = 90),
main = list("Season 2011/12", font = 1),
col.regions =
colorRampPalette(colors = c("white", "black"))(101),
screen = list(x = -110, z = -20, y = -140),
scpos = list(x = 9, y = 5, z = 2),
type = "on",
panel.3d.wireframe = panel.3d.contour,
scales = list(arrows = FALSE))
fit_safra1 <- list(g0, g1, n0, n1)
#-----------------------------------------------------------------------
# Leitura dos dados.
# Linha de armadilhas I.
da2 <- read.xls(f, sheet = 2, header = TRUE, stringsAsFactors = FALSE)
da2 <- da2[, 1:4]
names(da2) <- c("ts", "dia", "dis", "numesp")
da2$line <- "I"
# Linha de armadilhas II.
da3 <- read.xls(f, sheet = 3, header = TRUE, stringsAsFactors = FALSE)
da3 <- da3[, 1:4]
names(da3) <- c("ts", "dia", "dis", "numesp")
da3$line <- "II"
# Justa as linhas.
da <- rbind(da2, da3)
da$line <- factor(da$line)
da$ts <- as.Date(da$ts, format = "%d/%m/%y")
str(da)
## 'data.frame': 480 obs. of 5 variables:
## $ ts : Date, format: "2012-12-18" "2012-12-18" ...
## $ dia : int 0 0 0 0 0 0 0 0 7 7 ...
## $ dis : int 0 4 8 16 32 64 128 276 0 4 ...
## $ numesp: int NA 3 5 11 2 3 2 1 10 9 ...
## $ line : Factor w/ 2 levels "I","II": 1 1 1 1 1 1 1 1 1 1 ...
# A origem é na borda do pomar, aos 16 metros.
da$dis <- da$dis - 16
da <- subset(da, dis >= 0)
#-----------------------------------------------------------------------
# Visualização.
# A partir de qual data os resgistros são todos nulos?
head(aggregate(cbind(y = numesp > 0) ~ dia, data = da, FUN = sum),
n = 13)
## dia y
## 1 0 8
## 2 7 8
## 3 14 6
## 4 21 6
## 5 28 6
## 6 35 3
## 7 42 0
## 8 49 1
## 9 56 3
## 10 63 0
## 11 70 0
## 12 77 0
## 13 84 0
# Descartado a porção nula trazida com o aumento da data.
# db <- subset(da, dia <= 80)
db <- subset(da, dia <= 50)
xyplot(numesp ~ ts | factor(dis),
groups = line,
data = db,
subset = is.finite(numesp),
type = "o",
xlab = "Date",
ylab = "Number of spores in the trap",
main = list("Season 2012/13", font = 1),
as.table = TRUE)
xyplot(numesp ~ dis | factor(ts),
groups = line,
data = db,
subset = is.finite(numesp),
type = "o",
xlab = "Trap distance from the orchard border (m)",
ylab = "Number of spores in the trap",
main = list("Season 2012/13", font = 1),
as.table = TRUE)
O modelo foi especificado de forma equivalente a safra anterior, porém houve a inclusão do efeito de linha de coleta de esporos. As linhas funcionam como blocos e são incluÃdas no modelo para acomodar variações locais que podem mudar a intensidade de esporos, como por exemplo, pela influência da direção do vento predominante.
#-----------------------------------------------------------------------
# Declarando um modelo para a contagem usando quasi-Poisson.
g0 <- glm(numesp ~ line * poly(dia, 2) * poly(dis, 2),
data = db,
family = quasipoisson)
# Reajuste com polinômio cru.
g0 <- update(g0, ~line * (dia + I(dia^2)) * (dis + I(dis^2)))
# ResÃduos.
par(mfrow = c(2, 2))
plot(g0)
layout(1)
# Resumo do ajuste.
anova(g0, test = "F")
## Analysis of Deviance Table
##
## Model: quasipoisson, link: log
##
## Response: numesp
##
## Terms added sequentially (first to last)
##
##
## Df Deviance Resid. Df Resid. Dev F Pr(>F)
## NULL 66 302.95
## line 1 10.995 65 291.96 3.7793 0.0576441 .
## dia 1 86.279 64 205.68 29.6572 1.65e-06 ***
## I(dia^2) 1 19.000 63 186.68 6.5310 0.0137545 *
## dis 1 42.827 62 143.85 14.7211 0.0003571 ***
## I(dis^2) 1 9.448 61 134.40 3.2476 0.0776799 .
## line:dia 1 0.000 60 134.40 0.0000 0.9955563
## line:I(dia^2) 1 2.618 59 131.78 0.8998 0.3474857
## line:dis 1 0.342 58 131.44 0.1177 0.7330101
## line:I(dis^2) 1 0.054 57 131.39 0.0185 0.8924889
## dia:dis 1 2.983 56 128.41 1.0252 0.3162560
## dia:I(dis^2) 1 4.344 55 124.06 1.4932 0.2275667
## I(dia^2):dis 1 1.495 54 122.57 0.5139 0.4768473
## I(dia^2):I(dis^2) 1 0.437 53 122.13 0.1502 0.7000111
## line:dia:dis 1 3.216 52 118.91 1.1055 0.2982205
## line:dia:I(dis^2) 1 1.376 51 117.54 0.4731 0.4947919
## line:I(dia^2):dis 1 0.436 50 117.10 0.1500 0.7002036
## line:I(dia^2):I(dis^2) 1 0.000 49 117.10 0.0001 0.9918406
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Modelo reduzido.
g1 <- update(g0, ~line + dia + dis)
# Teste para os termos abandonados.
anova(g1, g0, test = "F")
## Analysis of Deviance Table
##
## Model 1: numesp ~ line + dia + dis
## Model 2: numesp ~ line + dia + I(dia^2) + dis + I(dis^2) + line:dia +
## line:I(dia^2) + line:dis + line:I(dis^2) + dia:dis + dia:I(dis^2) +
## I(dia^2):dis + I(dia^2):I(dis^2) + line:dia:dis + line:dia:I(dis^2) +
## line:I(dia^2):dis + line:I(dia^2):I(dis^2)
## Resid. Df Resid. Dev Df Deviance F Pr(>F)
## 1 63 163.12
## 2 49 117.10 14 46.024 1.13 0.3574
# ResÃduos.
par(mfrow = c(2, 2))
plot(g1)
layout(1)
# Resumo do ajuste.
summary(g1)
##
## Call:
## glm(formula = numesp ~ line + dia + dis, family = quasipoisson,
## data = db)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -3.1385 -1.2767 -0.6671 0.3074 5.4808
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.464412 0.251475 9.800 2.74e-14 ***
## lineII -0.860631 0.335914 -2.562 0.01281 *
## dia -0.054583 0.011676 -4.675 1.60e-05 ***
## dis -0.009001 0.003014 -2.987 0.00401 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for quasipoisson family taken to be 3.184985)
##
## Null deviance: 302.95 on 66 degrees of freedom
## Residual deviance: 163.13 on 63 degrees of freedom
## (5 observations deleted due to missingness)
## AIC: NA
##
## Number of Fisher Scoring iterations: 6
beta <- coef(g1)
eqn <- data.frame(sign = ifelse(beta >= 0, "+", "-"),
abs = abs(beta))
Para esta safra, não houve interação de distância com tempo. Portanto, o modelo final é um modelo aditivo nos termos linha, distância e tempo. Considerando o efeito médio de linha, a seguinte equação representa o número médio de esporos (\(\hat{y}\)) \[ \hat{y} = \exp\{ 2.0340962 - -0.8606315 \times \texttt{dia} - -0.0545834 \times \texttt{dis} \} \]
# Predição com efeito médio das linhas.
grid <- with(db,
expand.grid(line = factor(levels(line)[1],
levels = levels(line)),
dis = c(0, 10, 25, 50, 100, 300),
dia = seq(0, 60, by = 15),
KEEP.OUT.ATTRS = FALSE))
X <- model.matrix(formula(g1)[-2], data = grid)
colnames(X)
## [1] "(Intercept)" "lineII" "dia" "dis"
X[, "lineII"] <- 0.5
nu <- X %*% coef(g1)
grid$z <- g1$family$linkinv(nu)
grid$line <- NULL
tb <- reshape2::dcast(grid, formula = dis ~ dia, value.var = "z")
cap <-
"Número de esporos como função da distância (m) e do perÃodo após a primeira coleta (dias)."
knitr::kable(tb, caption = cap)
dis | 0 | 15 | 30 | 45 | 60 |
---|---|---|---|---|---|
0 | 7.6453391 | 3.3714572 | 1.4867521 | 0.6556310 | 0.2891215 |
10 | 6.9872162 | 3.0812369 | 1.3587701 | 0.5991932 | 0.2642335 |
25 | 6.1047141 | 2.6920693 | 1.1871542 | 0.5235137 | 0.2308601 |
50 | 4.8745430 | 2.1495859 | 0.9479288 | 0.4180196 | 0.1843391 |
100 | 3.1079288 | 1.3705408 | 0.6043839 | 0.2665224 | 0.1175316 |
300 | 0.5135937 | 0.2264856 | 0.0998761 | 0.0440436 | 0.0194224 |
# Predição com efeito médio das linhas.
grid <- with(db,
expand.grid(line = levels(line),
dis = seq(0, 200, length.out = 151),
dia = seq(0, 50, length.out = 151),
KEEP.OUT.ATTRS = FALSE))
# grid$z <- predict(g1, newdata = grid, type = "response")
X <- model.matrix(formula(g1)[-2], data = grid)
colnames(X)
## [1] "(Intercept)" "lineII" "dia" "dis"
X[, "lineII"] <- 0.5
nu <- X %*% coef(g1)
grid$z <- g1$family$linkinv(nu)
grid$line <- NULL
# range(grid$z)
# at <- seq(0, 10, by = 1)
at <- c(Inf, 7, 5, 2, 3, 1, 0)
lv2 <- levelplot(z ~ dia + dis,
data = grid,
xlab = "Time after the first assessement (days)",
ylab = "Distance from orchard border (m)",
col.regions = colorRampPalette(
colors = c("white", "gray60")), #(length(at) + 1),
at = at,
contour = FALSE) +
as.layer(
contourplot(z ~ dia + dis,
data = grid,
label.style = "flat",
at = at))
print(lv2)
# Visualizando o ajuste.
w2 <- wireframe(z ~ dia + dis,
data = grid,
drape = TRUE,
xlab = list("Time after the\nfirst assessement (days)",
rot = 32),
ylab = list("Distance from\norchard border (m)",
rot = -32),
zlab = list("Mean spores", rot = 90),
main = list("Season 2012/13", font = 1),
col.regions =
colorRampPalette(colors = c("white", "black"))(101),
screen = list(x = -110, z = -20, y = -140),
scpos = list(x = 9, y = 5, z = 2),
type = "on",
panel.3d.wireframe = panel.3d.contour,
scales = list(arrows = FALSE))
print(w2)
# rp.wire(w)
str(db)
## 'data.frame': 72 obs. of 5 variables:
## $ ts : Date, format: "2012-12-18" "2012-12-18" ...
## $ dia : int 0 0 0 0 0 7 7 7 7 7 ...
## $ dis : num 0 16 48 112 260 0 16 48 112 260 ...
## $ numesp: int 11 2 3 2 1 28 3 2 2 1 ...
## $ line : Factor w/ 2 levels "I","II": 1 1 1 1 1 1 1 1 1 1 ...
str(grid)
## 'data.frame': 45602 obs. of 3 variables:
## $ dis: num 0 0 1.33 1.33 2.67 ...
## $ dia: num 0 0 0 0 0 0 0 0 0 0 ...
## $ z : num [1:45602, 1] 7.65 7.65 7.55 7.55 7.46 ...
## ..- attr(*, "dimnames")=List of 2
## .. ..$ : chr [1:45602] "1" "2" "3" "4" ...
## .. ..$ : NULL
db$lineII <- as.integer(db$line == "II")
n0 <- nls(numesp ~ exp(b0 + a * lineII + b1 * dia + b2 * dis),
data = db,
start = list(b0 = 4.5,
a = -0.86,
b1 = -0.05,
b2 = -0.009),
trace = FALSE)
# Estimativas. Compara entre modelos.
cbind(`m. não linear` = coef(n0),
`m. linear generalizado` = coef(g1))
## m. não linear m. linear generalizado
## b0 2.95449717 2.464411944
## a -1.12683843 -0.860631497
## b1 -0.04633810 -0.054583409
## b2 -0.09982847 -0.009001397
summary(n0)
##
## Formula: numesp ~ exp(b0 + a * lineII + b1 * dia + b2 * dis)
##
## Parameters:
## Estimate Std. Error t value Pr(>|t|)
## b0 2.95450 0.12982 22.758 < 2e-16 ***
## a -1.12684 0.33877 -3.326 0.00147 **
## b1 -0.04634 0.01022 -4.534 2.65e-05 ***
## b2 -0.09983 0.03157 -3.162 0.00241 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.939 on 63 degrees of freedom
##
## Number of iterations to convergence: 16
## Achieved convergence tolerance: 5.328e-06
## (5 observations deleted due to missingness)
grid$lineII <- 0.5
grid$z_exp <- predict(n0, newdata = grid)
# Resultado do ajuste.
wireframe(z_exp ~ dia + dis,
data = grid,
drape = TRUE,
xlab = list("Time after the\nfirst assessement (days)",
rot = 32),
ylab = list("Distance from\norchard border (m)",
rot = -32),
zlab = list("Mean spores", rot = 90),
main = list("Season 2011/12", font = 1),
col.regions =
colorRampPalette(colors = c("white", "black"))(101),
screen = list(x = -110, z = -20, y = -140),
scpos = list(x = 9, y = 5, z = 2),
type = "on",
panel.3d.wireframe = panel.3d.contour,
scales = list(arrows = FALSE))
n1 <- nls(numesp ~ d0 * (dia + 0.01)^d1 * (dis + 0.01)^d2,
data = db,
start = list(d0 = 4.8,
d1 = 0.5,
d2 = 0.5),
trace = FALSE)
summary(n1)
##
## Formula: numesp ~ d0 * (dia + 0.01)^d1 * (dis + 0.01)^d2
##
## Parameters:
## Estimate Std. Error t value Pr(>|t|)
## d0 2.96624 0.63592 4.665 1.62e-05 ***
## d1 -0.10753 0.04063 -2.646 0.0102 *
## d2 -0.15874 0.04785 -3.317 0.0015 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.676 on 64 degrees of freedom
##
## Number of iterations to convergence: 11
## Achieved convergence tolerance: 2.166e-06
## (5 observations deleted due to missingness)
grid$z_pow <- predict(n1, newdata = grid)
# Resultado do ajuste.
wireframe(z_pow ~ dia + dis,
data = grid,
drape = TRUE,
xlab = list("Time after the\nfirst assessement (days)",
rot = 32),
ylab = list("Distance from\norchard border (m)",
rot = -32),
zlab = list("Mean spores", rot = 90),
main = list("Season 2011/12", font = 1),
col.regions =
colorRampPalette(colors = c("white", "black"))(101),
screen = list(x = -110, z = -20, y = -140),
scpos = list(x = 9, y = 5, z = 2),
type = "on",
panel.3d.wireframe = panel.3d.contour,
scales = list(arrows = FALSE))
fit_safra2 <- list(g0, g1, n0, n1)
# Diagramas de dispersão.
xy1 <- xyplot(numesp ~ dis | factor(ts),
data = da1,
subset = ts <= "2012-01-09" & is.finite(numesp),
lty = 1,
col = 1,
type = "o",
xlab = "Distance between trap and orchard border (m)",
ylab = "Number of spores in the trap",
main = list("Season 2011/12", font = 1),
layout = c(NA, 1),
as.table = TRUE)
xy2 <- xyplot(numesp ~ dis | factor(ts),
groups = line,
lty = c(1, 2),
col = 1,
data = db,
subset = ts <= "2013-01-15" & is.finite(numesp),
type = "o",
xlab = "Distance between trap and orchard border (m)",
ylab = "Number of spores in the trap",
main = list("Season 2012/13", font = 1),
layout = c(NA, 1),
as.table = TRUE)
cap <-
"Scatter plot of the number of spores per trap as a function of the distance between trap and orchard border for the first assessement dates separated by season."
gridExtra::grid.arrange(xy1, xy2, ncol = 1)
Scatter plot of the number of spores per trap as a function of the distance between trap and orchard border for the first assessement dates separated by season.
cap <-
"Coutour levels plot for the mean number of spores predicted as a function of the distance between trap and orchard border (m) and days after the first assessement for each season."
# Gráficos de nÃveis.
gridExtra::grid.arrange(lv1, lv2, nrow = 1)
Coutour levels plot for the mean number of spores predicted as a function of the distance between trap and orchard border (m) and days after the first assessement for each season.
cap <-
"Surface for the mean number of spores predicted as a function of the distance between trap and orchard border (m) and days after the first assessement for each season."
# SuperfÃcies.
# gridExtra::grid.arrange(w1, w2, nrow = 1)
print(w1,
position = c(x0 = 0, y0 = 0, x1 = 0.515, y1 = 1),
more = TRUE)
print(w2,
position = c(x0 = 0.5, y0 = 0, x1 = 1, y1 = 1),
more = FALSE)
Surface for the mean number of spores predicted as a function of the distance between trap and orchard border (m) and days after the first assessement for each season.
da1$ts_px <- as.POSIXlt(da1$ts)
db$ts_px <- as.POSIXlt(db$ts)
db$ts_px$year <- db$ts_px$year - 1
xlim <- extendrange(c(da1$ts_px, db$ts_px))
ylim <- extendrange(c(0, max(c(da1$numesp, db$numesp), na.rm = TRUE)))
plot_safra1 <-
xyplot(numesp ~ as.Date(ts_px) | factor(dis),
data = da1,
xlim = as.Date(xlim),
ylim = ylim,
layout = c(NA, 1),
type = "o",
xlab = "Date",
ylab = "Number of spores in the trap",
main = list("Season 2011/12", font = 1),
as.table = TRUE)
plot_safra2 <-
xyplot(numesp ~ as.Date(ts_px) | factor(dis),
groups = line,
auto.key = list(title = "Line",
cex.title = 1,
type = "o",
columns = 1,
corner = c(0.95, 0.80)),
data = db,
xlim = as.Date(xlim),
ylim = ylim,
layout = c(NA, 1),
subset = is.finite(numesp),
type = "o",
xlab = "Date",
ylab = "Number of spores in the trap",
main = list("Season 2012/13", font = 1),
as.table = TRUE)
gridExtra::grid.arrange(plot_safra1, plot_safra2, ncol = 1)
#
xlim <- extendrange(c(da1$dia, db$dia))
ylim <- extendrange(c(0, da1$numesp, db$numexp))
# Malha para predição em cada distância como função do tempo.
grid <- with(da1,
expand.grid(dis = sort(unique(dis)),
dia = seq(min(dia),
max(dia),
length.out = 30)))
grid$z <- predict(fit_safra1[[2]], newdata = grid, type = "response")
# grid$z_exp <- predict(fit_safra1[[3]], newdata = grid)
grid$z_pow <- predict(fit_safra1[[4]], newdata = grid)
plot_fit_safra1 <-
xyplot(numesp ~ dia | factor(dis),
data = da1,
layout = c(NA, 1),
col = 1,
xlim = xlim,
# ylim = ylim,
xlab = "Days",
ylab = "Number of spores in the trap",
main = list("Season 2011/12", font = 1),
as.table = TRUE,
key = list(columns = 2,
# title = "Models",
# corner = c(1, 0.6),
cex.title = 1,
lines = list(col = 1, lty = 1:2),
text = list(c("GLM Exponential", "Power")))) +
as.layer(
xyplot(z + z_pow ~ dia | factor(dis),
data = grid,
lty = 1:2,
col = 1,
type = "l"))
# Predição com efeito médio das linhas.
grid <- with(db,
expand.grid(line = factor(levels(line)[1],
levels = levels(line)),
dis = sort(unique(dis)),
dia = seq(0, max(dia), length.out = 51),
KEEP.OUT.ATTRS = FALSE))
X <- model.matrix(formula(fit_safra2[[2]])[-2], data = grid)
X[, "lineII"] <- 0.5
nu <- X %*% coef(fit_safra2[[2]])
grid$z <- fit_safra2[[2]]$family$linkinv(nu)
grid$line <- NULL
grid$lineII <- 0.5
# grid$z_exp <- predict(fit_safra2[[3]], newdata = grid)
grid$z_pow <- predict(fit_safra2[[4]], newdata = grid)
plot_fit_safra2 <-
xyplot(numesp ~ dia | factor(dis),
data = db,
layout = c(NA, 1),
col = 1,
xlim = xlim,
# ylim = ylim,
xlab = "Days",
ylab = "Number of spores in the trap",
main = list("Season 2012/13", font = 1),
as.table = TRUE,
key = list(columns = 2,
# title = "Models",
# corner = c(1, 0.6),
cex.title = 1,
lines = list(col = 1, pch = 1:2),
text = list(c("GLM Exponential", "Power")))) +
as.layer(
xyplot(z + z_pow ~ dia | factor(dis),
data = grid,
col = 1,
lty = 1:2,
type = "l"))
gridExtra::grid.arrange(plot_fit_safra1, plot_fit_safra2, ncol = 1)
#
xlim <- extendrange(c(da1$dis, db$dis))
ylim <- extendrange(c(0, da1$numesp, db$numexp))
# Malha para predição em cada distância como função do tempo.
grid <- with(da1,
expand.grid(dis = seq(0,
# max(xlim),
150,
length.out = 51),
dia = sort(unique(dia))[1:5]))
grid$z <- predict(fit_safra1[[2]], newdata = grid, type = "response")
# grid$z_exp <- predict(fit_safra1[[3]], newdata = grid)
grid$z_pow <- predict(fit_safra1[[4]], newdata = grid)
grid <- merge(grid, unique(da1[, c("dia", "ts")]), by = "dia")
grid$ts2 <- strftime(grid$ts, format = "%b %d, %Y")
da1$ts2 <- strftime(da1$ts, format = "%b %d, %Y")
plot_fit_safra1 <-
xyplot(numesp ~ dis | ts2,
data = subset(da1, ts %in% unique(grid$ts)),
layout = c(NA, 1),
col = 1,
# xlim = xlim,
xlim = c(xlim[1], 160),
# ylim = ylim,
# xlab = "Distance (m)",
xlab = NULL,
ylab = "Number of spores",
# main = list("Season 2011/12", font = 1),
main = list("A", font = 1),
par.settings = list(
strip.background = list(col = "lightgrey"),
par.main.text = list(font = 2,
just = "right",
x = grid::unit(0.95, "npc"))),
as.table = TRUE,
par.strip.text = list(cex = 0.8),
scales = list(alternating = 1, cex = 0.7, tck = c(1, 0)),
key = list(columns = 2,
# title = "Models",
# corner = c(1, 0.6),
cex.title = 1,
lines = list(col = 1, lty = 1:2),
text = list(c("GLM Exponential", "Power")))) +
as.layer(
xyplot(z + z_pow ~ dis | ts2,
data = grid,
col = 1,
lty = 1:2,
type = "l"))
plot_fit_safra1
# Predição com efeito médio das linhas.
grid <- with(db,
expand.grid(line = factor(levels(line)[1],
levels = levels(line)),
dis = seq(0, max(xlim), length.out = 51),
dia = sort(unique(dia))[1:5],
KEEP.OUT.ATTRS = FALSE))
X <- model.matrix(formula(fit_safra2[[2]])[-2], data = grid)
X[, "lineII"] <- 0.5
nu <- X %*% coef(fit_safra2[[2]])
grid$z <- fit_safra2[[2]]$family$linkinv(nu)
grid$line <- NULL
grid$lineII <- 0.5
# grid$z_exp <- predict(fit_safra2[[3]], newdata = grid)
grid$z_pow <- predict(fit_safra2[[4]], newdata = grid)
grid <- merge(grid, unique(db[, c("dia", "ts")]), by = "dia")
grid$ts2 <- strftime(grid$ts, format = "%b %d, %Y")
db$ts2 <- strftime(db$ts, format = "%b %d, %Y")
plot_fit_safra2 <-
xyplot(numesp ~ dis | ts2,
data = subset(db, ts %in% unique(grid$ts)),
layout = c(NA, 1),
col = 1,
xlim = xlim,
# ylim = ylim,
xlab = "Distance (m)",
ylab = "Number of spores",
# main = list("Season 2012/13", font = 1),
main = list("B", font = 1),
par.settings = list(
strip.background = list(col = "lightgrey"),
par.main.text = list(font = 2,
just = "right",
x = grid::unit(0.95, "npc"))),
as.table = TRUE,
par.strip.text = list(cex = 0.8),
# key = list(columns = 2,
# # title = "Models",
# # corner = c(1, 0.6),
# cex.title = 1,
# lines = list(col = 1, lty = 1:2),
# text = list(c("GLM Exponential", "Power"))),
scales = list(alternating = 1, cex = 0.7, tck = c(1, 0))) +
as.layer(
xyplot(z + z_pow ~ dis | ts2,
data = grid,
col = 1,
lty = 1:2,
type = "l"))
gridExtra::grid.arrange(plot_fit_safra1, plot_fit_safra2, ncol = 1)
#
The final gaussian exponential model is \[ \hat{y} = \exp\{\beta_0 + \beta_1 t + \beta_2 t^2 + \beta_3 x\} \] where
This model were fitted to the data using nls()
function in R language.
The final quasi-poisson exponential (log link function) model is \[ \hat{y} = \exp\{\beta_0 + \beta_1 t + \beta_2 t^2 + \beta_3 x\} \] where
The difference between the Gaussian and Quasi-Poisson is about the assumptions for the error distribution, since the mean equation has the same terms. The former assumes a Gaussian distribution with constant variance (no mean-variance relation). The latter assumes the two first moments of a Poisson distribution and includes an extra dispersion parameter to have a mean-variance relation described by \(\text{Var}(Y) = \phi\cdot\text{E}(Y)\), that is, the variance is proportional to the mean.
This model were fitted to the data using glm()
function in R language using quasipoisson
as family.
The final gaussian power model is \[ \hat{y} = \beta_0 \cdot t^{\beta_1} \cdot x^{\beta_2} \] where
This model were fitted to the data using nls()
function in R language.