Reproducible Data Analysis of Scientific Cooperations
|
Effect of fungicide sprays programs and pistachio hedging on fruit stain levels caused by Alternaria late blight in commercial pistachio orchard of Tulare County, California.
Paulo S. F. Lichtemberg Ryan D. Puckett Walmes M. Zeviani Connor G. Cunningham Themis J. Michailides
# https://github.com/walmes/wzRfun
# devtools::install_github("walmes/wzRfun")
library(lattice)
library(latticeExtra)
library(plyr)
library(reshape)
library(doBy)
library(multcomp)
library(lme4)
library(lmerTest)
library(wzRfun)
library(wzCoop)
# Data strucuture.
str(quality_ake_b)
## 'data.frame': 288 obs. of 12 variables:
## $ yr : int 2015 2015 2015 2015 2015 2015 2015 2015 2015 2015 ...
## $ hed: Factor w/ 2 levels "heavy","normal": 1 1 1 1 1 1 1 1 1 1 ...
## $ tra: Factor w/ 4 levels "cnt","fon2","mer2",..: 4 4 4 4 4 4 4 4 4 1 ...
## $ plo: Factor w/ 12 levels "1A","1B","1C",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ tre: int 1 1 1 2 2 2 3 3 3 4 ...
## $ rep: int 1 2 3 1 2 3 1 2 3 1 ...
## $ c0 : int 2 4 7 4 0 0 0 0 2 NA ...
## $ c1 : int 27 55 53 31 44 38 22 21 25 NA ...
## $ c2 : int 18 12 12 26 40 37 25 32 36 NA ...
## $ c3 : int 16 16 14 33 8 18 19 12 15 NA ...
## $ c4 : int 37 13 14 6 8 7 34 35 22 NA ...
## $ tot: int 100 100 100 100 100 100 100 100 100 NA ...
# Short names are easier to use.
da <- quality_ake_b
da$yr <- factor(da$yr)
intlev <- c(c0 = "[0]",
c1 = "[1, 10]",
c2 = "[11, 35]",
c3 = "[36, 64]",
c4 = "[65, 100]")
# Creating the blocks.
da$block <- with(da, {
factor(ifelse(as.integer(gsub("\\D", "", plo)) > 2, "II", "I"))
})
# Creating unique levels for trees.
da$tre <- with(da,
interaction(hed, tra, block, tre, drop = TRUE))
# Stack data to plot variables.
db <- melt(da,
id.vars = 1:6,
measure.vars = grep("c\\d", names(quality_ake_b)),
na.rm = TRUE)
names(db)[ncol(db) - 1:0] <- c("categ", "freq")
useOuterStrips(
xyplot(freq ~ categ | hed + yr,
groups = tra,
data = db,
xlab = "Percentage of the shell surface discolored",
ylab = "Number of fruits in each class (n = 100)",
jitter.x = TRUE,
scales = list(x = list(labels = intlev,
alternating = 1)),
auto.key = list(columns = 2,
title = "Fungicide",
cex.title = 1.1),
type = c("p", "a")))
useOuterStrips(
xyplot(freq ~ categ | tra + yr,
groups = hed,
data = db,
xlab = "Percentage of the shell surface discolored",
ylab = "Number of fruits in each class (n = 100)",
jitter.x = TRUE,
scales = list(x = list(labels = intlev,
rot = 45,
alternating = 1)),
auto.key = list(columns = 2,
title = "Hed",
cex.title = 1.1),
type = c("p", "a")))
i <- grep("^c\\d$", x = names(da))
# useOuterStrips(
# splom(~da[, i] | hed + yr,
# groups = tra,
# type = c("p", "r"),
# data = da))
# Acumulated proportion.
cml <- t(apply(da[, i], MARGIN = 1, FUN = cumsum))
cml <- cml[, -length(i)]
colnames(cml) <- paste0("p", 1:ncol(cml))
# Add acummulated proportions.
da <- cbind(da, as.data.frame(cml))
db <- melt(da,
id.vars = 1:6,
measure.vars = colnames(cml),
na.rm = TRUE)
names(db)[ncol(db) - 1:0] <- c("categ", "prop")
str(db)
## 'data.frame': 1104 obs. of 8 variables:
## $ yr : Factor w/ 2 levels "2015","2016": 1 1 1 1 1 1 1 1 1 1 ...
## $ hed : Factor w/ 2 levels "heavy","normal": 1 1 1 1 1 1 1 1 1 2 ...
## $ tra : Factor w/ 4 levels "cnt","fon2","mer2",..: 4 4 4 4 4 4 4 4 4 2 ...
## $ plo : Factor w/ 12 levels "1A","1B","1C",..: 1 1 1 1 1 1 1 1 1 2 ...
## $ tre : Factor w/ 48 levels "heavy.mer3.I.1",..: 1 1 1 2 2 2 3 3 3 5 ...
## $ rep : int 1 2 3 1 2 3 1 2 3 1 ...
## $ categ: Factor w/ 4 levels "p1","p2","p3",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ prop : int 2 4 7 4 0 0 0 0 2 0 ...
accumlev <- c(p1 = "0",
p2 = "<=10",
p3 = "<=35",
p4 = "<=65")
useOuterStrips(
xyplot(prop ~ categ | hed + yr,
groups = tra,
data = db,
xlab = "Percentage of the shell surface discolored",
ylab = "Number of fruits in each class (n = 100)",
jitter.x = TRUE,
auto.key = list(columns = 2,
title = "Fungicide",
cex.title = 1.1),
scales = list(x = list(labels = accumlev,
alternating = 1)),
type = c("p", "a")))
useOuterStrips(
xyplot(prop ~ categ | tra + yr,
groups = hed,
data = db,
xlab = "Percentage of the shell surface discolored",
ylab = "Number of fruits in each category (n = 100)",
jitter.x = TRUE,
auto.key = list(columns = 2,
title = "Hed",
cex.title = 1.1),
scales = list(x = list(labels = accumlev,
alternating = 1)),
type = c("p", "a")))
The plots above showed that the greatest difference among levels of treaments, either hed or fungicides, occurs for the accumulated proportions at class \(\leq 35\) (p2
). Because of this, the analysis will be for this response variable.
The total number of independent observations results from the calculus \[ \begin{aligned} 288 \text{ observations} &= 2 \text{ years } \times 2 \text{ blocks } \times \\ &\quad\,\, 2 \text{ heds } \times 4 \text{ fungicides } \times \\ &\quad\,\, 3 \text{ trees } \times 3 \text{ bags }.\\ \end{aligned} \]
da$y <- da$p2
da <- da[!is.na(da$y), ]
xyplot(p2 ~ tra | yr,
groups = hed,
data = da,
type = c("p", "a"),
xlab = "Fungicides",
ylab = expression("Number of fruits with" <= "35% of the shell surface discolored"),
jitter.x = TRUE,
auto.key = list(columns = 2,
title = "Hed",
cex.title = 1.1))
# xyplot(p2 ~ yr | tre,
# data = da,
# xlab = "Years",
# ylab = "Number of fruits with 35% or less damage",
# as.table = TRUE,
# jitter.x = TRUE)
The analysis are being separated for each year. First, although the choosen response variable is limited or bounded (because it is a sample proportion), it doesn’t show problematic departures from a linear (gaussian) model. Linear models are more flexible for analysing clutered data as such.
da15 <- subset(da, yr == "2015")
da16 <- subset(da, yr == "2016")
# Quasi-binomial model.
# m0 <- glm(cbind(p2, 100 - p2) ~ block + yr * hed * tra,
# family = quasibinomial,
# data = da)
# Just to check if the model assumptions are meet.
m0 <- lm(p2 ~ block + yr * hed * tra,
data = da)
par(mfrow = c(2, 2))
plot(m0); layout(1)
# Normal approximation goes well.
A simple linear models is fitted only to check how the residuals behave. No evidence of departures from models assumptions were found.
# Year 2015.
m15 <- lmer(p2 ~ block + hed * tra + (1 | tre),
data = da15)
# r <- residuals(m15)
# f <- fitted(m15)
# plot(r ~ f)
# qqnorm(r)
anova(m15)
## Analysis of Variance Table of type III with Satterthwaite
## approximation for degrees of freedom
## Sum Sq Mean Sq NumDF DenDF F.value Pr(>F)
## block 32.467 32.467 1 35 1.05247 0.3120
## hed 18.445 18.445 1 35 0.59794 0.4446
## tra 87.850 29.283 3 35 0.94926 0.4274
## hed:tra 189.774 63.258 3 35 2.05060 0.1246
summary(m15)
## Linear mixed model fit by REML t-tests use Satterthwaite
## approximations to degrees of freedom [lmerMod]
## Formula: p2 ~ block + hed * tra + (1 | tre)
## Data: da15
##
## REML criterion at convergence: 896.2
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.4619 -0.4981 -0.1348 0.5054 2.6844
##
## Random effects:
## Groups Name Variance Std.Dev.
## tre (Intercept) 166.72 12.912
## Residual 30.85 5.554
## Number of obs: 132, groups: tre, 44
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 20.7481 6.9546 35.0000 2.983 0.00517 **
## blockII -4.1628 4.0577 35.0000 -1.026 0.31198
## hednormal -2.6667 8.5877 35.0000 -0.311 0.75801
## trafon2 -0.2222 8.5877 35.0000 -0.026 0.97950
## tramer2 10.5000 8.5877 35.0000 1.223 0.22962
## tramer3 9.5556 8.5877 35.0000 1.113 0.27342
## hednormal:trafon2 13.5000 11.5217 35.0000 1.172 0.24923
## hednormal:tramer2 -2.1240 12.1872 35.0000 -0.174 0.86265
## hednormal:tramer3 -13.3333 11.5217 35.0000 -1.157 0.25501
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) blckII hdnrml trafn2 tramr2 tramr3 hdnrml:trf2
## blockII -0.292
## hednormal -0.741 0.000
## trafon2 -0.741 0.000 0.600
## tramer2 -0.741 0.000 0.600 0.600
## tramer3 -0.741 0.000 0.600 0.600 0.600
## hdnrml:trf2 0.552 0.000 -0.745 -0.745 -0.447 -0.447
## hdnrml:trm2 0.498 0.083 -0.705 -0.423 -0.705 -0.423 0.525
## hdnrml:trm3 0.552 0.000 -0.745 -0.447 -0.447 -0.745 0.556
## hdnrml:trm2
## blockII
## hednormal
## trafon2
## tramer2
## tramer3
## hdnrml:trf2
## hdnrml:trm2
## hdnrml:trm3 0.525
# Year 2016.
m16 <- lmer(p2 ~ block + hed * tra + (1 | tre),
data = da16)
# r <- residuals(m16)
# f <- fitted(m16)
# plot(r ~ f)
# qqnorm(r)
anova(m16)
## Analysis of Variance Table of type III with Satterthwaite
## approximation for degrees of freedom
## Sum Sq Mean Sq NumDF DenDF F.value Pr(>F)
## block 1.385 1.385 1 39 0.03743 0.8476
## hed 81.092 81.092 1 39 2.19127 0.1468
## tra 59.092 19.697 3 39 0.53226 0.6629
## hed:tra 55.633 18.544 3 39 0.50111 0.6837
summary(m16)
## Linear mixed model fit by REML t-tests use Satterthwaite
## approximations to degrees of freedom [lmerMod]
## Formula: p2 ~ block + hed * tra + (1 | tre)
## Data: da16
##
## REML criterion at convergence: 984.2
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.06240 -0.61593 -0.05643 0.60353 1.87718
##
## Random effects:
## Groups Name Variance Std.Dev.
## tre (Intercept) 102.01 10.100
## Residual 37.01 6.083
## Number of obs: 144, groups: tre, 48
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 44.8681 4.6303 39.0000 9.690 6.19e-12 ***
## blockII 0.5972 3.0869 39.0000 0.193 0.8476
## hednormal 11.1111 6.1737 39.0000 1.800 0.0796 .
## trafon2 9.6667 6.1737 39.0000 1.566 0.1255
## tramer2 6.1111 6.1737 39.0000 0.990 0.3283
## tramer3 6.5556 6.1737 39.0000 1.062 0.2948
## hednormal:trafon2 -8.6111 8.7309 39.0000 -0.986 0.3301
## hednormal:tramer2 -9.1111 8.7309 39.0000 -1.044 0.3031
## hednormal:tramer3 -8.4444 8.7309 39.0000 -0.967 0.3394
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) blckII hdnrml trafn2 tramr2 tramr3 hdnrml:trf2
## blockII -0.333
## hednormal -0.667 0.000
## trafon2 -0.667 0.000 0.500
## tramer2 -0.667 0.000 0.500 0.500
## tramer3 -0.667 0.000 0.500 0.500 0.500
## hdnrml:trf2 0.471 0.000 -0.707 -0.707 -0.354 -0.354
## hdnrml:trm2 0.471 0.000 -0.707 -0.354 -0.707 -0.354 0.500
## hdnrml:trm3 0.471 0.000 -0.707 -0.354 -0.354 -0.707 0.500
## hdnrml:trm2
## blockII
## hednormal
## trafon2
## tramer2
## tramer3
## hdnrml:trf2
## hdnrml:trm2
## hdnrml:trm3 0.500
For both years, no effect were found for the experimental factors.
# Represent the results with interval confidence on ls means.
cmp <- list()
lsm <- LSmeans(m15, effect = "hed")
grid <- equallevels(lsm$grid, da)
L <- lsm$K
rownames(L) <- grid[, 1]
cmp$hed <-
rbind(
cbind(yr = "2015", apmc(L, model = m15, focus = "hed")),
cbind(yr = "2016", apmc(L, model = m16, focus = "hed")))
lsm <- LSmeans(m15, effect = "tra")
grid <- equallevels(lsm$grid, da)
L <- lsm$K
rownames(L) <- grid[, 1]
cmp$tra <-
rbind(
cbind(yr = "2015", apmc(L, model = m15, focus = "tra")),
cbind(yr = "2016", apmc(L, model = m16, focus = "tra")))
cmp <- ldply(cmp,
.id = "effect",
.fun = function(x) {
names(x)[2] <- "level"
return(x)
})
cmp$level <- factor(cmp$level, levels = unique(cmp$level))
cmp
## effect yr level fit lwr upr cld
## 1 hed 2015 heavy 23.62500 17.979486 29.27051 a
## 2 hed 2015 normal 20.46899 14.801639 26.13635 a
## 3 hed 2016 heavy 50.75000 46.471923 55.02808 a
## 4 hed 2016 normal 55.31944 51.041367 59.59752 a
## 5 tra 2015 cnt 17.33333 8.917498 25.74917 a
## 6 tra 2015 fon2 23.86111 16.333759 31.38846 a
## 7 tra 2015 mer2 26.77132 18.296971 35.24566 a
## 8 tra 2015 mer3 20.22222 12.694870 27.74957 a
## 9 tra 2016 cnt 50.72222 44.672107 56.77234 a
## 10 tra 2016 fon2 56.08333 50.033218 62.13345 a
## 11 tra 2016 mer2 52.27778 46.227663 58.32789 a
## 12 tra 2016 mer3 53.05556 47.005441 59.10567 a
resizePanels(
useOuterStrips(
segplot(level ~ lwr + upr | yr + effect,
centers = fit,
data = cmp,
draw = FALSE,
xlab = "Proportion of fruits with 35% or less damage",
ylab = "Levels of the factors",
scales = list(y = list(relation = "free"),
alternating = 1),
ann = sprintf("%0.2f %s", cmp$fit, cmp$cld)
)
), h = c(2, 4)) +
layer(panel.text(x = centers[subscripts],
y = z[subscripts],
labels = ann[subscripts],
pos = 3))
## domingo, 07 de maio de 2017, 22:17
## ----------------------------------------
## R version 3.3.3 (2017-03-06)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 16.04.2 LTS
##
## locale:
## [1] LC_CTYPE=pt_BR.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=pt_BR.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=pt_BR.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=pt_BR.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=pt_BR.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] methods stats graphics grDevices utils datasets
## [7] base
##
## other attached packages:
## [1] wzCoop_0.0-5 captioner_2.2.3 wzRfun_0.79
## [4] lmerTest_2.0-33 lme4_1.1-12 Matrix_1.2-8
## [7] multcomp_1.4-6 TH.data_1.0-8 MASS_7.3-45
## [10] survival_2.40-1 mvtnorm_1.0-5 doBy_4.5-15
## [13] reshape_0.8.6 plyr_1.8.4 latticeExtra_0.6-28
## [16] RColorBrewer_1.1-2 lattice_0.20-34 rmarkdown_1.3
## [19] knitr_1.15.1
##
## loaded via a namespace (and not attached):
## [1] zoo_1.7-14 splines_3.3.3 testthat_1.0.2
## [4] colorspace_1.3-2 htmltools_0.3.5 yaml_2.1.14
## [7] rpanel_1.1-3 base64enc_0.1-3 nloptr_1.0.4
## [10] withr_1.0.2 foreign_0.8-67 stringr_1.1.0
## [13] commonmark_1.1 munsell_0.4.3 gtable_0.2.0
## [16] htmlwidgets_0.8 devtools_1.12.0 memoise_1.0.0
## [19] codetools_0.2-15 evaluate_0.10 parallel_3.3.3
## [22] pbkrtest_0.4-6 highr_0.6 htmlTable_1.9
## [25] Rcpp_0.12.9 acepack_1.4.1 scales_0.4.1
## [28] backports_1.0.5 checkmate_1.8.2 Hmisc_4.0-2
## [31] gridExtra_2.2.1 ggplot2_2.2.1 digest_0.6.12
## [34] stringi_1.1.2 grid_3.3.3 rprojroot_1.2
## [37] tools_3.3.3 sandwich_2.3-4 magrittr_1.5
## [40] lazyeval_0.2.0 tibble_1.2 Formula_1.2-1
## [43] cluster_2.0.6 crayon_1.3.2 xml2_1.1.1
## [46] data.table_1.10.4 roxygen2_6.0.1 assertthat_0.1
## [49] minqa_1.2.4 R6_2.2.0 rpart_4.1-10
## [52] nnet_7.3-12 nlme_3.1-131
wzCoop - Reproducible Data Analysis of Scientific
Cooperations
|
Walmes Zeviani |