|
Reproducible Data Analysis of Scientific Cooperations
|
# Prepare table of data.
ogt <- as_tibble(RDASC::pistachio_anthracnose$ogrotem)
ogt$exp <- factor(ogt$exp)
attr(ogt, "spec") <- NULL
str(ogt)
## tibble [2,646 × 8] (S3: tbl_df/tbl/data.frame)
## $ exp: Factor w/ 3 levels "1","2","3": 1 1 1 1 1 1 1 1 1 1 ...
## $ spp: chr [1:2646] "Cf" "Cf" "Cf" "Cf" ...
## $ iso: chr [1:2646] "11K11" "11K11" "11K11" "11K17" ...
## $ rep: int [1:2646] 1 2 3 1 2 3 1 2 3 1 ...
## $ tem: int [1:2646] 10 10 10 10 10 10 10 10 10 10 ...
## $ day: int [1:2646] 1 1 1 1 1 1 1 1 1 1 ...
## $ mm1: num [1:2646] 4.3 4.02 4.55 4.45 4 4 4.33 3.95 4.15 4.17 ...
## $ mm2: num [1:2646] 5.1 4.66 4.7 4.41 4 4 4.39 4.89 4.7 4.39 ...
# Get the mean diameter (4 mm is the plug size).
ogt$diameter <- (ogt$mm1 + ogt$mm2)/2 - 4
# with(ogt, c(min(mm1, na.rm = TRUE),
# min(mm2, na.rm = TRUE),
# min(diameter, na.rm = TRUE)))
# Curves for each isolate according to experimental conditions.
ggplot(data = ogt,
mapping = aes(x = day,
y = diameter,
color = iso)) +
facet_grid(facets = exp ~ tem) +
geom_point() +
stat_summary(geom = "line", fun.y = "mean") +
labs(x = "Days",
y = "Diameter (mm)",
color = "Isolate")
# Loads a function to calculate area under a curve by trapezoidal
# method.
# source("https://raw.githubusercontent.com/walmes/wzRfun/master/R/auc.R")
# args(auc)
# body(auc)
# Frequency tables.
xtabs(~iso + spp, data = ogt)
## spp
## iso Cf Ck
## 11J23 378 0
## 11K11 378 0
## 11K17 378 0
## 12D46 378 0
## 12J05 378 0
## 12J41 378 0
## 3G23 0 378
## iso 11J23 11K11 11K17 12D46 12J05 12J41 3G23
## exp tem
## 1 10 21 21 21 21 21 21 21
## 15 21 21 21 21 21 21 21
## 20 21 21 21 21 21 21 21
## 25 21 21 21 21 21 21 21
## 30 21 21 21 21 21 21 21
## 35 21 21 21 21 21 21 21
## 2 10 21 21 21 21 21 21 21
## 15 21 21 21 21 21 21 21
## 20 21 21 21 21 21 21 21
## 25 21 21 21 21 21 21 21
## 30 21 21 21 21 21 21 21
## 35 21 21 21 21 21 21 21
## 3 10 21 21 21 21 21 21 21
## 15 21 21 21 21 21 21 21
## 20 21 21 21 21 21 21 21
## 25 21 21 21 21 21 21 21
## 30 21 21 21 21 21 21 21
## 35 21 21 21 21 21 21 21
# AUC by each experimental unit = exp > spp > iso > tem > rep.
aumgc <- ogt %>%
group_by(exp, spp, iso, tem, rep) %>%
summarise(auc = auc(time = day, resp = diameter)) %>%
ungroup()
aumgc
## # A tibble: 378 x 6
## exp spp iso tem rep auc
## <fct> <chr> <chr> <int> <int> <dbl>
## 1 1 Cf 11J23 10 1 21.0
## 2 1 Cf 11J23 10 2 24.2
## 3 1 Cf 11J23 10 3 30.0
## 4 1 Cf 11J23 15 1 74.0
## 5 1 Cf 11J23 15 2 79.7
## 6 1 Cf 11J23 15 3 74.0
## 7 1 Cf 11J23 20 1 103.
## 8 1 Cf 11J23 20 2 101.
## 9 1 Cf 11J23 20 3 103.
## 10 1 Cf 11J23 25 1 149.
## # … with 368 more rows
# Convert come experimental controled variables to factor.
aumgc <- aumgc %>%
mutate_at(vars(exp, spp, iso, rep), factor)
str(aumgc)
## tibble [378 × 6] (S3: tbl_df/tbl/data.frame)
## $ exp: Factor w/ 3 levels "1","2","3": 1 1 1 1 1 1 1 1 1 1 ...
## $ spp: Factor w/ 2 levels "Cf","Ck": 1 1 1 1 1 1 1 1 1 1 ...
## $ iso: Factor w/ 7 levels "11J23","11K11",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ tem: int [1:378] 10 10 10 15 15 15 20 20 20 25 ...
## $ rep: Factor w/ 3 levels "1","2","3": 1 2 3 1 2 3 1 2 3 1 ...
## $ auc: num [1:378] 21 24.2 30 74 79.7 ...
# Visualize.
ggplot(data = aumgc,
mapping = aes(x = tem, y = auc, color = exp)) +
facet_wrap(facets = ~iso) +
geom_point() +
stat_summary(geom = "line", fun.y = "mean") +
labs(x = "Temperature",
y = "AUC",
color = "Experiment")
exp
: represents the contour conditions of each experiment, so it represents the blocking factor.iso
: is the effect of isolate.rep
: is each independent replication of experiment \(\times\) isolate \(\times\) temperature. Response could be averaged across replications.# Averages the AUC.
aumgcm <- aumgc %>%
group_by(exp, iso, tem) %>%
summarise(aucm = mean(auc, na.rm = TRUE)) %>%
ungroup()
# Visualize.
ggplot(data = aumgcm,
mapping = aes(x = tem, y = aucm, color = exp)) +
facet_wrap(facets = ~iso) +
geom_point() +
geom_line() +
labs(x = "Temperature",
y = "AUC",
color = "Experiment")
# Uses a "naive" model (in terms of variance components) to check the
# model assumptions. All factores here are qualitative.
m0 <- lm(aucm ~ exp + iso * factor(tem),
data = aumgcm)
# Residual plots.
par(mfrow = c(2, 2))
plot(m0)
## Analysis of Variance Table
##
## Response: aucm
## Df Sum Sq Mean Sq F value Pr(>F)
## exp 2 5630 2815 28.8050 3.348e-10 ***
## iso 6 1209 202 2.0625 0.06655 .
## factor(tem) 5 281392 56278 575.9074 < 2.2e-16 ***
## iso:factor(tem) 30 3701 123 1.2626 0.20356
## Residuals 82 8013 98
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
By the above model, the growth pattern does rely on main effects. Interactions between isolate and temperature is not relevant at 5% significance level. Despite the above model can be useful, the greater interest is in comparing optimal growth temperatures. In the next section, the optimal temperature will be estimated for each experimental unit and then these data will be submited to the analysis of variance.
Estimation of optimal temperature is done for each experimental condition: experiment \(\times\) isolate. After estimation, a model for hyphotesis on equality of the optimal values will be employed.
# # Each experimental unit.
# gg <- ggplot(data = aumgc,
# mapping = aes(x = tem, y = auc, color = rep, group = rep)) +
# facet_grid(facets = exp ~ iso) +
# geom_point() +
# expand_limits(y = c(0, NA)) +
# geom_hline(yintercept = 25, linetype = 3) +
# labs(x = "Temperature",
# y = "AUC")
# Each experimental cell.
gg <- ggplot(data = aumgcm,
mapping = aes(x = tem, y = aucm)) +
facet_grid(facets = exp ~ iso) +
geom_point() +
expand_limits(y = c(0, NA)) +
geom_hline(yintercept = 25, linetype = 3) +
labs(x = "Temperature",
y = "AUC")
gg +
geom_smooth(method = "lm",
formula = y ~ poly(x, degree = 4),
se = FALSE)
# ATTENTION FIXME: the values of diameter must be subtracted from the
# plug initial diameter? Is 25 mm?
A 4 degree polynomial will be used to fit the AUC as a function of temperature. After fitting, the optimal temperature will be determined by a numerical optmization method.
#-----------------------------------------------------------------------
# Polynomial approach.
# Function that fits a model equation to data and determine the optimal
# temperature.
fit_poly <- function(data,
formula = auc ~ poly(tem, degree = 4),
interval = c(10, 35),
maximum = TRUE) {
m0 <- lm(formula = formula, data = data)
pred <- function(x) predict(m0, newdata = list(tem = x))
t_opt <- optimise(f = pred,
interval = interval,
maximum = maximum)
return(data.frame(t_opt = t_opt$maximum,
sqr = deviance(m0),
ll = logLik(m0)))
}
# # Testing.
# fit_poly(data = aumgcm[with(aumgcm, iso == "11J23" & exp == "1"),
# c("aucm", "tem" )])
#-----------------------------------------------------------------------
# Optimal temperature.
# Fits for each experimental cell.
temps <- aumgcm %>%
group_by(iso, exp) %>%
summarise(topt_p = list(
fit_poly(data = data.frame(aucm, tem),
formula = aucm ~ poly(tem, degree = 4)))) %>%
ungroup() %>%
unnest(topt_p)
# # Fits for each experimental unit.
# temps <- aumgc %>%
# group_by(iso, exp, rep) %>%
# summarise(topt_p = list(
# fit_poly(data = data.frame(auc, tem),
# formula = auc ~ poly(tem, degree = 4)))) %>%
# ungroup() %>%
# unnest()
# See the results for the polynomial approach.
gg +
geom_smooth(method = "lm",
formula = y ~ poly(x, degree = 4),
se = FALSE,
color = "purple",
size = 0.6) +
geom_vline(data = temps,
mapping = aes(xintercept = t_opt),
color = "purple",
linetype = 2)
# Filter by the chosen approach.
da <- temps %>%
select(iso, exp, t_opt)
# Fit the model according to the experiment layout.
m0 <- lm(t_opt ~ exp + iso, data = da)
# Residual plots.
par(mfrow = c(2, 2))
plot(m0)
## Analysis of Variance Table
##
## Response: t_opt
## Df Sum Sq Mean Sq F value Pr(>F)
## exp 2 2.1868 1.09340 2.4221 0.130737
## iso 6 14.2021 2.36702 5.2433 0.007244 **
## Residuals 12 5.4172 0.45143
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Tukey HSD test on isolate means.
tk <- HSD.test(m0, trt = "iso", group = TRUE, console = FALSE)
tk <- tk$groups %>%
rownames_to_column() %>%
rename("iso" = "rowname")
tk
## iso t_opt groups
## 1 3G23 26.60877 a
## 2 12D46 24.48489 b
## 3 12J41 24.46055 b
## 4 12J05 24.44710 b
## 5 11J23 24.36128 b
## 6 11K11 24.29514 b
## 7 11K17 23.88601 b
# Least squares means.
lsm <- emmeans(m0, specs = "iso") %>%
as.data.frame() %>%
mutate(iso = as.character(iso))
# Merge these two tables.
tb_means <- full_join(x = lsm, y = tk, by = "iso")
tb_means
## iso emmean SE df lower.CL upper.CL t_opt groups
## 1 11J23 24.36128 0.3879154 12 23.51609 25.20648 24.36128 b
## 2 11K11 24.29514 0.3879154 12 23.44995 25.14034 24.29514 b
## 3 11K17 23.88601 0.3879154 12 23.04082 24.73121 23.88601 b
## 4 12D46 24.48489 0.3879154 12 23.63970 25.33009 24.48489 b
## 5 12J05 24.44710 0.3879154 12 23.60190 25.29229 24.44710 b
## 6 12J41 24.46055 0.3879154 12 23.61536 25.30575 24.46055 b
## 7 3G23 26.60877 0.3879154 12 25.76357 27.45396 26.60877 a
# Segment plot displaying means with confidence interval.
ggplot(data = tb_means,
mapping = aes(y = fct_reorder(iso, emmean), x = emmean)) +
geom_point() +
geom_errorbarh(mapping = aes(xmin = lower.CL, xmax = upper.CL),
height = 0.1) +
geom_text(mapping = aes(label = sprintf("%0.1f%s", emmean, groups)),
vjust = 0, nudge_y = 0.10) +
labs(y = "Isolate",
x = expression("Temperature" ~ (degree * C)))
Pairwise comparisons were made using Tukey HSD test at a 5% nominal significance level.
# Prepare table of data.
spo <- as_tibble(RDASC::pistachio_anthracnose$spo_vt)
attr(spo, "spec") <- NULL
# Convert variables to factor.
spo$exp <- factor(spo$exp)
spo$spp <- factor(spo$spp,
levels = c("Cf", "Ck"),
labels = c("C. fioriniae", "C. karstii"))
str(spo)
## tibble [108 × 10] (S3: tbl_df/tbl/data.frame)
## $ exp : Factor w/ 3 levels "1","2","3": 1 1 1 1 1 1 1 1 1 1 ...
## $ spp : Factor w/ 2 levels "C. fioriniae",..: 1 1 1 1 1 1 2 2 2 2 ...
## $ tem : int [1:108] 20 20 20 20 20 20 20 20 20 20 ...
## $ iso : chr [1:108] "11K11" "11K11" "11K11" "11K11" ...
## $ rep : int [1:108] 1 2 3 1 2 3 1 2 3 1 ...
## $ cv : chr [1:108] "Red Aleppo" "Red Aleppo" "Red Aleppo" "Kerman" ...
## $ spo : num [1:108] 53.8 67.5 77.2 50.3 103.9 ...
## $ index: int [1:108] 160000 160000 160000 160000 160000 160000 160000 10000 160000 160000 ...
## $ slice: num [1:108] 575 642 555 1225 1254 ...
## $ ml : int [1:108] 20 25 25 25 30 30 10 10 10 10 ...
## exp spp tem iso rep
## 1:36 C. fioriniae:54 Min. :20 Length:108 Min. :1
## 2:36 C. karstii :54 1st Qu.:20 Class :character 1st Qu.:1
## 3:36 Median :25 Mode :character Median :2
## Mean :25 Mean :2
## 3rd Qu.:30 3rd Qu.:3
## Max. :30 Max. :3
##
## cv spo index slice
## Length:108 Min. : 1.70 Min. : 10000 Min. : 169.3
## Class :character 1st Qu.: 9.15 1st Qu.:160000 1st Qu.: 527.3
## Mode :character Median : 27.35 Median :160000 Median :1001.5
## Mean : 37.93 Mean :155794 Mean :1378.2
## 3rd Qu.: 62.52 3rd Qu.:160000 3rd Qu.:1894.9
## Max. :122.40 Max. :160000 Max. :5708.5
## NA's :2 NA's :1 NA's :1
## ml
## Min. :10.00
## 1st Qu.:10.00
## Median :10.00
## Mean :16.68
## 3rd Qu.:22.50
## Max. :40.00
## NA's :1
## iso
## spp 11K11 3G23
## C. fioriniae 54 0
## C. karstii 0 54
## cv
## iso Kerman Red Aleppo
## 11K11 27 27
## 3G23 27 27
# Creating the context or offset variable for the count (spo).
spo$offset <- with(spo, slice *
c(16, 4)[match(index, c(160000, 10000))])
# Creates the variable concentration.
# Numbers correspond to 1000 in slide and 16 in cells.
spo$con <- with(spo, 1000 * 16 * spo/offset)
ggplot(data = spo,
mapping = aes(x = tem, y = con, color = iso)) +
facet_grid(facets = exp ~ cv) +
geom_jitter(width = 0.5) +
stat_summary(geom = "line", fun.y = "mean") +
scale_y_log10()
Data were transform to log scale to avoid violations in model assumptions. These data could be analysed assuming a probability distribution for count data. Here we decided to use a Gaussian model to the transformed response variable. This results should not differ from the count data model.
# Convert from numerical to factor.
spo$tem <- factor(spo$tem)
# Fit the model.
# m0 <- lm(con ~ exp + iso * cv * tem, data = spo)
# MASS::boxcox(m0)
m0 <- lm(log(con) ~ exp + iso * cv * tem, data = spo)
# Check for any violation.
par(mfrow = c(2, 2))
plot(m0)
## Analysis of Variance Table
##
## Response: log(con)
## Df Sum Sq Mean Sq F value Pr(>F)
## exp 2 3.457 1.728 1.6533 0.1970
## iso 1 58.809 58.809 56.2547 3.889e-11 ***
## cv 1 1.607 1.607 1.5369 0.2182
## tem 2 3.084 1.542 1.4751 0.2341
## iso:cv 1 0.077 0.077 0.0736 0.7867
## iso:tem 2 1.782 0.891 0.8522 0.4298
## cv:tem 2 2.853 1.427 1.3646 0.2606
## iso:cv:tem 2 2.050 1.025 0.9804 0.3790
## Residuals 92 96.178 1.045
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Update model dropping no relevant experimental terms.
m1 <- update(m0, . ~ exp + iso)
anova(m1, m0)
## Analysis of Variance Table
##
## Model 1: log(con) ~ exp + iso
## Model 2: log(con) ~ exp + iso * cv * tem
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 102 107.630
## 2 92 96.178 10 11.453 1.0955 0.374
# Gets the estimated marginal means for isolates.
emm <- emmeans(m1, specs = ~iso)
emm %>% as.data.frame()
## iso emmean SE df lower.CL upper.CL
## 1 11K11 3.905965 0.1411258 102 3.626043 4.185888
## 2 3G23 2.415861 0.1411258 102 2.135939 2.695784
# Values in the response scale.
tb_means <- multcomp::cld(emm, Letters = letters) %>%
as.data.frame() %>%
mutate_at(c("emmean", "lower.CL", "upper.CL"), exp) %>%
mutate(.group = str_trim(.group)) %>%
rename(cld = .group)
tb_means
## iso emmean SE df lower.CL upper.CL cld
## 1 3G23 11.19941 0.1411258 102 8.464991 14.81713 a
## 2 11K11 49.69804 0.1411258 102 37.563885 65.75185 b
# Results.
ggplot(data = tb_means,
mapping = aes(y = iso, x = emmean)) +
geom_point() +
geom_errorbarh(mapping = aes(xmin = lower.CL, xmax = upper.CL),
height = 0.1) +
geom_text(mapping = aes(label = sprintf("%0.2f%s", emmean, cld)),
vjust = 0,
nudge_y = 0.05,
show.legend = FALSE) +
labs(x = "Number of spores",
# x = expression(Log[10] ~ "of sporulation"),
y = "Isolates")
# Import the data.
cs <- as_tibble(RDASC::pistachio_anthracnose$cs)
cs$exp <- factor(cs$exp)
cs$spp <- factor(cs$spp,
levels = c("Cf", "Ck"),
labels = c("C. fioriniae", "C. karstii"))
attr(cs, "spec") <- NULL
str(cs)
## tibble [370 × 6] (S3: tbl_df/tbl/data.frame)
## $ exp : Factor w/ 2 levels "1","2": 1 1 1 1 1 1 1 1 1 1 ...
## $ spp : Factor w/ 2 levels "C. fioriniae",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ kare: chr [1:370] "11J41" "11J41" "11J41" "11J41" ...
## $ len : num [1:370] 13.9 12.4 12.4 12.3 12.3 ...
## $ wid : num [1:370] 5.12 5.51 5.24 5.41 5.26 4.64 5.17 4.05 5.35 6.98 ...
## $ vol : num [1:370] 286 296 268 284 268 ...
## exp spp kare len wid
## 1:188 C. fioriniae:306 Length:370 Min. : 6.77 Min. :3.350
## 2:182 C. karstii : 64 Class :character 1st Qu.:10.15 1st Qu.:4.780
## Mode :character Median :11.28 Median :5.250
## Mean :11.53 Mean :5.371
## 3rd Qu.:12.59 3rd Qu.:5.820
## Max. :17.79 Max. :7.930
## vol
## Min. : 95.99
## 1st Qu.:187.27
## Median :240.30
## Mean :274.22
## 3rd Qu.:325.70
## Max. :773.94
# Curves for each isolate according to experimental conditions.
ggplot(data = cs,
mapping = aes(x = len,
y = wid,
color = kare)) +
facet_wrap(facets = ~exp) +
geom_point()
# Find the convex hull of the points being plotted.
cs_hull <- cs %>%
group_by(exp, kare) %>%
slice(chull(len, wid))
# Show the convex hull for each isolate.
ggplot(data = cs,
mapping = aes(x = len,
y = wid,
color = kare)) +
facet_wrap(facets = ~exp) +
geom_polygon(data = cs_hull, alpha = 0.1) +
geom_point() +
labs(x = expression("Conidia length" ~ (mu * m)),
y = expression("Conidia width" ~ (mu * m)))
# ggplot(data = cs,
# mapping = aes(x = len,
# color = kare)) +
# facet_wrap(facets = ~exp) +
# geom_density()
#
# ggplot(data = cs,
# mapping = aes(x = wid,
# color = kare)) +
# facet_wrap(facets = ~exp) +
# geom_density()
#
# ggplot(data = cs,
# mapping = aes(x = vol,
# color = kare)) +
# facet_wrap(facets = ~exp) +
# geom_density()
cs %>%
gather(key = "variable", value = "value", len, wid, vol) %>%
group_by(variable) %>%
mutate(value = scale(value)) %>%
ggplot(mapping = aes(x = value,
color = kare)) +
facet_grid(facets = exp ~ variable, scales = "free") +
geom_density() +
geom_rug()
# Empirical cummulative density function.
cs %>%
gather(key = "variable", value = "value", len, wid, vol) %>%
group_by(variable) %>%
mutate(value = scale(value)) %>%
ggplot(mapping = aes(x = value,
color = kare)) +
facet_grid(facets = exp ~ variable, scales = "free") +
stat_ecdf() +
geom_rug()
# Analysis done at experimental unit since individual values are samples
# from the same experimental unit (replicates and not repetitions).
csm <- cs %>%
group_by(exp, kare) %>%
summarise(n = n(),
len = mean(len),
wid = mean(wid),
vol = mean(vol))
csm
## # A tibble: 14 x 6
## # Groups: exp [2]
## exp kare n len wid vol
## <fct> <chr> <int> <dbl> <dbl> <dbl>
## 1 1 11J41 26 11.2 5.50 266.
## 2 1 11K11 26 11.3 4.94 219.
## 3 1 11K17 26 10.8 5.02 220.
## 4 1 12D46 26 10.5 5.28 234.
## 5 1 12J05 26 11.2 5.43 265.
## 6 1 12J23 26 10.9 5.32 246.
## 7 1 3G23 32 14.7 6.73 527.
## 8 2 11J41 25 11.1 4.54 183.
## 9 2 11K11 25 11.6 5.29 258.
## 10 2 11K17 25 10.6 4.99 210.
## 11 2 12D46 25 10.8 5.15 228.
## 12 2 12J05 25 10.6 4.98 209.
## 13 2 12J23 25 10.4 5.06 213.
## 14 2 3G23 32 14.2 6.35 450.
# To keep the estimated means.
tb_results <- list()
# Models that uses the number of replicates as weighting variable.
m0 <- lm(len ~ exp + kare, data = csm, weight = n)
par(mfrow = c(2, 2))
plot(m0)
## Analysis of Variance Table
##
## Response: len
## Df Sum Sq Mean Sq F value Pr(>F)
## exp 1 3.39 3.391 1.6688 0.2439
## kare 6 687.23 114.538 56.3626 5.16e-05 ***
## Residuals 6 12.19 2.032
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# DANGER: Tukey test is based on geometric mean of the number of
# replicates. Since the number of replications are very enequal, this
# aproximation is not suitable.
# tt <- HSD.test(m0, "kare", console = FALSE)
# tt$groups
emm <- emmeans(m0, specs = ~kare)
tb_means <- multcomp::cld(emm, Letters = letters) %>%
as.data.frame() %>%
mutate(.group = str_trim(.group)) %>%
rename(cld = .group)
tb_means
## kare emmean SE df lower.CL upper.CL cld
## 1 12D46 10.62930 0.1996206 6 10.14085 11.11775 a
## 2 12J23 10.67989 0.1996206 6 10.19143 11.16834 a
## 3 11K17 10.69558 0.1996206 6 10.20712 11.18403 a
## 4 12J05 10.89479 0.1996206 6 10.40634 11.38324 a
## 5 11J41 11.14969 0.1996206 6 10.66124 11.63815 a
## 6 11K11 11.43401 0.1996206 6 10.94555 11.92246 a
## 7 3G23 14.44844 0.1781923 6 14.01242 14.88446 b
tb_results[["len"]] <- tb_means
# Segment plot displaying means with confidence interval.
gg_len <- ggplot(data = tb_means,
# mapping = aes(y = fct_reorder(kare, emmean), x = emmean)) +
mapping = aes(y = kare, x = emmean)) +
geom_point() +
geom_errorbarh(mapping = aes(xmin = lower.CL, xmax = upper.CL),
height = 0.1) +
geom_text(mapping = aes(label = sprintf("%0.2f%s", emmean, cld)),
vjust = 0, nudge_y = 0.10) +
labs(x = expression("Conidia length" ~ (mu * m)),
y = "Isolates", title = "(A)")
# Models that uses the number of replicates as weighting variable.
m0 <- lm(wid ~ exp + kare, data = csm, weight = n)
par(mfrow = c(2, 2))
plot(m0)
## Analysis of Variance Table
##
## Response: wid
## Df Sum Sq Mean Sq F value Pr(>F)
## exp 1 6.242 6.2421 2.9985 0.13406
## kare 6 108.222 18.0370 8.6644 0.00943 **
## Residuals 6 12.490 2.0817
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
emm <- emmeans(m0, specs = ~kare)
tb_means <- multcomp::cld(emm, Letters = letters) %>%
as.data.frame() %>%
mutate(.group = str_trim(.group)) %>%
rename(cld = .group)
tb_means
## kare emmean SE df lower.CL upper.CL cld
## 1 11K17 5.002081 0.2020409 6 4.507705 5.496457 a
## 2 11J41 5.026002 0.2020409 6 4.531626 5.520379 a
## 3 11K11 5.109140 0.2020409 6 4.614763 5.603516 a
## 4 12J23 5.186983 0.2020409 6 4.692607 5.681359 a
## 5 12J05 5.206591 0.2020409 6 4.712214 5.700967 a
## 6 12D46 5.216002 0.2020409 6 4.721626 5.710379 a
## 7 3G23 6.539531 0.1803528 6 6.098224 6.980839 b
tb_results[["wid"]] <- tb_means
# Segment plot displaying means with confidence interval.
gg_wid <- ggplot(data = tb_means,
# mapping = aes(y = fct_reorder(kare, emmean), x = emmean)) +
mapping = aes(y = kare, x = emmean)) +
geom_point() +
geom_errorbarh(mapping = aes(xmin = lower.CL, xmax = upper.CL),
height = 0.1) +
geom_text(mapping = aes(label = sprintf("%0.2f%s", emmean, cld)),
vjust = 0, nudge_y = 0.10) +
labs(x = expression("Conidia width" ~ (mu * m)),
y = "Isolates", title = "(B)")
# Models that uses the number of replicates as weighting variable.
m0 <- lm(vol^(1/3) ~ exp + kare, data = csm, weight = n)
par(mfrow = c(2, 2))
plot(m0)
## Analysis of Variance Table
##
## Response: vol^(1/3)
## Df Sum Sq Mean Sq F value Pr(>F)
## exp 1 5.710 5.7099 3.543 0.108808
## kare 6 165.525 27.5875 17.118 0.001545 **
## Residuals 6 9.669 1.6116
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
emm <- emmeans(m0, specs = ~kare)
tb_means <- multcomp::cld(emm, Letters = letters) %>%
as.data.frame() %>%
mutate(.group = str_trim(.group)) %>%
rename(cld = .group)
tb_means
## kare emmean SE df lower.CL upper.CL cld
## 1 11K17 5.986931 0.1777674 6 5.551950 6.421912 a
## 2 11J41 6.060593 0.1777674 6 5.625612 6.495574 a
## 3 12J23 6.117165 0.1777674 6 5.682184 6.552146 a
## 4 12D46 6.131897 0.1777674 6 5.696915 6.566878 a
## 5 12J05 6.178998 0.1777674 6 5.744017 6.613979 a
## 6 11K11 6.190052 0.1777674 6 5.755071 6.625034 a
## 7 3G23 7.871398 0.1586849 6 7.483110 8.259686 b
tb_results[["vol"]] <- tb_means
# Segment plot displaying means with confidence interval.
gg_vol <- ggplot(data = tb_means,
# mapping = aes(y = fct_reorder(kare, emmean), x = emmean)) +
mapping = aes(y = kare, x = emmean)) +
geom_point() +
geom_errorbarh(mapping = aes(xmin = lower.CL, xmax = upper.CL),
height = 0.1) +
geom_text(mapping = aes(label = sprintf("%0.2f%s", emmean, cld)),
vjust = 0, nudge_y = 0.10) +
labs(x = expression("Cubic root of conidia volume" ~ (mu * m^3)),
y = "Isolates", title = "(C)")
# All morphological characteristics on the same plot.
gridExtra::grid.arrange(gg_len, gg_wid, gg_vol, ncol = 1)
# Imports data.
germ <- as_tibble(RDASC::pistachio_anthracnose$ogertem)
germ$exp <- factor(germ$exp)
germ$tim <- factor(germ$tim)
germ$spp <- factor(germ$spp,
levels = c("Cf", "Ck"),
labels = c("C. fioriniae", "C. karstii"))
attr(germ, "spec") <- NULL
str(germ)
## tibble [756 × 8] (S3: tbl_df/tbl/data.frame)
## $ exp: Factor w/ 3 levels "1","2","3": 1 1 1 1 1 1 1 1 1 1 ...
## $ spp: Factor w/ 2 levels "C. fioriniae",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ tim: Factor w/ 2 levels "6","12": 1 1 1 1 1 1 1 1 1 1 ...
## $ iso: chr [1:756] "11K11" "11K11" "11K11" "11K11" ...
## $ tem: int [1:756] 10 10 10 15 15 15 20 20 20 25 ...
## $ rep: int [1:756] 1 2 3 1 2 3 1 2 3 1 ...
## $ ger: int [1:756] 1 0 0 6 8 7 23 17 16 16 ...
## $ of : int [1:756] 50 50 50 50 50 50 50 50 50 50 ...
## exp spp tim iso tem
## 1:252 C. fioriniae:648 6 :378 Length:756 Min. :10.0
## 2:252 C. karstii :108 12:378 Class :character 1st Qu.:15.0
## 3:252 Mode :character Median :22.5
## Mean :22.5
## 3rd Qu.:30.0
## Max. :35.0
##
## rep ger of
## Min. :1 Min. : 0.00 Min. :50.00
## 1st Qu.:1 1st Qu.: 7.50 1st Qu.:50.00
## Median :2 Median :28.00 Median :50.00
## Mean :2 Mean :26.41 Mean :50.18
## 3rd Qu.:3 3rd Qu.:45.00 3rd Qu.:50.00
## Max. :3 Max. :58.00 Max. :70.00
## NA's :1
# # Curves for each isolate according to experimental conditions.
# ggplot(data = germ,
# mapping = aes(x = tem,
# y = ger/of,
# color = tim)) +
# facet_grid(facets = exp ~ iso) +
# geom_point() +
# stat_summary(geom = "line", fun.y = "mean")
# Curves for each isolate according to experimental conditions.
ggplot(data = filter(germ, tem > 10),
mapping = aes(x = tem,
y = ger/of,
color = tim)) +
facet_grid(facets = exp ~ iso) +
geom_jitter(width = 1, height = 0) +
# stat_summary(geom = "line", fun.y = "mean") +
geom_smooth(method = "glm",
formula = y ~ poly(x, degree = 3),
# formula = y ~ splines::bs(x, df = 2, degree = 3),
# formula = y ~ splines::ns(x, df = 3),
se = FALSE,
method.args = list(family = "quasibinomial"))
# Function that fits a model equation to data and determine the optimal
# temperature.
fit_poly <- function(data) {
m0 <- glm(formula = cbind(ger, of - ger) ~ poly(tem, degree = 3),
data = data,
family = quasibinomial)
pval <- tail(summary(m0)$coeff[, "Pr(>|t|)"], n = 1)
if (pval > 0.05) {
update(m0, . ~ poly(tem, degree = 2))
}
pred <- function(x) predict(m0,
newdata = list(tem = x),
type = "response")
t_opt <- optimise(f = pred,
interval = c(10, 35),
maximum = TRUE)
return(data.frame(temp_opt = t_opt$maximum,
germ_opt = t_opt$objective,
n_par = length(coef(m0)),
deviance = deviance(m0)))
}
# A experimental condition to test.
data <- germ[with(germ, iso == "11J23" &
exp == "1" &
tim == "6" &
tem > 10), ]
# Testing.
fit_poly(data = data)
## temp_opt germ_opt n_par deviance
## 1 26.50989 0.8676486 4 10.65015
# Apply to all experimental units except one.
temp_opt <- germ %>%
filter(tem > 10 & !(tim == "12" & exp == "2" & iso == "3G23")) %>%
group_by(exp, iso, tim) %>%
do({
fit_poly(.)
}) %>%
as.data.frame()
# ggplot(data = temp_opt,
# mapping = aes(x = temp_opt, y = germ_opt, color = tim)) +
# facet_wrap(facets = ~iso) +
# geom_point()
gridExtra::grid.arrange(
ggplot(data = temp_opt,
mapping = aes(x = iso, y = temp_opt,
color = tim, group = tim)) +
geom_point() +
stat_summary(geom = "line", fun.y = "mean") +
labs(y = "Optimal temperature for germination",
x = "Isolates"),
ggplot(data = temp_opt,
mapping = aes(x = iso, y = germ_opt,
color = tim, group = tim)) +
geom_point() +
stat_summary(geom = "line", fun = "mean") +
labs(y = "Maximum germination",
x = "Isolates")
)
## Analysis of Variance Table
##
## Response: temp_opt
## Df Sum Sq Mean Sq F value Pr(>F)
## exp 2 13.537 6.7684 6.0395 0.007247 **
## iso 6 110.610 18.4350 16.4498 1.351e-07 ***
## tim 1 0.008 0.0082 0.0073 0.932624
## iso:tim 6 4.554 0.7590 0.6773 0.669176
## Residuals 25 28.017 1.1207
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
emm <- emmeans(m0, specs = ~iso)
tb_means <- multcomp::cld(emm, Letters = letters) %>%
as.data.frame() %>%
mutate(.group = str_trim(.group)) %>%
rename(cld = .group)
tb_means
## iso emmean SE df lower.CL upper.CL cld
## 1 3G23 22.59713 0.4868950 25 21.59435 23.59991 a
## 2 11J23 26.96305 0.4321804 25 26.07296 27.85315 b
## 3 11K11 27.15118 0.4321804 25 26.26109 28.04127 b
## 4 12J41 27.41832 0.4321804 25 26.52823 28.30841 b
## 5 12D46 27.60945 0.4321804 25 26.71936 28.49954 b
## 6 12J05 27.62944 0.4321804 25 26.73934 28.51953 b
## 7 11K17 27.80810 0.4321804 25 26.91801 28.69819 b
# Segment plot displaying means with confidence interval.
gg_temp <- ggplot(data = tb_means,
mapping = aes(y = fct_reorder(iso, emmean), x = emmean)) +
geom_point() +
geom_errorbarh(mapping = aes(xmin = lower.CL, xmax = upper.CL),
height = 0.1) +
geom_text(mapping = aes(label = sprintf("%0.2f%s", emmean, cld)),
vjust = 0,
nudge_y = 0.10,
show.legend = FALSE) +
labs(x = expression("Optimal temperature for germination" ~ (""^degree * C)),
y = "Isolates", title = "(A)")
# Analysis on the logit scale of the estimated germination.
temp_opt$logit_germ_opt <- binomial()$linkfun(temp_opt$germ_opt)
m0 <- lm(logit_germ_opt ~ exp + iso * tim, data = temp_opt[-8, ])
# m0 <- lm(germ_opt ~ exp + iso, data = temp_opt)
# MASS::boxcox(m0)
par(mfrow = c(2, 2))
plot(m0)
## Analysis of Variance Table
##
## Response: logit_germ_opt
## Df Sum Sq Mean Sq F value Pr(>F)
## exp 2 3.266 1.633 2.7922 0.08124 .
## iso 6 43.206 7.201 12.3111 2.578e-06 ***
## tim 1 67.571 67.571 115.5217 1.180e-10 ***
## iso:tim 6 2.700 0.450 0.7693 0.60147
## Residuals 24 14.038 0.585
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
m0 <- lm(logit_germ_opt ~ exp + iso + tim, data = temp_opt[-8, ])
emm <- emmeans(m0, specs = ~iso)
tb_means <- multcomp::cld(emm, Letters = letters) %>%
as.data.frame() %>%
mutate(.group = str_trim(.group)) %>%
rename(cld = .group) %>%
mutate_at(c("emmean", "lower.CL", "upper.CL"), binomial()$linkinv)
tb_means
## iso emmean SE df lower.CL upper.CL cld
## 1 3G23 0.5831466 0.3366426 30 0.4129431 0.7355993 a
## 2 11K11 0.9248271 0.3049381 30 0.8684187 0.9582169 b
## 3 11K17 0.9415467 0.3049381 30 0.8962773 0.9677689 b
## 4 12D46 0.9420900 0.3366426 30 0.8910678 0.9700183 b
## 5 11J23 0.9656861 0.3049381 30 0.9378781 0.9812945 b
## 6 12J05 0.9670426 0.3049381 30 0.9402659 0.9820454 b
## 7 12J41 0.9692054 0.3049381 30 0.9440843 0.9832408 b
# ATTENTION: these values of germination is a mean of the predicted
# germination for the levels of tim. So, interpret them with caution. In
# fact, what is relevant is the contrast between isolates that represent
# the isolate effect.
# Segment plot displaying means with confidence interval.
gg_germ <- ggplot(data = tb_means,
mapping = aes(y = fct_reorder(iso, emmean), x = emmean)) +
geom_point() +
geom_errorbarh(mapping = aes(xmin = lower.CL, xmax = upper.CL),
height = 0.1) +
geom_text(mapping = aes(label = sprintf("%0.2f%s", emmean, cld)),
vjust = 0,
nudge_y = 0.10,
show.legend = FALSE) +
labs(x = expression("Proportion of maximum germination"),
y = "Isolates", title = "(B)")
gridExtra::grid.arrange(gg_temp, gg_germ, ncol = 1)
# Get the means for all combinations despite the effects are additive.
emm <- emmeans(m0, specs = ~iso + tim) %>%
# multcomp::cld(Letters = letters) %>%
as.data.frame() %>%
# mutate(.group = str_trim(.group)) %>%
# rename(cld = .group) %>%
mutate_at(c("emmean", "lower.CL", "upper.CL"), binomial()$linkinv)
pd <- position_dodge(0.1)
ggplot(data = emm,
mapping = aes(x = fct_reorder(iso, emmean),
y = emmean,
color = tim,
group = tim)) +
geom_point(position = pd) +
geom_errorbar(mapping = aes(ymin = lower.CL, ymax = upper.CL),
position = pd,
width = 0.1) +
geom_text(mapping = aes(label = sprintf("%0.2f", emmean)),
position = pd,
hjust = -0.2,
show.legend = FALSE) +
labs(y = expression("Maximum germination"),
x = "Isolates",
color = "Incubation\nperiod (h)")
# str(RDASC::pistachio_anthracnose$af)
af <- RDASC::pistachio_anthracnose$af %>%
as_tibble()
af$spp <- factor(af$spp,
levels = c("Cf", "Ck"),
labels = c("C. fioriniae", "C. karstii"))
af <- af %>%
mutate_at(c("exp", "spp", "iso"), as.factor)
af
## # A tibble: 28 x 6
## exp iso spp rep app tot
## <fct> <fct> <fct> <int> <int> <int>
## 1 1 11K11 C. fioriniae 1 68 100
## 2 1 11K11 C. fioriniae 2 62 100
## 3 1 11K17 C. fioriniae 1 65 100
## 4 1 11K17 C. fioriniae 2 64 100
## 5 1 12D46 C. fioriniae 1 52 100
## 6 1 12D46 C. fioriniae 2 48 100
## 7 1 12J05 C. fioriniae 1 66 100
## 8 1 12J05 C. fioriniae 2 61 100
## 9 1 12J23 C. fioriniae 1 57 100
## 10 1 12J23 C. fioriniae 2 64 100
## # … with 18 more rows
## iso 11K11 11K17 12D46 12J05 12J23 12J41 3G23
## exp spp
## 1 C. fioriniae 2 2 2 2 2 2 0
## C. karstii 0 0 0 0 0 0 2
## 2 C. fioriniae 2 2 2 2 2 2 0
## C. karstii 0 0 0 0 0 0 2
ggplot(data = af,
mapping = aes(x = iso,
y = app/tot,
color = spp,
shape = exp)) +
geom_jitter(width = 0.1)
m0 <- glm(cbind(app, tot - app) ~ exp + iso,
data = af,
family = quasibinomial)
par(mfrow = c(2, 2))
plot(m0)
## Analysis of Deviance Table
##
## Model: quasibinomial, link: logit
##
## Response: cbind(app, tot - app)
##
## Terms added sequentially (first to last)
##
##
## Df Deviance Resid. Df Resid. Dev F Pr(>F)
## NULL 27 328.04
## exp 1 3.278 26 324.76 0.9802 0.334
## iso 6 256.073 20 68.69 12.7622 6.314e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
emm <- emmeans(m0, specs = ~iso)
tb_means <- multcomp::cld(emm, Letters = letters, type = "response") %>%
as.data.frame() %>%
mutate(.group = str_trim(.group)) %>%
rename(cld = .group)
tb_means
## iso prob SE df asymp.LCL asymp.UCL cld
## 1 12J05 0.5250369 0.04569386 Inf 0.4356336 0.6128636 a
## 2 12J23 0.5650947 0.04536074 Inf 0.4750487 0.6510409 a
## 3 12J41 0.5826189 0.04512040 Inf 0.4924672 0.6675668 a
## 4 12D46 0.5951356 0.04491281 Inf 0.5049751 0.6793037 a
## 5 11K11 0.6026454 0.04477370 Inf 0.5125065 0.6863185 a
## 6 11K17 0.6602127 0.04333125 Inf 0.5709337 0.7393943 a
## 7 3G23 0.9451367 0.02080149 Inf 0.8869768 0.9742378 b
# Segment plot displaying means with confidence interval.
gg_af <- ggplot(data = tb_means,
mapping = aes(y = fct_reorder(iso, prob), x = prob)) +
geom_point() +
geom_errorbarh(mapping = aes(xmin = asymp.LCL, xmax = asymp.UCL),
height = 0.1) +
geom_text(mapping = aes(label = sprintf("%0.2f%s", prob, cld)),
vjust = 0,
nudge_y = 0.05,
show.legend = FALSE) +
labs(x = expression("Appressory formation"),
y = "Isolates")
gg_af
# ATTENTION: new data from 2020 are available.
vv <- RDASC::pistachio_anthracnose$pato_vv
vv$cv[vv$cv == "RA"] <- "Red Aleppo"
vv$cv <- droplevels(vv$cv)
vv$spp <- factor(vv$spp,
levels = c("Cf", "Ck"),
labels = c("C. fioriniae", "C. karstii"))
str(vv)
## 'data.frame': 720 obs. of 10 variables:
## $ yr : int 2017 2017 2017 2017 2017 2017 2017 2017 2017 2017 ...
## $ mo : chr "June" "June" "June" "June" ...
## $ cv : Factor w/ 4 levels "Golden Hills",..: 4 4 4 4 4 4 4 4 4 4 ...
## $ spp: Factor w/ 2 levels "C. fioriniae",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ fla: chr "B" "B" "B" "B" ...
## $ arb: int 1 1 1 1 1 1 1 1 1 1 ...
## $ clu: int 1 2 3 4 5 6 7 8 9 10 ...
## $ bli: int 13 6 19 3 4 11 5 10 1 7 ...
## $ hea: int 33 31 56 11 16 28 45 24 18 42 ...
## $ tot: int 46 37 75 14 20 39 50 34 19 49 ...
# Incomplete crossed factorial design. But it is a complete factorial
# design in each year.
ftable(xtabs(~cv + spp + yr, data = vv))
## yr 2017 2018 2019 2020
## cv spp
## Golden Hills C. fioriniae 29 30 30 30
## C. karstii 31 30 30 0
## Kerman C. fioriniae 30 30 30 30
## C. karstii 30 30 30 0
## Lost Hills C. fioriniae 0 0 30 30
## C. karstii 0 0 30 0
## Red Aleppo C. fioriniae 30 30 30 30
## C. karstii 30 30 30 0
# Each experimental codition has 3 trees in sequence and 10 clusters per
# tree.
ggplot(data = vv,
mapping = aes(x = arb, y = hea/tot, color = spp)) +
facet_grid(facets = yr ~ cv) +
geom_point() +
stat_summary(geom = "line", fun = "mean")
ggplot(data = vv,
mapping = aes(x = spp,
y = hea/tot,
color = factor(arb),
group = 1)) +
facet_grid(facets = yr ~ cv) +
geom_jitter(width = 0.1) +
stat_summary(geom = "line", fun = "mean")
#-----------------------------------------------------------------------
# Modelo de 3 componentes de variância.
# Variáveis que podem ser usadas na análise.
vv$prop <- vv$hea/vv$tot
# vv$asin <- asin(sqrt(vv$hea/vv$tot))
# vv$logit <- binomial()$linkfun(0.95 * (vv$hea/vv$tot - 0.5) + 0.5)
# Para guardar as médias.
tb_means <- list()
#--------------------------------------------
# 2017.
# Ajusta o modelo.
a <- 2017
mm0 <- lmer(prop ~ cv * spp + (1 | cv:arb) + (1 | cv:arb:spp),
data = filter(vv, yr == a),
weights = tot)
# # Diagnóstico.
# qqnorm(residuals(mm0))
# plot(residuals(mm0) ~ fitted(mm0))
# Estimativa dos componentes de variância.
VarCorr(mm0)
## Groups Name Std.Dev.
## cv:arb:spp (Intercept) 0.00004787
## cv:arb (Intercept) 0.03812432
## Residual 0.47879835
## Type III Analysis of Variance Table with Satterthwaite's method
## Sum Sq Mean Sq NumDF DenDF F value Pr(>F)
## cv 1.75910 0.87955 2 5.855 3.8367 0.08614 .
## spp 0.05397 0.05397 1 145.823 0.2354 0.62825
## cv:spp 0.73004 0.36502 2 143.956 1.5922 0.20703
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Médias ajustadas.
tb_means[[as.character(a)]] <-
emmeans(mm0, specs = ~cv, data = filter(vv, yr == a)) %>%
multcomp::cld(Letters = letters) %>%
as.data.frame()
tb_means[[as.character(a)]]
## cv emmean SE df lower.CL upper.CL .group
## 3 Red Aleppo 0.7717835 0.02457391 435.5136 0.7234853 0.8200817 a
## 1 Golden Hills 0.8353772 0.03062148 371.8386 0.7751642 0.8955902 ab
## 2 Kerman 0.8648131 0.02526860 569.0338 0.8151820 0.9144442 b
#--------------------------------------------
# 2018.
# Ajusta o modelo.
a <- 2018
mm0 <- lmer(prop ~ cv * spp + (1 | cv:arb) + (1 | cv:arb:spp),
data = filter(vv, yr == a),
weights = tot)
# # Diagnóstico.
# qqnorm(residuals(mm0))
# plot(residuals(mm0) ~ fitted(mm0))
# Estimativa dos componentes de variância.
VarCorr(mm0)
## Groups Name Std.Dev.
## cv:arb:spp (Intercept) 0.00013175
## cv:arb (Intercept) 0.10033036
## Residual 1.07973068
## Type III Analysis of Variance Table with Satterthwaite's method
## Sum Sq Mean Sq NumDF DenDF F value Pr(>F)
## cv 36.154 18.0772 2 5.981 15.5060 0.004299 **
## spp 0.413 0.4130 1 154.108 0.3543 0.552582
## cv:spp 1.026 0.5131 2 154.026 0.4401 0.644751
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Médias ajustadas.
tb_means[[as.character(a)]] <-
emmeans(mm0, specs = ~cv, data = filter(vv, yr == a)) %>%
multcomp::cld(Letters = letters) %>%
as.data.frame()
tb_means[[as.character(a)]]
## cv emmean SE df lower.CL upper.CL .group
## 3 Red Aleppo 0.3809485 0.06341187 435.4912 0.2563171 0.5055798 a
## 1 Golden Hills 0.4505638 0.06594052 329.7309 0.3208467 0.5802810 a
## 2 Kerman 0.8292197 0.06312194 264.1577 0.7049335 0.9535058 b
#--------------------------------------------
# 2019.
# Ajusta o modelo.
a <- 2019
mm0 <- lmer(prop ~ cv * spp + (1 | cv:arb) + (1 | cv:arb:spp),
data = filter(vv, yr == a),
weights = tot)
# # Diagnóstico.
# qqnorm(residuals(mm0))
# plot(residuals(mm0) ~ fitted(mm0))
# Estimativa dos componentes de variância.
VarCorr(mm0)
## Groups Name Std.Dev.
## cv:arb:spp (Intercept) 0.044952
## cv:arb (Intercept) 0.020933
## Residual 0.447859
## Type III Analysis of Variance Table with Satterthwaite's method
## Sum Sq Mean Sq NumDF DenDF F value Pr(>F)
## cv 206.167 68.722 3 8.5686 342.6219 3.017e-09 ***
## spp 0.380 0.380 1 8.5571 1.8954 0.2035
## cv:spp 1.666 0.555 3 8.4961 2.7683 0.1069
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Médias ajustadas.
tb_means[[as.character(a)]] <-
emmeans(mm0, specs = ~cv, data = filter(vv, yr == a)) %>%
multcomp::cld(Letters = letters) %>%
as.data.frame()
tb_means[[as.character(a)]]
## cv emmean SE df lower.CL upper.CL .group
## 4 Red Aleppo 0.08637501 0.02381745 506.0399 0.03958175 0.1331683 a
## 2 Kerman 0.94270102 0.02431849 493.8088 0.89492054 0.9904815 b
## 1 Golden Hills 0.94732632 0.02413313 508.5405 0.89991342 0.9947392 b
## 3 Lost Hills 0.97688541 0.02551160 664.4131 0.92679235 1.0269785 b
#--------------------------------------------
# 2020.
# NOTE: This year only one spp were observed.
# Ajusta o modelo.
a <- 2020
mm0 <- lmer(prop ~ cv + (1 | cv:arb),
data = filter(vv, yr == a),
weights = tot)
# # Diagnóstico.
# qqnorm(residuals(mm0))
# plot(residuals(mm0) ~ fitted(mm0))
# Estimativa dos componentes de variância.
VarCorr(mm0)
## Groups Name Std.Dev.
## cv:arb (Intercept) 0.00000
## Residual 0.79399
## Type III Analysis of Variance Table with Satterthwaite's method
## Sum Sq Mean Sq NumDF DenDF F value Pr(>F)
## cv 207.1 69.032 3 115 109.5 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Médias ajustadas.
tb_means[[as.character(a)]] <-
emmeans(mm0, specs = ~cv, data = filter(vv, yr == a)) %>%
multcomp::cld(Letters = letters) %>%
as.data.frame()
tb_means[[as.character(a)]]
## cv emmean SE df lower.CL upper.CL .group
## 4 Red Aleppo 0.3986989 0.02430888 16881.921 0.3510509 0.4463468 a
## 2 Kerman 0.9032258 0.03298946 4438.364 0.8385500 0.9679016 b
## 1 Golden Hills 0.9209770 0.03031108 6197.419 0.8615568 0.9803972 b
## 3 Lost Hills 0.9558824 0.02586952 11594.880 0.9051737 1.0065910 b
#-----------------------------------------------------------------------
# Gráficos.
tb_m <- tb_means %>%
bind_rows(.id = "yr") %>%
rename(cld = .group) %>%
mutate(cld = str_trim(cld))
# Segment plot displaying means with confidence interval.
gg_yr <- ggplot(data = tb_m,
mapping = aes(y = cv, x = emmean)) +
facet_wrap(facets = ~yr, ncol = 1, scale = "free_y") +
geom_point() +
geom_errorbarh(mapping = aes(xmin = lower.CL, xmax = upper.CL),
height = 0.1) +
geom_text(mapping = aes(label = sprintf("%0.2f%s", emmean, cld)),
vjust = 0,
nudge_y = 0.125,
show.legend = FALSE) +
labs(x = "Proportion of healthy nuts",
y = "Cultivars")
gg_yr
# ATTENTION: new data from 2020 are available.
tos <- RDASC::pistachio_anthracnose$tos
tos$mo <- factor(tos$mo, levels = unique(tos$mo)[c(4, 5, 1, 2, 3)])
tos$spp <- factor(tos$spp,
levels = c("Cf", "Ck"),
labels = c("C. fioriniae", "C. karstii"))
str(tos)
## 'data.frame': 960 obs. of 10 variables:
## $ yr : int 2017 2017 2017 2017 2017 2017 2017 2017 2017 2017 ...
## $ mo : Factor w/ 5 levels "April","May",..: 3 3 3 3 3 3 3 3 3 3 ...
## $ cv : Factor w/ 2 levels "Kerman","Red Aleppo": 2 2 2 2 2 2 2 2 2 2 ...
## $ spp: Factor w/ 2 levels "C. fioriniae",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ fla: chr "B" "B" "B" "B" ...
## $ arb: int 1 1 1 1 1 1 1 1 1 1 ...
## $ clu: int 1 2 3 4 5 6 7 8 9 10 ...
## $ bli: int 13 6 19 3 4 11 5 10 1 7 ...
## $ hea: int 33 31 56 11 16 28 45 24 18 42 ...
## $ tot: int 46 37 75 14 20 39 50 34 19 49 ...
# Incomplete crossed factorial design. But it is a complete factorial
# design in each year.
ftable(xtabs(~cv + spp + yr + mo, data = tos))
## mo April May June July August
## cv spp yr
## Kerman C. fioriniae 2017 0 0 0 0 0
## 2018 0 0 0 0 0
## 2019 0 0 0 0 0
## 2020 30 30 30 30 0
## C. karstii 2017 0 0 0 0 0
## 2018 0 0 0 0 0
## 2019 0 0 0 0 0
## 2020 0 0 0 0 0
## Red Aleppo C. fioriniae 2017 0 0 30 30 30
## 2018 30 30 30 30 30
## 2019 30 30 30 30 0
## 2020 30 30 30 30 0
## C. karstii 2017 0 0 30 30 30
## 2018 30 30 30 30 30
## 2019 30 30 30 30 0
## 2020 0 0 0 0 0
ggplot(data = tos,
mapping = aes(x = mo, y = hea/tot, color = spp, group = spp)) +
facet_wrap(facets = ~yr) +
geom_jitter(width = 0.1) +
stat_summary(geom = "line", fun = "mean")
# Criando a estrutura de unidades experimentais.
#
# UE: as árvores são os blocos.
# UE para mês de inoculação: 1/5 de cada árvore.
# UE para espécie: 1/2 de 1/5 de cada árvore.
# UA: 10 cachos por UE de espécie.
tos <- tos %>%
mutate(ue_bl = interaction(yr, arb, drop = FALSE),
ue_mo = interaction(yr, arb, mo, drop = FALSE),
ue_spp = interaction(yr, arb, mo, spp, drop = FALSE))
tos %>%
group_by(yr) %>%
summarise_at(vars(ue_bl, ue_mo, ue_spp, tot),
~ifelse(is.factor(.),
nlevels(droplevels(.)),
length(.)))
## # A tibble: 4 x 5
## yr ue_bl ue_mo ue_spp tot
## <int> <int> <int> <int> <int>
## 1 2017 3 9 18 180
## 2 2018 3 15 30 300
## 3 2019 3 12 24 240
## 4 2020 3 12 12 240
#-----------------------------------------------------------------------
# Variáveis.
# Variáveis que podem ser usadas na análise.
# tos$prop <- tos$hea/tos$tot
# tos$asin <- asin(sqrt(tos$hea/tos$tot))
tos$logit <- binomial()$linkfun(0.95 * (tos$hea/tos$tot - 0.5) + 0.5)
tos$y <- tos$logit
# Para guardar as médias.
tb_means <- list()
#--------------------------------------------
# 2017.
# Ajusta o modelo.
a <- 2017
tb <- droplevels(filter(tos, yr == a, mo %in% c("June", "July")))
mm0 <- lmer(y ~ spp * mo + (1 | ue_bl) + (1 | ue_mo) + (1 | ue_spp),
data = tb,
weights = tot)
# # Diagnóstico.
# qqnorm(residuals(mm0))
# plot(residuals(mm0) ~ fitted(mm0))
# Estimativa dos componentes de variância.
VarCorr(mm0)
## Groups Name Std.Dev.
## ue_spp (Intercept) 0.167341
## ue_mo (Intercept) 0.000000
## ue_bl (Intercept) 0.039869
## Residual 2.988858
## Type III Analysis of Variance Table with Satterthwaite's method
## Sum Sq Mean Sq NumDF DenDF F value Pr(>F)
## spp 1.6761 1.6761 1 5.6079 0.1876 0.6811
## mo 30.9756 30.9756 1 5.6208 3.4674 0.1152
## spp:mo 3.3234 3.3234 1 5.6465 0.3720 0.5656
# Médias ajustadas.
tb_means[[as.character(a)]] <-
emmeans(mm0, specs = ~spp, data = tb) %>%
multcomp::cld(Letters = letters) %>%
as.data.frame()
tb_means[[as.character(a)]]
## spp emmean SE df lower.CL upper.CL .group
## 2 C. karstii 1.053158 0.09387868 2808.622 0.8690794 1.237236 a
## 1 C. fioriniae 1.107946 0.09562848 2589.505 0.9204298 1.295462 a
u <- emmeans(mm0, specs = ~1, data = tb) %>%
as.data.frame() %>%
rename("yr" = "1") %>%
mutate(yr = a)
tb_means[[as.character(a)]] <- bind_rows(tb_means[[as.character(a)]], u)
tb_means[[as.character(a)]]
## spp emmean SE df lower.CL upper.CL .group yr
## 1 C. karstii 1.053158 0.09387868 2808.622 0.8690794 1.237236 a NA
## 2 C. fioriniae 1.107946 0.09562848 2589.505 0.9204298 1.295462 a NA
## 3 <NA> 1.080552 0.06964751 1000.322 0.9438797 1.217224 <NA> 2017
#--------------------------------------------
# 2018.
# Ajusta o modelo.
a <- 2018
tb <- droplevels(filter(tos, yr == a))
mm0 <- lmer(y ~ spp * mo + (1 | ue_bl) + (1 | ue_mo) + (1 | ue_spp),
data = tb,
weights = tot)
# # Diagnóstico.
# qqnorm(residuals(mm0))
# plot(residuals(mm0) ~ fitted(mm0))
# Estimativa dos componentes de variância.
VarCorr(mm0)
## Groups Name Std.Dev.
## ue_spp (Intercept) 0.73566
## ue_mo (Intercept) 0.36997
## ue_bl (Intercept) 0.00000
## Residual 7.07666
## Type III Analysis of Variance Table with Satterthwaite's method
## Sum Sq Mean Sq NumDF DenDF F value Pr(>F)
## spp 13.311 13.3111 1 8.0326 0.2658 0.6200
## mo 33.206 8.3015 4 8.8752 0.1658 0.9504
## spp:mo 69.316 17.3289 4 7.9547 0.3460 0.8398
# Médias ajustadas.
tb_means[[as.character(a)]] <-
emmeans(mm0, specs = ~spp, data = tb) %>%
multcomp::cld(Letters = letters) %>%
as.data.frame()
tb_means[[as.character(a)]]
## spp emmean SE df lower.CL upper.CL .group
## 1 C. fioriniae -0.5020973 0.2525457 307.6529 -0.9990327 -0.005162004 a
## 2 C. karstii -0.3374404 0.2686898 385.2459 -0.8657224 0.190841676 a
u <- emmeans(mm0, specs = ~1, data = tb) %>%
as.data.frame() %>%
rename("yr" = "1") %>%
mutate(yr = a)
tb_means[[as.character(a)]] <- bind_rows(tb_means[[as.character(a)]], u)
tb_means[[as.character(a)]]
## spp emmean SE df lower.CL upper.CL .group
## 1 C. fioriniae -0.5020973 0.2525457 307.6529 -0.9990327 -0.005162004 a
## 2 C. karstii -0.3374404 0.2686898 385.2459 -0.8657224 0.190841676 a
## 3 <NA> -0.4197689 0.2036588 125.7583 -0.8228111 -0.016726617 <NA>
## yr
## 1 NA
## 2 NA
## 3 2018
#--------------------------------------------
# 2019.
# Ajusta o modelo.
a <- 2019
tb <- droplevels(filter(tos, yr == a))
mm0 <- lmer(y ~ spp * mo + (1 | ue_bl) + (1 | ue_mo) + (1 | ue_spp),
data = tb,
weights = tot)
# # Diagnóstico.
# qqnorm(residuals(mm0))
# plot(residuals(mm0) ~ fitted(mm0))
# Estimativa dos componentes de variância.
VarCorr(mm0)
## Groups Name Std.Dev.
## ue_spp (Intercept) 0.55432383
## ue_mo (Intercept) 0.00000000
## ue_bl (Intercept) 0.00081637
## Residual 6.89144984
## Type III Analysis of Variance Table with Satterthwaite's method
## Sum Sq Mean Sq NumDF DenDF F value Pr(>F)
## spp 2428.4 2428.40 1 15.634 51.133 2.631e-06 ***
## mo 1664.2 554.75 3 15.554 11.681 0.0002916 ***
## spp:mo 1969.0 656.33 3 15.554 13.820 0.0001176 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Médias ajustadas.
tb_means[[as.character(a)]] <-
emmeans(mm0, specs = ~mo | spp, data = tb) %>%
multcomp::cld(Letters = letters) %>%
as.data.frame()
tb_means[[as.character(a)]]
## mo spp emmean SE df lower.CL upper.CL .group
## 1 April C. fioriniae -1.5119982 0.3991278 3955.137 -2.2945138 -0.7294827 a
## 2 May C. fioriniae 0.2137881 0.3665753 3754.949 -0.5049180 0.9324941 b
## 3 June C. fioriniae 1.6136904 0.3661942 3769.627 0.8957325 2.3316483 c
## 4 July C. fioriniae 2.7623358 0.3667554 4221.724 2.0433022 3.4813694 c
## 8 July C. karstii 2.4194305 0.3569209 4160.232 1.7196749 3.1191861 a
## 5 April C. karstii 2.6026715 0.3654866 3790.458 1.8861020 3.3192410 a
## 7 June C. karstii 2.6418595 0.3695812 3638.109 1.9172526 3.3664664 a
## 6 May C. karstii 2.7339330 0.3562237 4194.134 2.0355459 3.4323201 a
#--------------------------------------------
# 2020.
# Ajusta o modelo.
a <- 2020
tb <- droplevels(filter(tos, yr == a))
mm0 <- lmer(y ~ mo + (1 | ue_bl) + (1 | ue_mo) + (1 | ue_spp),
data = tb,
weights = tot)
# # Diagnóstico.
# qqnorm(residuals(mm0))
# plot(residuals(mm0) ~ fitted(mm0))
# Estimativa dos componentes de variância.
VarCorr(mm0)
## Groups Name Std.Dev.
## ue_spp (Intercept) 0.76018
## ue_mo (Intercept) 0.14313
## ue_bl (Intercept) 0.11313
## Residual 7.64476
## Type III Analysis of Variance Table with Satterthwaite's method
## Sum Sq Mean Sq NumDF DenDF F value Pr(>F)
## mo 31.88 10.627 3 5.2961 0.1818 0.9046
# Médias ajustadas.
tb_means[[as.character(a)]] <-
emmeans(mm0, specs = ~mo, data = tb) %>%
multcomp::cld(Letters = letters) %>%
as.data.frame()
tb_means[[as.character(a)]]
## mo emmean SE df lower.CL upper.CL .group
## 3 June 0.5665807 0.5227293 218.6877 -0.463651315 1.596813 a
## 4 July 0.6342678 0.5195992 223.4501 -0.389673801 1.658209 a
## 1 April 0.6876725 0.5501621 668.0303 -0.392582556 1.767928 a
## 2 May 1.0385837 0.5310249 281.8131 -0.006694935 2.083862 a
u <- emmeans(mm0, specs = ~1, data = tb) %>%
as.data.frame() %>%
rename("yr" = "1") %>%
mutate(yr = a)
tb_means[[as.character(a)]] <- bind_rows(tb_means[[as.character(a)]], u)
tb_means[[as.character(a)]]
## mo emmean SE df lower.CL upper.CL .group yr
## 1 June 0.5665807 0.5227293 218.68766 -0.463651315 1.596813 a NA
## 2 July 0.6342678 0.5195992 223.45006 -0.389673801 1.658209 a NA
## 3 April 0.6876725 0.5501621 668.03025 -0.392582556 1.767928 a NA
## 4 May 1.0385837 0.5310249 281.81310 -0.006694935 2.083862 a NA
## 5 <NA> 0.7317762 0.2861124 70.40019 0.161199971 1.302352 <NA> 2020
## $`2017`
## spp emmean SE df lower.CL upper.CL .group yr
## 1 C. karstii 1.053158 0.09387868 2808.622 0.8690794 1.237236 a NA
## 2 C. fioriniae 1.107946 0.09562848 2589.505 0.9204298 1.295462 a NA
## 3 <NA> 1.080552 0.06964751 1000.322 0.9438797 1.217224 <NA> 2017
##
## $`2018`
## spp emmean SE df lower.CL upper.CL .group
## 1 C. fioriniae -0.5020973 0.2525457 307.6529 -0.9990327 -0.005162004 a
## 2 C. karstii -0.3374404 0.2686898 385.2459 -0.8657224 0.190841676 a
## 3 <NA> -0.4197689 0.2036588 125.7583 -0.8228111 -0.016726617 <NA>
## yr
## 1 NA
## 2 NA
## 3 2018
##
## $`2019`
## mo spp emmean SE df lower.CL upper.CL .group
## 1 April C. fioriniae -1.5119982 0.3991278 3955.137 -2.2945138 -0.7294827 a
## 2 May C. fioriniae 0.2137881 0.3665753 3754.949 -0.5049180 0.9324941 b
## 3 June C. fioriniae 1.6136904 0.3661942 3769.627 0.8957325 2.3316483 c
## 4 July C. fioriniae 2.7623358 0.3667554 4221.724 2.0433022 3.4813694 c
## 8 July C. karstii 2.4194305 0.3569209 4160.232 1.7196749 3.1191861 a
## 5 April C. karstii 2.6026715 0.3654866 3790.458 1.8861020 3.3192410 a
## 7 June C. karstii 2.6418595 0.3695812 3638.109 1.9172526 3.3664664 a
## 6 May C. karstii 2.7339330 0.3562237 4194.134 2.0355459 3.4323201 a
##
## $`2020`
## mo emmean SE df lower.CL upper.CL .group yr
## 1 June 0.5665807 0.5227293 218.68766 -0.463651315 1.596813 a NA
## 2 July 0.6342678 0.5195992 223.45006 -0.389673801 1.658209 a NA
## 3 April 0.6876725 0.5501621 668.03025 -0.392582556 1.767928 a NA
## 4 May 1.0385837 0.5310249 281.81310 -0.006694935 2.083862 a NA
## 5 <NA> 0.7317762 0.2861124 70.40019 0.161199971 1.302352 <NA> 2020
tb_m <- tb_means %>%
bind_rows(.id = "yr") %>%
rename(cld = .group) %>%
mutate(cld = str_trim(cld))
# Transformação inversa da aplicada para a análise.
tb_m <- tb_m %>%
mutate_at(c("emmean", "lower.CL", "upper.CL"), binomial()$linkinv)
# Segment plot displaying means with confidence interval.
ggplot(data = filter(tb_m, yr %in% c("2017", "2018"), !is.na(spp)),
mapping = aes(y = spp, x = emmean)) +
facet_wrap(facets = ~yr, ncol = 1) +
geom_point() +
geom_errorbarh(mapping = aes(xmin = lower.CL, xmax = upper.CL),
height = 0.1) +
geom_text(mapping = aes(label = sprintf("%0.2f%s", emmean, cld)),
vjust = 0,
nudge_y = 0.05,
show.legend = FALSE) +
labs(x = "Proportion of healthy nuts",
y = "Species")
ggplot(data = filter(tb_m, yr %in% c("2019")),
mapping = aes(x = mo, y = emmean)) +
facet_wrap(facets = ~spp, nrow = 1) +
geom_point() +
geom_errorbar(mapping = aes(ymin = lower.CL, ymax = upper.CL),
width = 0.1) +
geom_text(mapping = aes(label = sprintf("%0.2f%s", emmean, cld)),
hjust = 0,
nudge_x = 0.05,
show.legend = FALSE) +
labs(y = "Proportion of healthy nuts",
x = "Months")
ggplot(data = filter(tb_m, yr %in% c("2020"), !is.na(cld)),
mapping = aes(x = mo, y = emmean)) +
geom_point() +
geom_errorbar(mapping = aes(ymin = lower.CL, ymax = upper.CL),
width = 0.1) +
geom_text(mapping = aes(label = sprintf("%0.2f%s", emmean, cld)),
hjust = 0,
nudge_x = 0.05,
show.legend = FALSE) +
labs(y = "Proportion of healthy nuts",
x = "Months")
## spp emmean SE df lower.CL upper.CL cld yr mo
## 1 <NA> 0.7465984 0.06964751 1000.32175 0.7198827 0.7715746 <NA> 2017 <NA>
## 2 <NA> 0.3965721 0.20365878 125.75828 0.3051673 0.4958184 <NA> 2018 <NA>
## 3 <NA> 0.6751949 0.28611242 70.40019 0.5402130 0.7862306 <NA> 2020 <NA>
ggplot(data = filter(tb_m, is.na(spp), is.na(mo)),
mapping = aes(x = yr, y = emmean)) +
geom_point() +
geom_errorbar(mapping = aes(ymin = lower.CL, ymax = upper.CL),
width = 0.1) +
geom_text(mapping = aes(label = sprintf("%0.2f", emmean)),
hjust = 0,
nudge_x = 0.05,
show.legend = FALSE) +
labs(y = "Proportion of healthy nuts",
x = "Seasons")
## 'data.frame': 2880 obs. of 5 variables:
## $ tem: num 67.8 72.8 76.1 79 81.8 83 85 85 84.5 83.4 ...
## $ lw : num 0 0 0 0 0 0 0 0 0 0 ...
## $ rh : num 55.6 44.3 41.8 38.7 33.6 30.7 30.2 31.1 29.1 30.1 ...
## $ dew: num 51.3 49.8 51.2 51.7 50.4 49 50.3 51.1 48.8 48.8 ...
## $ dt : POSIXct, format: "2019-04-23 09:00:00" "2019-04-23 10:00:00" ...
pos_l <- pos %>%
gather(key = "serie", value = "value", -dt)
ggplot(data = pos_l,
mapping = aes(x = dt, y = value)) +
facet_grid(facets = serie ~ ., scale = "free_y",
labeller = labeller(serie = c("dew" = "Dew point temp.",
"lw" = "Leaf wetness",
"rh" = "Rel. humidity",
"tem" = "Temperature"))) +
geom_line(color = "gray50") +
geom_smooth(method = "loess", se = FALSE, span = 0.05,
color = "black") +
labs(x = "Days", y = "Value")
RDASC - Reproducible Data Analysis of Scientific
Cooperations
|
Walmes Zeviani |