Importação e preparo dos dados

#-----------------------------------------------------------------------
# Pacotes.

rm(list = objects())

library(tidyverse)
library(survival)
library(emmeans)

# Será usada a `ordered_cld()`.
url <- "https://raw.githubusercontent.com/walmes/wzRfun/master/R/pairwise.R"
source(url)

#-----------------------------------------------------------------------
# Importação e preparação.

# Leitura.
da <- read_tsv("frutosmaduros.txt")
attr(da, "spec") <- NULL
str(da)

## Classes 'spec_tbl_df', 'tbl_df', 'tbl' and 'data.frame': 3000 obs. of  5 variables:
##  $ spp  : chr  "Ch" "Ch" "Ch" "Ch" ...
##  $ day  : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ trat : chr  "test" "test" "test" "test" ...
##  $ rep  : num  1 2 3 4 5 6 7 8 9 10 ...
##  $ incid: num  0 0 0 0 0 0 0 0 0 0 ...

# Tabela de frequência cruzada.
xtabs(~spp + trat, data = da)

##     trat
## spp  amistar top mythos nativo recop score test
##   Cf         250    250    250   250   250  250
##   Ch         250    250    250   250   250  250

# Existe NA em algum lugar?
u <- !complete.cases(da)
sum(u)

## [1] 1

da[u, ]

## # A tibble: 1 x 5
##   spp     day trat     rep incid
##   <chr> <dbl> <chr>  <dbl> <dbl>
## 1 Ch       10 mythos    21    NA

# Substitui o NA por censura a direita.
da <- da %>%
    replace_na(replace = list(incid = 0))

# Cria fatores.
da <- da %>%
    mutate(trat = factor(trat),
           spp = factor(spp),
           ue = interaction(trat, spp, rep))

# A tabela contém o registro dia após dia de cada unidade
# experimental. O evento ocorre quando o variável `incid` troca de valor
# 0 para 1.
da %>%
    filter(ue == levels(ue)[1])

## # A tibble: 10 x 6
##    spp     day trat          rep incid ue              
##    <fct> <dbl> <fct>       <dbl> <dbl> <fct>           
##  1 Cf        1 amistar top     1     0 amistar top.Cf.1
##  2 Cf        2 amistar top     1     0 amistar top.Cf.1
##  3 Cf        3 amistar top     1     0 amistar top.Cf.1
##  4 Cf        4 amistar top     1     1 amistar top.Cf.1
##  5 Cf        5 amistar top     1     1 amistar top.Cf.1
##  6 Cf        6 amistar top     1     1 amistar top.Cf.1
##  7 Cf        7 amistar top     1     1 amistar top.Cf.1
##  8 Cf        8 amistar top     1     1 amistar top.Cf.1
##  9 Cf        9 amistar top     1     1 amistar top.Cf.1
## 10 Cf       10 amistar top     1     1 amistar top.Cf.1

# Determina a primeira data de valor `incid == 1` para cada unidade
# experimental.
da_1 <- da %>%
  group_by(ue) %>%
  filter(incid == 1) %>%
  top_n(day, n = -1)
da_1

## # A tibble: 213 x 6
## # Groups:   ue [213]
##    spp     day trat    rep incid ue        
##    <fct> <dbl> <fct> <dbl> <dbl> <fct>     
##  1 Ch        2 test      1     1 test.Ch.1 
##  2 Ch        2 test      2     1 test.Ch.2 
##  3 Ch        2 test      3     1 test.Ch.3 
##  4 Ch        2 test      4     1 test.Ch.4 
##  5 Ch        2 test      5     1 test.Ch.5 
##  6 Ch        2 test      6     1 test.Ch.6 
##  7 Ch        2 test      7     1 test.Ch.7 
##  8 Ch        2 test      8     1 test.Ch.8 
##  9 Ch        2 test     10     1 test.Ch.10
## 10 Ch        2 test     11     1 test.Ch.11
## # … with 203 more rows

# Determina a última data de valor `incid == 0` para cada unidade
# experimental restante, ou seja, que não apresentou `incid == 1`.
da_0 <- anti_join(da, da_1, by = "ue") %>%
  group_by(ue) %>%
  top_n(day, n = 1)
da_0

## # A tibble: 87 x 6
## # Groups:   ue [87]
##    spp     day trat     rep incid ue          
##    <fct> <dbl> <fct>  <dbl> <dbl> <fct>       
##  1 Ch       10 test       9     0 test.Ch.9   
##  2 Ch       10 test      14     0 test.Ch.14  
##  3 Ch       10 test      21     0 test.Ch.21  
##  4 Ch       10 mythos    12     0 mythos.Ch.12
##  5 Ch       10 mythos    15     0 mythos.Ch.15
##  6 Ch       10 mythos    16     0 mythos.Ch.16
##  7 Ch       10 mythos    17     0 mythos.Ch.17
##  8 Ch       10 mythos    21     0 mythos.Ch.21
##  9 Cf       10 mythos    11     0 mythos.Cf.11
## 10 Ch       10 score      2     0 score.Ch.2  
## # … with 77 more rows

# Junta as duas porções.
da_final <- bind_rows(da_1, da_0) %>%
  ungroup()
da_final

## # A tibble: 300 x 6
##    spp     day trat    rep incid ue        
##    <fct> <dbl> <fct> <dbl> <dbl> <fct>     
##  1 Ch        2 test      1     1 test.Ch.1 
##  2 Ch        2 test      2     1 test.Ch.2 
##  3 Ch        2 test      3     1 test.Ch.3 
##  4 Ch        2 test      4     1 test.Ch.4 
##  5 Ch        2 test      5     1 test.Ch.5 
##  6 Ch        2 test      6     1 test.Ch.6 
##  7 Ch        2 test      7     1 test.Ch.7 
##  8 Ch        2 test      8     1 test.Ch.8 
##  9 Ch        2 test     10     1 test.Ch.10
## 10 Ch        2 test     11     1 test.Ch.11
## # … with 290 more rows

xtabs(~trat + spp, data = da_final)

##              spp
## trat          Cf Ch
##   amistar top 25 25
##   mythos      25 25
##   nativo      25 25
##   recop       25 25
##   score       25 25
##   test        25 25

xtabs(incid ~ trat + spp, data = da_final)

##              spp
## trat          Cf Ch
##   amistar top 16 14
##   mythos      24 20
##   nativo      18  9
##   recop       18 18
##   score       20  9
##   test        25 22

# Verifica.
da_final %>%
    filter(spp == "Ch", trat == "mythos") %>%
    print(n = Inf)

## # A tibble: 25 x 6
##    spp     day trat     rep incid ue          
##    <fct> <dbl> <fct>  <dbl> <dbl> <fct>       
##  1 Ch        2 mythos     1     1 mythos.Ch.1 
##  2 Ch        2 mythos     2     1 mythos.Ch.2 
##  3 Ch        2 mythos     3     1 mythos.Ch.3 
##  4 Ch        2 mythos     4     1 mythos.Ch.4 
##  5 Ch        2 mythos     7     1 mythos.Ch.7 
##  6 Ch        2 mythos     8     1 mythos.Ch.8 
##  7 Ch        2 mythos     9     1 mythos.Ch.9 
##  8 Ch        2 mythos    10     1 mythos.Ch.10
##  9 Ch        2 mythos    11     1 mythos.Ch.11
## 10 Ch        2 mythos    14     1 mythos.Ch.14
## 11 Ch        2 mythos    20     1 mythos.Ch.20
## 12 Ch        2 mythos    22     1 mythos.Ch.22
## 13 Ch        2 mythos    24     1 mythos.Ch.24
## 14 Ch        2 mythos    25     1 mythos.Ch.25
## 15 Ch        5 mythos     5     1 mythos.Ch.5 
## 16 Ch        5 mythos     6     1 mythos.Ch.6 
## 17 Ch        5 mythos    18     1 mythos.Ch.18
## 18 Ch        5 mythos    19     1 mythos.Ch.19
## 19 Ch       10 mythos    13     1 mythos.Ch.13
## 20 Ch       10 mythos    23     1 mythos.Ch.23
## 21 Ch       10 mythos    12     0 mythos.Ch.12
## 22 Ch       10 mythos    15     0 mythos.Ch.15
## 23 Ch       10 mythos    16     0 mythos.Ch.16
## 24 Ch       10 mythos    17     0 mythos.Ch.17
## 25 Ch       10 mythos    21     0 mythos.Ch.21

Análise exploratória

#-----------------------------------------------------------------------
# Análise exploratória.

str(da_final)

## Classes 'tbl_df', 'tbl' and 'data.frame':    300 obs. of  6 variables:
##  $ spp  : Factor w/ 2 levels "Cf","Ch": 2 2 2 2 2 2 2 2 2 2 ...
##  $ day  : num  2 2 2 2 2 2 2 2 2 2 ...
##  $ trat : Factor w/ 6 levels "amistar top",..: 6 6 6 6 6 6 6 6 6 6 ...
##  $ rep  : num  1 2 3 4 5 6 7 8 10 11 ...
##  $ incid: num  1 1 1 1 1 1 1 1 1 1 ...
##  $ ue   : Factor w/ 300 levels "amistar top.Cf.1",..: 12 24 36 48 60 72 84 96 120 132 ...

# Ordena pelo dia do evento dentro de cada unidade experimental.
da_final <- da_final %>%
    group_by(spp, trat) %>%
    arrange(day) %>%
    mutate(ord = seq_along(day)/n())

# Gráfico com a linha do tempo de cada fruto que mostra quando o evento
# aconteceu e que tipo de desfecho.

ggplot(data = da_final,
       mapping = aes(x = day, y = ord, shape = factor(incid))) +
    facet_grid(facets = spp ~ trat) +
    geom_point() +
    geom_segment(mapping = aes(x = 0, y = ord, xend = day, yend = ord)) +
    scale_shape_manual(breaks = c(0, 1),
                       values = c(1, 19),
                       labels = c("Censura", "Sintoma")) +
    labs(shape = "Desfecho",
         x = "Dias após inoculação",
         y = "Ordem do fruto") +
    scale_x_continuous(breaks = seq(0, max(da_final$day), by = 2))

# ATTENTION ------------------------------------------------------------
# Será feita análise de sobreviência com esses dados pois é motivada
# pelo tipo de resposta de natureza "tempo até desfecho". Para acomodar
# a estrutura de planejamento fatorial completo 2 x 6, será usada uma
# abordagem paramétrica. Por apresentar boa flexibilidade, será usada a
# distribuição Weibull para o tempo até o desfecho. Com isso consegue-se
# acomodar a censura e a estrutura experimental.

Breve reconhecimento da parametrização da Weibull

#-----------------------------------------------------------------------
# Antes de ir para o ajuste da Weibull, vamos reconhecer a
# parametrização.

# Gera números aleatórios de uma distribuição Weibull.
y <- rweibull(n = 1000, shape = 4, scale = 2)
plot(ecdf(y))

# Ajusta o modelo com a `survreg()`.
m <- survreg(formula = Surv(time = y) ~ 1, dist = "weibull")
summary(m)

## 
## Call:
## survreg(formula = Surv(time = y) ~ 1, dist = "weibull")
##                Value Std. Error     z      p
## (Intercept)  0.69441    0.00811  85.6 <2e-16
## Log(scale)  -1.41257    0.02457 -57.5 <2e-16
## 
## Scale= 0.244 
## 
## Weibull distribution
## Loglik(model)= -715.7   Loglik(intercept only)= -715.7
## Number of Newton-Raphson Iterations: 6 
## n= 1000

# Compreende os coeficientes estimados.
exp(coef(m)) # scale = exp(coef) -> 2.

## (Intercept) 
##    2.002531

1/m$scale    # shape = 1/m$scale -> 4.

## [1] 4.106508

# Verifica o ajuste com obsevado vs ajustado.
plot(ecdf(y))
curve(pweibull(x, shape = 1/m$scale, scale = exp(coef(m))),
      add = TRUE,
      col = 2)

# Obtenção do tempo médio.
a <- 1/m$scale
b <- unname(exp(coef(m))[1])
c("sample_mean" = mean(y),
  "estimated_mean" = b * gamma(1 + 1/a))

##    sample_mean estimated_mean 
##       1.817993       1.817824

Ajuste do modelo Weibull

#-----------------------------------------------------------------------
# Ajustar a Weibull acomodando a estrutura experimental.

# COMMENT: a análise de sobrevivência é a abrodagem mais adequada para
# análise destes dados, visto que de fato a variável resposta é o tempo
# para o desfecho e tem-se censura.

# A modelagem é para a variável aleatória `tempo para aparecimento de
# sintoma` (desfecho).

s <- with(da_final,
          Surv(time = day,
               event = incid))

# Modelo nulo que não tem efeito de nenhum fator.
m0 <- survreg(formula = s ~ 1,
              data = da_final,
              dist = "weibull")

# Inclui o efeito de espécie.
m1 <- survreg(formula = s ~ spp,
              data = da_final,
              dist = "weibull")

# Inclui o efeito de espécie e tratamentos (aditivo).
m2 <- survreg(formula = s ~ spp + trat,
              data = da_final,
              dist = "weibull")

# Inclui o efeito de espécie e tratamentos com interação.
m3 <- survreg(formula = s ~ spp * trat,
              data = da_final,
              dist = "weibull")

# Teste da razão de verossimilhanças entre modelos encaixados.
anova(m0, m1, m2, m3)

##        Terms Resid. Df    -2*LL Test Df Deviance     Pr(>Chi)
## 1          1       298 1266.985      NA       NA           NA
## 2        spp       297 1251.616    =  1 15.36874 8.843933e-05
## 3 spp + trat       292 1198.358    =  5 53.25808 2.976992e-10
## 4 spp * trat       287 1186.178    =  5 12.17968 3.240697e-02

# O modelo com interação é o mais apropriado para descrever o tempo para
# o aparecimento de sintomas. Portanto, existe interação entre espécies
# e produtos ao nível de 5% pelo teste da deviance entre modelos
# encaixados.

# Modelo final.
mx <- m3

Curvas de sobreviência

#-----------------------------------------------------------------------
# Extração dos tempos médios e confecção das curvas de sobrevivência.

# Médias amostrais (que ignoram estrutura e censura).
# da_final %>%
#     group_by(spp, trat) %>%
#     summarise(m_day = mean(day))

# Médias ajustadas (é na interpretação do parâmetro da Weibull).
emm <- emmeans(mx,
               specs = ~spp + trat)
emm

##  spp trat        emmean    SE  df lower.CL upper.CL
##  Cf  amistar top   2.19 0.210 287    1.777     2.60
##  Ch  amistar top   2.54 0.225 287    2.102     2.99
##  Cf  mythos        1.17 0.172 287    0.833     1.51
##  Ch  mythos        1.79 0.187 287    1.417     2.16
##  Cf  nativo        1.96 0.197 287    1.573     2.35
##  Ch  nativo        2.85 0.282 287    2.296     3.41
##  Cf  recop         2.02 0.197 287    1.627     2.40
##  Ch  recop         1.87 0.197 287    1.480     2.26
##  Cf  score         1.72 0.187 287    1.347     2.08
##  Ch  score         2.85 0.282 287    2.296     3.41
##  Cf  test          1.14 0.168 287    0.811     1.47
##  Ch  test          1.34 0.179 287    0.990     1.69
## 
## Results are given on the log (not the response) scale. 
## Confidence level used: 0.95

tb_emm <- as.data.frame(emm)

# Estimativas conforme parametrização da {p, d, q, r}weibull.
a <- 1/mx$scale          # shape.
b <- exp(tb_emm$emmean)  # scale.

# Obtenção do tempo médio (é uma função complexa dos dois parâmetros).
tb_emm$mean_day <- b * gamma(1 + 1/a)
tb_emm

##    spp        trat   emmean        SE  df  lower.CL upper.CL  mean_day
## 1   Cf amistar top 2.189537 0.2096084 287 1.7769724 2.602102  8.412596
## 2   Ch amistar top 2.544112 0.2246914 287 2.1018602 2.986364 11.992785
## 3   Cf      mythos 1.171612 0.1717900 287 0.8334837 1.509740  3.039839
## 4   Ch      mythos 1.786291 0.1874039 287 1.4174307 2.155151  5.620856
## 5   Cf      nativo 1.961678 0.1974581 287 1.5730282 2.350328  6.698415
## 6   Ch      nativo 2.850903 0.2816894 287 2.2964641 3.405342 16.298874
## 7   Cf       recop 2.015539 0.1974580 287 1.6268891 2.404188  7.069088
## 8   Ch       recop 1.868744 0.1974632 287 1.4800845 2.257404  6.103956
## 9   Cf       score 1.715966 0.1874072 287 1.3470996 2.084833  5.239150
## 10  Ch       score 2.850903 0.2816894 287 2.2964641 3.405342 16.298874
## 11  Cf        test 1.141479 0.1678588 287 0.8110887 1.471870  2.949608
## 12  Ch        test 1.342256 0.1789124 287 0.9901091 1.694403  3.605459

# Curvas de probabilidade de desfecho.
tb_curves <- tb_emm %>%
    group_by(spp, trat) %>%
    do({
        day <- seq(0, 20, length.out = 51)
        dens <- dweibull(day, shape = a, scale = exp(.$emmean))
        accu <- pweibull(day, shape = a, scale = exp(.$emmean))
        data.frame(day, dens, accu)
    }) %>%
    ungroup()

# As curvas de densidade ajustadas.
ggplot(data = tb_curves,
       mapping = aes(x = day, y = dens)) +
    facet_grid(facets = spp ~ trat) +
    geom_line()

# As curvas de "1 - sobreviência".
ggplot(data = tb_curves,
       mapping = aes(x = day, y = accu)) +
    facet_grid(facets = spp ~ trat) +
    geom_line()

# Valores observados com sobreposição das curvas ajustadas.
ggplot(data = da_final,
       mapping = aes(x = day, y = ord, shape = factor(incid))) +
    facet_grid(facets = spp ~ trat) +
    geom_point() +
    scale_shape_manual(breaks = c(0, 1),
                       values = c(1, 19),
                       labels = c("Censura", "Sintoma")) +
    labs(shape = "Desfecho",
         x = "Dias após inoculação",
         y = "Frequência acumulada") +
    geom_vline(data = tb_emm,
               mapping = aes(xintercept = mean_day),
               color = "orange") +
    geom_line(data = tb_curves,
              inherit.aes = FALSE,
              mapping = aes(x = day, y = accu))

Estudo da interação com comparações múltiplas.

#-----------------------------------------------------------------------
# Fazendo o estudo da interação com desdobramento com testes para o
# parâmetro de locação.

# Estudar produtos em cada espécie.
emm_trat_in <- emmeans(mx, specs = ~trat | spp)
emm_trat_in

## spp = Cf:
##  trat        emmean    SE  df lower.CL upper.CL
##  amistar top   2.19 0.210 287    1.777     2.60
##  mythos        1.17 0.172 287    0.833     1.51
##  nativo        1.96 0.197 287    1.573     2.35
##  recop         2.02 0.197 287    1.627     2.40
##  score         1.72 0.187 287    1.347     2.08
##  test          1.14 0.168 287    0.811     1.47
## 
## spp = Ch:
##  trat        emmean    SE  df lower.CL upper.CL
##  amistar top   2.54 0.225 287    2.102     2.99
##  mythos        1.79 0.187 287    1.417     2.16
##  nativo        2.85 0.282 287    2.296     3.41
##  recop         1.87 0.197 287    1.480     2.26
##  score         2.85 0.282 287    2.296     3.41
##  test          1.34 0.179 287    0.990     1.69
## 
## Results are given on the log (not the response) scale. 
## Confidence level used: 0.95

# contrast(emm_trat_in, method = "pairwise")
trat_in_comp <- multcomp::cld(emm_trat_in)
trat_in_comp

## spp = Cf:
##  trat        emmean    SE  df lower.CL upper.CL .group
##  test          1.14 0.168 287    0.811     1.47  1    
##  mythos        1.17 0.172 287    0.833     1.51  1    
##  score         1.72 0.187 287    1.347     2.08  12   
##  nativo        1.96 0.197 287    1.573     2.35   2   
##  recop         2.02 0.197 287    1.627     2.40   2   
##  amistar top   2.19 0.210 287    1.777     2.60   2   
## 
## spp = Ch:
##  trat        emmean    SE  df lower.CL upper.CL .group
##  test          1.34 0.179 287    0.990     1.69  1    
##  mythos        1.79 0.187 287    1.417     2.16  12   
##  recop         1.87 0.197 287    1.480     2.26  123  
##  amistar top   2.54 0.225 287    2.102     2.99   23  
##  nativo        2.85 0.282 287    2.296     3.41    3  
##  score         2.85 0.282 287    2.296     3.41    3  
## 
## Results are given on the log (not the response) scale. 
## Confidence level used: 0.95 
## P value adjustment: tukey method for comparing a family of 6 estimates 
## significance level used: alpha = 0.05

# Tabela com o resultado das comparações múltiplas.
tb_contr <- as.data.frame(trat_in_comp)
tb_contr

##           trat spp   emmean        SE  df  lower.CL upper.CL .group
## 6         test  Cf 1.141479 0.1678588 287 0.8110887 1.471870     1 
## 2       mythos  Cf 1.171612 0.1717900 287 0.8334837 1.509740     1 
## 5        score  Cf 1.715966 0.1874072 287 1.3470996 2.084833     12
## 3       nativo  Cf 1.961678 0.1974581 287 1.5730282 2.350328      2
## 4        recop  Cf 2.015539 0.1974580 287 1.6268891 2.404188      2
## 1  amistar top  Cf 2.189537 0.2096084 287 1.7769724 2.602102      2
## 12        test  Ch 1.342256 0.1789124 287 0.9901091 1.694403    1  
## 8       mythos  Ch 1.786291 0.1874039 287 1.4174307 2.155151    12 
## 10       recop  Ch 1.868744 0.1974632 287 1.4800845 2.257404    123
## 7  amistar top  Ch 2.544112 0.2246914 287 2.1018602 2.986364     23
## 9       nativo  Ch 2.850903 0.2816894 287 2.2964641 3.405342      3
## 11       score  Ch 2.850903 0.2816894 287 2.2964641 3.405342      3

# Transformar os números para letras e ordenar as letras conforme as
# estimativas.
f <- function(x) {
    l <- x %>%
        str_trim() %>%
        str_split("") %>%
        flatten_chr() %>%
        as.integer()
    str_c(letters[l], collapse = "")
}
f(" 123 ")

## [1] "abc"

# Acerta a representação da significância dos contrastes com letras.
tb_contr <- tb_contr %>%
    mutate(let = map_chr(.group, f)) %>%
    group_by(spp) %>%
    mutate(let2 = ordered_cld(let = let, means = emmean))

ggplot(data = tb_contr,
       mapping = aes(x = trat, y = emmean)) +
    facet_wrap(facets = ~spp) +
    geom_point() +
    geom_errorbar(mapping = aes(ymin = lower.CL,
                                ymax = upper.CL),
                  width = 0.05) +
    geom_text(mapping = aes(label = sprintf("%0.2f %s",
                                            emmean,
                                            let2)),
              hjust = 0.5,
              vjust = -1,
              angle = 90,
              nudge_x = 0.075) +
    labs(x = "Tratamento",
         y = "Estimativa do parâmetro de locação")

Ajuste de curvas de sobreviência com o modelo Weibull para planejamentofatorial duplo

Camilla Castellar, Thiago Carraro, Louise Larissa May De Mio, Walmes Marques Zeviani

2019-11-01

Importação e preparo dos dados

Análise exploratória

Breve reconhecimento da parametrização da Weibull

Ajuste do modelo Weibull

Curvas de sobreviência

Estudo da interação com comparações múltiplas.