1 Definições da sessão

#-----------------------------------------------------------------------
#                                            Prof. Dr. Walmes M. Zeviani
#                                leg.ufpr.br/~walmes · github.com/walmes
#                                        walmes@ufpr.br · @walmeszeviani
#                      Laboratory of Statistics and Geoinformation (LEG)
#                Department of Statistics · Federal University of Paraná
#                                       2021-mar-04 · Curitiba/PR/Brazil
#-----------------------------------------------------------------------

# O caso em estudo é referente a descrição do perfil do tronco de
# árvores utilizando uma função de afilamento segmentada de Max &
# Burkhart amplamente aplicada em inventário florestal.

#-----------------------------------------------------------------------
# Pacotes.

rm(list = objects())

library(nlme)
library(tidyverse)
# library(nlme)

2 Importação e preparo

#-----------------------------------------------------------------------
# Leitura.

da <- read.table(file = "afilamento_RG.txt", header = TRUE, sep = "\t")
str(da)
## 'data.frame':    1296 obs. of  10 variables:
##  $ n   : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ arv : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ ht  : num  9.6 9.6 9.6 9.6 9.6 9.6 9.6 9.6 9.6 9.6 ...
##  $ hrel: int  0 1 2 3 4 5 15 25 35 45 ...
##  $ hi  : num  0 0.096 0.192 0.288 0.384 0.48 1.44 2.4 3.36 4.32 ...
##  $ dap : num  18.5 18.5 18.5 18.5 18.5 ...
##  $ di  : num  32.9 23.1 21.9 21.3 20.6 ...
##  $ x   : num  0 0.01 0.02 0.03 0.04 0.05 0.15 0.25 0.35 0.45 ...
##  $ y   : num  1.78 1.25 1.18 1.15 1.12 ...
##  $ z   : num  1.00e+23 1.00e+02 5.00e+01 3.33e+01 2.50e+01 ...
# Adicionando uma coluna (di/dap)^2.
da$y2 <- (da$y)^2

da %>%
    count(arv) %>%
    head()
##   arv  n
## 1   1 16
## 2   2 16
## 3   3 16
## 4   4 16
## 5   5 16
## 6   6 16
#

3 Análise exploratória

#-----------------------------------------------------------------------
# Análise gráfica.

# Visualização.
ggplot(data = da,
       mapping = aes(x = x, y = y2)) +
    geom_point() +
    geom_smooth(se = FALSE)

# Altera a escala.
ggplot(data = da,
       mapping = aes(x = x, y = y2)) +
    geom_point() +
    scale_x_sqrt() +
    geom_smooth(se = FALSE)

# ATTENTION: Uma escala com menor efeito de alavancagens.
ggplot(data = da,
       mapping = aes(x = x^(1/3), y = y2)) +
    geom_point() +
    geom_smooth(se = FALSE)

# WARNING: O que fazer com os valores observados em x = 1? Deveriam ser
# removidos pois não são variáveis aleatórias.

# Número de árvores.
length(unique(da$arv))
## [1] 81
# Criando alguns grupos apenas para visualização.
ggplot(data = da,
       mapping = aes(x = x, y = y2, group = arv)) +
    facet_wrap(facets = ~ceiling(arv/9)) +
    geom_point() +
    geom_line()

ggplot(data = da,
       mapping = aes(x = x, y = y2, group = arv)) +
    facet_wrap(facets = ~arv) +
    scale_x_sqrt() +
    geom_point() +
    geom_line()

#

4 Ajuste visual para extração de valores iniciais

# Formula do modelo.
model <- y2 ~ ((b1 * (x - 1) +
                b2 * ((x^2) - 1) +
                (b3 * (a1 - x)^2) * (x <= a1) +
                (b4 * (a2 - x)^2) * (x <= a2)))
# Carrega função para ajuste visual.
library(rpanel)
source("https://raw.githubusercontent.com/walmes/wzRfun/master/R/rp.nls.R")

# Chama a GUI.
fit <- rp.nls(model = model,
              data = da,
              start = list(a1 = c(0, 0.21, 1),
                           a2 = c(0, 0.04, 1),
                           b1 = c(-2, -0.8, 2),
                           b2 = c(-2, -0.12, 2),
                           b3 = c(-10, 10, 20),
                           b4 = c(0, 175, 200)),
              # extra = function(a1, a2, b1, b2, b3, b4) {
              #     abline(h = c(b1, b2, b3, b4),
              #            v = c(a1, a2),
              #            col = "green")
              # },
              xlab = "hi/ht",
              ylab = "di/dap")

# Exposta vetor com valores iniciais.
dput(round(coef(fit), 3))
# Valores iniciais.
start <- c(b1 = -0.801,
           b2 = -0.13,
           b3 = 10.081,
           a1 = 0.215,
           b4 = 175.66,
           a2 = 0.042)

# Ajuste ignorando a estrutura aninhada.
n0 <- nls(model,
          data = filter(da, x < 1),
          start = start)

plot(y2 ~ x, data = da)
curve(predict(n0, newdata = list(x = x)),
      add = TRUE,
      col = "red")

# Decompondo o modelo nos seus segmentos de curva.
plot(y2 ~ x, data = da, col = "gray")
with(as.list(coef(n0)), {
    curve(b1 * (x - 1) + b2 * ((x^2) - 1),
          col = "red",
          lwd = 2, add = TRUE)
    curve((b3 * (a1 - x)^2) * (x <= a1),
          col = "cyan",
          to = a1,
          lwd = 2, add = TRUE)
    abline(v = a1, col = "cyan", lty = 2)
    curve((b4 * (a2 - x)^2) * (x <= a2),
          col = "green",
          to = a2,
          lwd = 2, add = TRUE)
    abline(v = a2, col = "green", lty = 2)
    curve(predict(n0, newdata = list(x = x)),
          add = TRUE,
          lwd = 4, lty = 3,
          col = "purple")
})

#

5 Ajuste do modelo por árvore

#-----------------------------------------------------------------------
# Prepara os dados para o ajuste de várias curvas.

tbg <- da %>%
    # filter(x < 1) %>%
    mutate(arv = factor(arv)) %>%
    select(y2, x, arv) %>%
    groupedData(formula = y2 ~ x | arv,
                order.groups = FALSE)
str(tbg)
## Classes 'nfnGroupedData', 'nfGroupedData', 'groupedData' and 'data.frame':   1296 obs. of  3 variables:
##  $ y2 : num  3.18 1.57 1.4 1.33 1.25 ...
##  $ x  : num  0 0.01 0.02 0.03 0.04 0.05 0.15 0.25 0.35 0.45 ...
##  $ arv: Factor w/ 81 levels "1","2","3","4",..: 1 1 1 1 1 1 1 1 1 1 ...
##  - attr(*, "formula")=Class 'formula'  language y2 ~ x | arv
##   .. ..- attr(*, ".Environment")=<environment: 0x559e7c121cb0> 
##  - attr(*, "FUN")=function (x)  
##  - attr(*, "order.groups")= logi FALSE
ggplot(data = tbg,
       mapping = aes(x = x, y = y2)) +
    geom_point()

#-----------------------------------------------------------------------
# Ajuste para uma árvore.

id <- "4"
ggplot(data = filter(tbg, arv == id),
       mapping = aes(x = x, y = y2)) +
    geom_point()

n0_one <- nls(model,
              data = filter(tbg, arv == id),
              start = start,
              trace = TRUE)
## 0.4090907 :   -0.801  -0.130  10.081   0.215 175.660   0.042
## 0.3218627 :   -0.9244006  -0.0319432   6.7975322   0.2475892 150.3748427   0.0491011
## 0.2341805 :   -1.10532302   0.11240537   3.39046892   0.30930507 132.73763834   0.05889509
## 0.1254563 :   -1.03801847   0.08339960   2.23359894   0.40123504 128.95943144   0.06443815
## 0.02789719 :   -1.06621636   0.11564570   3.74045032   0.36316769 146.93035721   0.05982359
## 0.02002699 :   -0.96812717   0.05387402   2.92929959   0.41647463 133.79072956   0.06496363
## 0.01904372 :   -1.0442810   0.1015613   3.3737135   0.3874111 141.2514292   0.0620629
## 0.0167731 :   -1.01084579   0.08117090   3.44974367   0.39477493 141.67259560   0.06229824
## 0.01677235 :   -1.03518731   0.09640567   3.55725192   0.38702402 143.23379722   0.06167071
## 0.01676845 :   -1.02191396   0.08810348   3.49207626   0.39122115 142.12385624   0.06208833
## 0.01676764 :   -1.02278059   0.08865410   3.50437299   0.39076647 142.36196684   0.06201041
## 0.01676763 :   -1.02193414   0.08812552   3.49958810   0.39105229 142.26613398   0.06204508
## 0.01676763 :   -1.02210668   0.08823382   3.50016087   0.39100581 142.26951655   0.06204317
## 0.01676763 :   -1.02202906   0.08818524   3.49981211   0.39102950 142.26425148   0.06204524
## 0.01676763 :   -1.02203319   0.08818776   3.49986908   0.39102714 142.26598960   0.06204467
coef(n0_one)
##           b1           b2           b3           a1           b4 
##  -1.02203319   0.08818776   3.49986908   0.39102714 142.26598960 
##           a2 
##   0.06204467
plot(y2 ~ x, data = filter(tbg, arv == id))
curve(predict(n0_one, newdata = list(x = x)),
      add = TRUE,
      col = "red")

#-----------------------------------------------------------------------
# Ajustando para todas.

# Formula do modelo.
model_by <- y2 ~ ((b1 * (x - 1) +
                   b2 * ((x^2) - 1) +
                   (b3 * (a1 - x)^2) * (x <= a1) +
                   (b4 * (a2 - x)^2) * (x <= a2))) | arv

# Ajuste por árvore.
n0_list <- nlsList(model = model_by,
                   data = tbg,
                   start = start)

coef(n0_list)
##              b1           b2         b3        a1        b4         a2
## 1            NA           NA         NA        NA        NA         NA
## 2   0.389229490 -0.952518295  3.7694636 0.4193937  33.81086 0.10014485
## 3   5.538220189 -4.067274715  5.4892186 0.6901064  86.66136 0.07876198
## 4  -1.022033187  0.088187757  3.4998691 0.3910271 142.26599 0.06204467
## 5            NA           NA         NA        NA        NA         NA
## 6            NA           NA         NA        NA        NA         NA
## 7  -0.662018564 -0.306323632  4.2487217 0.2965138 129.16408 0.04777590
## 8   0.634698476 -1.201436387  8.8895832 0.2811660  65.23515 0.03921653
## 9            NA           NA         NA        NA        NA         NA
## 10           NA           NA         NA        NA        NA         NA
## 11           NA           NA         NA        NA        NA         NA
## 12           NA           NA         NA        NA        NA         NA
## 13  4.550460977 -3.389923292  3.8302748 0.7648971 122.41249 0.09463093
## 14           NA           NA         NA        NA        NA         NA
## 15 -0.539929698 -0.208387362  2.6811364 0.3722732  65.01676 0.09372735
## 16 -1.101795623  0.083785817  1.8850782 0.3300295 172.60791 0.03856901
## 17 -0.425113223 -0.139486699  2.9346619 0.4131456 103.67228 0.06598959
## 18  0.219436465 -0.856918485  2.4440477 0.5233194  75.66341 0.06516753
## 19           NA           NA         NA        NA        NA         NA
## 20 -0.725164115 -0.109560582 14.3631825 0.2042453 134.77439 0.03773273
## 21           NA           NA         NA        NA        NA         NA
## 22           NA           NA         NA        NA        NA         NA
## 23           NA           NA         NA        NA        NA         NA
## 24           NA           NA         NA        NA        NA         NA
## 25 -0.951445854  0.041447477  6.0305135 0.2563024  85.36911 0.04727476
## 26           NA           NA         NA        NA        NA         NA
## 27 -0.775463097 -0.121916471  6.3626957 0.2819372  91.56456 0.04538306
## 28           NA           NA         NA        NA        NA         NA
## 29 -1.267573408  0.189456508  4.0805721 0.2622164  79.17079 0.07135673
## 30           NA           NA         NA        NA        NA         NA
## 31  0.041391674 -0.654000904  3.2623827 0.4021060 203.06464 0.04222426
## 32           NA           NA         NA        NA        NA         NA
## 33 -0.009903264 -0.707850842  7.0445057 0.3321336 342.45215 0.03357662
## 34           NA           NA         NA        NA        NA         NA
## 35 -0.890023163 -0.009239465 11.5860553 0.1915513  21.03712 0.06313322
## 36           NA           NA         NA        NA        NA         NA
## 37  0.217787110 -0.997496521 20.7208892 0.2400779 272.81603 0.03815858
## 38 -1.381139063  0.245391772 42.5870834 0.1375360 210.98223 0.03375289
## 39           NA           NA         NA        NA        NA         NA
## 40  1.554579677 -1.955312320  7.0346726 0.4111997  84.36701 0.07948043
## 41           NA           NA         NA        NA        NA         NA
## 42 -0.121466348 -0.530816870  1.0913207 0.4699618  77.34715 0.07132351
## 43           NA           NA         NA        NA        NA         NA
## 44 -0.606836268 -0.232368171  5.5232274 0.2490354 198.06147 0.03633456
## 45           NA           NA         NA        NA        NA         NA
## 46  0.993122561 -1.164209394  1.8989629 0.7197620  88.36344 0.06871813
## 47           NA           NA         NA        NA        NA         NA
## 48           NA           NA         NA        NA        NA         NA
## 49           NA           NA         NA        NA        NA         NA
## 50 -0.855214595 -0.097881322  4.9302055 0.2839657 223.93236 0.04900615
## 51           NA           NA         NA        NA        NA         NA
## 52           NA           NA         NA        NA        NA         NA
## 53           NA           NA         NA        NA        NA         NA
## 54           NA           NA         NA        NA        NA         NA
## 55 -0.806295457 -0.182337749  5.0378911 0.1848222 165.53122 0.05949623
## 56  1.345512731 -1.549716954  2.4281533 0.6115682 355.82236 0.03291526
## 57           NA           NA         NA        NA        NA         NA
## 58           NA           NA         NA        NA        NA         NA
## 59 -1.287540453  0.285887766  4.7061813 0.2421706 267.58165 0.02761882
## 60  0.357795359 -0.897707346  5.8063077 0.3425010 412.93398 0.03392121
## 61 -1.116939674  0.167689662 44.5329549 0.1252091 468.34051 0.02272143
## 62           NA           NA         NA        NA        NA         NA
## 63           NA           NA         NA        NA        NA         NA
## 64           NA           NA         NA        NA        NA         NA
## 65 -0.316540656 -0.560185285  7.7923205 0.1800678  77.45133 0.09920309
## 66  2.103178835 -2.153795108  3.6850673 0.5725864 121.95840 0.05137843
## 67  0.959356890 -1.405991263  6.4126523 0.3550241 206.11373 0.04346317
## 68 -0.863929345 -0.045224237  2.1167759 0.2504465 130.77392 0.04516646
## 69 -0.853729999 -0.216094266 13.2387432 0.1303754  96.16639 0.04001957
## 70 -0.611769263 -0.181051342  4.4875338 0.2933409  78.51482 0.05330476
## 71 -0.711175439 -0.262401601 44.5960982 0.1341209 442.10218 0.02041765
## 72 -0.714652029 -0.128020822  0.6114478 0.2826349  75.77366 0.08639433
## 73 -0.401819133 -0.362281929  1.1782393 0.5491936 152.14229 0.06569002
## 74           NA           NA         NA        NA        NA         NA
## 75 -0.557347474 -0.286917746 27.1582761 0.1293981 555.02862 0.02269467
## 76 -0.910738679  0.014418354 25.7205641 0.1833859 289.83380 0.03898005
## 77           NA           NA         NA        NA        NA         NA
## 78           NA           NA         NA        NA        NA         NA
## 79           NA           NA         NA        NA        NA         NA
## 80           NA           NA         NA        NA        NA         NA
## 81 -0.522369940 -0.089771118  7.8775040 0.1676985 246.95904 0.03341482
plot(coef(n0_list))

pairs(coef(n0_list))

# acp <- princomp(drop_na(coef(n0_list)))
acp <- princomp(scale(drop_na(coef(n0_list))))
screeplot(acp)

acp$loadings
## 
## Loadings:
##    Comp.1 Comp.2 Comp.3 Comp.4 Comp.5 Comp.6
## b1  0.445  0.433  0.119  0.216  0.172  0.724
## b2 -0.439 -0.438 -0.135 -0.269 -0.227  0.688
## b3 -0.354  0.397  0.713        -0.452       
## a1  0.475  0.157 -0.328 -0.405 -0.690       
## b4 -0.318  0.558 -0.278 -0.598  0.391       
## a2  0.396 -0.357  0.524 -0.596  0.293       
## 
##                Comp.1 Comp.2 Comp.3 Comp.4 Comp.5 Comp.6
## SS loadings     1.000  1.000  1.000  1.000  1.000  1.000
## Proportion Var  0.167  0.167  0.167  0.167  0.167  0.167
## Cumulative Var  0.167  0.333  0.500  0.667  0.833  1.000
biplot(acp)

# DANGER: As estimativas dos parâmetros da curva média não deverão ser
# iguais à média das estimativas dos parâmetros. Além do mais, tem-se o
# agravante de não haver ajuste para uma considerável porção das
# árvores.

coef(n0_list) %>%
    as.data.frame() %>%
    colMeans(na.rm = TRUE) %>%
    as.data.frame() %>%
    add_column(start)
##               .   start
## b1  -0.05134626  -0.801
## b2  -0.60751594  -0.130
## b3   9.35548867  10.081
## a1   0.33313285   0.215
## b4 176.89855181 175.660
## a2   0.05317766   0.042
#

6 Ajuste do modelo de efeitos aleatórios

#-----------------------------------------------------------------------
# Ajusta o modelo de efeitos aleatórios para ter o perfil médio.

model <- y2 ~ ((b1 * (x - 1) +
                b2 * ((x^2) - 1) +
                (b3 * (a1 - x)^2) * (x <= a1) +
                (b4 * (a2 - x)^2) * (x <= a2)))

n0_ran <- nlme(model = model,
               data = tbg,
               fixed = b1 + b2 + b3 + a1 + b4 + a2 ~ 1,
               random = b2 + a2 ~ 1 | arv, # OK. Melhor LogLik.
               # random = b2 + b3 ~ 1 | arv, # Singularity.
               # random = b3 + b4 ~ 1 | arv, # Singularity.
               # random = b2 + b4 ~ 1 | arv, # Singularity.
               # random = b1 + b4 ~ 1 | arv, # Singularity.
               # random = b3 + a1 ~ 1 | arv, # Singularity.
               # random = b1 + a1 ~ 1 | arv, # OK.
               # random = b3 + a1 ~ 1 | arv, # Singularity.
               # random = b4 + a1 ~ 1 | arv, # Singularity.
               # random = b2 + a1 ~ 1 | arv, # minimum step.
               # random = b1 + a1 + a2 ~ 1 | arv, # Singularity.
               # random = b1 + a1 + b3 ~ 1 | arv, # Error.
               # random = b1 + a1 + b4 ~ 1 | arv, # Singularity.
               # random = b1 + b3 + a1 + b4 + a2 ~ 1 | arv,
               start = start)

# Resumo do modelo ajustado.
summary(n0_ran)
## Nonlinear mixed-effects model fit by maximum likelihood
##   Model: model 
##   Data: tbg 
##         AIC       BIC   logLik
##   -2624.463 -2572.793 1322.232
## 
## Random effects:
##  Formula: list(b2 ~ 1, a2 ~ 1)
##  Level: arv
##  Structure: General positive-definite, Log-Cholesky parametrization
##          StdDev     Corr  
## b2       0.07800116 b2    
## a2       0.01641408 -0.202
## Residual 0.07394775       
## 
## Fixed effects:  b1 + b2 + b3 + a1 + b4 + a2 ~ 1 
##        Value Std.Error   DF   t-value p-value
## b1  -0.73187  0.082531 1210 -8.867777  0.0000
## b2  -0.17395  0.056604 1210 -3.073016  0.0022
## b3   2.87537  0.586732 1210  4.900651  0.0000
## a1   0.31375  0.033120 1210  9.473019  0.0000
## b4 109.60514  6.099304 1210 17.970104  0.0000
## a2   0.06660  0.003253 1210 20.475121  0.0000
##  Correlation: 
##    b1     b2     b3     a1     b4    
## b2 -0.984                            
## b3 -0.345  0.310                     
## a1  0.719 -0.676 -0.869              
## b4  0.036 -0.033  0.173 -0.002       
## a2  0.064 -0.076 -0.472  0.259 -0.695
## 
## Standardized Within-Group Residuals:
##        Min         Q1        Med         Q3        Max 
## -5.4183997 -0.4622225  0.0000000  0.4901663 13.3298138 
## 
## Number of Observations: 1296
## Number of Groups: 81
# Log-verossimilhança.
logLik(n0_ran)
## 'log Lik.' 1322.232 (df=10)
# Resíduos (só esse plot não é suficiente).
plot(n0_ran)

# Examina os efeitos aleatórios.
plot(ranef(n0_ran))

pairs(ranef(n0_ran))

# Compara observado com ajustado.
plot(augPred(n0_ran), as.table = TRUE)

# Estimativas da "curva média".
fixef(n0_ran)
##          b1          b2          b3          a1          b4          a2 
##  -0.7318660  -0.1739453   2.8753691   0.3137508 109.6051353   0.0666046
# Compara os dois ajustes.
plot(y2 ~ x, data = tbg)
curve(predict(n0_ran, newdata = list(x = x), level = 0),
      add = TRUE,
      lty = 1, lwd = 2,
      col = "orange")
curve(predict(n0, newdata = list(x = x)),
      add = TRUE,
      lty = 1, lwd = 2,
      col = "cyan")

# NOTE: não há distinção visual no posicionamento das curvas.

# Estimativas pontuais.
cbind(coef(n0), fixef(n0_ran))
##            [,1]        [,2]
## b1  -0.80099838  -0.7318660
## b2  -0.12963395  -0.1739453
## b3  10.08219845   2.8753691
## a1   0.21536407   0.3137508
## b4 175.69040273 109.6051353
## a2   0.04206867   0.0666046
# Cria tabela com IC para os parâmetros.
tb_coef <- list()
tb_coef[[1]] <-
    cbind(est = coef(n0), confint.default(n0)) %>%
    as.data.frame() %>%
    rename(lower = 2, upper = 3) %>%
    add_column(model = "nls") %>%
    rownames_to_column(var = "param")
tb_coef[[2]] <-
    intervals(n0_ran, which = "fixed")$fixed %>%
                                     as.data.frame() %>%
                                     relocate(est., lower, upper) %>%
                                     rename(est = est.) %>%
                                     add_column(model = "nlme") %>%
                                     rownames_to_column(var = "param")
tb_coef <- tb_coef %>%
    bind_rows()

# Compara a amplitude e sobreposição dos IC.
ggplot(data = tb_coef,
       mapping = aes(x = model, y = est, ymin = lower, ymax = upper)) +
    facet_wrap(facets = ~param, scale = "free_y", nrow = 1) +
    geom_point() +
    geom_errorbar(width = 0.1) +
    labs(x = "Modelo considerado",
         y = "Valor do parâmetro")

#