Estatística Aplicada à Ciência do Solo

github.com/walmes/EACS

Análise Exploratória

#-----------------------------------------------------------------------
# Carrega os pacotes necessários.

library(lattice)
library(latticeExtra)
library(plyr)
library(reshape2)
library(car)
library(nFactors)
library(rpart)
library(EACS)
#-----------------------------------------------------------------------
# Análise exploratório dos dados.

str(teca_qui)
## 'data.frame':    150 obs. of  15 variables:
##  $ loc: int  1 1 1 2 2 2 3 3 3 4 ...
##  $ cam: Factor w/ 3 levels "[0, 5)","[5, 40)",..: 1 2 3 1 2 3 1 2 3 1 ...
##  $ ph : num  6.8 6.7 6.7 4.7 4.7 4.9 7.6 6.8 6.9 6.6 ...
##  $ p  : num  22.51 0.83 0.01 3.89 0.69 ...
##  $ k  : num  72.24 13.42 7.23 48.13 12.34 ...
##  $ ca : num  8.27 2.91 2.33 0.97 0.76 0.21 7.63 3.22 2.22 5.54 ...
##  $ mg : num  1.7 1.77 0.51 0.16 0.14 0 1.53 1.31 1.09 1.77 ...
##  $ al : num  0 0 0 0.3 0.6 0.6 0 0 0 0 ...
##  $ ctc: num  12.47 6.57 4.52 5.3 4.17 ...
##  $ sat: num  81.4 71.7 63.2 23.7 22.4 ...
##  $ mo : num  72.2 25.6 9.7 34.4 8.7 9.7 50.4 15.6 9.5 50.2 ...
##  $ arg: num  184 215 286 232 213 ...
##  $ are: num  770 750 674 741 775 ...
##  $ cas: num  1.8 2.2 3.7 0.4 1.1 1.7 8.4 19 14.3 5.5 ...
##  $ acc: num  770 750 676 741 775 ...
str(teca_crapar)
## 'data.frame':    144 obs. of  10 variables:
##  $ loc: int  1 1 1 2 2 2 3 3 3 4 ...
##  $ cam: Factor w/ 3 levels "[0, 5)","[5, 40)",..: 1 2 3 1 2 3 1 2 3 1 ...
##  $ Ur : num  0.223 0.178 0.188 0.131 0.165 ...
##  $ Us : num  0.648 0.58 0.647 0.697 0.573 ...
##  $ alp: num  -0.724 -0.833 -0.722 -0.548 -0.547 ...
##  $ n  : num  3.59 3.11 3.25 4.01 2.92 ...
##  $ cad: num  0.227 0.217 0.247 0.3 0.222 ...
##  $ Ui : num  0.45 0.396 0.435 0.431 0.387 ...
##  $ I  : num  0.815 0.957 0.835 0.62 0.691 ...
##  $ S  : num  -0.341 -0.273 -0.329 -0.515 -0.257 ...
##  - attr(*, "na.action")=Class 'omit'  Named int 88
##   .. ..- attr(*, "names")= chr "40.[5, 40)"
str(teca_arv)
## 'data.frame':    50 obs. of  5 variables:
##  $ loc : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ alt : num  12.9 11.9 22.5 20.4 15.6 ...
##  $ dap : num  0.194 0.182 0.348 0.304 0.227 ...
##  $ vol : num  0.2707 0.0963 0.4974 0.4074 0.2165 ...
##  $ prod: num  68.2 24.3 125.3 102.7 54.6 ...
# Juntar as inforções químicas, físico-hídricas e de produção.
db <- merge(teca_qui, teca_crapar, all = TRUE)
str(db)
## 'data.frame':    150 obs. of  23 variables:
##  $ loc: int  1 1 1 2 2 2 3 3 3 4 ...
##  $ cam: Factor w/ 3 levels "[0, 5)","[5, 40)",..: 1 2 3 1 2 3 1 2 3 1 ...
##  $ ph : num  6.8 6.7 6.7 4.7 4.7 4.9 7.6 6.8 6.9 6.6 ...
##  $ p  : num  22.51 0.83 0.01 3.89 0.69 ...
##  $ k  : num  72.24 13.42 7.23 48.13 12.34 ...
##  $ ca : num  8.27 2.91 2.33 0.97 0.76 0.21 7.63 3.22 2.22 5.54 ...
##  $ mg : num  1.7 1.77 0.51 0.16 0.14 0 1.53 1.31 1.09 1.77 ...
##  $ al : num  0 0 0 0.3 0.6 0.6 0 0 0 0 ...
##  $ ctc: num  12.47 6.57 4.52 5.3 4.17 ...
##  $ sat: num  81.4 71.7 63.2 23.7 22.4 ...
##  $ mo : num  72.2 25.6 9.7 34.4 8.7 9.7 50.4 15.6 9.5 50.2 ...
##  $ arg: num  184 215 286 232 213 ...
##  $ are: num  770 750 674 741 775 ...
##  $ cas: num  1.8 2.2 3.7 0.4 1.1 1.7 8.4 19 14.3 5.5 ...
##  $ acc: num  770 750 676 741 775 ...
##  $ Ur : num  0.223 0.178 0.188 0.131 0.165 ...
##  $ Us : num  0.648 0.58 0.647 0.697 0.573 ...
##  $ alp: num  -0.724 -0.833 -0.722 -0.548 -0.547 ...
##  $ n  : num  3.59 3.11 3.25 4.01 2.92 ...
##  $ cad: num  0.227 0.217 0.247 0.3 0.222 ...
##  $ Ui : num  0.45 0.396 0.435 0.431 0.387 ...
##  $ I  : num  0.815 0.957 0.835 0.62 0.691 ...
##  $ S  : num  -0.341 -0.273 -0.329 -0.515 -0.257 ...
# Todos os locais tem pelo menos um registro. Usar o registro que
# estiver disponível na ordem de prioridade das camadas: II, III e I.
xtabs(~loc, data = na.omit(db))
## loc
##  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 
##  3  3  3  3  3  3  3  3  3  3  3  3  3  3  3  3  3  3  3  2  3  3 
## 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 
##  3  3  3  3  3  3  3  3  2  3  3  2  2  3  3  3  3  3  3  1  3  3 
## 45 46 47 48 49 50 
##  3  3  3  3  3  3
# Número de missings por variável separado por camada.
by(data = is.na(db[, -(1:2)]), INDICES = db$cam, FUN = colSums)
## INDICES: [0, 5)
##  ph   p   k  ca  mg  al ctc sat  mo arg are cas acc  Ur  Us alp   n 
##   0   0   0   0   0   0   0   0   0   0   0   0   0   1   1   1   1 
## cad  Ui   I   S 
##   1   1   1   1 
## --------------------------------------------------- 
## INDICES: [5, 40)
##  ph   p   k  ca  mg  al ctc sat  mo arg are cas acc  Ur  Us alp   n 
##   0   0   0   0   0   0   0   0   0   0   0   0   0   3   3   3   3 
## cad  Ui   I   S 
##   3   3   3   3 
## --------------------------------------------------- 
## INDICES: [40, 80)
##  ph   p   k  ca  mg  al ctc sat  mo arg are cas acc  Ur  Us alp   n 
##   0   0   0   0   0   0   0   0   0   0   0   0   0   2   2   2   2 
## cad  Ui   I   S 
##   2   2   2   2
#-----------------------------------------------------------------------
# Fazendo a seleção de um valor para cada local.

# Será mantido os valores na camada II de cada local. Caso a camada II
# esteja incompleta, será usada a camada III e por fim a camada I.

# Remove linhas com registros ausentes.
dc <- na.omit(db)
attr(dc, "na.action") <- NULL

# Reordena os níveis (para usar na seleção).
dc$cam <- factor(dc$cam, levels = levels(db$cam)[c(2, 3, 1)])

# Reordena.
dc <- arrange(dc, loc, cam)

# Pega o primeiro registro de cada local.
dd <- do.call(rbind,
              by(dc, INDICES = dc$loc,
                 FUN = function(x) {
                     x[1, ]}
                 ))
str(dd)
## 'data.frame':    50 obs. of  23 variables:
##  $ loc: int  1 2 3 4 5 6 7 8 9 10 ...
##  $ cam: Factor w/ 3 levels "[5, 40)","[40, 80)",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ ph : num  6.7 4.7 6.8 6.2 5.1 5.2 5.1 6.4 4.9 5 ...
##  $ p  : num  0.83 0.69 1.66 1.17 0.49 0.69 0.21 1.14 1.36 1.04 ...
##  $ k  : num  13.4 12.3 25.8 11.1 22.2 ...
##  $ ca : num  2.91 0.76 3.22 2.25 0.93 0.7 0.24 2.39 1.15 0.72 ...
##  $ mg : num  1.77 0.14 1.31 0.21 0.52 0.36 0.14 0.99 0.05 0.37 ...
##  $ al : num  0 0.6 0 0 0.2 0.5 0.6 0 0.3 0.6 ...
##  $ ctc: num  6.57 4.17 6.26 4.56 4.1 5.16 5.02 5.28 4.86 3.86 ...
##  $ sat: num  71.7 22.4 73.4 54.5 36.8 ...
##  $ mo : num  25.6 8.7 15.6 10 11.2 5.8 8.5 11.8 6.3 6.9 ...
##  $ arg: num  215 213 234 169 304 ...
##  $ are: num  750 775 698 788 665 ...
##  $ cas: num  2.2 1.1 19 3.1 7.2 23.4 2.9 2.1 2.5 1.6 ...
##  $ acc: num  750 775 704 788 667 ...
##  $ Ur : num  0.1785 0.1649 0.3699 0.0372 0.1902 ...
##  $ Us : num  0.58 0.573 0.659 0.521 0.608 ...
##  $ alp: num  -0.833 -0.547 -0.552 -0.478 -0.671 ...
##  $ n  : num  3.11 2.92 1.96 2.21 2.93 ...
##  $ cad: num  0.217 0.222 0.168 0.274 0.228 ...
##  $ Ui : num  0.396 0.387 0.538 0.311 0.418 ...
##  $ I  : num  0.957 0.691 0.918 0.751 0.813 ...
##  $ S  : num  -0.273 -0.257 -0.108 -0.214 -0.265 ...
# Deixar o S positivo para facilitar a interpretação.
dd$S <- -1 * dd$S

Análise Fatorial

Na análise fatorial, a variável acc (areia + cascalho + calhau) não foi usada por ter alta correlação, por construção, com areia e cascalho. A variável da curva de água do solo Ur foi deixada de fora para ser usado o cad = Ui - Ur.

#-----------------------------------------------------------------------

# Em cada linha o grupo de variáveis químicas, físicas e hídricas.
j <- c("ph", "p", "k", "ca", "mg", "al", "ctc", "sat", "mo",
       "arg", "are", "cas",
       "alp", "n", "I", "Us", "Ui", "S", "cad")
X <- dd[, j]
fit0 <- factanal(X,
                 factors = 4,
                 # covmat = cov2cor(var(X)),
                 # covmat = var(X),
                 rotation = "varimax")

print(fit0, digits = 2, cutoff = 0.5, sort = TRUE)
## 
## Call:
## factanal(x = X, factors = 4, rotation = "varimax")
## 
## Uniquenesses:
##   ph    p    k   ca   mg   al  ctc  sat   mo  arg  are  cas  alp 
## 0.12 0.72 0.72 0.19 0.37 0.36 0.20 0.02 0.55 0.08 0.04 0.89 0.00 
##    n    I   Us   Ui    S  cad 
## 0.11 0.08 0.41 0.54 0.00 0.19 
## 
## Loadings:
##     Factor1 Factor2 Factor3 Factor4
## ph   0.93                          
## k    0.52                          
## ca   0.79                          
## mg   0.66                          
## al  -0.80                          
## ctc  0.69            0.56          
## sat  0.98                          
## mo   0.63                          
## alp          0.95                  
## I           -0.96                  
## Us           0.74                  
## Ui           0.51                  
## cad          0.89                  
## arg                  0.90          
## are                 -0.92          
## n                            0.89  
## S                            0.99  
## p                                  
## cas                                
## 
##                Factor1 Factor2 Factor3 Factor4
## SS loadings       4.99    3.65    2.57    2.19
## Proportion Var    0.26    0.19    0.14    0.12
## Cumulative Var    0.26    0.45    0.59    0.71
## 
## Test of the hypothesis that 4 factors are sufficient.
## The chi square statistic is 535.95 on 101 degrees of freedom.
## The p-value is 1.84e-60

Com 4 fatores chegou-se a uma explicação superior à 70% da variância total. Baseado nos carregamentos de valor superior absoluto maior que 0.5, foi possível interpretar cada um dos fatores em termos de índices:

  1. Índice de cátions do solo. Contraste entre cátions desejáveis ou variáveis que favorecem/resultam de cátions vs. o alumínio (al) do solo.
  2. Índice de disponibilidade de água. Contraste entre parâmetros que aumentam a disponibilidade de água (alp, Us, Ui, cad) vs. que diminuem (I).
  3. Índice de porosidade do solo. Contraste entre conteúdo de argila vs. areia. O aumento da argila aumenta a microporosidade que é responsável por maior armazenamento de água.
  4. Índice de inclinação da curva de retenção de água. Função linear do parâmetro S (slope) e n (parâmetro de forma) que são ambos relacionados à inclinação da curva de retenção e da CRA.

Algumas variáveis não apresentaram carregamento alto e por isso alta singularidade (uniquenesses), sendo por isso variáveis pouco explicadas pelos fatores. Essas variáveis serão removidas e o ajuste será refeito.

#-----------------------------------------------------------------------
# Removendo variáveis de pouca importância ou sem explicação (com
# alta singularidade/unicidade).

# str(fit0)

# Variáveis abandonadas.
j[fit0$uniquenesses > 0.7]
## [1] "p"   "k"   "cas"
# Reajuste.
X <- dd[, j[fit0$uniquenesses <= 0.7]]
fit1 <- factanal(X,
                 factors = 4,
                 rotation = "varimax",
                 scores = "regression")

colnames(fit1$loadings) <- c("cation", "agua", "poros", "inclin")
print(fit1, digits = 2, cutoff = 0.5, sort = TRUE)
## 
## Call:
## factanal(x = X, factors = 4, scores = "regression", rotation = "varimax")
## 
## Uniquenesses:
##   ph   ca   mg   al  ctc  sat   mo  arg  are  alp    n    I   Us 
## 0.13 0.20 0.38 0.35 0.21 0.01 0.56 0.08 0.04 0.00 0.11 0.08 0.41 
##   Ui    S  cad 
## 0.54 0.00 0.19 
## 
## Loadings:
##     cation agua  poros inclin
## ph   0.93                    
## ca   0.79                    
## mg   0.66                    
## al  -0.80                    
## ctc  0.68         0.57       
## sat  0.99                    
## mo   0.62                    
## alp         0.95             
## I          -0.96             
## Us          0.74             
## Ui          0.52             
## cad         0.89             
## arg               0.89       
## are              -0.92       
## n                       0.89 
## S                       0.99 
## 
##                cation agua poros inclin
## SS loadings      4.56 3.65  2.38   2.12
## Proportion Var   0.28 0.23  0.15   0.13
## Cumulative Var   0.28 0.51  0.66   0.79
## 
## Test of the hypothesis that 4 factors are sufficient.
## The chi square statistic is 505.08 on 62 degrees of freedom.
## The p-value is 1.06e-70
# Singularidade das variáveis.
sort(fit1$uniquenesses)
##        alp          S        sat        are          I        arg 
## 0.00500000 0.00500000 0.01005537 0.04343385 0.07668692 0.08386209 
##          n         ph        cad         ca        ctc         al 
## 0.10625758 0.12559882 0.19047046 0.19832549 0.20827617 0.34872158 
##         mg         Us         Ui         mo 
## 0.37793727 0.40720590 0.53886744 0.56185743
# Carregamentos do fator 1 contra 2.
load <- fit1$loadings[, 1:2]
plot(load, type = "n",
     xlab = "Cátions", ylab = "Água")
abline(v = 0, h = 0, lty = 2)
text(load, labels = names(X), cex = 0.8)

# Pares de gráficos dos escores.
sc <- fit1$scores
pairs(sc)

Os resultados com o abandono das variáveis de alta singularidade não sofreu modificações substanciais. A interpretação dos fatores foi mantida. Com estes 4 fatores foi obtido uma explicação de 80% da variância total.

Apenas por precaução, foi confirmado por simulação que o número de fatores é de fato 4.

#-----------------------------------------------------------------------
# Determinação por simulação do número de fatores.

ev <- eigen(cor(X))
ap <- parallel(subject = nrow(X),
               var = ncol(X),
               rep = 100,
               cent = 0.05)

nS <- nScree(x = ev$values, aparallel = ap$eigen$qevpea)
plotnScree(nS)

Ajuste de Modelo de Regressão com os Escores

Os escores ou índices definidos pela análise fatorial serão usados como variáveis explicativas da produção de madeira.

#-----------------------------------------------------------------------
# Justar os escores com a variável de produção de madeira.

de <- merge(cbind(loc = dd[, "loc"], as.data.frame(sc)),
            teca_arv[, c("loc", "prod")])
names(de)[2:5] <- colnames(fit1$loadings)
str(de)
## 'data.frame':    50 obs. of  6 variables:
##  $ loc   : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ cation: num  1.031 -1.421 1.214 0.281 -0.813 ...
##  $ agua  : num  -0.6673 -0.0692 -0.7716 -0.125 -0.3489 ...
##  $ poros : num  -1.688 -1.17 -1.327 -2.005 -0.363 ...
##  $ inclin: num  0.865 0.935 -1.304 0.225 0.887 ...
##  $ prod  : num  68.2 24.3 125.3 102.7 54.6 ...
xyplot(sqrt(prod) ~ cation + agua + poros + inclin,
       data = de,
       outer = TRUE,
       type = c("p", "r"),
       as.table = TRUE,
       scales = list(x = list(relation = "free")),
       xlab = "Escores da análise fatorial",
       ylab = "Produção de madeira")

#-----------------------------------------------------------------------
# Ajuste do modelo de regressão.

# ATTENTION: Existe um maior atendimento dos pressupostos usando sqrt do
# que a variável original.

# m0 <- lm(prod ~ . - loc, data = de)
m0 <- lm(sqrt(prod) ~ . - loc, data = de)
summary(m0)
## 
## Call:
## lm(formula = sqrt(prod) ~ . - loc, data = de)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.5113 -1.0517 -0.1062  1.1450  3.2344 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   8.7856     0.1913  45.931  < 2e-16 ***
## cation        1.7240     0.1942   8.878 1.89e-11 ***
## agua         -0.3954     0.1938  -2.040   0.0472 *  
## poros         0.4150     0.1967   2.111   0.0404 *  
## inclin        0.2030     0.1938   1.048   0.3004    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.353 on 45 degrees of freedom
## Multiple R-squared:  0.6636, Adjusted R-squared:  0.6337 
## F-statistic: 22.19 on 4 and 45 DF,  p-value: 3.598e-10
par(mfrow = c(2, 2))
plot(m0)

layout(1)

# MASS::boxcox(m0)
# abline(v = c(0.5, 1), col = 2)

residualPlots(m0)

##            Test stat Pr(>|t|)
## cation        -2.456    0.018
## agua          -1.025    0.311
## poros         -0.003    0.998
## inclin        -0.200    0.842
## Tukey test    -3.110    0.002
im <- influence.measures(m0)
summary(im)
## Potentially influential observations of
##   lm(formula = sqrt(prod) ~ . - loc, data = de) :
## 
##    dfb.1_ dfb.catn dfb.agua dfb.pors dfb.incl dffit   cov.r  
## 12 -0.05  -0.01    -0.03     0.02    -0.18    -0.19    1.59_*
## 36 -0.19   0.05    -0.94     0.28     0.10    -1.01_*  2.47_*
## 38  0.41  -0.44    -0.18     0.54    -0.47     0.95    0.58_*
## 43 -0.21  -0.30    -0.45    -0.41     0.38    -0.81    1.36_*
## 48 -0.13  -0.12    -0.01    -0.31    -0.25    -0.44    1.34_*
## 49  0.05   0.02     0.01     0.09     0.11     0.15    1.37_*
##    cook.d hat    
## 12  0.01   0.30_*
## 36  0.20   0.58_*
## 38  0.16   0.11  
## 43  0.13   0.30_*
## 48  0.04   0.22  
## 49  0.00   0.19
# Remover observação com maior alavancagem.
which(im$is.inf[, "hat"])
## 12 36 43 
## 12 36 43
r <- which.max(im$infmat[, "hat"])

m0 <- lm(prod ~ . - loc, data = de[-r, ])
# m0 <- lm(sqrt(prod) ~ . - loc, data = de[, 35])
summary(m0)
## 
## Call:
## lm(formula = prod ~ . - loc, data = de[-r, ])
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -42.427 -14.990  -4.932  18.135  55.991 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   82.492      3.381  24.399  < 2e-16 ***
## cation        28.434      3.359   8.465 8.76e-11 ***
## agua          -3.865      4.992  -0.774   0.4429    
## poros          6.216      3.575   1.739   0.0891 .  
## inclin         3.655      3.366   1.086   0.2835    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 23.35 on 44 degrees of freedom
## Multiple R-squared:  0.6312, Adjusted R-squared:  0.5976 
## F-statistic: 18.82 on 4 and 44 DF,  p-value: 4.396e-09
par(mfrow = c(2, 2))
plot(m0)

layout(1)

residualPlots(m0)

##            Test stat Pr(>|t|)
## cation        -1.416    0.164
## agua          -0.826    0.413
## poros          0.167    0.868
## inclin        -0.625    0.536
## Tukey test    -1.837    0.066
m1 <- update(m0, . ~ cation)
summary(m1)
## 
## Call:
## lm(formula = prod ~ cation, data = de[-r, ])
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -47.42 -16.23  -1.68  19.08  60.84 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   83.110      3.378   24.61  < 2e-16 ***
## cation        28.300      3.397    8.33 8.23e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 23.64 on 47 degrees of freedom
## Multiple R-squared:  0.5962, Adjusted R-squared:  0.5876 
## F-statistic: 69.39 on 1 and 47 DF,  p-value: 8.233e-11
anova(m0, m1)
## Analysis of Variance Table
## 
## Model 1: prod ~ (loc + cation + agua + poros + inclin) - loc
## Model 2: prod ~ cation
##   Res.Df   RSS Df Sum of Sq      F Pr(>F)
## 1     44 23998                           
## 2     47 26274 -3   -2276.5 1.3913  0.258
# Carregamento das variáveis no fator 1.
cbind(fit1$loadings[, "cation"])
##             [,1]
## ph   0.925604336
## ca   0.788356898
## mg   0.659070902
## al  -0.804148698
## ctc  0.678101756
## sat  0.986348560
## mo   0.619513624
## arg  0.255178443
## are -0.295877997
## alp  0.005452939
## n    0.086897134
## I   -0.007830131
## Us  -0.043650910
## Ui  -0.079906747
## S    0.116752464
## cad  0.038783719

Ajuste de Modelo de Regressão Múltipla com a Variáveis de Solo

#-----------------------------------------------------------------------

de <- merge(subset(dd, select = -cam),
            teca_arv[, c("loc", "prod")])
de <- subset(de, select = -loc)
str(de)
## 'data.frame':    50 obs. of  22 variables:
##  $ ph  : num  6.7 4.7 6.8 6.2 5.1 5.2 5.1 6.4 4.9 5 ...
##  $ p   : num  0.83 0.69 1.66 1.17 0.49 0.69 0.21 1.14 1.36 1.04 ...
##  $ k   : num  13.4 12.3 25.8 11.1 22.2 ...
##  $ ca  : num  2.91 0.76 3.22 2.25 0.93 0.7 0.24 2.39 1.15 0.72 ...
##  $ mg  : num  1.77 0.14 1.31 0.21 0.52 0.36 0.14 0.99 0.05 0.37 ...
##  $ al  : num  0 0.6 0 0 0.2 0.5 0.6 0 0.3 0.6 ...
##  $ ctc : num  6.57 4.17 6.26 4.56 4.1 5.16 5.02 5.28 4.86 3.86 ...
##  $ sat : num  71.7 22.4 73.4 54.5 36.8 ...
##  $ mo  : num  25.6 8.7 15.6 10 11.2 5.8 8.5 11.8 6.3 6.9 ...
##  $ arg : num  215 213 234 169 304 ...
##  $ are : num  750 775 698 788 665 ...
##  $ cas : num  2.2 1.1 19 3.1 7.2 23.4 2.9 2.1 2.5 1.6 ...
##  $ acc : num  750 775 704 788 667 ...
##  $ Ur  : num  0.1785 0.1649 0.3699 0.0372 0.1902 ...
##  $ Us  : num  0.58 0.573 0.659 0.521 0.608 ...
##  $ alp : num  -0.833 -0.547 -0.552 -0.478 -0.671 ...
##  $ n   : num  3.11 2.92 1.96 2.21 2.93 ...
##  $ cad : num  0.217 0.222 0.168 0.274 0.228 ...
##  $ Ui  : num  0.396 0.387 0.538 0.311 0.418 ...
##  $ I   : num  0.957 0.691 0.918 0.751 0.813 ...
##  $ S   : num  0.273 0.257 0.108 0.214 0.265 ...
##  $ prod: num  68.2 24.3 125.3 102.7 54.6 ...
# Número de observações.
nrow(de)
## [1] 50
# m0 <- lm(prod ~ ., data = de)
m0 <- lm(sqrt(prod) ~ ., data = de)
anova(m0)
## Analysis of Variance Table
## 
## Response: sqrt(prod)
##           Df  Sum Sq Mean Sq F value    Pr(>F)    
## ph         1 124.347 124.347 68.1921 4.191e-09 ***
## p          1   0.001   0.001  0.0007  0.979655    
## k          1   0.434   0.434  0.2382  0.629146    
## ca         1   1.488   1.488  0.8162  0.373731    
## mg         1   8.249   8.249  4.5236  0.042062 *  
## al         1  17.590  17.590  9.6461  0.004216 ** 
## ctc        1   0.082   0.082  0.0447  0.833953    
## sat        1   8.148   8.148  4.4686  0.043244 *  
## mo         1   0.258   0.258  0.1413  0.709736    
## arg        1   6.899   6.899  3.7835  0.061515 .  
## are        1   0.527   0.527  0.2891  0.594907    
## cas        1   5.846   5.846  3.2062  0.083807 .  
## acc        1   5.766   5.766  3.1623  0.085845 .  
## Ur         1   3.191   3.191  1.7501  0.196206    
## Us         1   7.614   7.614  4.1757  0.050186 .  
## alp        1   0.053   0.053  0.0293  0.865219    
## n          1   0.066   0.066  0.0365  0.849914    
## cad        1   0.307   0.307  0.1683  0.684638    
## I          1   0.356   0.356  0.1953  0.661836    
## S          1   0.616   0.616  0.3381  0.565448    
## Residuals 29  52.881   1.823                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# im <- influence.measures(m0)
# summary(im)

# residualPlots(m0)

# AIC.
m1 <- step(m0)
## Start:  AIC=44.8
## sqrt(prod) ~ ph + p + k + ca + mg + al + ctc + sat + mo + arg + 
##     are + cas + acc + Ur + Us + alp + n + cad + Ui + I + S
## 
## 
## Step:  AIC=44.8
## sqrt(prod) ~ ph + p + k + ca + mg + al + ctc + sat + mo + arg + 
##     are + cas + acc + Ur + Us + alp + n + cad + I + S
## 
##        Df Sum of Sq    RSS    AIC
## - p     1    0.0427 52.924 42.842
## - al    1    0.0872 52.968 42.884
## - ph    1    0.2033 53.085 42.993
## - are   1    0.2328 53.114 43.021
## - mo    1    0.2969 53.178 43.081
## - acc   1    0.3369 53.218 43.119
## - mg    1    0.5174 53.399 43.288
## - S     1    0.6164 53.498 43.381
## - cas   1    0.6218 53.503 43.386
## - n     1    0.6336 53.515 43.397
## - ctc   1    0.7480 53.629 43.504
## - k     1    0.7911 53.672 43.544
## - Us    1    0.8616 53.743 43.609
## - Ur    1    0.8758 53.757 43.623
## - I     1    0.9475 53.829 43.689
## - alp   1    1.0035 53.885 43.741
## - cad   1    1.0595 53.941 43.793
## - arg   1    1.1153 53.997 43.845
## <none>              52.881 44.801
## - ca    1    2.4488 55.330 45.065
## - sat   1    8.7794 61.661 50.481
## 
## Step:  AIC=42.84
## sqrt(prod) ~ ph + k + ca + mg + al + ctc + sat + mo + arg + are + 
##     cas + acc + Ur + Us + alp + n + cad + I + S
## 
##        Df Sum of Sq    RSS    AIC
## - al    1    0.0865 53.010 40.923
## - are   1    0.1914 53.115 41.022
## - ph    1    0.2304 53.154 41.059
## - acc   1    0.2944 53.218 41.119
## - mo    1    0.3215 53.245 41.144
## - mg    1    0.5317 53.456 41.341
## - cas   1    0.5892 53.513 41.395
## - S     1    0.6139 53.538 41.418
## - n     1    0.6379 53.562 41.441
## - ctc   1    0.7715 53.695 41.565
## - k     1    0.7922 53.716 41.584
## - Us    1    0.8742 53.798 41.661
## - Ur    1    0.8899 53.814 41.675
## - I     1    0.9768 53.901 41.756
## - alp   1    1.0298 53.954 41.805
## - cad   1    1.0789 54.003 41.851
## - arg   1    1.0977 54.022 41.868
## <none>              52.924 42.842
## - ca    1    2.4456 55.369 43.100
## - sat   1    8.8935 61.817 48.608
## 
## Step:  AIC=40.92
## sqrt(prod) ~ ph + k + ca + mg + ctc + sat + mo + arg + are + 
##     cas + acc + Ur + Us + alp + n + cad + I + S
## 
##        Df Sum of Sq    RSS    AIC
## - are   1    0.2616 53.272 39.169
## - ph    1    0.2923 53.303 39.198
## - acc   1    0.3164 53.327 39.221
## - mo    1    0.3199 53.330 39.224
## - cas   1    0.6116 53.622 39.497
## - k     1    0.9357 53.946 39.798
## - mg    1    0.9716 53.982 39.831
## - arg   1    1.0188 54.029 39.875
## - S     1    1.1091 54.119 39.959
## - n     1    1.1852 54.196 40.029
## - ctc   1    1.2844 54.295 40.120
## - Us    1    1.4264 54.437 40.251
## - Ur    1    1.4465 54.457 40.269
## - I     1    1.5461 54.557 40.361
## - alp   1    1.6041 54.614 40.414
## - cad   1    1.6910 54.701 40.493
## <none>              53.010 40.923
## - ca    1    4.1608 57.171 42.701
## - sat   1   26.5823 79.593 59.245
## 
## Step:  AIC=39.17
## sqrt(prod) ~ ph + k + ca + mg + ctc + sat + mo + arg + cas + 
##     acc + Ur + Us + alp + n + cad + I + S
## 
##        Df Sum of Sq    RSS    AIC
## - acc   1    0.0560 53.328 37.222
## - ph    1    0.2146 53.487 37.370
## - mo    1    0.2332 53.505 37.388
## - cas   1    0.4941 53.766 37.631
## - k     1    0.8939 54.166 38.001
## - S     1    0.9161 54.188 38.022
## - n     1    0.9801 54.252 38.081
## - mg    1    1.0533 54.325 38.148
## - Us    1    1.2436 54.516 38.323
## - Ur    1    1.2582 54.530 38.337
## - I     1    1.3342 54.606 38.406
## - ctc   1    1.3380 54.610 38.410
## - alp   1    1.3933 54.665 38.460
## - cad   1    1.5018 54.774 38.559
## <none>              53.272 39.169
## - arg   1    2.6118 55.884 39.563
## - ca    1    3.9088 57.181 40.710
## - sat   1   27.9644 81.236 58.267
## 
## Step:  AIC=37.22
## sqrt(prod) ~ ph + k + ca + mg + ctc + sat + mo + arg + cas + 
##     Ur + Us + alp + n + cad + I + S
## 
##        Df Sum of Sq    RSS    AIC
## - mo    1    0.1962 53.524 35.406
## - ph    1    0.2685 53.596 35.473
## - cas   1    0.9740 54.302 36.127
## - k     1    0.9870 54.315 36.139
## - mg    1    1.0026 54.331 36.153
## - S     1    1.2724 54.600 36.401
## - ctc   1    1.3131 54.641 36.438
## - n     1    1.3539 54.682 36.475
## - Us    1    1.5355 54.863 36.641
## - Ur    1    1.5611 54.889 36.665
## - I     1    1.5947 54.923 36.695
## - alp   1    1.6080 54.936 36.707
## - cad   1    1.7923 55.120 36.875
## <none>              53.328 37.222
## - ca    1    3.9231 57.251 38.771
## - arg   1    6.9778 60.306 41.370
## - sat   1   28.1281 81.456 56.402
## 
## Step:  AIC=35.41
## sqrt(prod) ~ ph + k + ca + mg + ctc + sat + arg + cas + Ur + 
##     Us + alp + n + cad + I + S
## 
##        Df Sum of Sq    RSS    AIC
## - ph    1    0.3272 53.851 33.710
## - k     1    0.7978 54.322 34.145
## - mg    1    0.9349 54.459 34.271
## - cas   1    1.1881 54.712 34.503
## - ctc   1    1.2617 54.786 34.570
## - S     1    1.3558 54.880 34.656
## - n     1    1.4404 54.965 34.733
## - Us    1    1.6370 55.161 34.912
## - Ur    1    1.6624 55.187 34.935
## - alp   1    1.6899 55.214 34.960
## - I     1    1.6986 55.223 34.968
## - cad   1    1.8906 55.415 35.141
## <none>              53.524 35.406
## - ca    1    3.7273 57.251 36.771
## - arg   1    6.7825 60.307 39.371
## - sat   1   28.0329 81.557 54.464
## 
## Step:  AIC=33.71
## sqrt(prod) ~ k + ca + mg + ctc + sat + arg + cas + Ur + Us + 
##     alp + n + cad + I + S
## 
##        Df Sum of Sq    RSS    AIC
## - k     1     1.025 54.876 32.653
## - mg    1     1.188 55.040 32.801
## - S     1     1.282 55.133 32.886
## - cas   1     1.372 55.223 32.968
## - n     1     1.393 55.244 32.987
## - ctc   1     1.599 55.450 33.173
## - Us    1     1.607 55.458 33.180
## - Ur    1     1.624 55.476 33.196
## - alp   1     1.654 55.505 33.223
## - I     1     1.660 55.511 33.228
## - cad   1     1.843 55.694 33.392
## <none>              53.851 33.710
## - ca    1     4.422 58.273 35.656
## - arg   1     6.899 60.750 37.737
## - sat   1    33.511 87.362 55.902
## 
## Step:  AIC=32.65
## sqrt(prod) ~ ca + mg + ctc + sat + arg + cas + Ur + Us + alp + 
##     n + cad + I + S
## 
##        Df Sum of Sq    RSS    AIC
## - S     1     1.010 55.886 31.565
## - n     1     1.189 56.066 31.725
## - Us    1     1.326 56.203 31.847
## - Ur    1     1.354 56.230 31.871
## - alp   1     1.466 56.342 31.971
## - I     1     1.477 56.353 31.981
## - mg    1     1.477 56.353 31.981
## - ctc   1     1.520 56.397 32.019
## - cad   1     1.558 56.434 32.053
## - cas   1     1.684 56.561 32.164
## <none>              54.876 32.653
## - ca    1     4.258 59.135 34.390
## - arg   1     7.764 62.641 37.270
## - sat   1    32.488 87.364 53.903
## 
## Step:  AIC=31.56
## sqrt(prod) ~ ca + mg + ctc + sat + arg + cas + Ur + Us + alp + 
##     n + cad + I
## 
##        Df Sum of Sq    RSS    AIC
## - Us    1    0.3267 56.213 29.856
## - Ur    1    0.3640 56.250 29.889
## - n     1    0.4247 56.311 29.943
## - alp   1    0.4576 56.344 29.972
## - I     1    0.4672 56.353 29.981
## - cad   1    0.5830 56.469 30.084
## - ctc   1    1.1039 56.990 30.543
## - mg    1    1.4222 57.308 30.821
## - cas   1    1.7952 57.681 31.146
## <none>              55.886 31.565
## - ca    1    3.5365 59.423 32.633
## - arg   1    9.6406 65.527 37.522
## - sat   1   31.5991 87.485 51.972
## 
## Step:  AIC=29.86
## sqrt(prod) ~ ca + mg + ctc + sat + arg + cas + Ur + alp + n + 
##     cad + I
## 
##        Df Sum of Sq    RSS    AIC
## - Ur    1     0.124 56.337 27.966
## - I     1     0.164 56.377 28.002
## - alp   1     0.167 56.380 28.004
## - n     1     0.328 56.540 28.147
## - ctc   1     1.154 57.367 28.872
## - mg    1     1.436 57.649 29.117
## - cas   1     1.740 57.953 29.381
## <none>              56.213 29.856
## - cad   1     3.033 59.246 30.484
## - ca    1     4.028 60.241 31.316
## - arg   1    14.130 70.343 39.068
## - sat   1    33.669 89.882 51.324
## 
## Step:  AIC=27.97
## sqrt(prod) ~ ca + mg + ctc + sat + arg + cas + alp + n + cad + 
##     I
## 
##        Df Sum of Sq    RSS    AIC
## - alp   1     0.074 56.410 26.031
## - I     1     0.091 56.427 26.047
## - n     1     0.267 56.603 26.202
## - ctc   1     1.088 57.424 26.922
## - mg    1     1.402 57.738 27.195
## - cas   1     1.961 58.298 27.677
## <none>              56.337 27.966
## - ca    1     3.908 60.245 29.320
## - cad   1     4.553 60.890 29.852
## - arg   1    14.013 70.350 37.073
## - sat   1    33.547 89.883 49.325
## 
## Step:  AIC=26.03
## sqrt(prod) ~ ca + mg + ctc + sat + arg + cas + n + cad + I
## 
##        Df Sum of Sq    RSS    AIC
## - I     1     0.017 56.427 24.047
## - n     1     0.245 56.655 24.248
## - ctc   1     1.048 57.458 24.952
## - mg    1     1.368 57.778 25.229
## - cas   1     2.128 58.538 25.883
## <none>              56.410 26.031
## - ca    1     3.854 60.264 27.335
## - cad   1     5.516 61.927 28.696
## - arg   1    14.105 70.515 35.190
## - sat   1    34.665 91.075 47.983
## 
## Step:  AIC=24.05
## sqrt(prod) ~ ca + mg + ctc + sat + arg + cas + n + cad
## 
##        Df Sum of Sq    RSS    AIC
## - n     1     0.257 56.684 22.273
## - ctc   1     1.108 57.536 23.019
## - mg    1     1.445 57.872 23.311
## - cas   1     2.111 58.538 23.883
## <none>              56.427 24.047
## - ca    1     3.838 60.266 25.337
## - cad   1    14.030 70.458 33.149
## - arg   1    14.619 71.046 33.565
## - sat   1    35.038 91.465 46.197
## 
## Step:  AIC=22.27
## sqrt(prod) ~ ca + mg + ctc + sat + arg + cas + cad
## 
##        Df Sum of Sq    RSS    AIC
## - ctc   1     0.979 57.662 21.129
## - mg    1     1.220 57.904 21.338
## - cas   1     2.079 58.763 22.075
## <none>              56.684 22.273
## - ca    1     3.629 60.313 23.376
## - arg   1    14.581 71.264 31.719
## - cad   1    18.580 75.263 34.449
## - sat   1    35.036 91.720 44.336
## 
## Step:  AIC=21.13
## sqrt(prod) ~ ca + mg + sat + arg + cas + cad
## 
##        Df Sum of Sq     RSS    AIC
## - mg    1     0.242  57.904 19.338
## - cas   1     1.633  59.295 20.525
## <none>               57.662 21.129
## - ca    1     8.811  66.473 26.239
## - arg   1    17.335  74.997 32.272
## - cad   1    22.010  79.672 35.295
## - sat   1    60.789 118.451 55.124
## 
## Step:  AIC=19.34
## sqrt(prod) ~ ca + sat + arg + cas + cad
## 
##        Df Sum of Sq     RSS    AIC
## - cas   1     1.766  59.670 18.840
## <none>               57.904 19.338
## - ca    1     8.606  66.510 24.266
## - arg   1    17.810  75.715 30.747
## - cad   1    21.768  79.672 33.295
## - sat   1    78.082 135.986 60.026
## 
## Step:  AIC=18.84
## sqrt(prod) ~ ca + sat + arg + cad
## 
##        Df Sum of Sq     RSS    AIC
## <none>               59.670 18.840
## - ca    1     8.021  67.691 23.146
## - arg   1    16.387  76.057 28.973
## - cad   1    26.462  86.131 35.193
## - sat   1    77.252 136.921 58.369
# Resumo do modelo selecionado.
summary(m1)
## 
## Call:
## lm(formula = sqrt(prod) ~ ca + sat + arg + cad, data = de)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.6481 -0.7063 -0.1226  0.7843  2.7592 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   4.490565   1.002813   4.478 5.11e-05 ***
## ca           -0.339112   0.137882  -2.459  0.01782 *  
## sat           0.105371   0.013805   7.633 1.19e-09 ***
## arg           0.008467   0.002409   3.515  0.00101 ** 
## cad         -12.887466   2.884897  -4.467 5.29e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.152 on 45 degrees of freedom
## Multiple R-squared:  0.7562, Adjusted R-squared:  0.7345 
## F-statistic: 34.89 on 4 and 45 DF,  p-value: 2.917e-13
# residualPlots(m1)
anova(m1, m0)
## Analysis of Variance Table
## 
## Model 1: sqrt(prod) ~ ca + sat + arg + cad
## Model 2: sqrt(prod) ~ ph + p + k + ca + mg + al + ctc + sat + mo + arg + 
##     are + cas + acc + Ur + Us + alp + n + cad + Ui + I + S
##   Res.Df    RSS Df Sum of Sq      F Pr(>F)
## 1     45 59.670                           
## 2     29 52.881 16    6.7886 0.2327 0.9983
im <- influence.measures(m1)
summary(im)
## Potentially influential observations of
##   lm(formula = sqrt(prod) ~ ca + sat + arg + cad, data = de) :
## 
##    dfb.1_ dfb.ca dfb.sat dfb.arg dfb.cad dffit   cov.r   cook.d
## 4   0.35   0.23   0.01   -0.92    0.51    1.08_*  0.58_*  0.20 
## 19  0.41   0.83  -0.87   -0.50    0.32   -1.03_*  0.62_*  0.19 
## 21  0.08   0.13  -0.10    0.01   -0.17   -0.42    0.61_*  0.03 
## 31  0.06   0.05  -0.04   -0.06   -0.01   -0.07    1.39_*  0.00 
## 36  0.16   0.05  -0.05    0.11   -0.35   -0.37    2.24_*  0.03 
## 43 -0.10   0.25  -0.17   -0.01    0.21    0.41    2.11_*  0.03 
##    hat    
## 4   0.13  
## 19  0.13  
## 21  0.03  
## 31  0.20  
## 36  0.51_*
## 43  0.48_*
par(mfrow = c(2, 2))
plot(m1)

layout(1)

Árvore de Regressão

#-----------------------------------------------------------------------
# Árvore de regressão.

layout(1)

ar <- rpart(prod ~ ., data = de, method = "anova")

summary(ar)
## Call:
## rpart(formula = prod ~ ., data = de, method = "anova")
##   n= 50 
## 
##           CP nsplit rel error    xerror      xstd
## 1 0.57868463      0 1.0000000 1.0363413 0.1730044
## 2 0.10430085      1 0.4213154 0.6039645 0.1362822
## 3 0.07274426      2 0.3170145 0.5898582 0.1304633
## 4 0.01000000      3 0.2442703 0.4832313 0.1032574
## 
## Variable importance
## sat  ph  ca  al  mg ctc  mo  Ui 
##  20  17  17  16  13  13   3   1 
## 
## Node number 1: 50 observations,    complexity param=0.5786846
##   mean=82.08052, MSE=1370.545 
##   left son=2 (23 obs) right son=3 (27 obs)
##   Primary splits:
##       sat < 53.17     to the left,  improve=0.5786846, (0 missing)
##       ph  < 5.65      to the left,  improve=0.5243626, (0 missing)
##       mg  < 0.635     to the left,  improve=0.5209719, (0 missing)
##       al  < 0.05      to the right, improve=0.5078733, (0 missing)
##       ca  < 2.12      to the left,  improve=0.4645719, (0 missing)
##   Surrogate splits:
##       ph  < 5.65      to the left,  agree=0.96, adj=0.913, (0 split)
##       al  < 0.05      to the right, agree=0.94, adj=0.870, (0 split)
##       ca  < 2.245     to the left,  agree=0.92, adj=0.826, (0 split)
##       mg  < 0.635     to the left,  agree=0.90, adj=0.783, (0 split)
##       ctc < 5.61      to the left,  agree=0.86, adj=0.696, (0 split)
## 
## Node number 2: 23 observations,    complexity param=0.1043009
##   mean=51.56746, MSE=658.0742 
##   left son=4 (10 obs) right son=5 (13 obs)
##   Primary splits:
##       mo  < 9.15      to the left,  improve=0.4722245, (0 missing)
##       ca  < 0.84      to the left,  improve=0.3706763, (0 missing)
##       sat < 29.435    to the left,  improve=0.3378541, (0 missing)
##       acc < 641.93    to the right, improve=0.2859104, (0 missing)
##       cad < 0.2013866 to the right, improve=0.2581536, (0 missing)
##   Surrogate splits:
##       sat < 30.435    to the left,  agree=0.913, adj=0.8, (0 split)
##       ca  < 0.84      to the left,  agree=0.870, adj=0.7, (0 split)
##       al  < 0.25      to the right, agree=0.826, adj=0.6, (0 split)
##       ph  < 5.25      to the left,  agree=0.739, adj=0.4, (0 split)
##       ctc < 5.2       to the left,  agree=0.739, adj=0.4, (0 split)
## 
## Node number 3: 27 observations,    complexity param=0.07274426
##   mean=108.0731, MSE=508.7363 
##   left son=6 (17 obs) right son=7 (10 obs)
##   Primary splits:
##       sat < 72.085    to the left,  improve=0.3629156, (0 missing)
##       cas < 39.85     to the left,  improve=0.2433149, (0 missing)
##       ca  < 3.135     to the left,  improve=0.1588320, (0 missing)
##       mg  < 1.94      to the left,  improve=0.1355789, (0 missing)
##       k   < 33.61     to the left,  improve=0.1044933, (0 missing)
##   Surrogate splits:
##       ph  < 6.75      to the left,  agree=0.852, adj=0.6, (0 split)
##       ca  < 6.055     to the left,  agree=0.852, adj=0.6, (0 split)
##       ctc < 11.025    to the left,  agree=0.815, adj=0.5, (0 split)
##       mg  < 2.66      to the left,  agree=0.778, adj=0.4, (0 split)
##       Ui  < 0.5205158 to the left,  agree=0.741, adj=0.3, (0 split)
## 
## Node number 4: 10 observations
##   mean=31.46805, MSE=151.4165 
## 
## Node number 5: 13 observations
##   mean=67.02855, MSE=498.0069 
## 
## Node number 6: 17 observations
##   mean=97.65175, MSE=159.0397 
## 
## Node number 7: 10 observations
##   mean=125.7894, MSE=604.7241
rsq.rpart(ar)
## 
## Regression tree:
## rpart(formula = prod ~ ., data = de, method = "anova")
## 
## Variables actually used in tree construction:
## [1] mo  sat
## 
## Root node error: 68527/50 = 1370.5
## 
## n= 50 
## 
##         CP nsplit rel error  xerror    xstd
## 1 0.578685      0   1.00000 1.03634 0.17300
## 2 0.104301      1   0.42132 0.60396 0.13628
## 3 0.072744      2   0.31701 0.58986 0.13046
## 4 0.010000      3   0.24427 0.48323 0.10326

plot(ar, uniform = TRUE, main = "Árvore de regressão para produção")
text(ar, use.n = TRUE, all = TRUE, cex = 0.8)

pred <- factor(predict(ar))
plot(sat ~ mo,
     data = de,
     col = as.integer(pred),
     pch = NA)
with(de, text(y = sat,
              x = mo,
              labels = rownames(de),
              col = as.integer(pred)))

Informações da Sessão

## Atualizado em 23 de novembro de 2016.
## 
## R version 3.3.2 (2016-10-31)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 16.04.1 LTS
## 
## locale:
##  [1] LC_CTYPE=pt_BR.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=pt_BR.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=pt_BR.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=pt_BR.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=pt_BR.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods  
## [7] base     
## 
## other attached packages:
##  [1] EACS_0.0-2          rpart_4.1-10        nFactors_2.3.3     
##  [4] boot_1.3-18         psych_1.6.9         MASS_7.3-45        
##  [7] car_2.1-2           reshape2_1.4.1      captioner_2.2.3    
## [10] plyr_1.8.4          nlme_3.1-128        latticeExtra_0.6-28
## [13] RColorBrewer_1.1-2  lattice_0.20-34     knitr_1.13         
## [16] wzRfun_0.70         gsubfn_0.6-6        proto_0.3-10       
## [19] devtools_1.12.0    
## 
## loaded via a namespace (and not attached):
##  [1] zoo_1.7-13         splines_3.3.2      tcltk_3.3.2       
##  [4] doBy_4.5-15        htmltools_0.3.5    mgcv_1.8-16       
##  [7] rpanel_1.1-3       yaml_2.1.13        survival_2.39-5   
## [10] nloptr_1.0.4       foreign_0.8-67     withr_1.0.2       
## [13] multcomp_1.4-6     stringr_1.0.0      MatrixModels_0.4-1
## [16] mvtnorm_1.0-5      codetools_0.2-15   memoise_1.0.0     
## [19] evaluate_0.9       SparseM_1.7        quantreg_5.26     
## [22] parallel_3.3.2     pbkrtest_0.4-6     curl_0.9.7        
## [25] TH.data_1.0-7      Rcpp_0.12.7        formatR_1.4       
## [28] lme4_1.1-12        mnormt_1.5-5       digest_0.6.9      
## [31] stringi_1.1.1      grid_3.3.2         tools_3.3.2       
## [34] sandwich_2.3-4     magrittr_1.5       Matrix_1.2-7.1    
## [37] minqa_1.2.4        rmarkdown_1.0      roxygen2_5.0.1    
## [40] httr_1.2.1         R6_2.1.2           nnet_7.3-12       
## [43] git2r_0.15.0