Download dos dados: http://www.leg.ufpr.br/~walmes/data/teca.txt
Download do script: teca.R
##----------------------------------------------------------------------
## Lendo o arquivo.
da <- read.table(file="teca.txt", header=TRUE, sep="\t",
stringsAsFactors=FALSE)
da$local <- factor(da$local)
## Remove os espaços no texto das camadas: 0 - 5 -> 0-5.
da$camada <- gsub(pattern=" ", replacement="", x=da$camada)
str(da)
'data.frame': 150 obs. of 17 variables:
$ local : Factor w/ 50 levels "1","2","3","4",..: 1 1 1 2 2 2 3 3 3 4 ...
$ camada: chr "0-5" "5-40" "40-80" "0-5" ...
$ phh2o : num 6.8 6.7 6.7 4.7 4.7 4.9 7.6 6.8 6.9 6.6 ...
$ p : num 22.51 0.83 0.01 3.89 0.69 ...
$ k : num 72.24 13.42 7.23 48.13 12.34 ...
$ ca : num 8.27 2.91 2.33 0.97 0.76 0.21 7.63 3.22 2.22 5.54 ...
$ mg : num 1.7 1.77 0.51 0.16 0.14 0 1.53 1.31 1.09 1.77 ...
$ al : num 0 0 0 0.3 0.6 0.6 0 0 0 0 ...
$ ctctot: num 12.47 6.57 4.52 5.3 4.17 ...
$ sat : num 81.4 71.7 63.2 23.7 22.4 ...
$ mo : num 72.2 25.6 9.7 34.4 8.7 9.7 50.4 15.6 9.5 50.2 ...
$ arg : num 184 215 286 232 213 ...
$ are : num 770 750 674 741 775 ...
$ cas : num 1.8 2.2 3.7 0.4 1.1 1.7 8.4 19 14.3 5.5 ...
$ acc : num 770 750 676 741 775 ...
$ vol : num 0.271 0.271 0.271 0.096 0.096 0.096 0.497 0.497 0.497 0.407 ...
$ dap : num 30 30 30 18.2 18.2 ...
##----------------------------------------------------------------------
## Número de registros por camada.
f <- sort(xtabs(~camada, data=da), decreasing=TRUE)
f
camada
0-5 40-80 5-40
50 50 50
Para melhor compreender a relação entre essas variáveis.
Veja exemplo de análise de componentes principais:
##----------------------------------------------------------------------
## Interrelações entre as 3 primeiras camadas.
da$camada <- factor(da$camada, levels=c("0-5","5-40","40-80"))
## Ordenar os níveis de local pelo valor de volume.
da$local <- reorder(da$local, da$vol)
xtabs(~camada, data=da)
camada
0-5 5-40 40-80
50 50 50
##----------------------------------------------------------------------
## Variáveis resposta de solo.
v <- names(da)[3:15]
## Remover o alumínio por ter muitos zeros. Manter só argila e ACC.
v <- v[-c(6,11:12)]; v
[1] "phh2o" "p" "k" "ca" "mg" "ctctot" "sat"
[8] "mo" "arg" "acc"
##----------------------------------------------------------------------
## Desempilhar o registros das camadas.
## Empilhar primeiro nas variáveis.
dd <- melt(data=da, measure.vars=v,
id.vars=c("local", "camada", "vol", "dap"),
variable.name="var", value.name="valor")
str(dd)
'data.frame': 1500 obs. of 6 variables:
$ local : Factor w/ 50 levels "20","7","6","36",..: 18 18 18 5 5 5 44 44 44 35 ...
..- attr(*, "scores")= num [1:50(1d)] 0.271 0.096 0.497 0.407 0.217 0.087 0.083 0.406 0.143 0.176 ...
.. ..- attr(*, "dimnames")=List of 1
.. .. ..$ : chr "1" "2" "3" "4" ...
$ camada: Factor w/ 3 levels "0-5","5-40","40-80": 1 2 3 1 2 3 1 2 3 1 ...
$ vol : num 0.271 0.271 0.271 0.096 0.096 0.096 0.497 0.497 0.497 0.407 ...
$ dap : num 30 30 30 18.2 18.2 ...
$ var : Factor w/ 10 levels "phh2o","p","k",..: 1 1 1 1 1 1 1 1 1 1 ...
$ valor : num 6.8 6.7 6.7 4.7 4.7 4.9 7.6 6.8 6.9 6.6 ...
##----------------------------------------------------------------------
## Gráficos.
## Volume em função das variáveis de solo.
xyplot(vol~valor|var, groups=camada, data=dd,
scales=list(x="free"), type=c("p","r"),
auto.key=TRUE)
## DAP em função das variáveis de solo.
xyplot(dap~valor|var, groups=camada, data=dd,
scales=list(x="free"), type=c("p","r"),
auto.key=TRUE)
##----------------------------------------------------------------------
## Desempilhar.
dd$camvar <- with(dd, paste(camada, var, sep=":"))
de <- dcast(data=dd, formula=local~camvar, value.var="valor")
str(de)
'data.frame': 50 obs. of 31 variables:
$ local : Factor w/ 50 levels "20","7","6","36",..: 1 2 3 4 5 6 7 8 9 10 ...
$ 0-5:acc : num 715 611 746 740 741 ...
$ 0-5:arg : num 220 287 224 128 232 ...
$ 0-5:ca : num 0.71 1.6 1.88 3.12 0.97 2.9 2.91 2.26 1.91 3.86 ...
$ 0-5:ctctot : num 4.22 6.01 6.47 5.91 5.3 7.59 7.01 5.7 6.26 8.67 ...
$ 0-5:k : num 25.8 48.5 62.9 75 48.1 ...
$ 0-5:mg : num 0.21 1.42 1.53 0.93 0.16 1.66 1.29 0.68 0.21 1.82 ...
$ 0-5:mo : num 16.1 28.6 35 25.1 34.4 58.3 28.8 22.6 19.9 35.3 ...
$ 0-5:p : num 1.04 2.17 2.14 3.4 3.89 7.55 3.26 2.23 3.6 6.12 ...
$ 0-5:phh2o : num 5.2 5.9 5.8 6.2 4.7 5.8 6.1 5.8 5.5 6.2 ...
$ 0-5:sat : num 23.4 52.3 55.2 71.8 23.7 ...
$ 40-80:acc : num 620 663 533 756 752 ...
$ 40-80:arg : num 329 283 423 382 237 ...
$ 40-80:ca : num 0.3 0.6 0.3 1 0.21 0.6 0.94 0.79 0.47 1.29 ...
$ 40-80:ctctot: num 3.22 4.53 4.48 4.38 3.47 3.28 6.35 4.9 4.58 4.12 ...
$ 40-80:k : num 10.32 5.16 16.04 35.96 8.64 ...
$ 40-80:mg : num 0.09 0.18 0.1 0.96 0 0.24 0.3 0.38 0.04 0.37 ...
$ 40-80:mo : num 9 8.2 9.3 7.4 9.7 9.9 2.5 7.6 7.1 6.9 ...
$ 40-80:p : num 0.31 0.01 0.1 0.01 0.1 5.29 0.01 0.01 1.07 0.01 ...
$ 40-80:phh2o : num 5.4 5.6 5.5 5.3 4.9 5.4 5.4 5.2 5 6 ...
$ 40-80:sat : num 12.94 17.5 9.84 47.02 6.69 ...
$ 5-40:acc : num 694 592 666 731 775 ...
$ 5-40:arg : num 244 304 250 261 213 ...
$ 5-40:ca : num 0.44 0.24 0.7 1.05 0.76 2.3 0.69 0.68 1.15 1.49 ...
$ 5-40:ctctot : num 3.69 5.02 5.16 4.62 4.17 5.38 5.84 4.81 4.86 5.26 ...
$ 5-40:k : num 12.39 9.29 22.21 34.39 12.34 ...
$ 5-40:mg : num 0.12 0.14 0.36 1.16 0.14 0.41 0.53 0.79 0.05 0.62 ...
$ 5-40:mo : num 8 8.5 5.8 8.5 8.7 15 7.8 6.9 6.3 11.4 ...
$ 5-40:p : num 0.73 0.21 0.69 0.01 0.69 7.03 0.31 0.01 1.36 0.51 ...
$ 5-40:phh2o : num 5 5.1 5.2 5.2 4.7 5.6 5.3 4.9 4.9 5.5 ...
$ 5-40:sat : num 16.04 8.04 21.65 49.83 22.35 ...
##----------------------------------------------------------------------
## PCA: Análise de Componentes Principais.
## X <- de[, colwise(is.numeric)(de)]
X <- de[, sapply(de, is.numeric)]
pca <- princomp(x=X, cor=TRUE)
summary(pca)
Importance of components:
Comp.1 Comp.2 Comp.3 Comp.4
Standard deviation 3.835619 1.9451791 1.42497451 1.27232479
Proportion of Variance 0.490399 0.1261241 0.06768508 0.05396035
Cumulative Proportion 0.490399 0.6165230 0.68420812 0.73816847
Comp.5 Comp.6 Comp.7 Comp.8
Standard deviation 1.25280105 1.05167692 0.93740170 0.84783288
Proportion of Variance 0.05231702 0.03686748 0.02929073 0.02396069
Cumulative Proportion 0.79048548 0.82735296 0.85664369 0.88060438
Comp.9 Comp.10 Comp.11 Comp.12
Standard deviation 0.76435311 0.70349461 0.65415157 0.60413383
Proportion of Variance 0.01947452 0.01649682 0.01426381 0.01216592
Cumulative Proportion 0.90007890 0.91657572 0.93083953 0.94300545
Comp.13 Comp.14 Comp.15 Comp.16
Standard deviation 0.56392221 0.485541702 0.451896781 0.426189604
Proportion of Variance 0.01060028 0.007858358 0.006807023 0.006054586
Cumulative Proportion 0.95360573 0.961464088 0.968271111 0.974325697
Comp.17 Comp.18 Comp.19 Comp.20
Standard deviation 0.41270305 0.370984883 0.329499559 0.30315192
Proportion of Variance 0.00567746 0.004587659 0.003618999 0.00306337
Cumulative Proportion 0.98000316 0.984590817 0.988209815 0.99127319
Comp.21 Comp.22 Comp.23
Standard deviation 0.242703867 0.232160369 0.211622779
Proportion of Variance 0.001963506 0.001796615 0.001492807
Cumulative Proportion 0.993236691 0.995033305 0.996526112
Comp.24 Comp.25 Comp.26
Standard deviation 0.188251866 0.174326559 0.1259196976
Proportion of Variance 0.001181292 0.001012992 0.0005285257
Cumulative Proportion 0.997707404 0.998720396 0.9992489213
Comp.27 Comp.28 Comp.29
Standard deviation 0.1052954295 0.0914319312 0.0483650808
Proportion of Variance 0.0003695709 0.0002786599 0.0000779727
Cumulative Proportion 0.9996184922 0.9998971522 0.9999751249
Comp.30
Standard deviation 2.731765e-02
Proportion of Variance 2.487514e-05
Cumulative Proportion 1.000000e+00
##----------------------------------------------------------------------
## Variâncias.
## Variância de cada componente.
screeplot(pca, type="lines", main=NULL)
## Proporção de variância acumulada.
plot(cumsum(pca$sdev^2)/sum(pca$sdev^2), type="o",
xlab="Componente", ylab="Proporção de variância acumulada")
abline(h=0.75, lty=2)
##----------------------------------------------------------------------
## Gráficos biplot.
biplot(pca, choices=c(1,2))
biplot(pca, choices=c(1,3))
biplot(pca, choices=c(2,3))
##----------------------------------------------------------------------
## Agrupamento hierárquico.
di <- dist(pca$scores[,1:3], method="euclidean")
tree <- hclust(di, method="ward.D")
plot(tree, xlab="")
k <- 3
rect.hclust(tree, k=k, border="red")
## Pontos que pertencem a cada grupo.
g <- cutree(tree, k=k)
split(as.character(de$local), f=g)
$`1`
[1] "20" "7" "6" "36" "2" "21" "40" "35" "9" "25" "16" "10" "37"
[14] "5" "39" "15" "14" "32" "18" "46" "45" "13" "23" "8" "4" "30"
[27] "38"
$`2`
[1] "1" "19" "44" "12" "41" "26" "22" "33" "31" "11" "27" "3" "24"
[14] "29" "17" "47" "28"
$`3`
[1] "43" "42" "50" "48" "49" "34"
##----------------------------------------------------------------------
## Carregamentos.
pca$loadings[,1:3]
Comp.1 Comp.2 Comp.3
0-5:acc 0.18311637 -0.253878526 0.106003404
0-5:arg -0.15508592 0.270970073 -0.135701721
0-5:ca -0.23895243 -0.042777858 0.164229911
0-5:ctctot -0.23947163 0.004517663 0.124092785
0-5:k -0.18169653 -0.147630418 0.083905286
0-5:mg -0.15349720 0.043157461 -0.078323864
0-5:mo -0.15480616 -0.001907438 0.171219753
0-5:p -0.06404461 -0.206021719 0.229027827
0-5:phh2o -0.19789857 -0.190504783 0.178364639
0-5:sat -0.21476821 -0.097341498 0.114656950
40-80:acc 0.14425170 -0.061353969 0.379247615
40-80:arg -0.16664400 0.048205195 -0.380603495
40-80:ca -0.21957457 0.079740058 0.120989016
40-80:ctctot -0.19606231 0.089319577 0.039795215
40-80:k -0.08850550 -0.363638771 -0.086036106
40-80:mg -0.20464540 -0.062177083 -0.111952625
40-80:mo -0.12325432 -0.044768536 -0.227109355
40-80:p -0.04555785 -0.339183521 -0.399653018
40-80:phh2o -0.21072564 -0.104441766 0.042574460
40-80:sat -0.23594947 -0.074414418 0.014847038
5-40:acc 0.16622503 -0.258992462 0.163585589
5-40:arg -0.17736450 0.279430085 -0.101570753
5-40:ca -0.22409743 0.123598056 0.099112819
5-40:ctctot -0.22796980 0.170147189 -0.003066074
5-40:k -0.12372044 -0.321517395 -0.080885744
5-40:mg -0.20623931 0.024568406 0.003859249
5-40:mo -0.18839688 0.088284562 -0.091687650
5-40:p -0.05815599 -0.356009711 -0.331855614
5-40:phh2o -0.20913015 -0.136402606 0.231305023
5-40:sat -0.23242211 -0.108966466 0.156171747
## A fração dos carregamentos mais importantes.
imp <- function(x, f=0.25){
a <- abs(x)
k <- ceiling(f*length(x))
i <- sort(a, decreasing=TRUE)[k]
x[a<=i] <- NA
return(x)
}
apply(pca$loadings[,1:3], MARGIN=2, FUN=imp, f=0.25)
Comp.1 Comp.2 Comp.3
0-5:acc NA NA NA
0-5:arg NA 0.2709701 NA
0-5:ca -0.2389524 NA NA
0-5:ctctot -0.2394716 NA NA
0-5:k NA NA NA
0-5:mg NA NA NA
0-5:mo NA NA NA
0-5:p NA NA 0.2290278
0-5:phh2o NA NA NA
0-5:sat NA NA NA
40-80:acc NA NA 0.3792476
40-80:arg NA NA -0.3806035
40-80:ca -0.2195746 NA NA
40-80:ctctot NA NA NA
40-80:k NA -0.3636388 NA
40-80:mg NA NA NA
40-80:mo NA NA -0.2271094
40-80:p NA -0.3391835 -0.3996530
40-80:phh2o NA NA NA
40-80:sat -0.2359495 NA NA
5-40:acc NA -0.2589925 NA
5-40:arg NA 0.2794301 NA
5-40:ca -0.2240974 NA NA
5-40:ctctot -0.2279698 NA NA
5-40:k NA -0.3215174 NA
5-40:mg NA NA NA
5-40:mo NA NA NA
5-40:p NA -0.3560097 -0.3318556
5-40:phh2o NA NA 0.2313050
5-40:sat -0.2324221 NA NA
## 1º componente: Ca, CTC e Saturação.
## 2º componente: AreiaCascalhoCalhau, pH, K, P, Argila.
## 3º componente: MO, ACC, Arg, K, P.
##----------------------------------------------------------------------
## PCA: Análise de Componentes Principais.
## X <- de[, colwise(is.numeric)(de)]
X <- de[, grep(x=names(de), pattern="0-5:")]
pca <- princomp(x=X, cor=TRUE)
summary(pca)
Importance of components:
Comp.1 Comp.2 Comp.3 Comp.4
Standard deviation 2.3914793 1.2718925 0.91795233 0.74270375
Proportion of Variance 0.5719173 0.1617710 0.08426365 0.05516089
Cumulative Proportion 0.5719173 0.7336884 0.81795202 0.87311290
Comp.5 Comp.6 Comp.7 Comp.8
Standard deviation 0.72159599 0.6108150 0.43054429 0.33730446
Proportion of Variance 0.05207008 0.0373095 0.01853684 0.01137743
Cumulative Proportion 0.92518298 0.9624925 0.98102932 0.99240675
Comp.9 Comp.10
Standard deviation 0.267333818 0.0668218688
Proportion of Variance 0.007146737 0.0004465162
Cumulative Proportion 0.999553484 1.0000000000
##----------------------------------------------------------------------
## Variâncias.
## Variância de cada componente.
screeplot(pca, type="lines", main=NULL)
## Proporção de variância acumulada.
## str(pca)
plot(cumsum(pca$sdev^2)/sum(pca$sdev^2), type="o",
xlab="Componente", ylab="Proporção de variância acumulada")
abline(h=0.75, lty=2)
##----------------------------------------------------------------------
## Gráficos biplot.
biplot(pca, choices=c(1,2))
biplot(pca, choices=c(1,3))
biplot(pca, choices=c(2,3))
##----------------------------------------------------------------------
## Agrupamento hierárquico.
di <- dist(pca$scores[,1:3], method="euclidean")
tree <- hclust(di, method="ward.D")
plot(tree, xlab="")
k <- 3
rect.hclust(tree, k=k, border="red")
## Pontos que pertencem a cada grupo.
g <- cutree(tree, k=k)
split(as.character(de$local), f=g)
$`1`
[1] "20" "7" "6" "36" "2" "21" "40" "35" "9" "25" "16" "10" "37"
[14] "5" "39" "14" "46" "45" "23" "38"
$`2`
[1] "15" "1" "32" "18" "19" "44" "12" "42" "26" "13" "22" "33" "8"
[14] "4" "31" "11" "27" "30" "3" "24" "29" "17" "47" "28"
$`3`
[1] "43" "41" "50" "48" "49" "34"
##----------------------------------------------------------------------
## Carregamentos.
pca$loadings[,1:3]
Comp.1 Comp.2 Comp.3
0-5:acc 0.2844891 -0.44005683 -0.393803394
0-5:arg -0.2536422 0.52733608 0.316575254
0-5:ca -0.3936613 -0.04840003 0.157913933
0-5:ctctot -0.3985748 0.04401529 -0.017491028
0-5:k -0.3021483 -0.25118328 -0.228470513
0-5:mg -0.2839132 0.12025369 -0.544920719
0-5:mo -0.3097933 0.07594248 -0.261172323
0-5:p -0.1098093 -0.55740728 0.549597523
0-5:phh2o -0.3498059 -0.30599093 -0.006700861
0-5:sat -0.3722984 -0.19060996 -0.002358581
apply(pca$loadings[,1:3], MARGIN=2, FUN=imp, f=0.25)
Comp.1 Comp.2 Comp.3
0-5:acc NA NA NA
0-5:arg NA 0.5273361 NA
0-5:ca -0.3936613 NA NA
0-5:ctctot -0.3985748 NA NA
0-5:k NA NA NA
0-5:mg NA NA -0.5449207
0-5:mo NA NA NA
0-5:p NA -0.5574073 0.5495975
0-5:phh2o NA NA NA
0-5:sat NA NA NA
##----------------------------------------------------------------------
## PCA: Análise de Componentes Principais.
## X <- de[, colwise(is.numeric)(de)]
X <- de[, grep(x=names(de), pattern="5-40:")]
pca <- princomp(x=X, cor=TRUE)
summary(pca)
Importance of components:
Comp.1 Comp.2 Comp.3 Comp.4
Standard deviation 2.3553777 1.3498894 0.8639878 0.80205996
Proportion of Variance 0.5547804 0.1822201 0.0746475 0.06433002
Cumulative Proportion 0.5547804 0.7370006 0.8116481 0.87597808
Comp.5 Comp.6 Comp.7 Comp.8
Standard deviation 0.68256372 0.53250884 0.51294287 0.37433588
Proportion of Variance 0.04658932 0.02835657 0.02631104 0.01401273
Cumulative Proportion 0.92256740 0.95092396 0.97723500 0.99124774
Comp.9 Comp.10
Standard deviation 0.27856095 0.0996314495
Proportion of Variance 0.00775962 0.0009926426
Cumulative Proportion 0.99900736 1.0000000000
##----------------------------------------------------------------------
## Variâncias.
## Variância de cada componente.
screeplot(pca, type="lines", main=NULL)
## Proporção de variância acumulada.
## str(pca)
plot(cumsum(pca$sdev^2)/sum(pca$sdev^2), type="o",
xlab="Componente", ylab="Proporção de variância acumulada")
abline(h=0.75, lty=2)
##----------------------------------------------------------------------
## Gráficos biplot.
biplot(pca, choices=c(1,2))
biplot(pca, choices=c(1,3))
biplot(pca, choices=c(2,3))
##----------------------------------------------------------------------
## Agrupamento hierárquico.
di <- dist(pca$scores[,1:3], method="euclidean")
tree <- hclust(di, method="ward.D")
plot(tree, xlab="")
k <- 3
rect.hclust(tree, k=k, border="red")
## Pontos que pertencem a cada grupo.
g <- cutree(tree, k=k)
split(as.character(de$local), f=g)
$`1`
[1] "20" "7" "6" "36" "2" "40" "35" "9" "25" "16" "10" "5" "39"
[14] "15" "14" "32" "18" "46" "45" "30" "38"
$`2`
[1] "21" "37" "1" "19" "12" "41" "13" "22" "23" "8" "4" "11" "27"
[14] "3" "24" "29" "28"
$`3`
[1] "44" "43" "42" "26" "33" "31" "50" "48" "49" "17" "47" "34"
##----------------------------------------------------------------------
## Carregamentos.
pca$loadings[,1:3]
Comp.1 Comp.2 Comp.3
5-40:acc 0.29776624 0.33606119 0.17901163
5-40:arg -0.31696590 -0.35789735 -0.28434089
5-40:ca -0.39339162 -0.07061065 0.23389708
5-40:ctctot -0.39508633 -0.13662716 -0.05437855
5-40:k -0.15142410 0.50680514 -0.62374686
5-40:mg -0.33370060 0.04283592 -0.35869216
5-40:mo -0.33764983 -0.05989423 0.46192355
5-40:p -0.05529923 0.56539073 0.19486354
5-40:phh2o -0.33354496 0.30179445 0.19602143
5-40:sat -0.37069490 0.24959823 0.14815683
apply(pca$loadings[,1:3], MARGIN=2, FUN=imp, f=0.25)
Comp.1 Comp.2 Comp.3
5-40:acc NA NA NA
5-40:arg NA NA NA
5-40:ca -0.3933916 NA NA
5-40:ctctot -0.3950863 NA NA
5-40:k NA 0.5068051 -0.6237469
5-40:mg NA NA NA
5-40:mo NA NA 0.4619236
5-40:p NA 0.5653907 NA
5-40:phh2o NA NA NA
5-40:sat NA NA NA
##----------------------------------------------------------------------
## PCA: Análise de Componentes Principais.
## X <- de[, colwise(is.numeric)(de)]
X <- de[, grep(x=names(de), pattern="40-80:")]
pca <- princomp(x=X, cor=TRUE)
summary(pca)
Importance of components:
Comp.1 Comp.2 Comp.3 Comp.4
Standard deviation 2.2711731 1.2073338 1.0909202 0.89999412
Proportion of Variance 0.5158227 0.1457655 0.1190107 0.08099894
Cumulative Proportion 0.5158227 0.6615882 0.7805989 0.86159783
Comp.5 Comp.6 Comp.7 Comp.8
Standard deviation 0.73074501 0.60758813 0.48046502 0.38431448
Proportion of Variance 0.05339883 0.03691633 0.02308466 0.01476976
Cumulative Proportion 0.91499666 0.95191299 0.97499766 0.98976742
Comp.9 Comp.10
Standard deviation 0.294483533 0.124921071
Proportion of Variance 0.008672055 0.001560527
Cumulative Proportion 0.998439473 1.000000000
##----------------------------------------------------------------------
## Variâncias.
## Variância de cada componente.
screeplot(pca, type="lines", main=NULL)
## Proporção de variância acumulada.
## str(pca)
plot(cumsum(pca$sdev^2)/sum(pca$sdev^2), type="o",
xlab="Componente", ylab="Proporção de variância acumulada")
abline(h=0.75, lty=2)
##----------------------------------------------------------------------
## Gráficos biplot.
biplot(pca, choices=c(1,2))
biplot(pca, choices=c(1,3))
biplot(pca, choices=c(2,3))
##----------------------------------------------------------------------
## Agrupamento hierárquico.
di <- dist(pca$scores[,1:3], method="euclidean")
tree <- hclust(di, method="ward.D")
plot(tree, xlab="")
k <- 3
rect.hclust(tree, k=k, border="red")
## Pontos que pertencem a cada grupo.
g <- cutree(tree, k=k)
split(as.character(de$local), f=g)
$`1`
[1] "20" "7" "6" "36" "2" "21" "40" "35" "9" "25" "16" "10" "5"
[14] "39" "15" "14" "32" "18" "46" "45" "13" "8" "4" "38"
$`2`
[1] "37" "1" "44" "43" "42" "33" "50" "11" "30" "48" "49" "3" "17"
[14] "47" "34"
$`3`
[1] "19" "12" "41" "26" "22" "23" "31" "27" "24" "29" "28"
##----------------------------------------------------------------------
## Carregamentos.
pca$loadings[,1:3]
Comp.1 Comp.2 Comp.3
40-80:acc 0.2556601 0.13035258 -0.67884342
40-80:arg -0.3289008 -0.06738028 0.49360578
40-80:ca -0.3741251 0.35743081 -0.12474009
40-80:ctctot -0.3494005 0.33612876 -0.17421578
40-80:k -0.2006861 -0.47529938 -0.17943385
40-80:mg -0.3732657 -0.01893580 -0.16523367
40-80:mo -0.2537319 -0.13106693 -0.41760809
40-80:p -0.1318250 -0.70032507 -0.11908213
40-80:phh2o -0.3693664 0.02509679 0.01543191
40-80:sat -0.4081916 0.05654300 -0.03605322
apply(pca$loadings[,1:3], MARGIN=2, FUN=imp, f=0.25)
Comp.1 Comp.2 Comp.3
40-80:acc NA NA -0.6788434
40-80:arg NA NA 0.4936058
40-80:ca -0.3741251 NA NA
40-80:ctctot NA NA NA
40-80:k NA -0.4752994 NA
40-80:mg NA NA NA
40-80:mo NA NA NA
40-80:p NA -0.7003251 NA
40-80:phh2o NA NA NA
40-80:sat -0.4081916 NA NA
A camada de 0-5 cm não será considerada nessa análise por ela ser predominantemente resultado da deposição de material organico proveniente da floresta.
##----------------------------------------------------------------------
sel <- grep(x=names(de), pattern="0-5", invert=TRUE)
db <- merge(subset(de, select=sel),
subset(da, camada=="0-5", select=c("local","vol")))
str(db)
'data.frame': 50 obs. of 22 variables:
$ local : Factor w/ 50 levels "20","7","6","36",..: 18 12 38 25 30 17 16 11 47 20 ...
$ 40-80:acc : num 676 594 411 386 638 ...
$ 40-80:arg : num 286 380 588 572 324 ...
$ 40-80:ca : num 2.33 0.36 2.23 2 0.8 1.32 0.87 0.86 3.22 1.11 ...
$ 40-80:ctctot: num 4.52 4.54 5.91 5.84 3.47 4 3.86 3.68 5.71 3.97 ...
$ 40-80:k : num 7.23 7.23 25.8 26.22 13.11 ...
$ 40-80:mg : num 0.51 0.12 1.02 1.92 0.32 0.06 0.36 0.2 0.37 0.59 ...
$ 40-80:mo : num 9.7 8.8 8.4 11.4 10.9 7.6 9.7 10.3 12.2 9.5 ...
$ 40-80:p : num 0.01 0.1 0.01 5.29 5.56 0.1 0.01 0.01 0.01 0.01 ...
$ 40-80:phh2o : num 6.7 5.5 6.4 6.3 5.1 5.4 5.5 5.4 6.4 5.9 ...
$ 40-80:sat : num 63.2 11 56.1 68.2 33.2 ...
$ 5-40:acc : num 750 623 617 608 736 ...
$ 5-40:arg : num 215 314 347 311 263 ...
$ 5-40:ca : num 2.91 0.72 2.3 2.9 2.24 0.89 2.32 1.34 3.97 1.36 ...
$ 5-40:ctctot : num 6.57 3.86 5.71 7.48 4.7 4.73 5.3 4.27 9.38 5.51 ...
$ 5-40:k : num 13.4 10.3 71.2 30.6 16.4 ...
$ 5-40:mg : num 1.77 0.37 0.75 1.91 0.1 0.19 0.35 0.3 2.75 0.77 ...
$ 5-40:mo : num 25.6 6.9 14.2 22.1 10.7 9.6 12.5 12.7 21.4 13.6 ...
$ 5-40:p : num 0.83 1.04 0.21 5.99 5.9 0.29 0.59 0.69 1.46 0.31 ...
$ 5-40:phh2o : num 6.7 5 6 5.8 5.3 5.1 5.6 5.2 6.8 5.1 ...
$ 5-40:sat : num 71.7 28.9 56.6 65.3 50.7 ...
$ vol : num 0.271 0.176 0.433 0.344 0.372 0.258 0.255 0.171 0.544 0.316 ...
m0 <- lm(vol~., data=db[,-1])
par(mfrow=c(2,2)); plot(m0); layout(1)
## ## Fica mais adequado se considerar transformação sqrt().
## MASS::boxcox(m0)
## m0 <- update(m0, sqrt(.)~.)
## par(mfrow=c(2,2)); plot(m0); layout(1)
##----------------------------------------------------------------------
## Stepwise por BIC.
k <- log(nrow(db)); k ## Critério BIC.
[1] 3.912023
## Y original, camadas 5-40 e 40-80.
m1 <- step(lm(vol~., data=db[,-1]), k=k, trace=0)
summary(m1)
Call:
lm(formula = vol ~ `40-80:phh2o` + `40-80:sat` + `5-40:phh2o`,
data = db[, -1])
Residuals:
Min 1Q Median 3Q Max
-0.22670 -0.05807 0.01265 0.06111 0.22487
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.003077 0.195882 0.016 0.98754
`40-80:phh2o` -0.110869 0.044498 -2.492 0.01639 *
`40-80:sat` 0.004777 0.001269 3.766 0.00047 ***
`5-40:phh2o` 0.131077 0.033511 3.911 0.00030 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 0.0935 on 46 degrees of freedom
Multiple R-squared: 0.6355, Adjusted R-squared: 0.6117
F-statistic: 26.73 on 3 and 46 DF, p-value: 3.684e-10
## raíz de Y , camadas 5-40 e 40-80.
m1 <- step(lm(sqrt(vol)~., data=db[,-1]), k=k, trace=0)
summary(m1)
Call:
lm(formula = sqrt(vol) ~ `40-80:phh2o` + `40-80:sat` + `5-40:phh2o`,
data = db[, -1])
Residuals:
Min 1Q Median 3Q Max
-0.24880 -0.05996 0.02020 0.05424 0.21490
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.303020 0.183197 1.654 0.104924
`40-80:phh2o` -0.109009 0.041616 -2.619 0.011892 *
`40-80:sat` 0.004931 0.001187 4.156 0.000139 ***
`5-40:phh2o` 0.115363 0.031341 3.681 0.000608 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 0.08744 on 46 degrees of freedom
Multiple R-squared: 0.6439, Adjusted R-squared: 0.6207
F-statistic: 27.73 on 3 and 46 DF, p-value: 2.162e-10
## Y original, apenas camada 5-40.
a <- subset(db, select=grep(x=names(db),
pattern="(40-80|local)", invert=TRUE))
m1 <- step(lm(vol~., data=a), k=k, trace=0)
summary(m1)
Call:
lm(formula = vol ~ `5-40:sat`, data = a)
Residuals:
Min 1Q Median 3Q Max
-0.215261 -0.061699 -0.002625 0.067258 0.248292
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.0290336 0.0373747 0.777 0.441
`5-40:sat` 0.0056437 0.0006588 8.567 3.11e-11 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 0.09533 on 48 degrees of freedom
Multiple R-squared: 0.6046, Adjusted R-squared: 0.5963
F-statistic: 73.39 on 1 and 48 DF, p-value: 3.106e-11
## raíz de Y, apenas camada 5-40.
a <- subset(db, select=grep(x=names(db),
pattern="(40-80|local)", invert=TRUE))
m1 <- step(lm(sqrt(vol)~., data=a), k=k, trace=0)
summary(m1)
Call:
lm(formula = sqrt(vol) ~ `5-40:sat`, data = a)
Residuals:
Min 1Q Median 3Q Max
-0.230061 -0.072021 -0.001073 0.065797 0.235749
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.2698080 0.0348826 7.735 5.55e-10 ***
`5-40:sat` 0.0053878 0.0006149 8.763 1.59e-11 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 0.08897 on 48 degrees of freedom
Multiple R-squared: 0.6153, Adjusted R-squared: 0.6073
F-statistic: 76.78 on 1 and 48 DF, p-value: 1.59e-11
##----------------------------------------------------------------------
m1 <- step(m0, k=k)
Start: AIC=-180.47
vol ~ `40-80:acc` + `40-80:arg` + `40-80:ca` + `40-80:ctctot` +
`40-80:k` + `40-80:mg` + `40-80:mo` + `40-80:p` + `40-80:phh2o` +
`40-80:sat` + `5-40:acc` + `5-40:arg` + `5-40:ca` + `5-40:ctctot` +
`5-40:k` + `5-40:mg` + `5-40:mo` + `5-40:p` + `5-40:phh2o` +
`5-40:sat`
Df Sum of Sq RSS AIC
- `5-40:ca` 1 0.000107 0.26183 -184.36
- `5-40:sat` 1 0.000225 0.26195 -184.34
- `40-80:ca` 1 0.001077 0.26280 -184.18
- `40-80:k` 1 0.001119 0.26284 -184.17
- `5-40:mo` 1 0.001649 0.26337 -184.07
- `5-40:mg` 1 0.001871 0.26360 -184.03
- `5-40:ctctot` 1 0.004043 0.26577 -183.62
- `5-40:p` 1 0.004843 0.26657 -183.47
- `5-40:k` 1 0.004938 0.26666 -183.45
- `40-80:ctctot` 1 0.006725 0.26845 -183.12
- `40-80:p` 1 0.009241 0.27097 -182.65
- `5-40:arg` 1 0.010559 0.27228 -182.41
- `5-40:acc` 1 0.010588 0.27231 -182.40
- `40-80:mg` 1 0.012638 0.27436 -182.03
- `40-80:acc` 1 0.014563 0.27629 -181.68
0.26172 -180.47
- `40-80:sat` 1 0.023830 0.28555 -180.03
- `5-40:phh2o` 1 0.024089 0.28581 -179.98
- `40-80:mo` 1 0.024126 0.28585 -179.97
- `40-80:arg` 1 0.026591 0.28831 -179.55
- `40-80:phh2o` 1 0.063405 0.32513 -173.54
Step: AIC=-184.36
vol ~ `40-80:acc` + `40-80:arg` + `40-80:ca` + `40-80:ctctot` +
`40-80:k` + `40-80:mg` + `40-80:mo` + `40-80:p` + `40-80:phh2o` +
`40-80:sat` + `5-40:acc` + `5-40:arg` + `5-40:ctctot` + `5-40:k` +
`5-40:mg` + `5-40:mo` + `5-40:p` + `5-40:phh2o` + `5-40:sat`
Df Sum of Sq RSS AIC
- `5-40:sat` 1 0.001121 0.26295 -188.06
- `40-80:ca` 1 0.001145 0.26298 -188.06
- `40-80:k` 1 0.001279 0.26311 -188.03
- `5-40:mo` 1 0.001566 0.26340 -187.98
- `5-40:p` 1 0.004741 0.26657 -187.38
- `5-40:k` 1 0.004831 0.26666 -187.36
- `5-40:mg` 1 0.005387 0.26722 -187.26
- `40-80:ctctot` 1 0.008851 0.27068 -186.61
- `40-80:p` 1 0.009165 0.27100 -186.56
- `5-40:acc` 1 0.010505 0.27234 -186.31
- `5-40:arg` 1 0.010979 0.27281 -186.22
- `40-80:acc` 1 0.015156 0.27699 -185.46
- `40-80:mg` 1 0.016406 0.27824 -185.24
0.26183 -184.36
- `5-40:phh2o` 1 0.024095 0.28593 -183.87
- `5-40:ctctot` 1 0.025492 0.28732 -183.63
- `40-80:arg` 1 0.026926 0.28876 -183.38
- `40-80:mo` 1 0.028209 0.29004 -183.16
- `40-80:sat` 1 0.040284 0.30212 -181.12
- `40-80:phh2o` 1 0.070133 0.33196 -176.41
Step: AIC=-188.06
vol ~ `40-80:acc` + `40-80:arg` + `40-80:ca` + `40-80:ctctot` +
`40-80:k` + `40-80:mg` + `40-80:mo` + `40-80:p` + `40-80:phh2o` +
`40-80:sat` + `5-40:acc` + `5-40:arg` + `5-40:ctctot` + `5-40:k` +
`5-40:mg` + `5-40:mo` + `5-40:p` + `5-40:phh2o`
Df Sum of Sq RSS AIC
- `40-80:ca` 1 0.000704 0.26366 -191.84
- `5-40:mo` 1 0.001096 0.26405 -191.77
- `40-80:k` 1 0.001487 0.26444 -191.69
- `5-40:k` 1 0.005167 0.26812 -191.00
- `5-40:p` 1 0.005548 0.26850 -190.93
- `5-40:mg` 1 0.006271 0.26922 -190.80
- `40-80:ctctot` 1 0.007793 0.27075 -190.51
- `5-40:acc` 1 0.010360 0.27331 -190.04
- `40-80:p` 1 0.010459 0.27341 -190.02
- `5-40:arg` 1 0.011264 0.27422 -189.88
- `40-80:acc` 1 0.014595 0.27755 -189.27
- `40-80:mg` 1 0.015327 0.27828 -189.14
0.26295 -188.06
- `5-40:ctctot` 1 0.025905 0.28886 -187.28
- `40-80:arg` 1 0.026308 0.28926 -187.21
- `40-80:mo` 1 0.027521 0.29047 -187.00
- `40-80:sat` 1 0.044745 0.30770 -184.12
- `5-40:phh2o` 1 0.061432 0.32438 -181.48
- `40-80:phh2o` 1 0.099115 0.36207 -175.98
Step: AIC=-191.84
vol ~ `40-80:acc` + `40-80:arg` + `40-80:ctctot` + `40-80:k` +
`40-80:mg` + `40-80:mo` + `40-80:p` + `40-80:phh2o` + `40-80:sat` +
`5-40:acc` + `5-40:arg` + `5-40:ctctot` + `5-40:k` + `5-40:mg` +
`5-40:mo` + `5-40:p` + `5-40:phh2o`
Df Sum of Sq RSS AIC
- `5-40:mo` 1 0.000718 0.26437 -195.62
- `40-80:k` 1 0.001542 0.26520 -195.46
- `5-40:p` 1 0.005487 0.26914 -194.72
- `5-40:mg` 1 0.006715 0.27037 -194.50
- `5-40:k` 1 0.007500 0.27116 -194.35
- `5-40:acc` 1 0.009848 0.27350 -193.92
- `40-80:p` 1 0.010326 0.27398 -193.83
- `5-40:arg` 1 0.010561 0.27422 -193.79
- `40-80:acc` 1 0.016122 0.27978 -192.78
- `40-80:ctctot` 1 0.017595 0.28125 -192.52
- `40-80:mg` 1 0.019190 0.28285 -192.24
0.26366 -191.84
- `5-40:ctctot` 1 0.025362 0.28902 -191.16
- `40-80:arg` 1 0.026036 0.28969 -191.04
- `40-80:mo` 1 0.027438 0.29109 -190.80
- `5-40:phh2o` 1 0.061716 0.32537 -185.24
- `40-80:sat` 1 0.091015 0.35467 -180.93
- `40-80:phh2o` 1 0.099474 0.36313 -179.75
Step: AIC=-195.62
vol ~ `40-80:acc` + `40-80:arg` + `40-80:ctctot` + `40-80:k` +
`40-80:mg` + `40-80:mo` + `40-80:p` + `40-80:phh2o` + `40-80:sat` +
`5-40:acc` + `5-40:arg` + `5-40:ctctot` + `5-40:k` + `5-40:mg` +
`5-40:p` + `5-40:phh2o`
Df Sum of Sq RSS AIC
- `40-80:k` 1 0.001479 0.26585 -199.25
- `5-40:p` 1 0.005245 0.26962 -198.55
- `5-40:mg` 1 0.007518 0.27189 -198.13
- `5-40:k` 1 0.008083 0.27246 -198.02
- `5-40:acc` 1 0.009790 0.27416 -197.71
- `40-80:p` 1 0.009922 0.27430 -197.69
- `5-40:arg` 1 0.009948 0.27432 -197.68
- `40-80:acc` 1 0.015426 0.27980 -196.69
- `40-80:ctctot` 1 0.018422 0.28280 -196.16
- `40-80:mg` 1 0.018515 0.28289 -196.14
0.26437 -195.62
- `40-80:arg` 1 0.025320 0.28969 -194.96
- `5-40:ctctot` 1 0.031357 0.29573 -193.92
- `40-80:mo` 1 0.036901 0.30128 -193.00
- `5-40:phh2o` 1 0.061060 0.32543 -189.14
- `40-80:sat` 1 0.092695 0.35707 -184.50
- `40-80:phh2o` 1 0.099710 0.36408 -183.53
Step: AIC=-199.25
vol ~ `40-80:acc` + `40-80:arg` + `40-80:ctctot` + `40-80:mg` +
`40-80:mo` + `40-80:p` + `40-80:phh2o` + `40-80:sat` + `5-40:acc` +
`5-40:arg` + `5-40:ctctot` + `5-40:k` + `5-40:mg` + `5-40:p` +
`5-40:phh2o`
Df Sum of Sq RSS AIC
- `5-40:p` 1 0.004203 0.27006 -202.38
- `5-40:mg` 1 0.008143 0.27400 -201.65
- `40-80:p` 1 0.009014 0.27487 -201.49
- `5-40:acc` 1 0.009636 0.27549 -201.38
- `5-40:arg` 1 0.009905 0.27576 -201.33
- `40-80:acc` 1 0.016459 0.28231 -200.16
- `40-80:ctctot` 1 0.019473 0.28533 -199.63
- `40-80:mg` 1 0.019876 0.28573 -199.56
- `5-40:k` 1 0.020617 0.28647 -199.43
0.26585 -199.25
- `40-80:arg` 1 0.029776 0.29563 -197.85
- `5-40:ctctot` 1 0.032036 0.29789 -197.47
- `40-80:mo` 1 0.037936 0.30379 -196.49
- `5-40:phh2o` 1 0.060267 0.32612 -192.94
- `40-80:phh2o` 1 0.098231 0.36408 -187.44
- `40-80:sat` 1 0.100325 0.36618 -187.15
Step: AIC=-202.38
vol ~ `40-80:acc` + `40-80:arg` + `40-80:ctctot` + `40-80:mg` +
`40-80:mo` + `40-80:p` + `40-80:phh2o` + `40-80:sat` + `5-40:acc` +
`5-40:arg` + `5-40:ctctot` + `5-40:k` + `5-40:mg` + `5-40:phh2o`
Df Sum of Sq RSS AIC
- `40-80:p` 1 0.008115 0.27817 -204.81
- `5-40:acc` 1 0.008296 0.27835 -204.78
- `5-40:mg` 1 0.009743 0.27980 -204.52
- `5-40:arg` 1 0.013747 0.28380 -203.81
- `40-80:acc` 1 0.016550 0.28661 -203.31
- `40-80:ctctot` 1 0.018136 0.28819 -203.04
- `40-80:mg` 1 0.019502 0.28956 -202.80
- `5-40:k` 1 0.019534 0.28959 -202.80
0.27006 -202.38
- `5-40:ctctot` 1 0.032813 0.30287 -200.56
- `40-80:arg` 1 0.032860 0.30292 -200.55
- `40-80:mo` 1 0.036152 0.30621 -200.01
- `5-40:phh2o` 1 0.060791 0.33085 -196.14
- `40-80:phh2o` 1 0.094096 0.36415 -191.34
- `40-80:sat` 1 0.112285 0.38234 -188.91
Step: AIC=-204.81
vol ~ `40-80:acc` + `40-80:arg` + `40-80:ctctot` + `40-80:mg` +
`40-80:mo` + `40-80:phh2o` + `40-80:sat` + `5-40:acc` + `5-40:arg` +
`5-40:ctctot` + `5-40:k` + `5-40:mg` + `5-40:phh2o`
Df Sum of Sq RSS AIC
- `5-40:acc` 1 0.006208 0.28438 -207.62
- `5-40:mg` 1 0.007174 0.28535 -207.45
- `5-40:arg` 1 0.010547 0.28872 -206.86
- `40-80:ctctot` 1 0.011949 0.29012 -206.62
- `40-80:mg` 1 0.013339 0.29151 -206.38
- `40-80:acc` 1 0.018695 0.29687 -205.47
- `5-40:k` 1 0.022625 0.30080 -204.81
0.27817 -204.81
- `40-80:arg` 1 0.028275 0.30645 -203.88
- `5-40:ctctot` 1 0.029923 0.30810 -203.61
- `40-80:mo` 1 0.053270 0.33144 -199.96
- `5-40:phh2o` 1 0.066141 0.34431 -198.06
- `40-80:phh2o` 1 0.099795 0.37797 -193.39
- `40-80:sat` 1 0.111053 0.38922 -191.93
Step: AIC=-207.62
vol ~ `40-80:acc` + `40-80:arg` + `40-80:ctctot` + `40-80:mg` +
`40-80:mo` + `40-80:phh2o` + `40-80:sat` + `5-40:arg` + `5-40:ctctot` +
`5-40:k` + `5-40:mg` + `5-40:phh2o`
Df Sum of Sq RSS AIC
- `5-40:mg` 1 0.006723 0.29110 -210.36
- `40-80:ctctot` 1 0.008502 0.29288 -210.06
- `40-80:mg` 1 0.012677 0.29706 -209.35
- `5-40:k` 1 0.020946 0.30533 -207.98
0.28438 -207.62
- `5-40:ctctot` 1 0.024033 0.30841 -207.47
- `5-40:arg` 1 0.025762 0.31014 -207.19
- `40-80:arg` 1 0.044524 0.32890 -204.26
- `40-80:acc` 1 0.050005 0.33438 -203.43
- `40-80:mo` 1 0.052773 0.33715 -203.02
- `5-40:phh2o` 1 0.060237 0.34462 -201.92
- `40-80:phh2o` 1 0.093661 0.37804 -197.29
- `40-80:sat` 1 0.110419 0.39480 -195.13
Step: AIC=-210.36
vol ~ `40-80:acc` + `40-80:arg` + `40-80:ctctot` + `40-80:mg` +
`40-80:mo` + `40-80:phh2o` + `40-80:sat` + `5-40:arg` + `5-40:ctctot` +
`5-40:k` + `5-40:phh2o`
Df Sum of Sq RSS AIC
- `40-80:mg` 1 0.007148 0.29825 -213.06
- `40-80:ctctot` 1 0.008172 0.29928 -212.89
- `5-40:ctctot` 1 0.018635 0.30974 -211.17
- `5-40:k` 1 0.022188 0.31329 -210.60
0.29110 -210.36
- `5-40:arg` 1 0.026481 0.31758 -209.92
- `40-80:arg` 1 0.049427 0.34053 -206.43
- `40-80:mo` 1 0.052738 0.34384 -205.95
- `40-80:acc` 1 0.055324 0.34643 -205.57
- `5-40:phh2o` 1 0.065444 0.35655 -204.13
- `40-80:phh2o` 1 0.100051 0.39115 -199.50
- `40-80:sat` 1 0.110856 0.40196 -198.14
Step: AIC=-213.06
vol ~ `40-80:acc` + `40-80:arg` + `40-80:ctctot` + `40-80:mo` +
`40-80:phh2o` + `40-80:sat` + `5-40:arg` + `5-40:ctctot` +
`5-40:k` + `5-40:phh2o`
Df Sum of Sq RSS AIC
- `40-80:ctctot` 1 0.003688 0.30194 -216.36
- `5-40:k` 1 0.015646 0.31390 -214.41
0.29825 -213.06
- `5-40:ctctot` 1 0.025944 0.32419 -212.80
- `5-40:arg` 1 0.031315 0.32957 -211.98
- `40-80:arg` 1 0.044686 0.34294 -209.99
- `40-80:mo` 1 0.045938 0.34419 -209.81
- `40-80:acc` 1 0.049417 0.34767 -209.31
- `5-40:phh2o` 1 0.083206 0.38146 -204.67
- `40-80:phh2o` 1 0.092940 0.39119 -203.41
- `40-80:sat` 1 0.109792 0.40804 -201.30
Step: AIC=-216.36
vol ~ `40-80:acc` + `40-80:arg` + `40-80:mo` + `40-80:phh2o` +
`40-80:sat` + `5-40:arg` + `5-40:ctctot` + `5-40:k` + `5-40:phh2o`
Df Sum of Sq RSS AIC
- `5-40:k` 1 0.020431 0.32237 -217.00
- `5-40:ctctot` 1 0.022577 0.32452 -216.66
0.30194 -216.36
- `5-40:arg` 1 0.034339 0.33628 -214.88
- `40-80:arg` 1 0.041696 0.34363 -213.80
- `40-80:acc` 1 0.046178 0.34812 -213.15
- `40-80:mo` 1 0.048973 0.35091 -212.75
- `5-40:phh2o` 1 0.080933 0.38287 -208.40
- `40-80:phh2o` 1 0.091434 0.39337 -207.04
- `40-80:sat` 1 0.111723 0.41366 -204.53
Step: AIC=-217
vol ~ `40-80:acc` + `40-80:arg` + `40-80:mo` + `40-80:phh2o` +
`40-80:sat` + `5-40:arg` + `5-40:ctctot` + `5-40:phh2o`
Df Sum of Sq RSS AIC
- `5-40:arg` 1 0.022303 0.34467 -217.56
- `5-40:ctctot` 1 0.024537 0.34691 -217.24
- `40-80:arg` 1 0.024579 0.34695 -217.23
0.32237 -217.00
- `40-80:acc` 1 0.037557 0.35993 -215.40
- `40-80:mo` 1 0.041818 0.36419 -214.81
- `40-80:phh2o` 1 0.084474 0.40684 -209.27
- `40-80:sat` 1 0.109655 0.43202 -206.27
- `5-40:phh2o` 1 0.113570 0.43594 -205.82
Step: AIC=-217.56
vol ~ `40-80:acc` + `40-80:arg` + `40-80:mo` + `40-80:phh2o` +
`40-80:sat` + `5-40:ctctot` + `5-40:phh2o`
Df Sum of Sq RSS AIC
- `5-40:ctctot` 1 0.005038 0.34971 -220.75
- `40-80:arg` 1 0.011648 0.35632 -219.81
0.34467 -217.56
- `40-80:mo` 1 0.030116 0.37479 -217.29
- `40-80:acc` 1 0.035307 0.37998 -216.60
- `40-80:phh2o` 1 0.071134 0.41581 -212.09
- `40-80:sat` 1 0.092782 0.43745 -209.56
- `5-40:phh2o` 1 0.098687 0.44336 -208.89
Step: AIC=-220.75
vol ~ `40-80:acc` + `40-80:arg` + `40-80:mo` + `40-80:phh2o` +
`40-80:sat` + `5-40:phh2o`
Df Sum of Sq RSS AIC
- `40-80:arg` 1 0.014055 0.36377 -222.69
- `40-80:mo` 1 0.026508 0.37622 -221.01
0.34971 -220.75
- `40-80:acc` 1 0.033123 0.38283 -220.14
- `40-80:phh2o` 1 0.066609 0.41632 -215.94
- `40-80:sat` 1 0.090187 0.43990 -213.19
- `5-40:phh2o` 1 0.094898 0.44461 -212.66
Step: AIC=-222.69
vol ~ `40-80:acc` + `40-80:mo` + `40-80:phh2o` + `40-80:sat` +
`5-40:phh2o`
Df Sum of Sq RSS AIC
- `40-80:acc` 1 0.019911 0.38368 -223.94
- `40-80:mo` 1 0.022775 0.38654 -223.57
0.36377 -222.69
- `40-80:phh2o` 1 0.066663 0.43043 -218.19
- `40-80:sat` 1 0.076824 0.44059 -217.02
- `5-40:phh2o` 1 0.153534 0.51730 -209.00
Step: AIC=-223.94
vol ~ `40-80:mo` + `40-80:phh2o` + `40-80:sat` + `5-40:phh2o`
Df Sum of Sq RSS AIC
- `40-80:mo` 1 0.018436 0.40211 -225.50
0.38368 -223.94
- `40-80:phh2o` 1 0.054789 0.43847 -221.18
- `40-80:sat` 1 0.092741 0.47642 -217.03
- `5-40:phh2o` 1 0.139613 0.52329 -212.33
Step: AIC=-225.5
vol ~ `40-80:phh2o` + `40-80:sat` + `5-40:phh2o`
Df Sum of Sq RSS AIC
0.40211 -225.50
- `40-80:phh2o` 1 0.054267 0.45638 -223.09
- `40-80:sat` 1 0.123952 0.52606 -215.98
- `5-40:phh2o` 1 0.133744 0.53586 -215.06
## anova(m1)
anova(m1, m0)
Analysis of Variance Table
Model 1: vol ~ `40-80:phh2o` + `40-80:sat` + `5-40:phh2o`
Model 2: vol ~ `40-80:acc` + `40-80:arg` + `40-80:ca` + `40-80:ctctot` +
`40-80:k` + `40-80:mg` + `40-80:mo` + `40-80:p` + `40-80:phh2o` +
`40-80:sat` + `5-40:acc` + `5-40:arg` + `5-40:ca` + `5-40:ctctot` +
`5-40:k` + `5-40:mg` + `5-40:mo` + `5-40:p` + `5-40:phh2o` +
`5-40:sat`
Res.Df RSS Df Sum of Sq F Pr(>F)
1 46 0.40211
2 29 0.26172 17 0.14039 0.915 0.5654
par(mfrow=c(2,2)); plot(m1); layout(1)
## im <- influence.measures(m1)
## summary(im)
summary(m1)
Call:
lm(formula = vol ~ `40-80:phh2o` + `40-80:sat` + `5-40:phh2o`,
data = db[, -1])
Residuals:
Min 1Q Median 3Q Max
-0.22670 -0.05807 0.01265 0.06111 0.22487
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.003077 0.195882 0.016 0.98754
`40-80:phh2o` -0.110869 0.044498 -2.492 0.01639 *
`40-80:sat` 0.004777 0.001269 3.766 0.00047 ***
`5-40:phh2o` 0.131077 0.033511 3.911 0.00030 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 0.0935 on 46 degrees of freedom
Multiple R-squared: 0.6355, Adjusted R-squared: 0.6117
F-statistic: 26.73 on 3 and 46 DF, p-value: 3.684e-10
##----------------------------------------------------------------------
## Pacote rpart.
require(rpart)
dc <- db
names(dc) <- gsub(x=names(db),
pattern="(.*)-(.*):(.*)",
replacement="\\3_\\1_\\2")
ar1 <- rpart(vol~., data=dc[,-1], method="anova")
## printcp(ar1)
## plotcp(ar1)
summary(ar1)
Call:
rpart(formula = vol ~ ., data = dc[, -1], method = "anova")
n= 50
CP nsplit rel error xerror xstd
1 0.58461810 0 1.0000000 1.0359599 0.16739926
2 0.10189705 1 0.4153819 0.5482849 0.11461095
3 0.06389554 2 0.3134848 0.5434564 0.11673920
4 0.01000000 3 0.2495893 0.4292373 0.08690008
Variable importance
sat_5_40 sat_40_80 ca_5_40 phh2o_5_40 ca_40_80
19 17 16 15 14
phh2o_40_80 mo_5_40 acc_40_80 ctctot_40_80
14 3 1 1
Node number 1: 50 observations, complexity param=0.5846181
mean=0.32766, MSE=0.02206298
left son=2 (23 obs) right son=3 (27 obs)
Primary splits:
sat_5_40 < 53.17 to the left, improve=0.5846181, (0 missing)
phh2o_5_40 < 5.65 to the left, improve=0.5320022, (0 missing)
phh2o_40_80 < 6.05 to the left, improve=0.5194287, (0 missing)
sat_40_80 < 54.815 to the left, improve=0.5179540, (0 missing)
ca_40_80 < 2.005 to the left, improve=0.4866620, (0 missing)
Surrogate splits:
sat_40_80 < 54.815 to the left, agree=0.96, adj=0.913, (0 split)
phh2o_5_40 < 5.65 to the left, agree=0.96, adj=0.913, (0 split)
phh2o_40_80 < 6.05 to the left, agree=0.94, adj=0.870, (0 split)
ca_5_40 < 2.245 to the left, agree=0.92, adj=0.826, (0 split)
ca_40_80 < 1.385 to the left, agree=0.90, adj=0.783, (0 split)
Node number 2: 23 observations, complexity param=0.1018971
mean=0.2046087, MSE=0.01035963
left son=4 (10 obs) right son=5 (13 obs)
Primary splits:
mo_5_40 < 9.15 to the left, improve=0.4717629, (0 missing)
ca_5_40 < 0.825 to the left, improve=0.3711853, (0 missing)
sat_40_80 < 22.505 to the left, improve=0.3621908, (0 missing)
acc_5_40 < 641.93 to the right, improve=0.2864897, (0 missing)
sat_5_40 < 29.435 to the left, improve=0.2846545, (0 missing)
Surrogate splits:
sat_40_80 < 22.505 to the left, agree=0.870, adj=0.7, (0 split)
ca_5_40 < 0.825 to the left, agree=0.870, adj=0.7, (0 split)
sat_5_40 < 30.435 to the left, agree=0.870, adj=0.7, (0 split)
ca_40_80 < 0.48 to the left, agree=0.783, adj=0.5, (0 split)
acc_40_80 < 604.57 to the right, agree=0.739, adj=0.4, (0 split)
Node number 3: 27 observations, complexity param=0.06389554
mean=0.4324815, MSE=0.008146546
left son=6 (17 obs) right son=7 (10 obs)
Primary splits:
sat_5_40 < 72.085 to the left, improve=0.3204553, (0 missing)
ca_40_80 < 2.41 to the left, improve=0.2556067, (0 missing)
acc_40_80 < 391.955 to the left, improve=0.2370571, (0 missing)
mo_40_80 < 11.8 to the left, improve=0.2093786, (0 missing)
k_5_40 < 33.61 to the left, improve=0.1886836, (0 missing)
Surrogate splits:
ca_5_40 < 6.055 to the left, agree=0.852, adj=0.6, (0 split)
ca_40_80 < 2.545 to the left, agree=0.815, adj=0.5, (0 split)
sat_40_80 < 72.445 to the left, agree=0.815, adj=0.5, (0 split)
phh2o_5_40 < 6.75 to the left, agree=0.815, adj=0.5, (0 split)
ctctot_40_80 < 6.75 to the left, agree=0.778, adj=0.4, (0 split)
Node number 4: 10 observations
mean=0.1249, MSE=0.00237549
Node number 5: 13 observations
mean=0.2659231, MSE=0.007854533
Node number 6: 17 observations
mean=0.3932941, MSE=0.003174325
Node number 7: 10 observations
mean=0.4991, MSE=0.00955069
rsq.rpart(ar1)
Regression tree:
rpart(formula = vol ~ ., data = dc[, -1], method = "anova")
Variables actually used in tree construction:
[1] mo_5_40 sat_5_40
Root node error: 1.1031/50 = 0.022063
n= 50
CP nsplit rel error xerror xstd
1 0.584618 0 1.00000 1.03596 0.16740
2 0.101897 1 0.41538 0.54828 0.11461
3 0.063896 2 0.31348 0.54346 0.11674
4 0.010000 3 0.24959 0.42924 0.08690
pred <- factor(predict(ar1))
plot(ar1, uniform=TRUE, main="Árvore de regressão para volume")
text(ar1, use.n=TRUE, all=TRUE, cex=.8)
plot(mo_5_40~sat_5_40, data=dc, col=as.integer(pred), pch=NA)
with(dc, text(y=mo_5_40, x=sat_5_40,
labels=rownames(dc), col=as.integer(pred)))
xyplot(vol~sat_5_40, groups=pred, data=dc)