#-----------------------------------------------------------------------
# Uma senhora toma chĂĄ.
# Quantidade de maneiras de gerar dois grupos de 4 xĂcaras usando 8.
n_poss <- choose(8, 4)
n_poss
## [1] 70
# De 4 xĂcaras selecionadas para um grupo, X representa o nĂșmero de
# acertos.
n_acertar <- sapply(4:0,
FUN = function(i) {
# (formas de acertar x em 4) * (formas de errar x em 4).
choose(4, i) * choose(4, 4 - i)
})
n_acertar
## [1] 1 16 36 16 1
# Pr(Acerto total) = 1/70 < 5%.
cumsum(n_acertar)/n_poss
## [1] 0.01428571 0.24285714 0.75714286 0.98571429 1.00000000
# Comprovando por simulação.
v <- rep(0:1, each = 4)
mean(replicate(n = 100000,
expr = all(sample(v) == v)))
## [1] 0.01473
# Essa forma de fazer a conta Ă© mais realista mas dĂĄ na mesma.
mean(replicate(n = 100000,
expr = all(sample(v) == sample(v))))
## [1] 0.01409
#-----------------------------------------------------------------------
# Exemplo com teste para a diferença de médias.
# Comprimentos de crùnios de cães pré-históricos.
m <- c(120, 107, 110, 116, 114, 111, 113, 117, 114, 112)
f <- c(110, 111, 107, 108, 110, 105, 107, 106, 111, 111)
plot(ecdf(m), xlim = range(c(m, f)), col = "cyan")
lines(ecdf(f), col = "magenta")
rug(m, col = "cyan")
rug(f, col = "magenta")
# Diferença de média observada.
d <- mean(m) - mean(f)
d
## [1] 4.8
# Todos as combinaçÔes possĂveis.
choose(n = 20, k = 10)
## [1] 184756
#--------------------------------------------
# Com todas as combinaçÔes possĂveis (exaustĂŁo).
# Para construir todas as combinaçÔes possĂveis.
k <- combn(x = 1:20, m = 10)
dim(k)
## [1] 10 184756
# Vetor com os valores dos dois grupos concatenados.
mf <- c(m, f)
# Vetor cheio de zeros.
g <- integer(20)
# Calcula a diferença para todo arranjo possĂvel.
D <- apply(k,
MARGIN = 2,
FUN = function(i) {
# Alguns elementos do vetor convertidos para 1.
g[i] <- 1L
# CĂĄlculo da estatĂstica de teste.
-diff(tapply(mf, g, FUN = mean))
})
# Histograma da distribuição da estatĂstica sib H_0.
hist(D, col = "gray50")
rug(D)
abline(v = d, col = 2)
plot(ecdf(D), cex = 0)
rug(D)
abline(v = d, col = 2)
# P-valor do teste.
2 * sum(D >= d)/length(D)
## [1] 0.003334127
#--------------------------------------------
# Com simulação (não necessåriamente exaustivo).
# VariĂĄveis que indentifica os grupos.
g <- rep(1:2, each = 10)
# Apenas para conferir.
cbind(g, mf)
## g mf
## [1,] 1 120
## [2,] 1 107
## [3,] 1 110
## [4,] 1 116
## [5,] 1 114
## [6,] 1 111
## [7,] 1 113
## [8,] 1 117
## [9,] 1 114
## [10,] 1 112
## [11,] 2 110
## [12,] 2 111
## [13,] 2 107
## [14,] 2 108
## [15,] 2 110
## [16,] 2 105
## [17,] 2 107
## [18,] 2 106
## [19,] 2 111
## [20,] 2 111
# Replicando a diferença para grupos formados por aleatorização.
D <- replicate(9999, {
gg <- sample(g)
-diff(tapply(mf, gg, FUN = mean))
})
# Tem que juntar a estatĂstica observada com as simuladas.
D <- c(D, d)
hist(D, col = "gray50")
rug(D)
abline(v = d, col = 2)
plot(ecdf(D), cex = 0)
rug(D)
abline(v = d, col = 2)
# P-valor do teste.
2 * sum(D >= d)/length(D)
## [1] 0.003
#-----------------------------------------------------------------------
# Teste de aleatorização para a correlação.
# N = 5 para um par de medidas.
x <- c(4.1, 8.3, 2.9, 10.8, 9.5)
y <- c(3.7, 5.1, 1.0, 7.7, 8.9)
cbind(x, y)
## x y
## [1,] 4.1 3.7
## [2,] 8.3 5.1
## [3,] 2.9 1.0
## [4,] 10.8 7.7
## [5,] 9.5 8.9
plot(x, y)
# EstatĂstica de teste na amostra original.
r0 <- cor(x, y, method = "pearson")
r0
## [1] 0.9228669
# Todas as permutaçÔes possiveis: 5! = 120 do vetor x.
X <- gtools::permutations(n = length(x), r = length(x), v = x)
str(X)
## num [1:120, 1:5] 2.9 2.9 2.9 2.9 2.9 2.9 2.9 2.9 2.9 2.9 ...
head(X)
## [,1] [,2] [,3] [,4] [,5]
## [1,] 2.9 4.1 8.3 9.5 10.8
## [2,] 2.9 4.1 8.3 10.8 9.5
## [3,] 2.9 4.1 9.5 8.3 10.8
## [4,] 2.9 4.1 9.5 10.8 8.3
## [5,] 2.9 4.1 10.8 8.3 9.5
## [6,] 2.9 4.1 10.8 9.5 8.3
# As 120 correlaçÔes obtidas para cada arranjo.
r <- apply(X, MARGIN = 1, FUN = cor, y = y, method = "spearman")
# P-valor do teste.
2 * sum(r >= r0)/length(r)
## [1] 0.01666667
#-----------------------------------------------------------------------
# Ăndice de Moran para medir dependĂȘncia espacial.
# Coordenadas dos eventos em uma malha regular.
x <- 1:8
y <- 1:8
# Para criar a matriz de pesos.
ind <- expand.grid(i = 1:length(x), j = 1:length(y))
f <- function(i, j) {
u <- min(3, sum(abs(ind[i, ] - ind[j, ])))
c(0, 1, sqrt(1/2), 0)[u + 1]
}
w <- matrix(0, nrow(ind), nrow(ind))
for (i in 1:nrow(ind)) {
for (j in 1:nrow(ind)) {
w[i, j] <- f(i, j)
}
}
w <- w/sum(w)
image(w)
# Ăndice de Moran.
moran <- function(x, weights) {
# Tamanho da amostra.
n <- length(x)
# Valores normalizados.
z <- as.vector((x - mean(x))/sd(x))
# Ăndice de Moran.
as.vector(z %*% weights %*% (z * sqrt(n/(n - 1))))
}
# Teste de permutação com gråfico.
ppt <- function(z, w, N = 10000, stat, ...) {
# Ăndice de Moran por reamostragem.
sim <- replicate(N,
moran(sample(z), w))
# Determina o p-valor.
p.value <- mean((all <- c(stat, sim)) >= stat)
# Histograma da distribuição empĂrica sob H_0.
hist(sim,
sub = paste("p =", round(p.value, 4)),
xlim = range(all),
...)
abline(v = stat, col = "#903030", lty = 3, lwd = 2)
return(p.value)
}
# ObservaçÔes simuladas.
set.seed(17)
par(mfrow = c(2, 3))
# Dados com dependĂȘncia espacial ---------------------------------------
# Indução de autocorrelação por meio de uma tendĂȘncia.
z <- matrix(rexp(length(x) * length(y),
outer(x, y^2)),
length(x))
image(log(z), main = "Com dependĂȘncia")
# Ăndice de Moran com dados originais.
stat <- moran(z, w)
stat
## [1] 0.06499871
hist(z)
ppt(z, w, stat = stat, main = "I de Moran", xlab = "I")
## [1] 0.01459854
# Dados sem dependĂȘncia espacial ---------------------------------------
# Geração de de um conjunto de dados sob hipótese nula.
z <- matrix(rnorm(length(x) * length(y), 0, 1/2), length(x))
image(z, main = "Sem dependĂȘncia")
# Ăndice de Moran com dados originais.
stat <- moran(z, w)
stat
## [1] -0.0121901
hist(z)
ppt(z, w, stat = stat, main = "I de Moran", xlab = "I")
## [1] 0.4478552
#-----------------------------------------------------------------------
# Aplicação de Jackknife em modelos de regressão não linear.
library(latticeExtra)
library(car)
library(alr3)
# Curva "palpite".
start <- list(th0 = 75, th1 = 0.5, th2 = 50)
xyplot(C ~ Temp, data = segreg) +
layer(panel.curve(th0 + th1 * (x - th2) * (x >= th2) +
0 * (x < th2), lty = 2),
data = start)
# Ajuste.
n0 <- nls(C ~ th0 + th1 * (Temp - th2) * (Temp >= th2) +
0 * (Temp < th2),
data = segreg,
start = start)
# Estimativas e medidas de ajuste.
summary(n0)
##
## Formula: C ~ th0 + th1 * (Temp - th2) * (Temp >= th2) + 0 * (Temp < th2)
##
## Parameters:
## Estimate Std. Error t value Pr(>|t|)
## th0 74.6953 1.3433 55.607 < 2e-16 ***
## th1 0.5674 0.1006 5.641 2.10e-06 ***
## th2 41.9512 4.6583 9.006 9.43e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.373 on 36 degrees of freedom
##
## Number of iterations to convergence: 4
## Achieved convergence tolerance: 1.141e-08
# Observados e preditos.
xyplot(C ~ Temp, data = segreg) +
layer(panel.curve(th0 + th1 * (x - th2) * (x >= th2) +
0 * (x < th2)),
data = as.list(coef(n0)))
#-----------------------------------------------------------------------
# Estimativas Jackknife para os parĂąmetros.
theta <- coef(n0) # Estimativas com todos os dados.
n <- nrow(segreg) # NĂșmero de observaçÔes.
# Matriz vazia para armazenar os pseudo-valores.
thetaj <- matrix(0, nrow = n, ncol = length(theta))
colnames(thetaj) <- names(theta)
# Ajustes deixando uma observação de fora (leave-one-out).
for (i in 1:n) {
# Reajusta o modelo, i.e. estima com leave-one-out.
n1 <- nls(C ~ th0 + th1 * (Temp - th2) * (Temp >= th2) +
0 * (Temp < th2),
data = segreg[-i, ],
start = coef(n0))
# Calcula os pseudo valores.
thetaj[i, ] <- n * theta - (n - 1) * coef(n1)
}
# Os primeiros pseudo valores.
head(thetaj)
## th0 th1 th2
## [1,] 103.87671 0.5674297 93.37856
## [2,] 68.84780 0.5674297 31.64593
## [3,] 73.17904 0.5674297 39.27902
## [4,] 58.30584 0.5674297 13.06748
## [5,] 68.76598 0.5674297 31.50172
## [6,] 79.22636 0.5674297 49.93642
# Estimativas pontuais.
jk <- colMeans(thetaj)
cbind(MLE = theta, Jack = jk, VĂcio = theta - jk)
## MLE Jack VĂcio
## th0 74.6953375 74.413230 0.28210720
## th1 0.5674294 0.525906 0.04152343
## th2 41.9512279 40.057567 1.89366134
# Erros padrÔes.
cbind(MLE = summary(n0)$coefficients[, "Std. Error"],
Jack = apply(thetaj, MARGIN = 2, sd)/sqrt(n))
## MLE Jack
## th0 1.3432772 1.90986928
## th1 0.1005824 0.07637956
## th2 4.6582537 5.17480685
# Pacote com função pronta para Jackknife para modelos não lineares.
library(nlstools)
# ls("package:nlstools")
# Aplica Jackknife sobre o modelo.
j0 <- nlsJack(n0)
summary(j0)
##
## ------
## Jackknife statistics
## Estimates Bias
## th0 74.413230 0.28210720
## th1 0.525906 0.04152343
## th2 40.057567 1.89366134
##
## ------
## Jackknife confidence intervals
## Low Up
## th0 70.539836 78.2866247
## th1 0.371001 0.6808109
## th2 29.562572 50.5525613
##
## ------
## Influential values
## * Observation 1 is influential on th0
## * Observation 4 is influential on th0
## * Observation 7 is influential on th0
## * Observation 15 is influential on th0
## * Observation 15 is influential on th1
## * Observation 39 is influential on th1
## * Observation 15 is influential on th2
#-----------------------------------------------------------------------
# InferĂȘncia para a DL 50.
# library(labestData)
# data(PaulaTb3.12)
# str(PaulaTb3.12)
# help(PaulaTb3.12, h = "html")
# # Converte a resposta para binĂĄrio.
# PaulaTb3.12$resp <- as.integer(PaulaTb3.12$resp) - 1
# dput(PaulaTb3.12)
PaulaTb3.12 <-
structure(list(vol = c(3.7, 3.5, 1.25, 0.75, 0.8, 0.7, 0.6, 1.1,
0.9, 0.9, 0.8, 0.55, 0.6, 1.4, 0.75, 2.3, 3.2, 0.85, 1.7, 1.8,
0.4, 0.95, 1.35, 1.5, 1.6, 0.6, 1.8, 0.95, 1.9, 1.6, 2.7, 2.35,
1.1, 1.1, 1.2, 0.8, 0.95, 0.75, 1.3), razao = c(0.825, 1.09,
2.5, 1.5, 3.2, 3.5, 0.75, 1.7, 0.75, 0.45, 0.57, 2.75, 3, 2.33,
3.75, 1.64, 1.6, 1.415, 1.06, 1.8, 2, 1.36, 1.35, 1.36, 1.78,
1.5, 1.5, 1.9, 0.95, 0.4, 0.75, 0.03, 1.83, 2.2, 2, 3.33, 1.9,
1.9, 1.625), resp = c(1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0,
1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1,
1, 1, 0, 0, 1)), .Names = c("vol", "razao", "resp"), row.names = c(NA,
39L), class = "data.frame")
layout(1)
plot(resp ~ vol, data = PaulaTb3.12)
m0 <- glm(resp ~ vol,
data = PaulaTb3.12,
family = binomial)
summary(m0)
##
## Call:
## glm(formula = resp ~ vol, family = binomial, data = PaulaTb3.12)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.8341 -1.0023 0.2719 1.1185 1.4978
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.6627 0.8123 -2.047 0.0407 *
## vol 1.3357 0.6162 2.168 0.0302 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 54.040 on 38 degrees of freedom
## Residual deviance: 46.989 on 37 degrees of freedom
## AIC: 50.989
##
## Number of Fisher Scoring iterations: 4
# DL_50.
dl <- -coef(m0)[1]/coef(m0)[2]
# str(m0$family)$link
plot(resp ~ vol, data = PaulaTb3.12)
curve(m0$family$linkinv(coef(m0)[1] + coef(m0)[2] * x),
add = TRUE)
abline(v = dl, h = 0.5, lty = 2)
n <- nrow(PaulaTb3.12)
i <- 1:n
# Estimativas por Jackknife.
DL <- sapply(i,
FUN = function(i) {
m0 <- glm(resp ~ vol,
data = PaulaTb3.12[-i, ],
family = binomial)
# Estimativa Parcial.
DL <- -coef(m0)[1]/coef(m0)[2]
# Pseudo-valor.
n * dl - (n - 1) * DL
})
m <- mean(DL)
e <- sd(DL)/sqrt(n)
cbind(Est = rbind(MLE = dl,
Jack = m),
StdErr = rbind(MLE = car::deltaMethod(m0, g = "-(Intercept)/vol")["SE"],
Jack = e))
## (Intercept) SE
## MLE 1.244812 0.2632587
## Jack 1.254201 0.2657998
plot(ecdf(DL))
rug(DL)
plot(density(DL))
rug(DL)
# NOTE: alguma pista sobre porque a distribuição fica bimodal?
#-----------------------------------------------------------------------
# Aplicação na Mediana (derivada não suave) -> problema.
# Ordena o vetor de valores.
x <- sort(precip)
# Calcula a mediana com os dados originais.
M <- median(x)
M
## [1] 36.6
n <- length(x)
i <- 1:length(x)
# Estimativas da mediana por Jackknife.
y <- sapply(i,
FUN = function(i) {
ep <- median(x[-i])
pv <- n * M - (n - 1) * ep
return(pv)
})
stem(y)
##
## The decimal point is 1 digit(s) to the right of the |
##
## 0 | 99999999999999999999999999999999999
## 1 |
## 2 |
## 3 |
## 4 |
## 5 |
## 6 | 44444444444444444444444444444444444
# NOTE: Explique porque o Jackknife para a mediana sĂł retornou 2
# valores? Considere que o tamanho da amostra Ă© dado abaixo.
length(x)
## [1] 70
#-----------------------------------------------------------------------
# Aplicação de bootstrap. Estudo de caso com modelos não lineares.
library(alr3)
str(turk0)
## 'data.frame': 35 obs. of 2 variables:
## $ A : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Gain: int 644 631 661 624 633 610 615 605 608 599 ...
# GrĂĄfico dos valores observados.
plot(Gain ~ A, data = turk0)
# Lista com os valores iniciais.
start <- list(Int = 625, Ass = 180, Mei = 0.1)
# Adiciona as curvas.
with(start, {
curve(Int + Ass * A/(Mei + A),
xname = "A", add = TRUE, col = 2)
curve(Int + Ass * (1 - 2^(-A/Mei)),
xname = "A", add = TRUE, col = 4)})
# Ajuste do primeiro modelo.
n0 <- nls(Gain ~ Int + Ass * A/(Mei + A),
data = turk0,
start = start)
# Ajuste do segundo modelo.
n1 <- nls(Gain ~ Int + Ass * (1 - 2^(-A/Mei)),
data = turk0,
start = start)
cbind(coef(n0), coef(n1))
## [,1] [,2]
## Int 622.2417493 622.95805415
## Ass 235.2174693 178.25190835
## Mei 0.1547585 0.09732193
#-----------------------------------------------------------------------
# Criando funçÔes que fazem reamostragem dos dados e ajustam o modelo.
n <- nrow(turk0)
i <- 1:n
myboot0 <- function(formula, start) {
n0 <- nls(formula = formula,
data = turk0[sample(i, n, replace = TRUE), ],
start = start)
coef(n0)
}
#-----------------------------------------------------------------------
# Replicando.
# B = 999 reamostragens.
b0 <- replicate(999,
myboot0(Gain ~ Int + Ass * A/(Mei + A), start))
b1 <- replicate(999,
myboot0(Gain ~ Int + Ass * (1 - 2^(-A/Mei)), start))
# Conjunto de 1000 estimativas bootstrap.
b0 <- cbind(b0, coef(n0))
b1 <- cbind(b1, coef(n1))
str(b0)
## num [1:3, 1:1000] 624.917 227.566 0.153 624.617 239.092 ...
## - attr(*, "dimnames")=List of 2
## ..$ : chr [1:3] "Int" "Ass" "Mei"
## ..$ : NULL
# Calcula o vĂcio absoluto (bootstrap - original).
rbind(n0 = rowMeans(b0) - coef(n0),
n1 = rowMeans(b1) - coef(n1))
## Int Ass Mei
## n0 -0.1248847 4.643737 0.007294549
## n1 -0.2939218 2.378039 0.002272771
# VariĂąncia do estimador.
rbind(n0 = apply(b0, MARGIN = 1, var),
n1 = apply(b1, MARGIN = 1, var))
## Int Ass Mei
## n0 35.84423 943.7282 0.0023195062
## n1 31.90712 158.5418 0.0002190582
# Função para calcular o erro quadråtico médio.
eqm <- function(x) {
sum((x - tail(x, 1))^2)/length(x)
}
# Erro quadråtico médio.
rbind(n0 = apply(b0, 1, FUN = eqm),
n1 = apply(b1, 1, FUN = eqm))
## Int Ass Mei
## n0 35.82398 964.3487 0.0023703971
## n1 31.96160 164.0384 0.0002240046
#-----------------------------------------------------------------------
# Curvas ajustadas de onde Ă© possĂvel determinar uma banda de confiança.
bts <- cbind(rep(1:2, each = ncol(b0)),
as.data.frame(rbind(t(b0), t(b1))))
splom(bts[, -1], groups = bts[, 1])
par(mfrow = c(1, 2))
plot(Gain ~ A, data = turk0, type = "n")
apply(b0,
MARGIN = 2,
FUN = function(b) {
curve(b[1] + b[2] * x/(b[3] + x),
add = TRUE, col = rgb(0, 1, 1, 0.25), n = 51)
invisible()
})
## NULL
points(Gain ~ A, data = turk0)
abline(v = 0.2, lty = 2)
plot(Gain ~ A, data = turk0, type = "n")
apply(b1,
MARGIN = 2,
FUN = function(b) {
curve(b[1] + b[2] * (1 - 2^(-x/b[3])),
add = TRUE, col = rgb(1, 1, 0, 0.25), n = 51)
invisible()
})
## NULL
points(Gain ~ A, data = turk0)
abline(v = 0.2, lty = 2)
#-----------------------------------------------------------------------
# Estimativa do valor da função em x = 0.2.
p0 <- apply(b0, MARGIN = 2,
FUN = function(b){
b[1] + b[2] * 0.2/(b[3] + 0.2)
})
p1 <- apply(b1, MARGIN = 2,
FUN = function(b){
b[1] + b[2] * (1 - 2^(-0.2/b[3]))
})
cbind(Est = rbind(mean(p0), mean(p1)),
StdErr = rbind(sd(p0), sd(p1)))
## [,1] [,2]
## [1,] 754.9576 4.516552
## [2,] 758.4149 4.673427
boot
#-----------------------------------------------------------------------
# Usando o pacote boot.
library(boot)
library(latticeExtra)
# dput(as.list(coef(n1)))
start <- list(Int = 622.958054146589,
Ass = 178.251908347242,
Mei = 0.097321927254495)
# Criar uma função com dois argumentos: o data.frame original e um vetor
# que vai representar o Ăndice das linhas usado para reamostrar dos
# dados.
fitmodel <- function(dataset, index) {
n0 <- nls(Gain ~ Int + Ass * (1 - 2^(-A/Mei)),
data = dataset[index, ],
start = start)
c(coef(n0), s2 = deviance(n0)/df.residual(n0))
}
set.seed(321)
b0 <- boot(data = turk0,
statistic = fitmodel,
R = 1999)
class(b0)
## [1] "boot"
methods(class = class(b0))
## [1] as.data.frame c confint hist plot
## [6] print summary vcov
## see '?methods' for accessing help and source code
summary(b0)
## R original bootBias bootSE bootMed
## 1 1999 622.958054 0.0225743 5.726720 622.752517
## 2 1999 178.251908 1.9825733 15.015128 179.120315
## 3 1999 0.097322 0.0025534 0.018418 0.097733
## 4 1999 386.481790 -31.1163672 69.507327 353.375813
# Como sĂŁo obtidos.
theta <- b0$t
# Estimativas como conjunto de dados original.
b0$t0
## Int Ass Mei s2
## 622.95805415 178.25190835 0.09732193 386.48178994
# VĂcio.
apply(theta, MARGIN = 2, FUN = mean) - b0$t0
## Int Ass Mei s2
## 0.022574309 1.982573260 0.002553388 -31.116367189
# Desvio-padrĂŁo boostrap.
apply(theta, MARGIN = 2, FUN = sd)
## [1] 5.72671967 15.01512825 0.01841813 69.50732700
# Mediana bootstrap.
apply(theta, MARGIN = 2, FUN = median)
## [1] 622.75251653 179.12031530 0.09773343 353.37581349
st <- summary(b0)
str(st)
## Classes 'summary.boot' and 'data.frame': 4 obs. of 5 variables:
## $ R : num 1999 1999 1999 1999
## $ original: num 622.9581 178.2519 0.0973 386.4818
## $ bootBias: num 0.02257 1.98257 0.00255 -31.11637
## $ bootSE : num 5.7267 15.0151 0.0184 69.5073
## $ bootMed : num 622.7525 179.1203 0.0977 353.3758
# GrĂĄfico de pares.
pairs(b0)
vcov(b0)
## [,1] [,2] [,3] [,4]
## [1,] 32.79531815 -18.9667661 0.0398778540 69.43297130
## [2,] -18.96676614 225.4540763 0.2040272736 -4.43769918
## [3,] 0.03987785 0.2040273 0.0003392275 0.07502662
## [4,] 69.43297130 -4.4376992 0.0750266164 4831.26850663
cov2cor(vcov(b0))
## [,1] [,2] [,3] [,4]
## [1,] 1.0000000 -0.220576032 0.37807704 0.174433237
## [2,] -0.2205760 1.000000000 0.73775749 -0.004252049
## [3,] 0.3780770 0.737757494 1.00000000 0.058605614
## [4,] 0.1744332 -0.004252049 0.05860561 1.000000000
# Extrair os vetores de estativativas bootstrap.
str(b0)
## List of 11
## $ t0 : Named num [1:4] 622.9581 178.2519 0.0973 386.4818
## ..- attr(*, "names")= chr [1:4] "Int" "Ass" "Mei" "s2"
## $ t : num [1:1999, 1:4] 626 622 622 622 619 ...
## $ R : num 1999
## $ data :'data.frame': 35 obs. of 2 variables:
## ..$ A : num [1:35] 0 0 0 0 0 0 0 0 0 0 ...
## ..$ Gain: int [1:35] 644 631 661 624 633 610 615 605 608 599 ...
## $ seed : int [1:626] 403 624 -333952915 -1819506614 -759241405 1506082216 -515332215 -1086775882 620038335 308851700 ...
## $ statistic:function (dataset, index)
## ..- attr(*, "srcref")=Class 'srcref' atomic [1:8] 15 13 20 1 13 1 15 20
## .. .. ..- attr(*, "srcfile")=Classes 'srcfilecopy', 'srcfile' <environment: 0x9508920>
## $ sim : chr "ordinary"
## $ call : language boot(data = turk0, statistic = fitmodel, R = 1999)
## $ stype : chr "i"
## $ strata : num [1:35] 1 1 1 1 1 1 1 1 1 1 ...
## $ weights : num [1:35] 0.0286 0.0286 0.0286 0.0286 0.0286 ...
## - attr(*, "class")= chr "boot"
## - attr(*, "boot_type")= chr "boot"
colnames(b0$t) <- names(b0$t0)
best <- stack(as.data.frame(b0$t))
names(best)
## [1] "values" "ind"
st
## R original bootBias bootSE bootMed
## 1 1999 622.958054 0.0225743 5.726720 622.752517
## 2 1999 178.251908 1.9825733 15.015128 179.120315
## 3 1999 0.097322 0.0025534 0.018418 0.097733
## 4 1999 386.481790 -31.1163672 69.507327 353.375813
densityplot(~values | ind,
data = best,
scales = "free",
as.table = TRUE) +
layer(panel.abline(v = st$original[which.packet()] +
c(0, st$bootBias[which.packet()]),
col = c(1, 2),
lty = 2))
# Intervalos de confiança.
confint(b0, type = "norm")
## Bootstrap quantiles, type = normal
##
## 2.5 % 97.5 %
## Int 611.71131554 634.1596441
## Ass 146.84022450 205.6984457
## Mei 0.05866967 0.1308674
## s2 281.36629955 553.8300147
confint(b0, type = "basic")
## Bootstrap quantiles, type = basic
##
## 2.5 % 97.5 %
## Int 610.46165695 633.4336766
## Ass 150.61507624 197.9774289
## Mei 0.06082153 0.1180598
## s2 271.84196772 546.0135969
confint(b0, type = "perc")
## Bootstrap quantiles, type = percent
##
## 2.5 % 97.5 %
## Int 612.48243168 635.4544513
## Ass 158.52638777 205.8887405
## Mei 0.07658404 0.1338223
## s2 226.94998302 501.1216122
confint(b0, type = "bca")
## Bootstrap quantiles, type = bca
##
## 2.5 % 97.5 %
## Int 613.44795481 636.850427
## Ass 155.64022730 201.246278
## Mei 0.07575608 0.131569
## s2 289.07574993 624.324303
#-----------------------------------------------------------------------
# Teste para independĂȘncia de processo pontual.
plot(NULL, NULL, xlim = c(0, 1), ylim = c(0, 1), asp = 1)
lines(x = c(0, 1, 1, 0, 0), y = c(0, 0, 1, 1, 0))
# xy <- locator(n = 20, type = "p", pch = 19)
# dput(lapply(xy, round, digits = 3))
xy <- structure(list(x = c(0.204, 0.186, 0.529, 0.529, 0.385, 0.579,
0.918, 0.798, 0.634, 0.761, 0.704, 0.485,
0.291, 0.341, 0.402, 0.833, 0.972, 0.625,
0.732, 0.315),
y = c(0.829, 0.545, 0.526, 0.752, 0.674, 0.648,
0.792, 0.33, 0.121, 0.127, 0.332, 0.352,
0.188, 0.452, 0.221, 0.524, 0.957, 0.964,
0.755, 0.332)),
.Names = c("x", "y"))
#-----------------------------------------------------------------------
# Transforma a lista em matriz.
xy <- do.call(cbind, xy)
plot(xy,
NULL,
xlim = c(0, 1),
ylim = c(0, 1),
asp = 1,
axes = FALSE,
ann = FALSE)
lines(x = c(0, 1, 1, 0, 0),
y = c(0, 0, 1, 1, 0),
lty = 2)
# Calcula todas as distĂąncias (euclidianas).
d <- dist(xy)
d
## 1 2 3 4 5 6
## 2 0.28456985
## 3 0.44433546 0.34352584
## 4 0.33399701 0.40062202 0.22600000
## 5 0.23829813 0.23715396 0.20649455 0.16376813
## 6 0.41639645 0.40627331 0.13184840 0.11539497 0.19573451
## 7 0.71495804 0.77254967 0.47125046 0.39105115 0.54590567 0.36831644
## 8 0.77578154 0.64866709 0.33283179 0.50044480 0.53749884 0.38611527
## 9 0.82835017 0.61683061 0.41838977 0.63967648 0.60647341 0.52986225
## 10 0.89613224 0.71087903 0.46154631 0.66667008 0.66376577 0.55187408
## 11 0.70498865 0.56008303 0.26126806 0.45500000 0.46768045 0.33982495
## 12 0.55361539 0.35587919 0.17947702 0.40241272 0.33717058 0.31056722
## 13 0.64687711 0.37212095 0.41338602 0.61216011 0.49500707 0.54271908
## 14 0.40112093 0.18075951 0.20203960 0.35403955 0.22631836 0.30831802
## 15 0.63942787 0.38939954 0.33038462 0.54597619 0.45331887 0.46223154
## 16 0.69904649 0.64734071 0.30400658 0.38000000 0.47244471 0.28265173
## 17 0.77859360 0.88743450 0.61806958 0.48813318 0.65165789 0.49993000
## 18 0.44211537 0.60686242 0.44839715 0.23272301 0.37643060 0.31933055
## 19 0.53316039 0.58499231 0.30602287 0.20302217 0.35632850 0.18670297
## 20 0.50924454 0.24901807 0.28884598 0.47137671 0.34909025 0.41176692
## 7 8 9 10 11 12
## 2
## 3
## 4
## 5
## 6
## 7
## 8 0.47733007
## 9 0.72862679 0.26566332
## 10 0.68328179 0.20634437 0.12714165
## 11 0.50734209 0.09402127 0.22230834 0.21277688
## 12 0.61732406 0.31377221 0.27488543 0.35609128 0.21991135
## 13 0.87060037 0.52651021 0.34948247 0.47394198 0.43738427 0.25403149
## 14 0.66972308 0.47300423 0.44205203 0.53106026 0.38232055 0.17531686
## 15 0.76960834 0.41072740 0.25263412 0.37110241 0.32175301 0.15508062
## 16 0.28115654 0.19713194 0.44945523 0.40347615 0.23131148 0.38818552
## 17 0.17361164 0.65069578 0.90174276 0.85640002 0.68003603 0.77665565
## 18 0.33975432 0.65717958 0.84304804 0.84797700 0.63691836 0.62780889
## 19 0.18964440 0.43009418 0.64152942 0.62866923 0.42392570 0.47267113
## 20 0.75842534 0.48300414 0.38246830 0.49085741 0.38900000 0.17117243
## 13 14 15 16 17 18
## 2
## 3
## 4
## 5
## 6
## 7
## 8
## 9
## 10
## 11
## 12
## 13
## 14 0.26869313
## 15 0.11580155 0.23891840
## 16 0.63769899 0.49724038 0.52684912
## 17 1.02719132 0.80819923 0.93091138 0.45476367
## 18 0.84482661 0.58549125 0.77574351 0.48668676 0.34707060
## 19 0.71831052 0.49466150 0.62773880 0.25211505 0.31369412 0.23479779
## 20 0.14598630 0.12278436 0.14103191 0.55243823 0.90679325 0.70393466
## 19
## 2
## 3
## 4
## 5
## 6
## 7
## 8
## 9
## 10
## 11
## 12
## 13
## 14
## 15
## 16
## 17
## 18
## 19
## 20 0.59398485
# Obtém a menor distùncia.
m <- min(d)
# NĂșmero de pontos.
n <- nrow(xy)
# Geração de estatĂsticas por simulação do modelo assumido sob
# hipótese nula: localização uniforme no quadrado.
M <- replicate(9999, {
# As localizaçÔes de n pontos.
loc <- cbind(x = runif(n),
y = runif(n))
# A menor distĂąncia entre eles sob H_0.
min(dist(loc))
})
# Concatena as estatĂsticas simuladas com a observada.
M <- c(m, M)
# GrĂĄfico da distribuição acumulada empĂrica sob H_0.
plot(ecdf(M))
abline(h = c(0.025, 0.975), lty = 2, col = 2)
abline(v = m, col = 2)
# GrĂĄfico da distribuição acumulada empĂrica sob H_0.
plot(density(M))
abline(v = m, col = 2)
abline(v = quantile(M, c(0.025, 0.975)), lty = 2, col = 2)
rug(M)
# P-valor.
2 * sum(M > m)/length(M)
## [1] 0.0098
#-----------------------------------------------------------------------
# Moficando a estatĂstica de teste.
xy <- structure(list(x = c(0.088, 0.326, 0.577, 0.846, 0.857, 0.568,
0.306, 0.077, 0.077, 0.328, 0.597, 0.883,
0.863, 0.64, 0.337, 0.088, 0.077, 0.346,
0.654, 0.619),
y = c(0.92, 0.922, 0.916, 0.935, 0.737, 0.674,
0.67, 0.665, 0.452, 0.502, 0.461, 0.454,
0.256, 0.26, 0.26, 0.219, 0.045, 0.06, 0.058,
0.439)),
.Names = c("x", "y"))
# Transforma a lista em matriz.
xy <- do.call(cbind, xy)
plot(xy,
NULL,
xlim = c(0, 1),
ylim = c(0, 1),
asp = 1,
axes = FALSE,
ann = FALSE)
lines(x = c(0, 1, 1, 0, 0),
y = c(0, 0, 1, 1, 0),
lty = 2)
# Todas as distĂąncia entre os pontos (a estatĂstica de teste Ă© um vetor
# e nĂŁo um escalar).
d <- c(dist(xy))
d
## [1] 0.2380084 0.4890164 0.7581484 0.7904745 0.5393663 0.3316987 0.2552371
## [8] 0.4681293 0.4820000 0.6853919 0.9215102 1.0205494 0.8604092 0.7054084
## [15] 0.7010000 0.8750691 0.8978664 1.0312129 0.7164649 0.2510717 0.5201625
## [22] 0.5623042 0.3465083 0.2527924 0.3578407 0.5318844 0.4200048 0.5347541
## [29] 0.7275115 0.8555262 0.7326937 0.6620914 0.7421947 0.9116633 0.8622320
## [36] 0.9241645 0.5649230 0.2696702 0.3323266 0.2421673 0.3660014 0.5594649
## [43] 0.6821261 0.4831118 0.4554393 0.5541480 0.7193024 0.6590182 0.6985242
## [50] 0.8514282 1.0043112 0.8866211 0.8614482 0.4788455 0.1983053 0.3813201
## [57] 0.6015189 0.8150221 0.9081024 0.6751392 0.5354223 0.4824210 0.6792128
## [64] 0.7057344 0.8454029 1.0426984 1.1762062 1.0077822 0.8977711 0.5454769
## [71] 0.2957871 0.5550586 0.7833160 0.8304366 0.5788489 0.3791781 0.2841918
## [78] 0.4810374 0.5240401 0.7056408 0.9271920 1.0427195 0.8482040 0.7086960
## [85] 0.3813765 0.2620305 0.4910825 0.5388553 0.2952694 0.2149651 0.3842200
## [92] 0.5116141 0.4202142 0.4740854 0.6613811 0.7979486 0.6529012 0.6219743
## [99] 0.2404704 0.2290546 0.3161724 0.1694344 0.3582764 0.6161047 0.6940065
## [106] 0.5288251 0.4111703 0.5009241 0.6656320 0.6113101 0.7040227 0.3890116
## [113] 0.2130000 0.2992825 0.5585839 0.8331608 0.8860457 0.6935373 0.4812744
## [120] 0.4461356 0.6200000 0.6621072 0.8374831 0.5872308 0.2559316 0.5200779
## [127] 0.8060025 0.8100691 0.5948386 0.3232089 0.2332595 0.4070000 0.4754209
## [134] 0.6986881 0.5421559 0.2721066 0.5570718 0.5888472 0.3948519 0.2421673
## [141] 0.3710647 0.5213924 0.4423664 0.5508285 0.2977415 0.2860857 0.3358288
## [148] 0.2055480 0.3286351 0.5636000 0.6659249 0.4730772 0.4070111 0.0311127
## [155] 0.1990075 0.3109421 0.5794411 0.8290054 0.9038346 0.6660368 0.4574462
## [162] 0.2644258 0.2230359 0.5260152 0.7758827 0.8138286 0.5529060 0.2878976
## [169] 0.3050000 0.3030000 0.5535206 0.6026558 0.3555784 0.2024846 0.1802276
## [176] 0.2523529 0.3373796 0.2002024 0.3758896 0.3340135 0.1743474 0.3030594
## [183] 0.5884531 0.5747704 0.2694179 0.5771464 0.6700746 0.3080065 0.4670867
## [190] 0.3826042
# Gerando estatĂsticas de teste por simulação.
D <- replicate(499, {
dist(cbind(x = runif(n), y = runif(n)))
})
# Juntanto observado com simulado.
D <- cbind(d, D)
str(D)
## num [1:190, 1:500] 0.238 0.489 0.758 0.79 0.539 ...
## - attr(*, "dimnames")=List of 2
## ..$ : NULL
## ..$ : chr [1:500] "d" "" "" "" ...
plot(ecdf(D[, 1]), cex = 0)
for (i in 2:ncol(D)) {
lines(ecdf(D[, i]), cex = 0, col = "gray50")
}
lines(ecdf(D[, 1]), cex = 0, col = 2, lwd = 2)
#-----------------------------------------------------------------------
# Amostragem por Conjuntos Ordenados.
# Extraà valores de uma população normal padrão.
rand <- function(n) {
rnorm(n, 0, 1)
}
rand(10)
## [1] -0.2458259 0.6357846 -0.4676933 -0.1834748 0.0672818 0.5451266
## [7] -0.3683698 -0.8494036 0.7576407 -0.9766793
# Ranked Set Sampling (Perfect Ranking). Calcula a média.
rss <- function(m = 3) {
x <- matrix(rand(m * m), nrow = m)
x <- apply(x, MARGIN = 1, sort)
mean(diag(x))
}
rss(m = 5)
## [1] -0.2394357
# Simple Random Sampling. Calcula a média.
srs <- function(m = 3) {
mean(rand(m))
}
rss(m = 5)
## [1] 0.09585034
# Ranked Set Sampling with Unperfect Ranking. Calcula a média.
rssur <- function(m = 3, rho) {
x <- MASS::mvrnorm(m * m,
mu = c(0, 0),
Sigma = matrix(c(1, rho, rho, 1), nrow = 2))
g <- gl(m, m)
b <- by(data = x,
INDICES = g,
FUN = function(y) {
o <- order(y[, 2])
# cbind(y[o, 1], y[o, 2])
y[o, 1]
}, simplify = TRUE)
mean(diag(do.call(rbind, b)))
}
rssur(m = 5, rho = 0.95)
## [1] 0.006458089
#-----------------------------------------------------------------------
# Simulação.
# Tamanho da amostra final.
m <- 10
# Distribuição da média na amostra aleatória simples.
system.time(m1 <- replicate(1000, srs(m = m)))
## user system elapsed
## 0.008 0.000 0.008
# Distribuição da média na amostra por conjuntos ordenados perfeito.
system.time(m2 <- replicate(1000, rss(m = m)))
## user system elapsed
## 0.332 0.000 0.332
# Distribuição da média na amostra por conjuntos ordenados imperfeito.
system.time(m3 <- replicate(1000, rssur(m = m, rho = 0.75)))
## user system elapsed
## 1.024 0.000 1.023
library(lattice)
library(latticeExtra)
densityplot(~m1 + m2 + m3,
auto.key = TRUE,
layout = c(NA, 1)) +
layer(panel.abline(v = 0, lty = 2))
ecdfplot(~m1 + m2 + m3,
auto.key = TRUE,
layout = c(NA, 1))
qqmath(~m1 + m2 + m3,
auto.key = TRUE,
layout = c(NA, 1))