Regressão com polinômios

Uma regressora

##=============================================================================
## Modelos de Regressão e aplicações no ambiente R
##
##   13 a 17 de Abril de 2015 - Manaus/AM
##   Fundação Oswaldo Cruz - FIOCRUZ
## 
##                                                  Prof. Dr. Walmes M. Zeviani
##                                                                LEG/DEST/UFPR
##=============================================================================

##-----------------------------------------------------------------------------
## Definições da sessão.

pkg <- c("lattice", "latticeExtra", "gridExtra", "car", "alr3",
         "plyr", "reshape", "multcomp", "ellipse", "rsm", "wzRfun")

sapply(pkg, require, character.only=TRUE)

##      lattice latticeExtra    gridExtra          car         alr3         plyr 
##         TRUE         TRUE         TRUE         TRUE         TRUE         TRUE 
##      reshape     multcomp      ellipse          rsm       wzRfun 
##         TRUE         TRUE         TRUE         TRUE         TRUE

trellis.device(color=FALSE)

Modelos polinômiais são definidos por

\[ y = \beta_0 + \beta_1 x^1 + \beta_2 x^2 + \ldots + \beta_k x^k, \] em que \(k\) representa a ordem do polinômio. Considerando que se tem \(j\) valores únicos de \(x\) o maior grau de polinômio ajustável aos dados é \(k = j-1\).

##-----------------------------------------------------------------------------
## Dados.

## area: área das residências (ft²);
## consume: consumo mensal de de energia eletrica (KWh/mes).
## http://www.statsdirect.com/help/default.htm#regression_and_correlation/polynomial.htm

da <- 
"area   consu
1290    1182
1350    1172
1470    1264
1600    1493
1710    1571
1840    1711
1980    1804
2230    1840
2400    1956
2930    1954"
da <- read.table(textConnection(da), header=TRUE, sep="\t");
## closeAllConnections()
str(da)

## 'data.frame':    10 obs. of  2 variables:
##  $ area : num  1290 1350 1470 1600 1710 1840 1980 2230 2400 2930
##  $ consu: int  1182 1172 1264 1493 1571 1711 1804 1840 1956 1954

##-----------------------------------------------------------------------------
## Visualizar.

xyplot(consu~area, data=da, type=c("p","smooth"),
       xlab=expression("Área da residência"~(ft^2)),
       ylab=expression("Consumo de energia elétrica"~(kWh~mês^{-1})))

## xyplot(log(consu)~log(area), data=da, type=c("p","smooth"))
## xyplot(log(consu)~area, data=da, type=c("p","smooth"))

##-----------------------------------------------------------------------------
## Ajustar reta (polinômio de grau 1).
## E(y|X) = b0+b1*x.

m0 <- lm(consu~area, data=da)
par(mfrow=c(2,2)); plot(m0); layout(1)

## Clara falta de ajuste. É necessário "soltar" mais o modelo,
## flexibiliza-lo para acomodar melhor o sinal de média dos dados.

##-----------------------------------------------------------------------------
## Polinômio de grau 2.
## E(y|X) = b0+b1*x+b2*x^2.

## m1 <- lm(consu~area+I(area^2), data=da)
m1 <- update(m0, .~.+I(area^2))
par(mfrow=c(2,2)); plot(m1); layout(1)

##-----------------------------------------------------------------------------
## Inferências.

anova(m1)

## Analysis of Variance Table
## 
## Response: consu
##           Df Sum Sq Mean Sq F value    Pr(>F)    
## area       1 703957  703957 321.388  4.15e-07 ***
## I(area^2)  1 127112  127112  58.032 0.0001244 ***
## Residuals  7  15333    2190                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

summary(m1)

## 
## Call:
## lm(formula = consu ~ area + I(area^2), data = da)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -73.792 -22.426   5.886  31.689  52.436 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -1.216e+03  2.428e+02  -5.009 0.001550 ** 
## area         2.399e+00  2.458e-01   9.758 2.51e-05 ***
## I(area^2)   -4.500e-04  5.908e-05  -7.618 0.000124 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 46.8 on 7 degrees of freedom
## Multiple R-squared:  0.9819, Adjusted R-squared:  0.9767 
## F-statistic: 189.7 on 2 and 7 DF,  p-value: 8.001e-07

##-----------------------------------------------------------------------------
## Predição.

er <- extendrange(da$area, f=0.1)
pred <- data.frame(area=seq(er[1], er[2], length.out=30))
pred <- cbind(pred, predict(m1, newdata=pred, interval="confidence"))
head(pred)

##       area       fit       lwr      upr
## 1 1126.000  914.4563  808.2235 1020.689
## 2 1193.862 1006.4025  915.9764 1096.829
## 3 1261.724 1094.2037 1017.8144 1170.593
## 4 1329.586 1177.8597 1113.5312 1242.188
## 5 1397.448 1257.3707 1202.8473 1311.894
## 6 1465.310 1332.7365 1285.4440 1380.029

p1 <- xyplot(consu~area, data=da,
             xlab=expression("Área"~(ft^2)),
             ylab=expression("Consumo"~(kWh~mes^{-1})))
p2 <- xyplot(fit~area, data=pred, type="l",
             ly=pred$lwr, uy=pred$upr, cty="bands",
             prepanel=prepanel.cbH, panel=panel.cbH)

## p1+as.layer(p2, under=TRUE)

##-----------------------------------------------------------------------------
## Qual a equação?

summary(m1)$coeff

##                  Estimate   Std. Error   t value     Pr(>|t|)
## (Intercept) -1.216144e+03 2.428064e+02 -5.008698 1.550025e-03
## area         2.398930e+00 2.458356e-01  9.758270 2.513355e-05
## I(area^2)   -4.500402e-04 5.907662e-05 -7.617907 1.244152e-04

prettyNum(coef(m1))

##     (Intercept)            area       I(area^2) 
##     "-1216.144"       "2.39893" "-0.0004500402"

formatC(coef(m1), format="e")

##   (Intercept)          area     I(area^2) 
## "-1.2161e+03"  "2.3989e+00" "-4.5004e-04"

## Inclusão da expressão do modelo.
c0 <- coef(m1)
sinal <- ifelse(c0<0, "-", "+")
co <- prettyNum(c(abs(c0), 100*summary(m1)$r.squared))
co[seq_along(sinal)] <- paste(sinal, co[seq_along(sinal)], sep="")
names(co) <- c("b0","b1","b2","r2")

eq <- substitute(hat(y)==b0~b1*x~b2*x^2~~~~(R^2==r2), as.list(co))

## eq <- expression(hat(y)==-1216.1+2.40*x-0.00045*x^2)

p1+as.layer(p2, under=TRUE)+
    layer(grid.text(x=0.975, y=0.025, just=c(1,0), label=eq))

##-----------------------------------------------------------------------------
## Não poderia ajustar um polinômio de grau maior?

L <- list("2nd"=.~.+I(area^2),
          "3rd"=.~.+I(area^3),
          "4th"=.~.+I(area^4),
          "5th"=.~.+I(area^5))

mi <- m0
for(i in 1:length(L)){
    mi <- update(mi, L[[i]])
    print(summary(mi))
}

## 
## Call:
## lm(formula = consu ~ area + I(area^2), data = da)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -73.792 -22.426   5.886  31.689  52.436 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -1.216e+03  2.428e+02  -5.009 0.001550 ** 
## area         2.399e+00  2.458e-01   9.758 2.51e-05 ***
## I(area^2)   -4.500e-04  5.908e-05  -7.618 0.000124 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 46.8 on 7 degrees of freedom
## Multiple R-squared:  0.9819, Adjusted R-squared:  0.9767 
## F-statistic: 189.7 on 2 and 7 DF,  p-value: 8.001e-07
## 
## 
## Call:
## lm(formula = consu ~ area + I(area^2) + I(area^3), data = da)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -75.068 -22.133   6.907  30.633  55.594 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.414e+03  1.300e+03  -1.088    0.318
## area         2.707e+00  2.000e+00   1.354    0.225
## I(area^2)   -6.033e-04  9.876e-04  -0.611    0.564
## I(area^3)    2.436e-08  1.567e-07   0.156    0.882
## 
## Residual standard error: 50.45 on 6 degrees of freedom
## Multiple R-squared:  0.982,  Adjusted R-squared:  0.9729 
## F-statistic: 108.9 on 3 and 6 DF,  p-value: 1.276e-05
## 
## 
## Call:
## lm(formula = consu ~ area + I(area^2) + I(area^3) + I(area^4), 
##     data = da)
## 
## Residuals:
##       1       2       3       4       5       6       7       8       9      10 
##  32.625 -25.483 -49.587  40.004   1.782  18.420   5.678 -61.828  40.726  -2.336 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)
## (Intercept)  8.483e+03  8.014e+03   1.059    0.338
## area        -1.812e+01  1.677e+01  -1.081    0.329
## I(area^2)    1.538e-02  1.282e-02   1.200    0.284
## I(area^3)   -5.276e-06  4.242e-06  -1.244    0.269
## I(area^4)    6.407e-10  5.125e-10   1.250    0.267
## 
## Residual standard error: 48.24 on 5 degrees of freedom
## Multiple R-squared:  0.9863, Adjusted R-squared:  0.9753 
## F-statistic: 89.69 on 4 and 5 DF,  p-value: 7.677e-05
## 
## 
## Call:
## lm(formula = consu ~ area + I(area^2) + I(area^3) + I(area^4) + 
##     I(area^5), data = da)
## 
## Residuals:
##         1         2         3         4         5         6         7         8         9 
##   1.18041  -0.07602 -12.41393  34.86760 -29.06376  -7.11343  19.49461 -10.39174   3.58575 
##        10 
##  -0.06948 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)  
## (Intercept)  1.065e+05  2.773e+04   3.841   0.0184 *
## area        -2.802e+02  7.378e+01  -3.798   0.0191 *
## I(area^2)    2.903e-01  7.710e-02   3.765   0.0197 *
## I(area^3)   -1.466e-04  3.955e-05  -3.707   0.0207 *
## I(area^4)    3.627e-08  9.955e-09   3.643   0.0219 *
## I(area^5)   -3.520e-12  9.833e-13  -3.580   0.0232 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 26.3 on 4 degrees of freedom
## Multiple R-squared:  0.9967, Adjusted R-squared:  0.9926 
## F-statistic: 243.9 on 5 and 4 DF,  p-value: 4.662e-05

Polinômios ortogonais

##-----------------------------------------------------------------------------
## Average claims paid per policy (y) for automobile insurance in New
## Brunswick in the years 1971-1980 (x).
## http://people.math.sfu.ca/~lockhart/richard/350/08_2/lectures/Polynomial/web.pdf

x <- 1971:1980
y <- c(45.13, 51.71, 60.17, 64.83, 65.24, 65.17, 67.65, 79.80, 96.13,
       115.19)

xyplot(y~x)

m0 <- lm(y~x+I(x^2)+I(x^3))
summary(m0) ## Ops!? NA?

## 
## Call:
## lm(formula = y ~ x + I(x^2) + I(x^3))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -8.4670 -5.0305 -0.1903  5.6711  7.7680 
## 
## Coefficients: (1 not defined because of singularities)
##               Estimate Std. Error t value Pr(>|t|)  
## (Intercept)  2.923e+06  1.130e+06   2.587   0.0361 *
## x           -2.965e+03  1.144e+03  -2.592   0.0358 *
## I(x^2)       7.521e-01  2.895e-01   2.598   0.0355 *
## I(x^3)              NA         NA      NA       NA  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6.652 on 7 degrees of freedom
## Multiple R-squared:  0.9213, Adjusted R-squared:  0.8988 
## F-statistic: 40.98 on 2 and 7 DF,  p-value: 0.0001366

X <- model.matrix(m0)
crossprod(X)

##             (Intercept)            x       I(x^2)       I(x^3)
## (Intercept)          10 1.975500e+04 3.902608e+07 7.709636e+10
## x                 19755 3.902608e+07 7.709636e+10 1.523048e+14
## I(x^2)         39026085 7.709636e+10 1.523048e+14 3.008807e+17
## I(x^3)      77096356875 1.523048e+14 3.008807e+17 5.943961e+20

## Scalonar ou trasladar x.
xr <- x-min(x)
m0 <- lm(y~xr+I(xr^2)+I(xr^3))
summary(m0)

## 
## Call:
## lm(formula = y ~ xr + I(xr^2) + I(xr^3))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.8775 -1.0510  0.3872  1.1647  2.3132 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 43.97186    2.05676  21.379 6.83e-07 ***
## xr          13.43626    2.09362   6.418 0.000676 ***
## I(xr^2)     -3.30476    0.55921  -5.910 0.001044 ** 
## I(xr^3)      0.30051    0.04077   7.370 0.000320 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.266 on 6 degrees of freedom
## Multiple R-squared:  0.9922, Adjusted R-squared:  0.9883 
## F-statistic: 253.5 on 3 and 6 DF,  p-value: 1.046e-06

##-----------------------------------------------------------------------------
## Nessa situação em particular, os valores da covariável x são
## equidistantes. Com isso pode-se usar polinômios ortogonais.

m1 <- lm(y~poly(xr, degree=3, raw=TRUE))
m2 <- lm(y~poly(xr, degree=3, raw=FALSE))

X <- model.matrix(m1); dimnames(X) <- NULL; round(crossprod(X), 3)

##      [,1]  [,2]   [,3]   [,4]
## [1,]   10    45    285   2025
## [2,]   45   285   2025  15333
## [3,]  285  2025  15333 120825
## [4,] 2025 15333 120825 978405

X <- model.matrix(m2); dimnames(X) <- NULL; round(crossprod(X), 3)

##      [,1] [,2] [,3] [,4]
## [1,]   10    0    0    0
## [2,]    0    1    0    0
## [3,]    0    0    1    0
## [4,]    0    0    0    1

V <- vcov(m1); dimnames(V) <- NULL; round(V, 3)

##        [,1]   [,2]   [,3]   [,4]
## [1,]  4.230 -3.262  0.682 -0.042
## [2,] -3.262  4.383 -1.122  0.077
## [3,]  0.682 -1.122  0.313 -0.022
## [4,] -0.042  0.077 -0.022  0.002

V <- vcov(m2); dimnames(V) <- NULL; round(V, 3)

##       [,1]  [,2]  [,3]  [,4]
## [1,] 0.514 0.000 0.000 0.000
## [2,] 0.000 5.135 0.000 0.000
## [3,] 0.000 0.000 5.135 0.000
## [4,] 0.000 0.000 0.000 5.135

U <- 0*V
U[lower.tri(U)] <- 1:(prod(nrow(U)-0:1)/2)
U <- U[-1,-ncol(U)]

th <- t(combn(names(coef(m1)), 2))
layout(U)
par(mar=c(4,4,1,1))
for(i in 1:nrow(th)){
    plot(ellipse(m1, which=th[i,], level=0.95), type="l")
}
mtext(outer=TRUE, text="Polinômios ordinários", side=3, line=-2)

layout(1)

th <- t(combn(names(coef(m2)), 2))
layout(U)
par(mar=c(4,4,1,1))
for(i in 1:nrow(th)){
    plot(ellipse(m2, which=th[i,], level=0.95), type="l")
}
mtext(outer=TRUE, text="Polinômios ortogonais", side=3, line=-2)

layout(1)

summary(m1)

## 
## Call:
## lm(formula = y ~ poly(xr, degree = 3, raw = TRUE))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.8775 -1.0510  0.3872  1.1647  2.3132 
## 
## Coefficients:
##                                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                       43.97186    2.05676  21.379 6.83e-07 ***
## poly(xr, degree = 3, raw = TRUE)1 13.43626    2.09362   6.418 0.000676 ***
## poly(xr, degree = 3, raw = TRUE)2 -3.30476    0.55921  -5.910 0.001044 ** 
## poly(xr, degree = 3, raw = TRUE)3  0.30051    0.04077   7.370 0.000320 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.266 on 6 degrees of freedom
## Multiple R-squared:  0.9922, Adjusted R-squared:  0.9883 
## F-statistic: 253.5 on 3 and 6 DF,  p-value: 1.046e-06

summary(m2)

## 
## Call:
## lm(formula = y ~ poly(xr, degree = 3, raw = FALSE))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.8775 -1.0510  0.3872  1.1647  2.3132 
## 
## Coefficients:
##                                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                         71.1020     0.7166  99.221 7.06e-11 ***
## poly(xr, degree = 3, raw = FALSE)1  57.6916     2.2661  25.459 2.42e-07 ***
## poly(xr, degree = 3, raw = FALSE)2  17.2816     2.2661   7.626 0.000265 ***
## poly(xr, degree = 3, raw = FALSE)3  16.7013     2.2661   7.370 0.000320 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.266 on 6 degrees of freedom
## Multiple R-squared:  0.9922, Adjusted R-squared:  0.9883 
## F-statistic: 253.5 on 3 and 6 DF,  p-value: 1.046e-06

##-----------------------------------------------------------------------------
## Polinomios ortogonais vistos mais de perto.

x <- 1:9
po <- poly(x, degree=length(x)-1)
round(po, 5)

##             1        2        3        4        5        6        7        8
##  [1,] -0.5164  0.53182 -0.44495  0.31289 -0.18490  0.08989 -0.03414  0.00881
##  [2,] -0.3873  0.13295  0.22247 -0.46934  0.50848 -0.38205  0.20484 -0.07052
##  [3,] -0.2582 -0.15195  0.41317 -0.24584 -0.18490  0.49441 -0.47795  0.24681
##  [4,] -0.1291 -0.32289  0.28604  0.20115 -0.41603  0.02247  0.47795 -0.49363
##  [5,]  0.0000 -0.37987  0.00000  0.40229  0.00000 -0.44947  0.00000  0.61703
##  [6,]  0.1291 -0.32289 -0.28604  0.20115  0.41603  0.02247 -0.47795 -0.49363
##  [7,]  0.2582 -0.15195 -0.41317 -0.24584  0.18490  0.49441  0.47795  0.24681
##  [8,]  0.3873  0.13295 -0.22247 -0.46934 -0.50848 -0.38205 -0.20484 -0.07052
##  [9,]  0.5164  0.53182  0.44495  0.31289  0.18490  0.08989  0.03414  0.00881
## attr(,"degree")
## [1] 1 2 3 4 5 6 7 8
## attr(,"coefs")
## attr(,"coefs")$alpha
## [1] 5 5 5 5 5 5 5 5
## 
## attr(,"coefs")$norm2
##  [1]      1.000      9.000     60.000    308.000   1425.600   5883.429  20800.000
##  [8]  58909.091 118422.378 126317.203
## 
## attr(,"class")
## [1] "poly"   "matrix"

round(t(po)%*%po, digits=3)

##   1 2 3 4 5 6 7 8
## 1 1 0 0 0 0 0 0 0
## 2 0 1 0 0 0 0 0 0
## 3 0 0 1 0 0 0 0 0
## 4 0 0 0 1 0 0 0 0
## 5 0 0 0 0 1 0 0 0
## 6 0 0 0 0 0 1 0 0
## 7 0 0 0 0 0 0 1 0
## 8 0 0 0 0 0 0 0 1

pg <- po
colnames(pg) <- paste0("grau ", colnames(po))
pg <- cbind(x=x, as.data.frame(pg))
pg <- melt(pg, id="x")
str(pg)

## 'data.frame':    72 obs. of  3 variables:
##  $ x       : int  1 2 3 4 5 6 7 8 9 1 ...
##  $ variable: Factor w/ 8 levels "grau 1","grau 2",..: 1 1 1 1 1 1 1 1 1 2 ...
##  $ value   : num  -5.16e-01 -3.87e-01 -2.58e-01 -1.29e-01 -7.17e-18 ...

xyplot(value~x|variable, data=pg, type="b", as.table=TRUE)+
    layer(panel.abline(h=0, lty=2))

##-----------------------------------------------------------------------------
## Só se pode usar com níveis equidistantes?

## x <- c(0, 2^(0:6))
## po <- poly(x, degree=length(x)-1)
## round(po, 5)
## round(poly(order(x), degree=length(x)-1))
## 
## round(t(po)%*%po, digits=3)
## 
## pg <- po
## colnames(pg) <- paste0("grau ", colnames(po))
## pg <- cbind(x=x, ox=order(x), as.data.frame(pg))
## pg <- melt(pg, id=c("x","ox"))
##
## xyplot(value~x|variable, data=pg, type="b", as.table=TRUE)+
##     layer(panel.abline(h=0, lty=2))
## 
## xyplot(value~ox|variable, data=pg, type="b", as.table=TRUE)+
##     layer(panel.abline(h=0, lty=2))

Duas regressoras (superfície de resposta)

##-----------------------------------------------------------------------------

rm(list=ls())

str(lathe1)

## 'data.frame':    20 obs. of  3 variables:
##  $ Feed : num  -1 -1 1 1 -1 -1 1 1 0 0 ...
##  $ Speed: num  -1 -1 -1 -1 1 ...
##  $ Life : num  54.5 66 11.8 14 5.2 3 0.8 0.5 86.5 0.4 ...

## Variáveis padronizadas:
## (speed-900)/300,
## (feed-13)/6.

xyplot(Feed~Speed, data=lathe1, jitter.x=TRUE, jitter.y=TRUE, aspect="iso")

## É um delineamento composto central. Repetições no ponto central e nos
## fatoriais.

## cloud(Life~Feed+Speed, data=lathe1)
## cloud(log(Life)~Feed+Speed, data=lathe1)

##-----------------------------------------------------------------------------
## Esse delineamento permite ajustar efeitos quadrados para os dois
## fatores. Esse é o modelo maximal.

m0 <- lm(Life~Feed+Speed+Feed:Speed+I(Feed^2)+I(Speed^2),
         data=lathe1)

## Diagnóstico.
par(mfrow=c(2,2)); plot(m0); layout(1)

MASS::boxcox(m0)

## Variável transformada.
m0 <- update(m0, log(Life)~.)
par(mfrow=c(2,2)); plot(m0); layout(1)

## summary() e anova() com os mesmos p-valores! Por que? Ortogonalidade!
summary(m0)

## 
## Call:
## lm(formula = log(Life) ~ Feed + Speed + I(Feed^2) + I(Speed^2) + 
##     Feed:Speed, data = lathe1)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.43349 -0.14576 -0.02494  0.16748  0.47992 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.18809    0.10508  11.307 2.00e-08 ***
## Feed        -0.79023    0.08580  -9.210 2.56e-07 ***
## Speed       -1.58902    0.08580 -18.520 3.04e-11 ***
## I(Feed^2)    0.41851    0.10063   4.159 0.000964 ***
## I(Speed^2)   0.28808    0.10063   2.863 0.012529 *  
## Feed:Speed  -0.07286    0.10508  -0.693 0.499426    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2972 on 14 degrees of freedom
## Multiple R-squared:  0.9702, Adjusted R-squared:  0.9596 
## F-statistic: 91.24 on 5 and 14 DF,  p-value: 3.551e-10

anova(m0)

## Analysis of Variance Table
## 
## Response: log(Life)
##            Df  Sum Sq Mean Sq  F value    Pr(>F)    
## Feed        1  7.4928  7.4928  84.8237 2.563e-07 ***
## Speed       1 30.2967 30.2967 342.9820 3.041e-11 ***
## I(Feed^2)   1  1.7400  1.7400  19.6985 0.0005619 ***
## I(Speed^2)  1  0.7239  0.7239   8.1956 0.0125294 *  
## Feed:Speed  1  0.0425  0.0425   0.4807 0.4994256    
## Residuals  14  1.2367  0.0883                       
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

## Quadro de anova com partição da SQR em erro puro e falta de ajuste.
pureErrorAnova(m0)

## Analysis of Variance Table
## 
## Response: log(Life)
##              Df  Sum Sq Mean Sq  F value    Pr(>F)    
## Feed          1  7.4928  7.4928 140.8736 1.302e-07 ***
## Speed         1 30.2967 30.2967 569.6177 7.965e-11 ***
## I(Feed^2)     1  1.7400  1.7400  32.7149 0.0001342 ***
## I(Speed^2)    1  0.7239  0.7239  13.6112 0.0035673 ** 
## Feed:Speed    1  0.0425  0.0425   0.7984 0.3906971    
## Residuals    14  1.2367  0.0883                       
##  Lack of fit  3  0.6516  0.2172   4.0836 0.0355805 *  
##  Pure Error  11  0.5851  0.0532                       
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

##-----------------------------------------------------------------------------
## Predição.

extendseq <- function(x, f=0.1, length.out=20){
    er <- extendrange(x, f=f)
    seq(er[1], er[2], length.out=length.out)
}

## expand.grid(1:3, 3:5)

pred <- with(lathe1,
             expand.grid(
                 Feed=extendseq(Feed),
                 Speed=extendseq(Speed)
             ))

pred$y <- predict(m0, newdata=pred)

wireframe(y~Feed+Speed, data=pred, scales=list(arrows=FALSE),
          screen=list(x=-105, z=-15, y=-140))

##-----------------------------------------------------------------------------
## Dando um visual melhor.

colr <- brewer.pal(11, "Spectral")
colr <- colorRampPalette(colr, space="rgb")

wireframe(y~Feed+Speed, data=pred, scales=list(arrows=FALSE),
          screen=list(x=-105, z=-15, y=-140),
          zlim=c(min(pred$y), 12),
          levels=c(-1:5),
          panel.3d.wireframe=panel.3d.contour,
          nlevels=18,# col="gray30",
          type=c("on", "top"),
          col.regions=colr(101),  drape=TRUE,
          par.box=list(lwd=1, lty=c(1,1,1,2,1,1,2,2)),
          scpos=list(x=9,y=5,z=2))

##-----------------------------------------------------------------------------
## Considerar as funções do pacote rsm também permitem dividir a SQR em
## erro puro e falta de ajuste além de calcular os pontos críticos.

## m1 <- lm(log(Life)~SO(Feed, Speed), data=lathe1)
m1 <- rsm(log(Life)~SO(Feed, Speed), data=lathe1)
summary(m1)

## 
## Call:
## rsm(formula = log(Life) ~ SO(Feed, Speed), data = lathe1)
## 
##              Estimate Std. Error  t value  Pr(>|t|)    
## (Intercept)  1.188093   0.105079  11.3066 2.000e-08 ***
## Feed        -0.790228   0.085801  -9.2100 2.563e-07 ***
## Speed       -1.589019   0.085801 -18.5198 3.041e-11 ***
## Feed:Speed  -0.072858   0.105079  -0.6934 0.4994256    
## Feed^2       0.418509   0.100627   4.1590 0.0009645 ***
## Speed^2      0.288075   0.100627   2.8628 0.0125294 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Multiple R-squared:  0.9702, Adjusted R-squared:  0.9596 
## F-statistic: 91.24 on 5 and 14 DF,  p-value: 3.551e-10
## 
## Analysis of Variance Table
## 
## Response: log(Life)
##                  Df Sum Sq Mean Sq  F value    Pr(>F)
## FO(Feed, Speed)   2 37.789 18.8947 213.9029 3.208e-11
## TWI(Feed, Speed)  1  0.042  0.0425   0.4807 0.4994256
## PQ(Feed, Speed)   2  2.464  1.2320  13.9471 0.0004654
## Residuals        14  1.237  0.0883                   
## Lack of fit       3  0.652  0.2172   4.0836 0.0355805
## Pure error       11  0.585  0.0532                   
## 
## Stationary point of response surface:
##     Feed    Speed 
## 1.197347 2.909405 
## 
## Eigenanalysis:
## $values
## [1] 0.4279935 0.2785907
## 
## $vectors
##             [,1]       [,2]
## Feed  -0.9677379 -0.2519591
## Speed  0.2519591 -0.9677379

## FO: first order, ~FO(x1,x2) = ~x1+x2;
## TWI: two way interactions, ~TWI(x1,x2) = ~x1:x2;
## PQ: pure quadratic, ~PQ(x1,x2) = ~I(x1^2)+I(x2^2);
## SO: second order, SO = FO+TWI+PQ,
## ~SO(x1+x2) = ~x1+x2+x1:x2+I(x1^2)+I(x2^2);

Ajustar com a média ou com os dados?

##-----------------------------------------------------------------------------
## Ganho de peso de perus em função de metionina na dieta.

str(turk0)

## 'data.frame':    35 obs. of  2 variables:
##  $ A   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Gain: int  644 631 661 624 633 610 615 605 608 599 ...

## help(turk0, help_type="html")

## A
##     Amount of methionine supplement (percent of diet)
## Gain
##     Pen weight increase (g)

## Diagrama de dispersão com linha de tendência.
xyplot(Gain~A, data=turk0, type=c("p","smooth"),
       xlab="Metionina (% da dieta)",
       ylab="Ganho de peso (g)",
       jitter.x=TRUE)

xtabs(~A, data=turk0)

## A
##    0 0.04  0.1 0.16 0.28 0.44 
##   10    5    5    5    5    5

##-----------------------------------------------------------------------------
## Ajuste do modelo.

## Ajuste do modelo de regressão linear simples: b0+b1*x+b2*x^2;
m0 <- lm(Gain~A, turk0)

## Observados vs ajustados.
xyplot(Gain~A, data=turk0,
       xlab="Metionina (% da dieta)",
       ylab="Ganho de peso (g)")+
    layer(panel.abline(m0))

## Resíduos.
par(mfrow=c(2,2)); plot(m0); layout(1)

## Indiscutível falta de ajuste. Considerar um modelo que permita
## curvatura na função.

##-----------------------------------------------------------------------------
## Ajuste do modelo saturado (considerando A como fator). Considerar A
## como fator ocupa o mesmo espaço vetorial que ajustar um polinômio de
## grau k-1, em que k é o número de níveis. A equação da reta é o
## polinômio de grau um.

turk0$Afat <- factor(turk0$A)
m1 <- lm(Gain~poly(A, nlevels(Afat)-1), turk0)
anova(m1)

## Analysis of Variance Table
## 
## Response: Gain
##                            Df Sum Sq Mean Sq F value    Pr(>F)    
## poly(A, nlevels(Afat) - 1)  5 150041 30008.2  88.587 < 2.2e-16 ***
## Residuals                  29   9824   338.7                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

m1 <- lm(Gain~Afat, turk0)
anova(m1)

## Analysis of Variance Table
## 
## Response: Gain
##           Df Sum Sq Mean Sq F value    Pr(>F)    
## Afat       5 150041 30008.2  88.587 < 2.2e-16 ***
## Residuals 29   9824   338.7                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

## Note que embora os modelos tenham especificações diferentes o quadro
## de anova é o mesmo. Isso porque ambos exploram o mesmo espaço
## vetorial de k-1 dimensões, apenas os vetores que formam a base desse
## espaço é que são diferentes. O conjunto de vetores que formam a base
## são as colunas da matriz do modelo, X.

## Resíduos.
par(mfrow=c(2,2)); plot(m1); layout(1)

## Resíduos ok.

##-----------------------------------------------------------------------------
## Médias amostrais e os valores preditos.

## Médias amostrais.
aggregate(Gain~A, data=turk0, mean)

##      A  Gain
## 1 0.00 623.0
## 2 0.04 668.4
## 3 0.10 715.6
## 4 0.16 732.0
## 5 0.28 794.0
## 6 0.44 785.4

## Valores preditos pelo modelo saturado.
predict(m1, newdata=list(Afat=levels(turk0$Afat)))

##     1     2     3     4     5     6 
## 623.0 668.4 715.6 732.0 794.0 785.4

## Valores preditos pelo modelo m0
predict(m0, newdata=list(A=unique(turk0$A)))

##        1        2        3        4        5        6 
## 648.4899 664.2535 687.8988 711.5441 758.8348 821.8890

##-----------------------------------------------------------------------------
## Teste de falta de ajuste para o modelo m0.

anova(m0, m1)

## Analysis of Variance Table
## 
## Model 1: Gain ~ A
## Model 2: Gain ~ Afat
##   Res.Df   RSS Df Sum of Sq      F    Pr(>F)    
## 1     33 35176                                  
## 2     29  9824  4     25352 18.711 1.062e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

pureErrorAnova(m0)

## Analysis of Variance Table
## 
## Response: Gain
##              Df Sum Sq Mean Sq F value    Pr(>F)    
## A             1 124689  124689 368.090 < 2.2e-16 ***
## Residuals    33  35176    1066                      
##  Lack of fit  4  25353    6338  18.711 1.062e-07 ***
##  Pure Error  29   9824     339                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

## Não passou na falta de ajuste conforme já se havia antecipado.

##-----------------------------------------------------------------------------
## Ajuste do modelo de segundo grau: b0+b1*x+b2*x^2;

m2 <- lm(Gain~A+I(A^2), data=turk0)
## m2 <- lm(Gain~poly(A, 2), turk0)

## Resíduos.
par(mfrow=c(2,2)); plot(m2); layout(1)

##-----------------------------------------------------------------------------
## Teste de falta de ajuste para o modelo m2.

anova(m1, m2)

## Analysis of Variance Table
## 
## Model 1: Gain ~ Afat
## Model 2: Gain ~ A + I(A^2)
##   Res.Df     RSS Df Sum of Sq     F Pr(>F)
## 1     29  9823.6                          
## 2     32 11339.9 -3   -1516.2 1.492 0.2374

pureErrorAnova(m2)

## Analysis of Variance Table
## 
## Response: Gain
##              Df Sum Sq Mean Sq F value    Pr(>F)    
## A             1 124689  124689 368.090 < 2.2e-16 ***
## I(A^2)        1  23836   23836  70.366 3.028e-09 ***
## Residuals    32  11340     354                      
##  Lack of fit  3   1516     505   1.492    0.2374    
##  Pure Error  29   9824     339                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

## Não apresentou falta de ajuste. Então é um modelo que pode ser mantido.

##-----------------------------------------------------------------------------
## Gráfico dos valores preditos com bandas de confiança.

## range(turk0$A)
pred2 <- data.frame(A=seq(0, 0.45, by=0.025))
a <- predict(m2, newdata=pred2, interval="confidence")
pred2 <- cbind(pred2, a)
str(pred2)

## 'data.frame':    19 obs. of  4 variables:
##  $ A  : num  0 0.025 0.05 0.075 0.1 0.125 0.15 0.175 0.2 0.225 ...
##  $ fit: num  626 649 670 690 708 ...
##  $ lwr: num  615 640 663 682 700 ...
##  $ upr: num  636 657 678 698 717 ...

xyplot(Gain~A, data=turk0,
     xlab="Metionina (% da dieta)",
     ylab="Ganho de peso (g)")+
    as.layer(xyplot(fit~A, data=pred2, type="l",
                    ly=pred2$lwr, uy=pred2$upr, cty="bands",
                    prepanel=prepanel.cbH, panel=panel.cbH))

## Sobrepondo as estimativas de médias do modelo m1.
pred1 <- data.frame(Afat=levels(turk0$Afat))
a <- predict(m1, newdata=pred1, interval="confidence")
pred1 <- cbind(pred1, a)
pred1$A <- as.numeric(as.character(pred1$Afat))
## pred1

xyplot(Gain~A, data=turk0,
     xlab="Metionina (% da dieta)",
     ylab="Ganho de peso (g)")+
    as.layer(xyplot(fit~A, data=pred2, type="l",
                    ly=pred2$lwr, uy=pred2$upr, cty="bands",
                    prepanel=prepanel.cbH, panel=panel.cbH))+
    as.layer(xyplot(fit~(A+0.005), data=pred1, pch=19,
                    ly=pred1$lwr, uy=pred1$upr, cty="bars",
                    prepanel=prepanel.cbH, panel=panel.cbH))

##-----------------------------------------------------------------------------
## Uso de pesos no ajuste do modelo de regressão.

## Imagine que não tem os dados completos, apenas as médias e
## frequências.

tu <- ddply(turk0, .(A), summarise,
            mGain=mean(Gain), nGain=length(Gain))
tu

##      A mGain nGain
## 1 0.00 623.0    10
## 2 0.04 668.4     5
## 3 0.10 715.6     5
## 4 0.16 732.0     5
## 5 0.28 794.0     5
## 6 0.44 785.4     5

m3 <- lm(mGain~A+I(A^2), data=tu, weights=nGain)
summary(m3)

## 
## Call:
## lm(formula = mGain ~ A + I(A^2), data = tu, weights = nGain)
## 
## Weighted Residuals:
##       1       2       3       4       5       6 
##  -8.037  14.442  16.171 -29.042  11.611  -1.816 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   625.542      6.243 100.201 2.19e-06 ***
## A             964.471     86.763  11.116  0.00156 ** 
## I(A^2)      -1362.069    198.338  -6.867  0.00632 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 22.48 on 3 degrees of freedom
## Multiple R-squared:  0.9899, Adjusted R-squared:  0.9832 
## F-statistic: 146.9 on 2 and 3 DF,  p-value: 0.001016

summary(m2)

## 
## Call:
## lm(formula = Gain ~ A + I(A^2), data = turk0)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -32.988 -16.542   2.193  12.788  36.059 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   625.542      5.227 119.665  < 2e-16 ***
## A             964.471     72.651  13.275 1.46e-14 ***
## I(A^2)      -1362.069    166.077  -8.201 2.28e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 18.82 on 32 degrees of freedom
## Multiple R-squared:  0.9291, Adjusted R-squared:  0.9246 
## F-statistic: 209.6 on 2 and 32 DF,  p-value: < 2.2e-16

## Observe as estimativas dos parâmetros. Por que o valor pontual é
## igual nos dois modelos? Por que o erro padrão é maior com o uso das
## médias? Por que o R² é maior com o uso das médias? O que iria
## acontecer se não fossem usados os pesos?

##-----------------------------------------------------------------------------
## Sobrepondo os valores preditos com bandas de confiança obtido com o
## ajuste com as médias.

pred3 <- data.frame(A=seq(0, 0.45, by=0.025))
a <- predict(m3, newdata=pred3, interval="confidence")
pred3 <- cbind(pred3, a)
## pred3

xyplot(Gain~A, data=turk0,
     xlab="Metionina (% da dieta)",
     ylab="Ganho de peso (g)")+
    as.layer(xyplot(fit~A, data=pred2, type="l",
                    ly=pred2$lwr, uy=pred2$upr, cty="bands",
                    prepanel=prepanel.cbH, panel=panel.cbH))+
    as.layer(xyplot(fit~A, data=pred3, type="l",
                    ly=pred3$lwr, uy=pred3$upr, cty="bands",
                    prepanel=prepanel.cbH, panel=panel.cbH))

Modelo emípirico vs modelo mecanístico

##-----------------------------------------------------------------------------
## Explorar os dados.

## Estrutura do Arquivo.
str(cars)

## 'data.frame':    50 obs. of  2 variables:
##  $ speed: num  4 4 7 7 8 9 10 10 10 11 ...
##  $ dist : num  2 10 4 22 16 10 18 26 34 17 ...

## Visualização.
xyplot(dist~speed, data=cars, type=c("p","smooth"))

##-----------------------------------------------------------------------------
## Usando a lm().

## y ~ Normal(modelo linear, sigma²)
## modelo linear: b0+b1*x.

m0 <- lm(dist~speed, data=cars)
## m0 <- lm(dist~0+speed, data=cars)

c0 <- coef(m0)
c0

## (Intercept)       speed 
##  -17.579095    3.932409

## Sobrepondo ajuste às observações.
xyplot(dist~speed, data=cars, xlim=c(0,NA), ylim=c(c0[1],NA))+
    layer(panel.abline(a=c0[1], b=c0[2], col=2))

## Diagnóstico.
par(mfrow=c(2,2)); plot(m0); layout(1)

## Falta de ajuste. Extender para um modelo que permita curvatura da
## função.

##-----------------------------------------------------------------------------
## Ajustar polinômio de segundo grau.

## modelo linear: b0+b1*x+b2*x^2.
m1 <- lm(dist~speed+I(speed^2), data=cars)

## Diagnóstico.
par(mfrow=c(2,2)); plot(m1); layout(1)

## Mostra uma possível relação média~variância. 

## Testa o abandono do termo extra por meio da mudança na soma de
## quadrados.
anova(m1, m0)

## Analysis of Variance Table
## 
## Model 1: dist ~ speed + I(speed^2)
## Model 2: dist ~ speed
##   Res.Df   RSS Df Sum of Sq     F Pr(>F)
## 1     47 10825                          
## 2     48 11354 -1   -528.81 2.296 0.1364

summary(m1)

## 
## Call:
## lm(formula = dist ~ speed + I(speed^2), data = cars)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -28.720  -9.184  -3.188   4.628  45.152 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept)  2.47014   14.81716   0.167    0.868
## speed        0.91329    2.03422   0.449    0.656
## I(speed^2)   0.09996    0.06597   1.515    0.136
## 
## Residual standard error: 15.18 on 47 degrees of freedom
## Multiple R-squared:  0.6673, Adjusted R-squared:  0.6532 
## F-statistic: 47.14 on 2 and 47 DF,  p-value: 5.852e-12

pureErrorAnova(m1)

## Analysis of Variance Table
## 
## Response: dist
##              Df  Sum Sq Mean Sq F value    Pr(>F)    
## speed         1 21185.5 21185.5 97.0836 4.558e-11 ***
## I(speed^2)    1   528.8   528.8  2.4233    0.1297    
## Residuals    47 10824.7   230.3                      
##  Lack of fit 16  4059.9   253.7  1.1628    0.3476    
##  Pure Error  31  6764.8   218.2                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

## Verifica se cabe uma tranformação.
MASS::boxcox(m1); abline(v=0.5, col=2)

## Usar lambda=0.5 como valor para transformar.
xyplot(sqrt(dist)~speed, data=cars)

##-----------------------------------------------------------------------------
## Com a variável transformada.

## Modelo quadrático com transformação na resposta.
m2 <- lm(sqrt(dist)~speed+I(speed^2), data=cars)

## Diagnóstico.
par(mfrow=c(2,2)); plot(m2); layout(1)

## Teste para as estimativas, h0: beta==0.
summary(m2)

## 
## Call:
## lm(formula = sqrt(dist) ~ speed + I(speed^2), data = cars)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.0734 -0.7260 -0.1833  0.6369  3.1159 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)  
## (Intercept)  0.990337   1.086712   0.911    0.367  
## speed        0.365587   0.149193   2.450    0.018 *
## I(speed^2)  -0.001429   0.004838  -0.295    0.769  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.113 on 47 degrees of freedom
## Multiple R-squared:   0.71,  Adjusted R-squared:  0.6976 
## F-statistic: 57.52 on 2 and 47 DF,  p-value: 2.334e-13

##-----------------------------------------------------------------------------
## Volta pro modelo mais simples.

## Modelo que abandona b2, ou seja, b2==0.
m3 <- lm(sqrt(dist)~speed, data=cars)

summary(m3)

## 
## Call:
## lm(formula = sqrt(dist) ~ speed, data = cars)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.0684 -0.6983 -0.1799  0.5909  3.1534 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.27705    0.48444   2.636   0.0113 *  
## speed        0.32241    0.02978  10.825 1.77e-14 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.102 on 48 degrees of freedom
## Multiple R-squared:  0.7094, Adjusted R-squared:  0.7034 
## F-statistic: 117.2 on 1 and 48 DF,  p-value: 1.773e-14

## Teste para o abandono de b2 (o mesmo que o t do summary).
anova(m3, m2)

## Analysis of Variance Table
## 
## Model 1: sqrt(dist) ~ speed
## Model 2: sqrt(dist) ~ speed + I(speed^2)
##   Res.Df    RSS Df Sum of Sq      F Pr(>F)
## 1     48 58.334                           
## 2     47 58.226  1   0.10814 0.0873 0.7689

## Diagnóstico para o modelo final.
par(mfrow=c(2,2)); plot(m3); layout(1)

##-----------------------------------------------------------------------------
## Representa os resultados.

## Gráfico do modelo final.
c3 <- coef(m3)
xyplot(sqrt(dist)~speed, data=cars)+
    layer(panel.abline(a=c3[1], b=c3[2], col=2))

## Na escala natural.
xyplot(dist~speed, data=cars, xlim=c(0,NA), ylim=c(0,NA))+
    layer(panel.curve((c3[1]+c3[2]*x)^2, col=2))

##-----------------------------------------------------------------------------
## Com bandas de confiança.

pred <- data.frame(speed=seq(0, 30, length.out=30))
pred <- cbind(pred, predict(m3, newdata=pred, interval="confidence"))
str(pred)

## 'data.frame':    30 obs. of  4 variables:
##  $ speed: num  0 1.03 2.07 3.1 4.14 ...
##  $ fit  : num  1.28 1.61 1.94 2.28 2.61 ...
##  $ lwr  : num  0.303 0.695 1.086 1.477 1.867 ...
##  $ upr  : num  2.25 2.53 2.8 3.08 3.35 ...

xyplot(sqrt(dist)~speed, data=cars)+
    as.layer(xyplot(fit~speed, data=pred, type="l",
                    ly=pred$lwr, uy=pred$upr, cty="bands",
                    prepanel=prepanel.cbH, panel=panel.cbH))

##-----------------------------------------------------------------------------
## Na escala original.

i <- c("fit","lwr","upr")
pred[,i] <- pred[,i]^2

xyplot(dist~speed, data=cars, xlim=c(0,NA), ylim=c(0,NA))+
    as.layer(xyplot(fit~speed, data=pred, type="l",
                    ly=pred$lwr, uy=pred$upr, cty="bands",
                    prepanel=prepanel.cbH, panel=panel.cbH))

##-----------------------------------------------------------------------------
## Modelo sem intercepto.

m4 <- lm(sqrt(dist)~0+speed, data=cars)
summary(m4)

## 
## Call:
## lm(formula = sqrt(dist) ~ 0 + speed, data = cars)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.2781 -0.6972  0.0208  0.7965  3.3898 
## 
## Coefficients:
##       Estimate Std. Error t value Pr(>|t|)    
## speed  0.39675    0.01015   39.09   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.167 on 49 degrees of freedom
## Multiple R-squared:  0.9689, Adjusted R-squared:  0.9683 
## F-statistic:  1528 on 1 and 49 DF,  p-value: < 2.2e-16

##-----------------------------------------------------------------------------
## Com bandas de confiança.

pred <- data.frame(speed=seq(0, 30, length.out=30))
pred <- cbind(pred, predict(m4, newdata=pred, interval="confidence"))
str(pred)

## 'data.frame':    30 obs. of  4 variables:
##  $ speed: num  0 1.03 2.07 3.1 4.14 ...
##  $ fit  : num  0 0.41 0.821 1.231 1.642 ...
##  $ lwr  : num  0 0.389 0.779 1.168 1.557 ...
##  $ upr  : num  0 0.432 0.863 1.295 1.726 ...

xyplot(sqrt(dist)~speed, data=cars)+
    as.layer(xyplot(fit~speed, data=pred, type="l",
                    ly=pred$lwr, uy=pred$upr, cty="bands",
                    prepanel=prepanel.cbH, panel=panel.cbH))

##-----------------------------------------------------------------------------
## Na escala original.

i <- c("fit","lwr","upr")
pred[,i] <- pred[,i]^2

xyplot(dist~speed, data=cars, xlim=c(0,NA), ylim=c(0,NA))+
    as.layer(xyplot(fit~speed, data=pred, type="l",
                    ly=pred$lwr, uy=pred$upr, cty="bands",
                    prepanel=prepanel.cbH, panel=panel.cbH))

Praticar

##-----------------------------------------------------------------------------
## http://udel.edu/~mcdonald/statcurvreg.html
## Data on tortoise carapace length and clutch size from Ashton et
## al. (2007).

tur <- "length clutch
284     3
290     2
290     7
290     7
298     11
299     12
302     10
306     8
306     8
309     9
310     10
311     13
317     7
317     9
320     6
323     13
334     2
334     8"
tur <- read.table(textConnection(tur), header=TRUE)
tur

xyplot(clutch~length, data=tur, type=c("p","smooth"))

##-----------------------------------------------------------------------------
## http://www.theanalysisfactor.com/r-tutorial-4/

geiger <-
    structure(list(Time=c(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,
                       14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,
                       27, 28, 29, 30),
                   Counts=c(750, 725.2, 695, 708.5,
                       725, 690.9, 691.5, 678.6, 686.3,
                       668.9, 665.3, 669.7, 657.5,
                       661.6, 665.1, 648.1, 664.9,
                       647.9, 660, 643, 646.2, 653,
                       646.9, 639.3, 638.7, 636.8,
                       650.2, 633.2, 642.2, 649.3,
                       642.7)),
              .Names = c("Time", "Counts"),
              row.names = c(NA, -31L), class = "data.frame") 

xyplot(Counts~Time, data=geiger)

##-----------------------------------------------------------------------------
## Dados de postássio liberado em função do tempo.
 
klib <- data.frame(k=c(51.03, 57.76, 26.60, 60.65, 87.07, 64.67,
                     91.28, 105.22, 72.74, 81.88, 97.62, 90.14,
                     89.88, 113.22, 90.91, 115.39, 112.63, 87.51,
                     104.69, 120.58, 114.32, 130.07, 117.65, 111.69,
                     128.54, 126.88, 127.00, 134.17, 149.66, 118.25,
                     132.67, 154.48, 129.11, 151.83, 147.66, 127.30),
                   t=rep(c(15, 30, 45, 60, 75, 90,
                     120, 150, 180, 210, 240, 270), each=3))

xyplot(k~t, data=klib)

##-----------------------------------------------------------------------------
## Dados de índice agronômico de milho por cultivar.
 
da <- read.table("http://www.leg.ufpr.br/~walmes/cursoR/anovareg.txt",
                 header=TRUE, sep="\t")
str(da)

##-----------------------------------------------------------------------------
## http://www.itl.nist.gov/div898/handbook/pri/section4/pri473.htm

cci <- 
structure(list(pressure = c(80, 42, 68.87, 15.13, 4, 42, 15.13, 
42, 68.87, 42, 42), H2WF6 = c(6, 6, 3.17, 8.83, 6, 6, 3.17, 2, 
8.83, 10, 6), Unif = c(4.6, 6.2, 3.4, 6.9, 7.3, 6.4, 8.6, 6.3, 
5.1, 5.4, 5), Stress = c(8.04, 7.78, 7.58, 7.27, 6.49, 7.69, 
6.66, 7.16, 8.33, 8.19, 7.9), x1 = c(1, 0, 0.71, -0.71, -1, 0, 
-0.71, 0, 0.71, 0, 0), x2 = c(0, 0, -0.71, 0.71, 0, 0, -0.71, 
-1, 0.71, 1, 0)), .Names = c("pressure", "H2WF6", "Unif", "Stress", 
"x1", "x2"), row.names = c(NA, -11L), class = "data.frame")

str(cci)

## Unif(ormidade) e Stress são as respostas medidas para níveis dos
## fatores pressure e H2WF6. As variáveis x1 e x2 são as mesmas
## codificadas.

xyplot(x1~x2, data=cci, jitter.x=TRUE, jitter.y=TRUE, aspect="iso")

##-----------------------------------------------------------------------------
## Incluir o efeito de bloco e codificar as variáveis.

## Weisberg, página 118.
str(cakes)

xyplot(X1~X2, data=cakes, jitter.x=TRUE, jitter.y=TRUE, aspect=1)

##-----------------------------------------------------------------------------
## http://www.leg.ufpr.br/~walmes/analises/MESerafim/maracuja/maracujaplanta.html

##-----------------------------------------------------------------------------
## Informações da sessão.

Sys.time()

## [1] "2015-04-15 15:44:33 BRT"

sessionInfo()

## R version 3.1.2 (2014-10-31)
## Platform: i686-pc-linux-gnu (32-bit)
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C               LC_TIME=pt_BR.UTF-8       
##  [4] LC_COLLATE=en_US.UTF-8     LC_MONETARY=pt_BR.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=pt_BR.UTF-8       LC_NAME=C                  LC_ADDRESS=C              
## [10] LC_TELEPHONE=C             LC_MEASUREMENT=pt_BR.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] splines   grid      stats     graphics  grDevices utils     datasets  base     
## 
## other attached packages:
##  [1] wzRfun_0.4          rsm_2.07            ellipse_0.3-8       multcomp_1.3-7     
##  [5] TH.data_1.0-5       survival_2.37-7     mvtnorm_1.0-1       reshape_0.8.5      
##  [9] plyr_1.8.1          alr3_2.0.5          car_2.0-22          gridExtra_0.9.1    
## [13] latticeExtra_0.6-26 RColorBrewer_1.0-5  lattice_0.20-29     rmarkdown_0.3.3    
## [17] knitr_1.8          
## 
## loaded via a namespace (and not attached):
##  [1] digest_0.6.4    doBy_4.5-12     evaluate_0.5.5  formatR_1.0     htmltools_0.2.6
##  [6] MASS_7.3-37     Matrix_1.1-5    methods_3.1.2   nnet_7.3-8      Rcpp_0.11.3    
## [11] sandwich_2.3-2  stringr_0.6.2   tools_3.1.2     yaml_2.1.13     zoo_1.7-11

Modelos de Regressão e aplicações no ambiente R
13 a 17 de Abril de 2015 - Manaus - AM	Prof. Dr. Walmes M. Zeviani
Fundação Oswaldo Cruz - FIOCRUZ	Lab. de Estatística e Geoinformação - LEG
	Departamento de Estatística - UFPR