Aplicação de modelos de regressão linear e não linear em ciências agrárias

09 à 11 de Dezembro de 2014 - Goiânia - GO
Prof. Dr. Walmes M. Zeviani
Embrapa Arroz e Feijão
Lab. de Estatística e Geoinformação - LEG
Departamento de Estatística - UFPR


Modelos semiparamétricos

##=============================================================================
## Aplicação de modelos de regressão linear e
##   não linear em ciências agrárias
##
##   09 à 11 de Dezembro de 2014 - Goiânia/GO
##   Embrapa Arroz e Feijão
## 
##                                                  Prof. Dr. Walmes M. Zeviani
##                                                                LEG/DEST/UFPR
##=============================================================================

##-----------------------------------------------------------------------------
## Definições da sessão.

pkg <- c("lattice", "latticeExtra", "gridExtra", "car", "alr3",
         "plyr", "reshape", "splines", "mgcv", "wzRfun")

sapply(pkg, require, character.only=TRUE)
##      lattice latticeExtra    gridExtra          car         alr3         plyr 
##         TRUE         TRUE         TRUE         TRUE         TRUE         TRUE 
##      reshape      splines         mgcv       wzRfun 
##         TRUE         TRUE         TRUE         TRUE
trellis.device(color=FALSE)
##-----------------------------------------------------------------------------
## "~/Dropbox/Consultorias/Leonardo Seno/suino_pre_abate"
## 
## Ver: http://cran.r-project.org/web/packages/lmeSplines/lmeSplines.pdf
## 
## É possível fazer!
## http://stackoverflow.com/questions/24750002/slopes-for-lme-linear-b-spline-model
## http://www.ats.ucla.edu/stat/r/examples/alda/ch6.htm

##-----------------------------------------------------------------------------
## Leitura.

sui <- read.table("http://www.leg.ufpr.br/~walmes/data/preabate.txt",
                  header=TRUE, dec=",")
sui$trat <- factor(-1*sui$trat)
levels(sui$trat) <- c("Sem aspersão","Com aspersão")
str(sui)
## 'data.frame':    384 obs. of  5 variables:
##  $ trat: Factor w/ 2 levels "Sem aspersão",..: 2 2 2 2 2 2 2 2 2 2 ...
##  $ asp : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ hora: int  0 0 0 0 5 5 5 5 10 10 ...
##  $ rep : int  1 2 3 4 1 2 3 4 1 2 ...
##  $ temp: num  29 30.9 34.5 31.3 29.6 ...
xyplot(temp~hora, groups=trat, data=sui,
       xlab="Minutos à partir das 07:00 da manhã",
       ylab="Temperatura corporal (ºC)",
       auto.key=TRUE)+
  glayer(panel.smoother(..., span=0.4))

xyplot(temp~hora|trat, groups=asp, data=sui, type=c("p","smooth"))

## Efeito de animal?
xyplot(temp~hora|trat, groups=rep, data=sui, type=c("b"))


Regressão local

##-----------------------------------------------------------------------------
## Loess.

str(sui)
## 'data.frame':    384 obs. of  5 variables:
##  $ trat: Factor w/ 2 levels "Sem aspersão",..: 2 2 2 2 2 2 2 2 2 2 ...
##  $ asp : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ hora: int  0 0 0 0 5 5 5 5 10 10 ...
##  $ rep : int  1 2 3 4 1 2 3 4 1 2 ...
##  $ temp: num  29 30.9 34.5 31.3 29.6 ...
ca <- subset(sui, trat=="Com aspersão")

## m0 <- loess(temp~hora, data=ca)
m0 <- loess(temp~hora, data=ca, span=0.25)
## m0 <- loess(temp~hora, data=ca, span=0.25, degree=1)

## Medidas de ajuste.
summary(m0)
## Call:
## loess(formula = temp ~ hora, data = ca, span = 0.25)
## 
## Number of Observations: 192 
## Equivalent Number of Parameters: 11.87 
## Residual Standard Error: 1.307 
## Trace of smoother matrix: 13.12 
## 
## Control settings:
##   normalize:  TRUE 
##   span       :  0.25 
##   degree   :  2 
##   family   :  gaussian
##   surface  :  interpolate      cell = 0.2
pred <- data.frame(hora=seq(min(ca$hora), max(ca$hora), l=200))
aux <- predict(m0, newdata=pred, se=TRUE)
aux$me <- outer(aux$se.fit, qnorm(0.975)*c(lwr=-1,fit=0,upr=1), FUN="*")
aux <- sweep(aux$me, MARGIN=1, STATS=aux$fit, FUN="+")
pred <- cbind(pred, aux)
str(pred)
## 'data.frame':    200 obs. of  4 variables:
##  $ hora: num  0 1.18 2.36 3.54 4.72 ...
##  $ lwr : num  30.5 30.2 30 29.8 29.5 ...
##  $ fit : num  31.5 31.2 30.8 30.5 30.2 ...
##  $ upr : num  32.5 32.1 31.7 31.3 30.9 ...
p1 <- xyplot(temp~hora, data=ca,
             ylab="Temperatura do corpo",
             xlab="Minutos a partir das 7h00")
p2 <- xyplot(fit~hora, data=pred, type="l",
             ly=pred$lwr, uy=pred$upr, cty="bands",
             prepanel=prepanel.cbH, panel=panel.cbH)
p1+as.layer(p2)


Splines

##-----------------------------------------------------------------------------
## Splines.

## m0 <- lm(temp~bs(hora), data=ca)
## m0 <- lm(temp~bs(hora, degree=13, df=1), data=ca)
m0 <- lm(temp~ns(hora, df=13), data=ca)

summary(m0)
## 
## Call:
## lm(formula = temp ~ ns(hora, df = 13), data = ca)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.7022 -0.9033 -0.0045  0.7134  3.8193 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         31.66367    0.55157  57.406  < 2e-16 ***
## ns(hora, df = 13)1  -2.27264    0.85325  -2.664 0.008442 ** 
## ns(hora, df = 13)2  -1.22015    0.97361  -1.253 0.211771    
## ns(hora, df = 13)3  -3.37650    0.89413  -3.776 0.000217 ***
## ns(hora, df = 13)4  -1.89242    0.98766  -1.916 0.056959 .  
## ns(hora, df = 13)5   1.24033    0.91510   1.355 0.177005    
## ns(hora, df = 13)6  -2.62154    0.92759  -2.826 0.005249 ** 
## ns(hora, df = 13)7  -3.10635    0.96947  -3.204 0.001605 ** 
## ns(hora, df = 13)8   2.75124    0.92410   2.977 0.003314 ** 
## ns(hora, df = 13)9   0.08984    0.92098   0.098 0.922400    
## ns(hora, df = 13)10 -4.65297    0.96546  -4.819 3.07e-06 ***
## ns(hora, df = 13)11  1.10806    0.80262   1.381 0.169149    
## ns(hora, df = 13)12 -3.62265    1.42213  -2.547 0.011701 *  
## ns(hora, df = 13)13  3.78342    0.66592   5.682 5.36e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.304 on 178 degrees of freedom
## Multiple R-squared:  0.5533, Adjusted R-squared:  0.5207 
## F-statistic: 16.96 on 13 and 178 DF,  p-value: < 2.2e-16
pred <- data.frame(hora=seq(min(ca$hora), max(ca$hora), l=200))
aux <- predict(m0, newdata=pred, interval="confidence")
pred <- cbind(pred, aux)
str(pred)
## 'data.frame':    200 obs. of  4 variables:
##  $ hora: num  0 1.18 2.36 3.54 4.72 ...
##  $ fit : num  31.7 31.3 31 30.6 30.3 ...
##  $ lwr : num  30.6 30.3 30.1 29.8 29.6 ...
##  $ upr : num  32.8 32.3 31.8 31.4 31 ...
p1 <- xyplot(temp~hora, data=ca,
             ylab="Temperatura do corpo",
             xlab="Minutos a partir das 7h00")
p2 <- xyplot(fit~hora, data=pred, type="l",
             ly=pred$lwr, uy=pred$upr, cty="bands",
             prepanel=prepanel.cbH, panel=panel.cbH)
p1+as.layer(p2)


Modelos aditivos generalizados (gam)

##-----------------------------------------------------------------------------
## Ajustar gam.

m0 <- gam(temp~s(hora), data=ca)
summary(m0)
## 
## Family: gaussian 
## Link function: identity 
## 
## Formula:
## temp ~ s(hora)
## 
## Parametric coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  30.2432     0.0951     318   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##          edf Ref.df     F p-value    
## s(hora) 8.92  8.998 22.55  <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.511   Deviance explained = 53.3%
## GCV = 1.8311  Scale est. = 1.7365    n = 192
pred <- data.frame(hora=seq(min(ca$hora), max(ca$hora), l=200))
aux <- predict(m0, newdata=pred, se=TRUE)
aux$me <- outer(aux$se.fit, c(lwr=-1,fit=0,upr=1)*qnorm(0.975), "*")
aux <- sweep(aux$me, MARGIN=1, STATS=aux$fit, FUN="+")
pred <- cbind(pred, aux)
str(pred)
## 'data.frame':    200 obs. of  4 variables:
##  $ hora: num  0 1.18 2.36 3.54 4.72 ...
##  $ lwr : num  30.5 30.3 30.1 29.8 29.6 ...
##  $ fit : num  31.5 31.2 30.9 30.6 30.3 ...
##  $ upr : num  32.5 32.1 31.7 31.4 31 ...
p1 <- xyplot(temp~hora, data=ca,
             ylab="Temperatura do corpo",
             xlab="Minutos a partir das 7h00")
p2 <- xyplot(fit~hora, data=pred, type="l",
             ly=pred$lwr, uy=pred$upr, cty="bands",
             prepanel=prepanel.cbH, panel=panel.cbH)
p1+as.layer(p2)

##-----------------------------------------------------------------------------
## Ajuste para os dois níveis de tratamento.

## Modelo sem interação trat com hora.
g1 <- gam(temp~trat+s(hora), data=sui)

## Modelo em que em cada trat a hora tem um efeito diferente.
g0 <- gam(temp~trat+s(hora, by=trat), data=sui)

## Para hipótese de interação nula.
anova(g0, g1, test="F")
## Analysis of Deviance Table
## 
## Model 1: temp ~ trat + s(hora, by = trat)
## Model 2: temp ~ trat + s(hora)
##   Resid. Df Resid. Dev      Df Deviance      F    Pr(>F)    
## 1    368.77     562.50                                      
## 2    373.20     824.09 -4.4341  -261.59 38.677 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Resumo do ajuste.
summary(g0)
## 
## Family: gaussian 
## Link function: identity 
## 
## Formula:
## temp ~ trat + s(hora, by = trat)
## 
## Parametric coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      34.76667    0.08913  390.06   <2e-16 ***
## tratCom aspersão -4.52344    0.12605  -35.89   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                            edf Ref.df     F p-value    
## s(hora):tratSem aspersão 4.302  5.295 92.57  <2e-16 ***
## s(hora):tratCom aspersão 8.930  8.999 25.76  <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =   0.84   Deviance explained = 84.5%
## GCV = 1.5883  Scale est. = 1.5253    n = 384
pred <- expand.grid(hora=seq(min(sui$hora), max(sui$hora), l=100),
                    trat=levels(sui$trat))
aux <- predict(g0, newdata=pred, se=TRUE)
aux$me <- outer(aux$se.fit, c(lwr=-1,fit=0,upr=1)*qnorm(0.975), "*")
aux <- sweep(aux$me, MARGIN=1, STATS=aux$fit, FUN="+")
pred <- cbind(pred, aux)
str(pred)
## 'data.frame':    200 obs. of  5 variables:
##  $ hora: num  0 2.37 4.75 7.12 9.49 ...
##  $ trat: Factor w/ 2 levels "Sem aspersão",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ lwr : num  30.3 30.4 30.6 30.7 30.8 ...
##  $ fit : num  31 31 31.1 31.2 31.3 ...
##  $ upr : num  31.6 31.7 31.7 31.7 31.8 ...
xmn <- seq(0, 4*60, by=30)
xhr <- seq(as.POSIXct("07:00", format="%H:%M"),
           as.POSIXct("11:00", format="%H:%M"), "30 min")
format(xhr, "%H:%M")
## [1] "07:00" "07:30" "08:00" "08:30" "09:00" "09:30" "10:00" "10:30" "11:00"
p1 <- xyplot(temp~hora, groups=trat, data=sui,
             xlab="Minutos à partir das 07:00 da manhã",
             ylab="Temperatura corporal (ºC)",
             scales=list(x=list(at=xmn, labels=format(xhr, "%H:%M"))),
             auto.key=list(columns=2),
             panel=function(...){
                 panel.abline(v=(0:9)*30, lty=3, col="gray50")
                 panel.rect((0:3)*60, 22, ((0:3)+0.5)*60, 40,
                            col="gray55", border="transparent", alpha=0.1)
                 panel.xyplot(...)           
             })

p2 <- xyplot(fit~hora, groups=trat, data=pred, type="l",
             prepanel=prepanel.cbH, cty="bands",
             ly=pred$lwr, uy=pred$upr, alpha=0.1, fill=1,
             panel.groups=panel.cbH,
             panel=panel.superpose)

p1+as.layer(p2)


##-----------------------------------------------------------------------------
## Informações da sessão.

Sys.time()
## [1] "2014-12-11 21:00:38 BRST"
sessionInfo()
## R version 3.1.2 (2014-10-31)
## Platform: i686-pc-linux-gnu (32-bit)
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C               LC_TIME=pt_BR.UTF-8       
##  [4] LC_COLLATE=en_US.UTF-8     LC_MONETARY=pt_BR.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=pt_BR.UTF-8       LC_NAME=C                  LC_ADDRESS=C              
## [10] LC_TELEPHONE=C             LC_MEASUREMENT=pt_BR.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] splines   grid      stats     graphics  grDevices utils     datasets  base     
## 
## other attached packages:
##  [1] wzRfun_0.4          mgcv_1.8-3          nlme_3.1-118        reshape_0.8.5      
##  [5] plyr_1.8.1          alr3_2.0.5          car_2.0-22          gridExtra_0.9.1    
##  [9] latticeExtra_0.6-26 RColorBrewer_1.0-5  lattice_0.20-29     rmarkdown_0.3.3    
## [13] knitr_1.8          
## 
## loaded via a namespace (and not attached):
##  [1] digest_0.6.4    doBy_4.5-12     evaluate_0.5.5  formatR_1.0     htmltools_0.2.6
##  [6] MASS_7.3-35     Matrix_1.1-4    methods_3.1.2   multcomp_1.3-7  mvtnorm_1.0-1  
## [11] nnet_7.3-8      Rcpp_0.11.3     sandwich_2.3-2  stringr_0.6.2   survival_2.37-7
## [16] TH.data_1.0-5   tools_3.1.2     yaml_2.1.13     zoo_1.7-11