Modelos de Regressão e aplicações no ambiente R

13 a 17 de Abril de 2015 - Manaus - AM
Prof. Dr. Walmes M. Zeviani
Fundação Oswaldo Cruz - FIOCRUZ
Lab. de Estatística e Geoinformação - LEG
Departamento de Estatística - UFPR


Medidas de diagnóstico e influência

http://web.stanford.edu/class/stats191/notebooks/Diagnostics%20for%20multiple%20regression.pdf


Diagnóstico visual

##=============================================================================
## Modelos de Regressão e aplicações no ambiente R
##
##   13 a 17 de Abril de 2015 - Manaus/AM
##   Fundação Oswaldo Cruz - FIOCRUZ
## 
##                                                  Prof. Dr. Walmes M. Zeviani
##                                                                LEG/DEST/UFPR
##=============================================================================

##-----------------------------------------------------------------------------
## Definições da sessão.

pkg <- c("lattice", "latticeExtra", "gridExtra", "car", "alr3", "asbio",
         "plyr", "wzRfun")

sapply(pkg, require, character.only=TRUE)
##      lattice latticeExtra    gridExtra          car         alr3        asbio 
##         TRUE         TRUE         TRUE         TRUE         TRUE         TRUE 
##         plyr       wzRfun 
##         TRUE         TRUE
trellis.device(color=FALSE)
##-----------------------------------------------------------------------------
## Dados.

url <- "http://www.leg.ufpr.br/~walmes/data/
business_economics_dataset/EXAMPLES/REALESTA.DAT"

da <-
    read.table(paste0(strwrap(url), collapse=""), header=FALSE)
da[,1] <- NULL
names(da) <- c("saleprice","landvalue","improvvalue","area")
str(da)
## 'data.frame':    20 obs. of  4 variables:
##  $ saleprice  : int  68900 48500 55500 62000 116500 45000 38000 83000 59000 47500 ...
##  $ landvalue  : int  5960 9000 9500 10000 18000 8500 8000 23000 8100 9000 ...
##  $ improvvalue: int  44967 27860 31439 39592 72827 27317 29856 47752 39117 29349 ...
##  $ area       : int  1873 928 1126 1265 2214 912 899 1803 1204 1725 ...
summary(da)
##    saleprice        landvalue      improvvalue         area     
##  Min.   : 22400   Min.   : 1500   Min.   : 5779   Min.   : 899  
##  1st Qu.: 40800   1st Qu.: 6965   1st Qu.:26351   1st Qu.:1054  
##  Median : 49250   Median : 8050   Median :31559   Median :1188  
##  Mean   : 56660   Mean   : 9213   Mean   :35311   Mean   :1383  
##  3rd Qu.: 63725   3rd Qu.: 9625   3rd Qu.:41366   3rd Qu.:1744  
##  Max.   :116500   Max.   :23000   Max.   :72827   Max.   :2455
pairs(da)

scatterplotMatrix(da)

## Dividir para ficar mais tratável.
da <- da/1000

##-----------------------------------------------------------------------------
## Ver.

## p1 <- xyplot(saleprice~landvalue, data=da, type=c("p","smooth"))
## p2 <- xyplot(saleprice~improvvalue, data=da, type=c("p","smooth"))
## p3 <- xyplot(saleprice~area, data=da, type=c("p","smooth"))
## grid.arrange(p1, p2, p3)

xyplot.list(list(saleprice~improvvalue,
                 saleprice~landvalue,
                 saleprice~area),
            data=da, x.same=FALSE, y.same=TRUE, layout=c(3,1),
            xlab=list(c("Improvvalue","Landvalue","Area")),
            ylab="Sales price (/1000)", type=c("p","smooth"))

##-----------------------------------------------------------------------------
## Ajuste.

m0 <- lm(saleprice~landvalue+improvvalue+area, data=da)

## class(m0)
methods(class="lm")
##  [1] add1.lm*                alias.lm*               anova.lm*              
##  [4] Anova.lm*               avPlot.lm*              bootCase.lm*           
##  [7] Boot.lm*                boxCox.lm*              case.names.lm*         
## [10] ceresPlot.lm*           confidenceEllipse.lm*   confint.lm             
## [13] cooks.distance.lm*      crPlot.lm*              deltaMethod.lm*        
## [16] deviance.lm*            dfbeta.lm*              dfbetaPlots.lm*        
## [19] dfbetas.lm*             dfbetasPlots.lm*        drop1.lm*              
## [22] dummy.coef.lm           durbinWatsonTest.lm*    effects.lm*            
## [25] extractAIC.lm*          family.lm*              formula.lm*            
## [28] hatvalues.lm*           hccm.lm*                infIndexPlot.lm*       
## [31] influence.lm*           influencePlot.lm*       inverseResponsePlot.lm*
## [34] kappa.lm                labels.lm*              leveneTest.lm*         
## [37] leveragePlot.lm*        linearHypothesis.lm*    logLik.lm*             
## [40] mcPlot.lm*              mmp.lm*                 model.frame.lm*        
## [43] model.matrix.lm         ncvTest.lm*             nextBoot.lm*           
## [46] nobs.lm*                outlierTest.lm*         plot.lm*               
## [49] pod.lm*                 powerTransform.lm*      predict.lm             
## [52] print.lm*               proj.lm*                pureErrorAnova.lm*     
## [55] qqPlot.lm*              qr.lm*                  randomLinComb.lm*      
## [58] residualPlot.lm*        residualPlots.lm*       residuals.lm           
## [61] rstandard.lm*           rstudent.lm*            sigmaHat.lm*           
## [64] simulate.lm*            spreadLevelPlot.lm*     summary.lm             
## [67] variable.names.lm*      vcov.lm*               
## 
##    Non-visible functions are asterisked
## Diagnóstico.
par(mfrow=c(2,2)); plot(m0); layout(1)

par(mfrow=c(2,3)); plot(m0, which=1:6); layout(1)

## plot(m0, which=2)

residualPlots(m0)