Curso de estatística experimental com aplicações em R

12 à 14 de Novembro de 2014 - Manaus - AM
Prof. Dr. Walmes M. Zeviani
Embrapa Amazônia Ocidental
Lab. de Estatística e Geoinformação - LEG
Departamento de Estatística - UFPR

Análise exploratória

##-----------------------------------------------------------------------------
## Dados de carros Duster à venda no webmotors em 26/03/2014.

dus <-
    read.table("http://www.leg.ufpr.br/~walmes/data/duster_venda_260314.txt",
               header=TRUE, sep="\t", encoding="utf-8")

## dus <-
##     read.table("/home/walmes/Dropbox/XML-leituras/carros/duster_venda_260314.txt",
##                header=TRUE, sep="\t", encoding="utf-8")

dus$ano <- factor(gsub(x=as.character(dus$ano), "/\\d{4}$", ""))
str(dus)
## 'data.frame':    699 obs. of  10 variables:
##  $ modelo: Factor w/ 11 levels "RENAULT DUSTER 1.6 4X2 16V FLEX 4P MANUAL",..: 3 1 2 2 1 1 2 3 6 2 ...
##  $ cor   : Factor w/ 9 levels "Azul","Branco",..: 5 1 5 5 5 8 6 6 6 6 ...
##  $ km    : int  31442 40800 56000 NA 45000 50000 44000 30000 41000 55000 ...
##  $ ano   : Factor w/ 4 levels "2011","2012",..: 1 1 1 1 1 2 2 2 1 1 ...
##  $ valor : num  41990 42500 42900 42990 43800 ...
##  $ cambio: Factor w/ 2 levels "AUTOMÁTICO","MANUAL": 2 2 2 2 2 2 2 2 2 2 ...
##  $ poten : num  1.6 1.6 1.6 1.6 1.6 1.6 1.6 1.6 2 1.6 ...
##  $ trac  : Factor w/ 2 levels "4X2","4X4": 1 1 1 1 1 1 1 1 1 1 ...
##  $ cat   : Factor w/ 5 levels " "," DYNAMIQUE ",..: 3 1 2 2 1 1 2 3 2 2 ...
##  $ novo  : Factor w/ 2 levels "novo","usado": 2 2 2 2 2 2 2 2 2 2 ...
## Quantidade de NA em cada coluna.
apply(dus, MARGIN=2, function(x) sum(is.na(x)))
## modelo    cor     km    ano  valor cambio  poten   trac    cat   novo 
##      0      0    132      0      0      0      0      0      0      0
## Elimina registros com NA.
dus <- na.omit(dus)
str(dus)
## 'data.frame':    567 obs. of  10 variables:
##  $ modelo: Factor w/ 11 levels "RENAULT DUSTER 1.6 4X2 16V FLEX 4P MANUAL",..: 3 1 2 1 1 2 3 6 2 1 ...
##  $ cor   : Factor w/ 9 levels "Azul","Branco",..: 5 1 5 5 8 6 6 6 6 5 ...
##  $ km    : int  31442 40800 56000 45000 50000 44000 30000 41000 55000 60000 ...
##  $ ano   : Factor w/ 4 levels "2011","2012",..: 1 1 1 1 2 2 2 1 1 1 ...
##  $ valor : num  41990 42500 42900 43800 43999 ...
##  $ cambio: Factor w/ 2 levels "AUTOMÁTICO","MANUAL": 2 2 2 2 2 2 2 2 2 2 ...
##  $ poten : num  1.6 1.6 1.6 1.6 1.6 1.6 1.6 2 1.6 1.6 ...
##  $ trac  : Factor w/ 2 levels "4X2","4X4": 1 1 1 1 1 1 1 1 1 1 ...
##  $ cat   : Factor w/ 5 levels " "," DYNAMIQUE ",..: 3 1 2 1 1 2 3 2 2 1 ...
##  $ novo  : Factor w/ 2 levels "novo","usado": 2 2 2 2 2 2 2 2 2 2 ...
##  - attr(*, "na.action")=Class 'omit'  Named int [1:132] 4 15 16 26 34 36 39 47 52 53 ...
##   .. ..- attr(*, "names")= chr [1:132] "4" "15" "16" "26" ...
##-----------------------------------------------------------------------------
## Gráfico de barras e setores.

x <- table(dus$cambio)
class(x)
## [1] "table"
## Se vem da xtabs() também tem classe `table`.
x <- xtabs(~cambio, data=dus)
class(x)
## [1] "xtabs" "table"
## barplot(x)
barplot(x,
        xlab="Tipo de câmbio",
        ylab="Frequência absoluta",
        col=c("seagreen", "yellowgreen"))

barplot(x, horiz=TRUE,
        xlab="Tipo de câmbio",
        ylab="Frequência absoluta",
        col=c("seagreen", "yellowgreen"))
box(bty="L")

## Cores com `green` no nome.
colors()
##   [1] "white"                "aliceblue"            "antiquewhite"        
##   [4] "antiquewhite1"        "antiquewhite2"        "antiquewhite3"       
##   [7] "antiquewhite4"        "aquamarine"           "aquamarine1"         
##  [10] "aquamarine2"          "aquamarine3"          "aquamarine4"         
##  [13] "azure"                "azure1"               "azure2"              
##  [16] "azure3"               "azure4"               "beige"               
##  [19] "bisque"               "bisque1"              "bisque2"             
##  [22] "bisque3"              "bisque4"              "black"               
##  [25] "blanchedalmond"       "blue"                 "blue1"               
##  [28] "blue2"                "blue3"                "blue4"               
##  [31] "blueviolet"           "brown"                "brown1"              
##  [34] "brown2"               "brown3"               "brown4"              
##  [37] "burlywood"            "burlywood1"           "burlywood2"          
##  [40] "burlywood3"           "burlywood4"           "cadetblue"           
##  [43] "cadetblue1"           "cadetblue2"           "cadetblue3"          
##  [46] "cadetblue4"           "chartreuse"           "chartreuse1"         
##  [49] "chartreuse2"          "chartreuse3"          "chartreuse4"         
##  [52] "chocolate"            "chocolate1"           "chocolate2"          
##  [55] "chocolate3"           "chocolate4"           "coral"               
##  [58] "coral1"               "coral2"               "coral3"              
##  [61] "coral4"               "cornflowerblue"       "cornsilk"            
##  [64] "cornsilk1"            "cornsilk2"            "cornsilk3"           
##  [67] "cornsilk4"            "cyan"                 "cyan1"               
##  [70] "cyan2"                "cyan3"                "cyan4"               
##  [73] "darkblue"             "darkcyan"             "darkgoldenrod"       
##  [76] "darkgoldenrod1"       "darkgoldenrod2"       "darkgoldenrod3"      
##  [79] "darkgoldenrod4"       "darkgray"             "darkgreen"           
##  [82] "darkgrey"             "darkkhaki"            "darkmagenta"         
##  [85] "darkolivegreen"       "darkolivegreen1"      "darkolivegreen2"     
##  [88] "darkolivegreen3"      "darkolivegreen4"      "darkorange"          
##  [91] "darkorange1"          "darkorange2"          "darkorange3"         
##  [94] "darkorange4"          "darkorchid"           "darkorchid1"         
##  [97] "darkorchid2"          "darkorchid3"          "darkorchid4"         
## [100] "darkred"              "darksalmon"           "darkseagreen"        
## [103] "darkseagreen1"        "darkseagreen2"        "darkseagreen3"       
## [106] "darkseagreen4"        "darkslateblue"        "darkslategray"       
## [109] "darkslategray1"       "darkslategray2"       "darkslategray3"      
## [112] "darkslategray4"       "darkslategrey"        "darkturquoise"       
## [115] "darkviolet"           "deeppink"             "deeppink1"           
## [118] "deeppink2"            "deeppink3"            "deeppink4"           
## [121] "deepskyblue"          "deepskyblue1"         "deepskyblue2"        
## [124] "deepskyblue3"         "deepskyblue4"         "dimgray"             
## [127] "dimgrey"              "dodgerblue"           "dodgerblue1"         
## [130] "dodgerblue2"          "dodgerblue3"          "dodgerblue4"         
## [133] "firebrick"            "firebrick1"           "firebrick2"          
## [136] "firebrick3"           "firebrick4"           "floralwhite"         
## [139] "forestgreen"          "gainsboro"            "ghostwhite"          
## [142] "gold"                 "gold1"                "gold2"               
## [145] "gold3"                "gold4"                "goldenrod"           
## [148] "goldenrod1"           "goldenrod2"           "goldenrod3"          
## [151] "goldenrod4"           "gray"                 "gray0"               
## [154] "gray1"                "gray2"                "gray3"               
## [157] "gray4"                "gray5"                "gray6"               
## [160] "gray7"                "gray8"                "gray9"               
## [163] "gray10"               "gray11"               "gray12"              
## [166] "gray13"               "gray14"               "gray15"              
## [169] "gray16"               "gray17"               "gray18"              
## [172] "gray19"               "gray20"               "gray21"              
## [175] "gray22"               "gray23"               "gray24"              
## [178] "gray25"               "gray26"               "gray27"              
## [181] "gray28"               "gray29"               "gray30"              
## [184] "gray31"               "gray32"               "gray33"              
## [187] "gray34"               "gray35"               "gray36"              
## [190] "gray37"               "gray38"               "gray39"              
## [193] "gray40"               "gray41"               "gray42"              
## [196] "gray43"               "gray44"               "gray45"              
## [199] "gray46"               "gray47"               "gray48"              
## [202] "gray49"               "gray50"               "gray51"              
## [205] "gray52"               "gray53"               "gray54"              
## [208] "gray55"               "gray56"               "gray57"              
## [211] "gray58"               "gray59"               "gray60"              
## [214] "gray61"               "gray62"               "gray63"              
## [217] "gray64"               "gray65"               "gray66"              
## [220] "gray67"               "gray68"               "gray69"              
## [223] "gray70"               "gray71"               "gray72"              
## [226] "gray73"               "gray74"               "gray75"              
## [229] "gray76"               "gray77"               "gray78"              
## [232] "gray79"               "gray80"               "gray81"              
## [235] "gray82"               "gray83"               "gray84"              
## [238] "gray85"               "gray86"               "gray87"              
## [241] "gray88"               "gray89"               "gray90"              
## [244] "gray91"               "gray92"               "gray93"              
## [247] "gray94"               "gray95"               "gray96"              
## [250] "gray97"               "gray98"               "gray99"              
## [253] "gray100"              "green"                "green1"              
## [256] "green2"               "green3"               "green4"              
## [259] "greenyellow"          "grey"                 "grey0"               
## [262] "grey1"                "grey2"                "grey3"               
## [265] "grey4"                "grey5"                "grey6"               
## [268] "grey7"                "grey8"                "grey9"               
## [271] "grey10"               "grey11"               "grey12"              
## [274] "grey13"               "grey14"               "grey15"              
## [277] "grey16"               "grey17"               "grey18"              
## [280] "grey19"               "grey20"               "grey21"              
## [283] "grey22"               "grey23"               "grey24"              
## [286] "grey25"               "grey26"               "grey27"              
## [289] "grey28"               "grey29"               "grey30"              
## [292] "grey31"               "grey32"               "grey33"              
## [295] "grey34"               "grey35"               "grey36"              
## [298] "grey37"               "grey38"               "grey39"              
## [301] "grey40"               "grey41"               "grey42"              
## [304] "grey43"               "grey44"               "grey45"              
## [307] "grey46"               "grey47"               "grey48"              
## [310] "grey49"               "grey50"               "grey51"              
## [313] "grey52"               "grey53"               "grey54"              
## [316] "grey55"               "grey56"               "grey57"              
## [319] "grey58"               "grey59"               "grey60"              
## [322] "grey61"               "grey62"               "grey63"              
## [325] "grey64"               "grey65"               "grey66"              
## [328] "grey67"               "grey68"               "grey69"              
## [331] "grey70"               "grey71"               "grey72"              
## [334] "grey73"               "grey74"               "grey75"              
## [337] "grey76"               "grey77"               "grey78"              
## [340] "grey79"               "grey80"               "grey81"              
## [343] "grey82"               "grey83"               "grey84"              
## [346] "grey85"               "grey86"               "grey87"              
## [349] "grey88"               "grey89"               "grey90"              
## [352] "grey91"               "grey92"               "grey93"              
## [355] "grey94"               "grey95"               "grey96"              
## [358] "grey97"               "grey98"               "grey99"              
## [361] "grey100"              "honeydew"             "honeydew1"           
## [364] "honeydew2"            "honeydew3"            "honeydew4"           
## [367] "hotpink"              "hotpink1"             "hotpink2"            
## [370] "hotpink3"             "hotpink4"             "indianred"           
## [373] "indianred1"           "indianred2"           "indianred3"          
## [376] "indianred4"           "ivory"                "ivory1"              
## [379] "ivory2"               "ivory3"               "ivory4"              
## [382] "khaki"                "khaki1"               "khaki2"              
## [385] "khaki3"               "khaki4"               "lavender"            
## [388] "lavenderblush"        "lavenderblush1"       "lavenderblush2"      
## [391] "lavenderblush3"       "lavenderblush4"       "lawngreen"           
## [394] "lemonchiffon"         "lemonchiffon1"        "lemonchiffon2"       
## [397] "lemonchiffon3"        "lemonchiffon4"        "lightblue"           
## [400] "lightblue1"           "lightblue2"           "lightblue3"          
## [403] "lightblue4"           "lightcoral"           "lightcyan"           
## [406] "lightcyan1"           "lightcyan2"           "lightcyan3"          
## [409] "lightcyan4"           "lightgoldenrod"       "lightgoldenrod1"     
## [412] "lightgoldenrod2"      "lightgoldenrod3"      "lightgoldenrod4"     
## [415] "lightgoldenrodyellow" "lightgray"            "lightgreen"          
## [418] "lightgrey"            "lightpink"            "lightpink1"          
## [421] "lightpink2"           "lightpink3"           "lightpink4"          
## [424] "lightsalmon"          "lightsalmon1"         "lightsalmon2"        
## [427] "lightsalmon3"         "lightsalmon4"         "lightseagreen"       
## [430] "lightskyblue"         "lightskyblue1"        "lightskyblue2"       
## [433] "lightskyblue3"        "lightskyblue4"        "lightslateblue"      
## [436] "lightslategray"       "lightslategrey"       "lightsteelblue"      
## [439] "lightsteelblue1"      "lightsteelblue2"      "lightsteelblue3"     
## [442] "lightsteelblue4"      "lightyellow"          "lightyellow1"        
## [445] "lightyellow2"         "lightyellow3"         "lightyellow4"        
## [448] "limegreen"            "linen"                "magenta"             
## [451] "magenta1"             "magenta2"             "magenta3"            
## [454] "magenta4"             "maroon"               "maroon1"             
## [457] "maroon2"              "maroon3"              "maroon4"             
## [460] "mediumaquamarine"     "mediumblue"           "mediumorchid"        
## [463] "mediumorchid1"        "mediumorchid2"        "mediumorchid3"       
## [466] "mediumorchid4"        "mediumpurple"         "mediumpurple1"       
## [469] "mediumpurple2"        "mediumpurple3"        "mediumpurple4"       
## [472] "mediumseagreen"       "mediumslateblue"      "mediumspringgreen"   
## [475] "mediumturquoise"      "mediumvioletred"      "midnightblue"        
## [478] "mintcream"            "mistyrose"            "mistyrose1"          
## [481] "mistyrose2"           "mistyrose3"           "mistyrose4"          
## [484] "moccasin"             "navajowhite"          "navajowhite1"        
## [487] "navajowhite2"         "navajowhite3"         "navajowhite4"        
## [490] "navy"                 "navyblue"             "oldlace"             
## [493] "olivedrab"            "olivedrab1"           "olivedrab2"          
## [496] "olivedrab3"           "olivedrab4"           "orange"              
## [499] "orange1"              "orange2"              "orange3"             
## [502] "orange4"              "orangered"            "orangered1"          
## [505] "orangered2"           "orangered3"           "orangered4"          
## [508] "orchid"               "orchid1"              "orchid2"             
## [511] "orchid3"              "orchid4"              "palegoldenrod"       
## [514] "palegreen"            "palegreen1"           "palegreen2"          
## [517] "palegreen3"           "palegreen4"           "paleturquoise"       
## [520] "paleturquoise1"       "paleturquoise2"       "paleturquoise3"      
## [523] "paleturquoise4"       "palevioletred"        "palevioletred1"      
## [526] "palevioletred2"       "palevioletred3"       "palevioletred4"      
## [529] "papayawhip"           "peachpuff"            "peachpuff1"          
## [532] "peachpuff2"           "peachpuff3"           "peachpuff4"          
## [535] "peru"                 "pink"                 "pink1"               
## [538] "pink2"                "pink3"                "pink4"               
## [541] "plum"                 "plum1"                "plum2"               
## [544] "plum3"                "plum4"                "powderblue"          
## [547] "purple"               "purple1"              "purple2"             
## [550] "purple3"              "purple4"              "red"                 
## [553] "red1"                 "red2"                 "red3"                
## [556] "red4"                 "rosybrown"            "rosybrown1"          
## [559] "rosybrown2"           "rosybrown3"           "rosybrown4"          
## [562] "royalblue"            "royalblue1"           "royalblue2"          
## [565] "royalblue3"           "royalblue4"           "saddlebrown"         
## [568] "salmon"               "salmon1"              "salmon2"             
## [571] "salmon3"              "salmon4"              "sandybrown"          
## [574] "seagreen"             "seagreen1"            "seagreen2"           
## [577] "seagreen3"            "seagreen4"            "seashell"            
## [580] "seashell1"            "seashell2"            "seashell3"           
## [583] "seashell4"            "sienna"               "sienna1"             
## [586] "sienna2"              "sienna3"              "sienna4"             
## [589] "skyblue"              "skyblue1"             "skyblue2"            
## [592] "skyblue3"             "skyblue4"             "slateblue"           
## [595] "slateblue1"           "slateblue2"           "slateblue3"          
## [598] "slateblue4"           "slategray"            "slategray1"          
## [601] "slategray2"           "slategray3"           "slategray4"          
## [604] "slategrey"            "snow"                 "snow1"               
## [607] "snow2"                "snow3"                "snow4"               
## [610] "springgreen"          "springgreen1"         "springgreen2"        
## [613] "springgreen3"         "springgreen4"         "steelblue"           
## [616] "steelblue1"           "steelblue2"           "steelblue3"          
## [619] "steelblue4"           "tan"                  "tan1"                
## [622] "tan2"                 "tan3"                 "tan4"                
## [625] "thistle"              "thistle1"             "thistle2"            
## [628] "thistle3"             "thistle4"             "tomato"              
## [631] "tomato1"              "tomato2"              "tomato3"             
## [634] "tomato4"              "turquoise"            "turquoise1"          
## [637] "turquoise2"           "turquoise3"           "turquoise4"          
## [640] "violet"               "violetred"            "violetred1"          
## [643] "violetred2"           "violetred3"           "violetred4"          
## [646] "wheat"                "wheat1"               "wheat2"              
## [649] "wheat3"               "wheat4"               "whitesmoke"          
## [652] "yellow"               "yellow1"              "yellow2"             
## [655] "yellow3"              "yellow4"              "yellowgreen"
grep("green", colors(), value=TRUE)
##  [1] "darkgreen"         "darkolivegreen"    "darkolivegreen1"   "darkolivegreen2"  
##  [5] "darkolivegreen3"   "darkolivegreen4"   "darkseagreen"      "darkseagreen1"    
##  [9] "darkseagreen2"     "darkseagreen3"     "darkseagreen4"     "forestgreen"      
## [13] "green"             "green1"            "green2"            "green3"           
## [17] "green4"            "greenyellow"       "lawngreen"         "lightgreen"       
## [21] "lightseagreen"     "limegreen"         "mediumseagreen"    "mediumspringgreen"
## [25] "palegreen"         "palegreen1"        "palegreen2"        "palegreen3"       
## [29] "palegreen4"        "seagreen"          "seagreen1"         "seagreen2"        
## [33] "seagreen3"         "seagreen4"         "springgreen"       "springgreen1"     
## [37] "springgreen2"      "springgreen3"      "springgreen4"      "yellowgreen"
## Gráfico de setores.
pie(x, col=c("seagreen", "yellowgreen"),
    main="Tipo de câmbio")

## Para as cores do carro.
x <- xtabs(~cor, data=dus)
levels(dus$cor)
## [1] "Azul"       "Branco"     "Cinza"      "Indefinida" "Prata"      "Preto"     
## [7] "Verde"      "Vermelho"   "Vinho"
par(mar=c(4.1,7.1,2.1,2.1))
barplot(x, horiz=TRUE, las=1,
        col=c("blue", "white", "gray50", "Yellow", "gray90", "black",
            "green4", "red", "red4"))
mtext(side=2, text="Cor", line=5)
mtext(side=1, text="Frequência absoluta", line=2)
box(bty="L")

##-----------------------------------------------------------------------------
## Gráficos de barras emplilhadas (stacked) e lado a lado.

x <- xtabs(~ano+cambio, data=dus)
x
##       cambio
## ano    AUTOMÁTICO MANUAL
##   2011         45     61
##   2012         31    145
##   2013         90    162
##   2014          4     29
## Barras empilhadas.
barplot(x, xlab="Câmbio", ylab="Frequência absoluta")

colcamb <- c("seagreen", "yellowgreen")
barplot(t(x),
        xlab="Ano",
        ylab="Frequência absoluta",
        col=colcamb)
legend("topleft", legend=levels(dus$cambio),
       fill=colcamb, bty="n")

## Barras lado a lado.
barplot(t(x), beside=TRUE,
        xlab="Ano", ylab="Frequência absoluta",
        col=colcamb)
legend("topleft", legend=levels(dus$cambio),
       fill=colcamb, bty="n")

##-----------------------------------------------------------------------------
## Anotações nas barras.

x <- xtabs(~cambio+poten, data=dus); x
##             poten
## cambio       1.6   2
##   AUTOMÁTICO   0 170
##   MANUAL     290 107
## Cores de preenchimento para as barras.
cols <- c("seagreen", "yellowgreen")

## Barras lado a lado.
bp <- barplot(t(x), beside=TRUE, col=cols,
              xlab="Tipo de câmbio", ylab="Frequência absoluta")

bp
##      [,1] [,2]
## [1,]  1.5  4.5
## [2,]  2.5  5.5
## Calcula a altura de uma palavra em termos da escala y do gráfico.
sh <- strheight("um texto qualquer"); sh
## [1] 10.75165
lim <- par()$usr[4]+3*sh

## Refaz o gráfico com espaço para o texto.
barplot(t(x), beside=TRUE, col=cols, ylim=c(0, lim),
              xlab="Tipo de câmbio", ylab="Frequência absoluta")
legend("topleft", title="Potência",
       legend=c("1.6","2.0"), fill=cols, bty="n")
text(x=c(bp), y=t(x), labels=t(x), pos=3)
box()

##-----------------------------------------------------------------------------
## Gráficos de mosaico.

x <- xtabs(~ano+cambio, data=dus)
x
##       cambio
## ano    AUTOMÁTICO MANUAL
##   2011         45     61
##   2012         31    145
##   2013         90    162
##   2014          4     29
mosaicplot(x, ylab="Tipo de câmbio", xlab="Ano")

mosaicplot(t(x), xlab="Tipo de câmbio", ylab="Ano")

x <- xtabs(~novo+poten, data=dus); x
##        poten
## novo    1.6   2
##   novo  104  99
##   usado 186 178
## Não dependência entre as variáveis.
mosaicplot(x, xlab="Condição", ylab="Potência",
           col=c("#009054","#900039"))

mosaicplot(HairEyeColor)

a <- apply(HairEyeColor, c(1,2), sum)
mosaicplot(a)

mosaicplot(t(a))

## Pode-se especificar cores com a trinca RGB (red, green, blue),
## pode-se usar o padrão hexadecimal html para cores.

## Visite estes sites para pegar cores.
## browseURL("http://www.w3schools.com/html/html_colors.asp")
## browseURL("http://html-color-codes.info/")

##-----------------------------------------------------------------------------
## Histograma.

hist(dus$valor)

hist(dus$valor, xlab="Preço de venda (R$)",
     ylab="Frequência absoluta", col="orange")
rug(dus$valor)

## Se breaks é um escalar então entende-se que é uma *sugestão* para o
## número de clases.
hist(dus$valor,
     breaks=15,
     xlab="Preço de venda (R$)",
     ylab="Frequência absoluta",
     col="orange")
rug(dus$valor)

## Se breaks é um vetor então entende-se que são os limites para
## classificação dos valores.
hist(dus$valor, breaks=seq(35000, 75000, 2500),
     xlab="Preço de venda (R$)",
     ylab="Frequência absoluta", col="#7700B7",
     sub="Amplitude de classe de R$ 2500", main=NULL)

## Gráfico onde a altura é a densidade e não a frequência.
hist(dus$valor, prob=TRUE, breaks=seq(35000, 75000, 2500),
     xlab="Preço de venda (R$)",
     ylab="Frequência absoluta", col="#7700B7",
     sub="Amplitude de classe de R$ 2500", main=NULL)

## Esse gráfico tem que a soma da área dos retângulos somam 1 pois o
## produto da amplitude pela densidade é a frequência relativa e a soma
## das frequência relativas é 1.

hist(dus$valor, prob=TRUE, seq(35000, 75000, 2000),
     xlab="Preço de venda (R$)", 
     ylab="Frequência absoluta", col="#6E0039",
     sub="Amplitude de classe de R$ 2500", main=NULL)
rug(dus$valor) ## Faz risquinhos no eixo x.

##-----------------------------------------------------------------------------
## Anotações sobre um histograma.

## Com domínio do R se pode fazer gráficos espetaculares, como por
## exemplo esse com variação da tonalidade.

ht <- hist(dus$valor, prob=TRUE, breaks=seq(35000, 75000, 2000),
           xlab="Preço de venda (R$)", 
           ylab="Frequência absoluta", col="#6E0039",
           sub="Amplitude de classe de R$ 2500")
rug(dus$valor) ## Faz risquinhos no eixo x.

## Destacar a barra da classe modal usando outra cor.
wm <- which.max(ht$counts)
cols <- rep("yellow", length(ht$counts))
cols[wm] <- "red"
cols
##  [1] "yellow" "yellow" "yellow" "yellow" "yellow" "yellow" "yellow" "red"    "yellow"
## [10] "yellow" "yellow" "yellow" "yellow" "yellow" "yellow" "yellow" "yellow" "yellow"
## [19] "yellow" "yellow"
plot(ht, col=cols)

## Traçar os segmentos que indicam o valor interpolado para a moda.
ycoor <- with(ht, counts[wm+0:1])
xcoor <- with(ht, breaks[wm+0:1])
segments(xcoor[1], ycoor[1], xcoor[2], ycoor[2])

ycoor <- with(ht, counts[wm-1:0])
xcoor <- with(ht, breaks[wm+0:1])
segments(xcoor[1], ycoor[1], xcoor[2], ycoor[2])

## Por semelhança de triangulos a moda obtida é:
ac <- with(ht, diff(breaks[1:2]))
d <- with(ht, abs(diff(counts[wm+(-1:1)])))
xmoda <- with(ht, breaks[wm]+(ac*d[1])/sum(d)); xmoda
## [1] 49666.67
abline(v=mean(dus$valor))
abline(v=xmoda, col="yellow")

## Como aprimorar um histograma.
plot(ht, col=NULL, lty=0, ann=FALSE, axes=FALSE)
abline(h=seq(0, 100, by=10), lty=2)
plot(ht, col=cols, ann=FALSE, axes=FALSE, add=TRUE)
rug(dus$valor)
axis(side=1, at=seq(35000, 75000, 5000))
axis(side=2, at=seq(0, 100, by=10))
box(bty="L")
title(main="Histograma do valor (R$)",
      sub="Dados retirados do webmotors.com",
      xlab="Valor (R$)",
      ylab="Frequência absoluta")
mtext(side=3, line=0,
      text=paste("Amostra de tamanho", length(dus$valor)))
mtext(side=4, line=-1, col="gray70", outer=TRUE, adj=0,
      text="Feito por Walmes Zeviani")
legend("topright", fill="red", legend="Classe modal", bty="n")

## Outra variação de um histograma.
ht <- hist(dus$valor, seq(35000, 75000, 2000), plot=FALSE)
nc <- length(ht$mids)             ## Número de classes.
ac <- diff(ht$breaks[1:2])        ## Amplitude de classe.
ma <- mean(dus$valor)             ## Média da amostra.
md <- median(dus$valor)           ## Mediana da amostra.
qts <- fivenum(dus$valor)[c(2,4)] ## 1Q e 3Q da amostra.
modal <- which.max(ht$counts)     ## Classe modal.
modal <- list(x=ht$mids[modal], y=ht$counts[modal])
colseq <- rgb(red=0.25, blue=0.7,
              green=seq(0.1, 0.9, length.out=nc))

plot(ht, col=colseq, ylim=c(0, modal$y+strheight("1")),
     xlab="Preço de venda (R$)",
     ylab="Frequência absoluta",
     sub=paste("Amplitude de classe de R$", ac),
     main=NULL, border="gray50")
text(x=modal$x, y=modal$y, labels=modal$y, pos=3)
rug(dus$valor)
arrows(ma, 0, ma, modal$y/3, code=1, length=0.15)
text(ma, modal$y/3, labels=paste("Média:", round(ma,2)), pos=3)
arrows(md, 0, md, modal$y/6, code=1, length=0.15)
text(ma, modal$y/6, labels=paste("Mediana:", round(md,1)),
     pos=ifelse(md<ma, 2, 4))
box()

## Responda: o que de informação foi acrescentado com as barras mudando
## de cor? Alguns não gostam de distração ou poluição visual, outros
## acreditam que isso atrai o leitor.

##-----------------------------------------------------------------------------
## Gráficos de densidade.

den <- density(dus$valor, kernel="triangular")
den <- density(dus$valor, kernel="rectangular", bw=3000)
str(den)
## List of 7
##  $ x        : num [1:512] 29000 29103 29205 29308 29411 ...
##  $ y        : num [1:512] 0 0 0 0 0 ...
##  $ bw       : num 3000
##  $ n        : int 567
##  $ call     : language density.default(x = dus$valor, bw = 3000, kernel = "rectangular")
##  $ data.name: chr "dus$valor"
##  $ has.na   : logi FALSE
##  - attr(*, "class")= chr "density"
plot(den)

str(den)
## List of 7
##  $ x        : num [1:512] 29000 29103 29205 29308 29411 ...
##  $ y        : num [1:512] 0 0 0 0 0 ...
##  $ bw       : num 3000
##  $ n        : int 567
##  $ call     : language density.default(x = dus$valor, bw = 3000, kernel = "rectangular")
##  $ data.name: chr "dus$valor"
##  $ has.na   : logi FALSE
##  - attr(*, "class")= chr "density"
modal <- which.max(den$y)
modal <- list(x=den$x[modal], y=den$y[modal])

plot(den, type="n", xlab="Preço de venda (R$)", ylab="Densidade",
     ylim=c(0, modal$y+strheight("1")), main="",
     sub=paste("Bandwidth:", round(den$bw,3)))
with(den, polygon(x, y, col="gray90"))
with(modal, segments(x, 0, x, y, col=2))
with(modal, text(x, y, labels=round(x, 2), pos=3))
arrows(ma, 0, ma, modal$y/3, code=1, length=0.15)
text(ma, modal$y/3, labels=paste("Média:", round(ma,2)), pos=3)
arrows(md, 0, md, modal$y/6, code=1, length=0.15)
text(ma, modal$y/6, labels=paste("Mediana:", round(md,1)),
     pos=ifelse(md<ma, 2, 4))
rug(dus$valor)

## Frequência acumulada empírica.
y <- ecdf(dus$valor)
plot(y)

plot(y, xlab="Preço de venda (R$)",
     ylab="Frequência relativa acumulada",
     cex=NA, verticals=TRUE, main=NULL)

## Destacando a frequência de veículos com preço de 50 à 60 mil.
lim <- c(50000,60000)
ptbl <- prop.table(table(cut(dus$valor,
                             breaks=c(-Inf,lim,Inf))))
cs <- cumsum(ptbl)[seq_along(lim)]

plot(y, xlab="Preço de venda (R$)",
     ylab="Frequência relativa acumulada",
     cex=NA, verticals=TRUE, main=NULL)
segments(lim, 0, lim, cs, lty=2)
segments(lim, cs, par()$usr[3], cs, lty=2)
arrows(lim[1], cs[1], lim[1], cs[2], code=3, length=0.15)
text(lim[1], median(cs), labels=round(ptbl[2], 4),
     srt=90, adj=c(0.5,-0.5))
rug(dus$valor)

##-----------------------------------------------------------------------------
## Diagrama de dispersão.

plot(valor~km, data=dus)

plot(valor~km, data=dus,
     xlab="Distância percorrida (km)",
     ylab="Preço de venda (R$)")

## Adicionar uma linha de tendência suave.
plot(valor~km, data=dus,
     xlab="Distância percorrida (km)",
     ylab="Preço de venda (R$)")
with(dus, lines(lowess(x=km, y=valor), lwd=2))

## Usar cores diferentes para identificar o tipo de câmbio, com linhas
## de tendência e grid.
plot(valor~km, data=dus, col=c(2,4)[dus$cambio],
     xlab="Distância percorrida (km)",
     ylab="Preço de venda (R$)")
with(subset(dus, cambio=="AUTOMÁTICO"),
     lines(lowess(x=km, y=valor), col=2, lwd=1.5,))
with(subset(dus, cambio=="MANUAL"),
     lines(lowess(x=km, y=valor), col=4, lwd=1.5,))
legend("topright", lty=1, col=c(2,4), lwd=1.5,
       legend=levels(dus$cambio), bty="n")
grid()

##-----------------------------------------------------------------------------

boxplot(valor~ano, data=dus)

boxplot(valor~cat, data=dus)

levels(dus$cat)
## [1] " "              " DYNAMIQUE "    " EXPRESSION "   " TECH ROAD "    " TECH ROAD II "
levels(dus$cat) <- c("?", "Dynamique", "Expression",
                     "Tech Road I", "Tech Road II")

boxplot(valor~cat, data=dus,
        xlab="Modelo", ylab="Preço de venda (R$)")

## Larguras proporcionais à raiz da quantidade em cada grupo.
pal <- c("#583882","#35165F","#43256C","#705199","#8A71AA")
boxplot(valor~cat, data=dus, varwidth=TRUE, pars=list(boxwex=1.25),
        col=pal, xlab="Modelo", ylab="Preço de venda (R$)")

table(dus$cat)
## 
##            ?    Dynamique   Expression  Tech Road I Tech Road II 
##           27          355           38          145            2
## Indicação do valor da média.
mds <- with(dus, tapply(valor, cat, mean))

boxplot(valor~cat, data=dus,
        xlab="Modelo", ylab="Preço de venda (R$)")
points(x=1:nlevels(dus$cat), y=mds, pch=15, cex=1.5)

##-----------------------------------------------------------------------------
## Gráficos com o valor para a média e barra de erro para o
## desvio-padrão.

res <- aggregate(valor~cat, data=dus,
                 FUN=function(x){ c(m=mean(x), s=sd(x)) })

## Criando os limites superior e inferior.
res <- transform(res,
                 lwr=valor[,1]-valor[,2],
                 upr=valor[,1]+valor[,2],
                 catf=as.integer(cat))
res
##            cat   valor.m   valor.s      lwr      upr catf
## 1            ? 47241.814  3635.043 43606.77 50876.86    1
## 2    Dynamique 53107.021  5065.612 48041.41 58172.63    2
## 3   Expression 47610.658  4800.317 42810.34 52410.98    3
## 4  Tech Road I 59625.931  3896.752 55729.18 63522.68    4
## 5 Tech Road II 69245.000  4603.265 64641.73 73848.27    5
with(res, matplot(x=catf, y=cbind(lwr, upr),
                  xlim=extendrange(x=catf, f=0.1),
                  ylim=extendrange(x=c(lwr, upr), f=0.1),
                  type="n", ann=FALSE, xaxt="n"))
grid()
with(res, points(x=catf, y=valor[,"m"], pch=19))
with(res, arrows(catf, lwr, catf, upr, code=3,
                 angle=90, length=0.1))
with(res, axis(side=1, at=catf, labels=as.character(cat)))
title(xlab="Categoria", ylab="Valor (R$)")
mtext(side=3, line=0,
      text=expression("Barras de erro representam "*bar(x) %+-% 1*s))

## Com o boxplot ao lado.
with(res, matplot(x=catf, y=cbind(lwr, upr),
                  xlim=extendrange(x=catf, f=0.1),
                  ylim=extendrange(x=c(lwr, upr, dus$valor), f=0.1),
                  type="n", ann=FALSE, xaxt="n"))
grid()
with(res, points(x=catf, y=valor[,"m"], pch=19))
with(res, arrows(catf, lwr, catf, upr, code=3,
                 angle=90, length=0.1))
with(res, axis(side=1, at=catf, labels=as.character(cat)))
title(xlab="Categoria", ylab="Valor (R$)")
mtext(side=3, line=0,
      text=expression("Barras de erro representam "*bar(x) %+-% 1*s))
boxplot(valor~cat,
        at=1:nlevels(dus$cat)+0.2, col="gray45",
        data=dus, add=TRUE, ann=FALSE, axes=FALSE,
        pars=list(boxwex=0.1))

## Com boxplot e pontos dispersos dos lados.

xlim <- extendrange(x=1:nlevels(dus$cat), f=0.1)
ylim <- extendrange(x=c(res$lwr, res$upr, dus$valor), f=0.1)

par(mar=c(5.1,4.1,4.1,0))
layout(matrix(c(1,2), ncol=2), widths=c(0.85,0.15))
with(dus,
     plot.default(
         x=jitter(as.integer(cat), factor=0.25)-0.2,
         y=valor, xaxt="n", ann=FALSE, col="gray50",
         xlim=xlim, ylim=ylim))
grid()
with(res, points(x=catf, y=valor[,"m"], pch=19))
with(res, arrows(catf, lwr, catf, upr, code=3,
                 angle=90, length=0.05))
with(res, axis(side=1, at=catf, labels=as.character(cat), cex.axis=0.95))
title(xlab="Categoria", ylab="Valor (R$)")
mtext(side=3, line=0,
      text=expression("Barras de erro representam "*bar(x) %+-% 1*s))
boxplot(valor~cat,
        at=1:nlevels(dus$cat)+0.2, col="gray45",
        data=dus, add=TRUE, ann=FALSE, axes=FALSE,
        pars=list(boxwex=0.1))
par(mar=c(5.1,0.1,4.1,1))
yhist <- hist(dus$valor, plot=FALSE, breaks=20)
str(yhist)
## List of 6
##  $ breaks  : num [1:19] 38000 40000 42000 44000 46000 48000 50000 52000 54000 56000 ...
##  $ counts  : int [1:18] 2 4 15 15 62 91 53 54 68 50 ...
##  $ density : num [1:18] 1.76e-06 3.53e-06 1.32e-05 1.32e-05 5.47e-05 ...
##  $ mids    : num [1:18] 39000 41000 43000 45000 47000 49000 51000 53000 55000 57000 ...
##  $ xname   : chr "dus$valor"
##  $ equidist: logi TRUE
##  - attr(*, "class")= chr "histogram"
with(yhist,
     plot(x=NULL, y=NULL, ann=FALSE, axes=FALSE,
          ylim=ylim, xlim=c(0, max(density))))
rug(side=2, dus$valor)
snc <- 1:length(yhist$mids)
with(yhist,
     rect(0, breaks[snc],
          density[snc], breaks[snc+1],
          col="gray70"))
den <- density(dus$valor)
with(den, lines(x=y, y=x, col="red", lwd=2))


print(sessionInfo(), locale=FALSE)
## R version 3.1.1 (2014-07-10)
## Platform: i686-pc-linux-gnu (32-bit)
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  base     
## 
## other attached packages:
## [1] rmarkdown_0.3.3 knitr_1.7      
## 
## loaded via a namespace (and not attached):
## [1] digest_0.6.4    evaluate_0.5.5  formatR_1.0     htmltools_0.2.6 stringr_0.6.2  
## [6] tools_3.1.1     yaml_2.1.13
Sys.time()
## [1] "2014-11-12 22:22:12 BRST"