Análise exploratória
##-----------------------------------------------------------------------------
## Dados de carros Duster à venda no webmotors em 26/03/2014.
dus <-
read.table("http://www.leg.ufpr.br/~walmes/data/duster_venda_260314.txt",
header=TRUE, sep="\t", encoding="utf-8")
## dus <-
## read.table("/home/walmes/Dropbox/XML-leituras/carros/duster_venda_260314.txt",
## header=TRUE, sep="\t", encoding="utf-8")
dus$ano <- factor(gsub(x=as.character(dus$ano), "/\\d{4}$", ""))
str(dus)
## 'data.frame': 699 obs. of 10 variables:
## $ modelo: Factor w/ 11 levels "RENAULT DUSTER 1.6 4X2 16V FLEX 4P MANUAL",..: 3 1 2 2 1 1 2 3 6 2 ...
## $ cor : Factor w/ 9 levels "Azul","Branco",..: 5 1 5 5 5 8 6 6 6 6 ...
## $ km : int 31442 40800 56000 NA 45000 50000 44000 30000 41000 55000 ...
## $ ano : Factor w/ 4 levels "2011","2012",..: 1 1 1 1 1 2 2 2 1 1 ...
## $ valor : num 41990 42500 42900 42990 43800 ...
## $ cambio: Factor w/ 2 levels "AUTOMÁTICO","MANUAL": 2 2 2 2 2 2 2 2 2 2 ...
## $ poten : num 1.6 1.6 1.6 1.6 1.6 1.6 1.6 1.6 2 1.6 ...
## $ trac : Factor w/ 2 levels "4X2","4X4": 1 1 1 1 1 1 1 1 1 1 ...
## $ cat : Factor w/ 5 levels " "," DYNAMIQUE ",..: 3 1 2 2 1 1 2 3 2 2 ...
## $ novo : Factor w/ 2 levels "novo","usado": 2 2 2 2 2 2 2 2 2 2 ...
## Quantidade de NA em cada coluna.
apply(dus, MARGIN=2, function(x) sum(is.na(x)))
## modelo cor km ano valor cambio poten trac cat novo
## 0 0 132 0 0 0 0 0 0 0
## Elimina registros com NA.
dus <- na.omit(dus)
str(dus)
## 'data.frame': 567 obs. of 10 variables:
## $ modelo: Factor w/ 11 levels "RENAULT DUSTER 1.6 4X2 16V FLEX 4P MANUAL",..: 3 1 2 1 1 2 3 6 2 1 ...
## $ cor : Factor w/ 9 levels "Azul","Branco",..: 5 1 5 5 8 6 6 6 6 5 ...
## $ km : int 31442 40800 56000 45000 50000 44000 30000 41000 55000 60000 ...
## $ ano : Factor w/ 4 levels "2011","2012",..: 1 1 1 1 2 2 2 1 1 1 ...
## $ valor : num 41990 42500 42900 43800 43999 ...
## $ cambio: Factor w/ 2 levels "AUTOMÁTICO","MANUAL": 2 2 2 2 2 2 2 2 2 2 ...
## $ poten : num 1.6 1.6 1.6 1.6 1.6 1.6 1.6 2 1.6 1.6 ...
## $ trac : Factor w/ 2 levels "4X2","4X4": 1 1 1 1 1 1 1 1 1 1 ...
## $ cat : Factor w/ 5 levels " "," DYNAMIQUE ",..: 3 1 2 1 1 2 3 2 2 1 ...
## $ novo : Factor w/ 2 levels "novo","usado": 2 2 2 2 2 2 2 2 2 2 ...
## - attr(*, "na.action")=Class 'omit' Named int [1:132] 4 15 16 26 34 36 39 47 52 53 ...
## .. ..- attr(*, "names")= chr [1:132] "4" "15" "16" "26" ...
##-----------------------------------------------------------------------------
## Gráfico de barras e setores.
x <- table(dus$cambio)
class(x)
## [1] "table"
## Se vem da xtabs() também tem classe `table`.
x <- xtabs(~cambio, data=dus)
class(x)
## [1] "xtabs" "table"
## barplot(x)
barplot(x,
xlab="Tipo de câmbio",
ylab="Frequência absoluta",
col=c("seagreen", "yellowgreen"))
barplot(x, horiz=TRUE,
xlab="Tipo de câmbio",
ylab="Frequência absoluta",
col=c("seagreen", "yellowgreen"))
box(bty="L")
## Cores com `green` no nome.
colors()
## [1] "white" "aliceblue" "antiquewhite"
## [4] "antiquewhite1" "antiquewhite2" "antiquewhite3"
## [7] "antiquewhite4" "aquamarine" "aquamarine1"
## [10] "aquamarine2" "aquamarine3" "aquamarine4"
## [13] "azure" "azure1" "azure2"
## [16] "azure3" "azure4" "beige"
## [19] "bisque" "bisque1" "bisque2"
## [22] "bisque3" "bisque4" "black"
## [25] "blanchedalmond" "blue" "blue1"
## [28] "blue2" "blue3" "blue4"
## [31] "blueviolet" "brown" "brown1"
## [34] "brown2" "brown3" "brown4"
## [37] "burlywood" "burlywood1" "burlywood2"
## [40] "burlywood3" "burlywood4" "cadetblue"
## [43] "cadetblue1" "cadetblue2" "cadetblue3"
## [46] "cadetblue4" "chartreuse" "chartreuse1"
## [49] "chartreuse2" "chartreuse3" "chartreuse4"
## [52] "chocolate" "chocolate1" "chocolate2"
## [55] "chocolate3" "chocolate4" "coral"
## [58] "coral1" "coral2" "coral3"
## [61] "coral4" "cornflowerblue" "cornsilk"
## [64] "cornsilk1" "cornsilk2" "cornsilk3"
## [67] "cornsilk4" "cyan" "cyan1"
## [70] "cyan2" "cyan3" "cyan4"
## [73] "darkblue" "darkcyan" "darkgoldenrod"
## [76] "darkgoldenrod1" "darkgoldenrod2" "darkgoldenrod3"
## [79] "darkgoldenrod4" "darkgray" "darkgreen"
## [82] "darkgrey" "darkkhaki" "darkmagenta"
## [85] "darkolivegreen" "darkolivegreen1" "darkolivegreen2"
## [88] "darkolivegreen3" "darkolivegreen4" "darkorange"
## [91] "darkorange1" "darkorange2" "darkorange3"
## [94] "darkorange4" "darkorchid" "darkorchid1"
## [97] "darkorchid2" "darkorchid3" "darkorchid4"
## [100] "darkred" "darksalmon" "darkseagreen"
## [103] "darkseagreen1" "darkseagreen2" "darkseagreen3"
## [106] "darkseagreen4" "darkslateblue" "darkslategray"
## [109] "darkslategray1" "darkslategray2" "darkslategray3"
## [112] "darkslategray4" "darkslategrey" "darkturquoise"
## [115] "darkviolet" "deeppink" "deeppink1"
## [118] "deeppink2" "deeppink3" "deeppink4"
## [121] "deepskyblue" "deepskyblue1" "deepskyblue2"
## [124] "deepskyblue3" "deepskyblue4" "dimgray"
## [127] "dimgrey" "dodgerblue" "dodgerblue1"
## [130] "dodgerblue2" "dodgerblue3" "dodgerblue4"
## [133] "firebrick" "firebrick1" "firebrick2"
## [136] "firebrick3" "firebrick4" "floralwhite"
## [139] "forestgreen" "gainsboro" "ghostwhite"
## [142] "gold" "gold1" "gold2"
## [145] "gold3" "gold4" "goldenrod"
## [148] "goldenrod1" "goldenrod2" "goldenrod3"
## [151] "goldenrod4" "gray" "gray0"
## [154] "gray1" "gray2" "gray3"
## [157] "gray4" "gray5" "gray6"
## [160] "gray7" "gray8" "gray9"
## [163] "gray10" "gray11" "gray12"
## [166] "gray13" "gray14" "gray15"
## [169] "gray16" "gray17" "gray18"
## [172] "gray19" "gray20" "gray21"
## [175] "gray22" "gray23" "gray24"
## [178] "gray25" "gray26" "gray27"
## [181] "gray28" "gray29" "gray30"
## [184] "gray31" "gray32" "gray33"
## [187] "gray34" "gray35" "gray36"
## [190] "gray37" "gray38" "gray39"
## [193] "gray40" "gray41" "gray42"
## [196] "gray43" "gray44" "gray45"
## [199] "gray46" "gray47" "gray48"
## [202] "gray49" "gray50" "gray51"
## [205] "gray52" "gray53" "gray54"
## [208] "gray55" "gray56" "gray57"
## [211] "gray58" "gray59" "gray60"
## [214] "gray61" "gray62" "gray63"
## [217] "gray64" "gray65" "gray66"
## [220] "gray67" "gray68" "gray69"
## [223] "gray70" "gray71" "gray72"
## [226] "gray73" "gray74" "gray75"
## [229] "gray76" "gray77" "gray78"
## [232] "gray79" "gray80" "gray81"
## [235] "gray82" "gray83" "gray84"
## [238] "gray85" "gray86" "gray87"
## [241] "gray88" "gray89" "gray90"
## [244] "gray91" "gray92" "gray93"
## [247] "gray94" "gray95" "gray96"
## [250] "gray97" "gray98" "gray99"
## [253] "gray100" "green" "green1"
## [256] "green2" "green3" "green4"
## [259] "greenyellow" "grey" "grey0"
## [262] "grey1" "grey2" "grey3"
## [265] "grey4" "grey5" "grey6"
## [268] "grey7" "grey8" "grey9"
## [271] "grey10" "grey11" "grey12"
## [274] "grey13" "grey14" "grey15"
## [277] "grey16" "grey17" "grey18"
## [280] "grey19" "grey20" "grey21"
## [283] "grey22" "grey23" "grey24"
## [286] "grey25" "grey26" "grey27"
## [289] "grey28" "grey29" "grey30"
## [292] "grey31" "grey32" "grey33"
## [295] "grey34" "grey35" "grey36"
## [298] "grey37" "grey38" "grey39"
## [301] "grey40" "grey41" "grey42"
## [304] "grey43" "grey44" "grey45"
## [307] "grey46" "grey47" "grey48"
## [310] "grey49" "grey50" "grey51"
## [313] "grey52" "grey53" "grey54"
## [316] "grey55" "grey56" "grey57"
## [319] "grey58" "grey59" "grey60"
## [322] "grey61" "grey62" "grey63"
## [325] "grey64" "grey65" "grey66"
## [328] "grey67" "grey68" "grey69"
## [331] "grey70" "grey71" "grey72"
## [334] "grey73" "grey74" "grey75"
## [337] "grey76" "grey77" "grey78"
## [340] "grey79" "grey80" "grey81"
## [343] "grey82" "grey83" "grey84"
## [346] "grey85" "grey86" "grey87"
## [349] "grey88" "grey89" "grey90"
## [352] "grey91" "grey92" "grey93"
## [355] "grey94" "grey95" "grey96"
## [358] "grey97" "grey98" "grey99"
## [361] "grey100" "honeydew" "honeydew1"
## [364] "honeydew2" "honeydew3" "honeydew4"
## [367] "hotpink" "hotpink1" "hotpink2"
## [370] "hotpink3" "hotpink4" "indianred"
## [373] "indianred1" "indianred2" "indianred3"
## [376] "indianred4" "ivory" "ivory1"
## [379] "ivory2" "ivory3" "ivory4"
## [382] "khaki" "khaki1" "khaki2"
## [385] "khaki3" "khaki4" "lavender"
## [388] "lavenderblush" "lavenderblush1" "lavenderblush2"
## [391] "lavenderblush3" "lavenderblush4" "lawngreen"
## [394] "lemonchiffon" "lemonchiffon1" "lemonchiffon2"
## [397] "lemonchiffon3" "lemonchiffon4" "lightblue"
## [400] "lightblue1" "lightblue2" "lightblue3"
## [403] "lightblue4" "lightcoral" "lightcyan"
## [406] "lightcyan1" "lightcyan2" "lightcyan3"
## [409] "lightcyan4" "lightgoldenrod" "lightgoldenrod1"
## [412] "lightgoldenrod2" "lightgoldenrod3" "lightgoldenrod4"
## [415] "lightgoldenrodyellow" "lightgray" "lightgreen"
## [418] "lightgrey" "lightpink" "lightpink1"
## [421] "lightpink2" "lightpink3" "lightpink4"
## [424] "lightsalmon" "lightsalmon1" "lightsalmon2"
## [427] "lightsalmon3" "lightsalmon4" "lightseagreen"
## [430] "lightskyblue" "lightskyblue1" "lightskyblue2"
## [433] "lightskyblue3" "lightskyblue4" "lightslateblue"
## [436] "lightslategray" "lightslategrey" "lightsteelblue"
## [439] "lightsteelblue1" "lightsteelblue2" "lightsteelblue3"
## [442] "lightsteelblue4" "lightyellow" "lightyellow1"
## [445] "lightyellow2" "lightyellow3" "lightyellow4"
## [448] "limegreen" "linen" "magenta"
## [451] "magenta1" "magenta2" "magenta3"
## [454] "magenta4" "maroon" "maroon1"
## [457] "maroon2" "maroon3" "maroon4"
## [460] "mediumaquamarine" "mediumblue" "mediumorchid"
## [463] "mediumorchid1" "mediumorchid2" "mediumorchid3"
## [466] "mediumorchid4" "mediumpurple" "mediumpurple1"
## [469] "mediumpurple2" "mediumpurple3" "mediumpurple4"
## [472] "mediumseagreen" "mediumslateblue" "mediumspringgreen"
## [475] "mediumturquoise" "mediumvioletred" "midnightblue"
## [478] "mintcream" "mistyrose" "mistyrose1"
## [481] "mistyrose2" "mistyrose3" "mistyrose4"
## [484] "moccasin" "navajowhite" "navajowhite1"
## [487] "navajowhite2" "navajowhite3" "navajowhite4"
## [490] "navy" "navyblue" "oldlace"
## [493] "olivedrab" "olivedrab1" "olivedrab2"
## [496] "olivedrab3" "olivedrab4" "orange"
## [499] "orange1" "orange2" "orange3"
## [502] "orange4" "orangered" "orangered1"
## [505] "orangered2" "orangered3" "orangered4"
## [508] "orchid" "orchid1" "orchid2"
## [511] "orchid3" "orchid4" "palegoldenrod"
## [514] "palegreen" "palegreen1" "palegreen2"
## [517] "palegreen3" "palegreen4" "paleturquoise"
## [520] "paleturquoise1" "paleturquoise2" "paleturquoise3"
## [523] "paleturquoise4" "palevioletred" "palevioletred1"
## [526] "palevioletred2" "palevioletred3" "palevioletred4"
## [529] "papayawhip" "peachpuff" "peachpuff1"
## [532] "peachpuff2" "peachpuff3" "peachpuff4"
## [535] "peru" "pink" "pink1"
## [538] "pink2" "pink3" "pink4"
## [541] "plum" "plum1" "plum2"
## [544] "plum3" "plum4" "powderblue"
## [547] "purple" "purple1" "purple2"
## [550] "purple3" "purple4" "red"
## [553] "red1" "red2" "red3"
## [556] "red4" "rosybrown" "rosybrown1"
## [559] "rosybrown2" "rosybrown3" "rosybrown4"
## [562] "royalblue" "royalblue1" "royalblue2"
## [565] "royalblue3" "royalblue4" "saddlebrown"
## [568] "salmon" "salmon1" "salmon2"
## [571] "salmon3" "salmon4" "sandybrown"
## [574] "seagreen" "seagreen1" "seagreen2"
## [577] "seagreen3" "seagreen4" "seashell"
## [580] "seashell1" "seashell2" "seashell3"
## [583] "seashell4" "sienna" "sienna1"
## [586] "sienna2" "sienna3" "sienna4"
## [589] "skyblue" "skyblue1" "skyblue2"
## [592] "skyblue3" "skyblue4" "slateblue"
## [595] "slateblue1" "slateblue2" "slateblue3"
## [598] "slateblue4" "slategray" "slategray1"
## [601] "slategray2" "slategray3" "slategray4"
## [604] "slategrey" "snow" "snow1"
## [607] "snow2" "snow3" "snow4"
## [610] "springgreen" "springgreen1" "springgreen2"
## [613] "springgreen3" "springgreen4" "steelblue"
## [616] "steelblue1" "steelblue2" "steelblue3"
## [619] "steelblue4" "tan" "tan1"
## [622] "tan2" "tan3" "tan4"
## [625] "thistle" "thistle1" "thistle2"
## [628] "thistle3" "thistle4" "tomato"
## [631] "tomato1" "tomato2" "tomato3"
## [634] "tomato4" "turquoise" "turquoise1"
## [637] "turquoise2" "turquoise3" "turquoise4"
## [640] "violet" "violetred" "violetred1"
## [643] "violetred2" "violetred3" "violetred4"
## [646] "wheat" "wheat1" "wheat2"
## [649] "wheat3" "wheat4" "whitesmoke"
## [652] "yellow" "yellow1" "yellow2"
## [655] "yellow3" "yellow4" "yellowgreen"
grep("green", colors(), value=TRUE)
## [1] "darkgreen" "darkolivegreen" "darkolivegreen1" "darkolivegreen2"
## [5] "darkolivegreen3" "darkolivegreen4" "darkseagreen" "darkseagreen1"
## [9] "darkseagreen2" "darkseagreen3" "darkseagreen4" "forestgreen"
## [13] "green" "green1" "green2" "green3"
## [17] "green4" "greenyellow" "lawngreen" "lightgreen"
## [21] "lightseagreen" "limegreen" "mediumseagreen" "mediumspringgreen"
## [25] "palegreen" "palegreen1" "palegreen2" "palegreen3"
## [29] "palegreen4" "seagreen" "seagreen1" "seagreen2"
## [33] "seagreen3" "seagreen4" "springgreen" "springgreen1"
## [37] "springgreen2" "springgreen3" "springgreen4" "yellowgreen"
## Gráfico de setores.
pie(x, col=c("seagreen", "yellowgreen"),
main="Tipo de câmbio")
## Para as cores do carro.
x <- xtabs(~cor, data=dus)
levels(dus$cor)
## [1] "Azul" "Branco" "Cinza" "Indefinida" "Prata" "Preto"
## [7] "Verde" "Vermelho" "Vinho"
par(mar=c(4.1,7.1,2.1,2.1))
barplot(x, horiz=TRUE, las=1,
col=c("blue", "white", "gray50", "Yellow", "gray90", "black",
"green4", "red", "red4"))
mtext(side=2, text="Cor", line=5)
mtext(side=1, text="Frequência absoluta", line=2)
box(bty="L")
##-----------------------------------------------------------------------------
## Gráficos de barras emplilhadas (stacked) e lado a lado.
x <- xtabs(~ano+cambio, data=dus)
x
## cambio
## ano AUTOMÁTICO MANUAL
## 2011 45 61
## 2012 31 145
## 2013 90 162
## 2014 4 29
## Barras empilhadas.
barplot(x, xlab="Câmbio", ylab="Frequência absoluta")
colcamb <- c("seagreen", "yellowgreen")
barplot(t(x),
xlab="Ano",
ylab="Frequência absoluta",
col=colcamb)
legend("topleft", legend=levels(dus$cambio),
fill=colcamb, bty="n")
## Barras lado a lado.
barplot(t(x), beside=TRUE,
xlab="Ano", ylab="Frequência absoluta",
col=colcamb)
legend("topleft", legend=levels(dus$cambio),
fill=colcamb, bty="n")
##-----------------------------------------------------------------------------
## Anotações nas barras.
x <- xtabs(~cambio+poten, data=dus); x
## poten
## cambio 1.6 2
## AUTOMÁTICO 0 170
## MANUAL 290 107
## Cores de preenchimento para as barras.
cols <- c("seagreen", "yellowgreen")
## Barras lado a lado.
bp <- barplot(t(x), beside=TRUE, col=cols,
xlab="Tipo de câmbio", ylab="Frequência absoluta")
bp
## [,1] [,2]
## [1,] 1.5 4.5
## [2,] 2.5 5.5
## Calcula a altura de uma palavra em termos da escala y do gráfico.
sh <- strheight("um texto qualquer"); sh
## [1] 10.75165
lim <- par()$usr[4]+3*sh
## Refaz o gráfico com espaço para o texto.
barplot(t(x), beside=TRUE, col=cols, ylim=c(0, lim),
xlab="Tipo de câmbio", ylab="Frequência absoluta")
legend("topleft", title="Potência",
legend=c("1.6","2.0"), fill=cols, bty="n")
text(x=c(bp), y=t(x), labels=t(x), pos=3)
box()
##-----------------------------------------------------------------------------
## Gráficos de mosaico.
x <- xtabs(~ano+cambio, data=dus)
x
## cambio
## ano AUTOMÁTICO MANUAL
## 2011 45 61
## 2012 31 145
## 2013 90 162
## 2014 4 29
mosaicplot(x, ylab="Tipo de câmbio", xlab="Ano")
mosaicplot(t(x), xlab="Tipo de câmbio", ylab="Ano")
x <- xtabs(~novo+poten, data=dus); x
## poten
## novo 1.6 2
## novo 104 99
## usado 186 178
## Não dependência entre as variáveis.
mosaicplot(x, xlab="Condição", ylab="Potência",
col=c("#009054","#900039"))
mosaicplot(HairEyeColor)
a <- apply(HairEyeColor, c(1,2), sum)
mosaicplot(a)
mosaicplot(t(a))
## Pode-se especificar cores com a trinca RGB (red, green, blue),
## pode-se usar o padrão hexadecimal html para cores.
## Visite estes sites para pegar cores.
## browseURL("http://www.w3schools.com/html/html_colors.asp")
## browseURL("http://html-color-codes.info/")
##-----------------------------------------------------------------------------
## Histograma.
hist(dus$valor)
hist(dus$valor, xlab="Preço de venda (R$)",
ylab="Frequência absoluta", col="orange")
rug(dus$valor)
## Se breaks é um escalar então entende-se que é uma *sugestão* para o
## número de clases.
hist(dus$valor,
breaks=15,
xlab="Preço de venda (R$)",
ylab="Frequência absoluta",
col="orange")
rug(dus$valor)
## Se breaks é um vetor então entende-se que são os limites para
## classificação dos valores.
hist(dus$valor, breaks=seq(35000, 75000, 2500),
xlab="Preço de venda (R$)",
ylab="Frequência absoluta", col="#7700B7",
sub="Amplitude de classe de R$ 2500", main=NULL)
## Gráfico onde a altura é a densidade e não a frequência.
hist(dus$valor, prob=TRUE, breaks=seq(35000, 75000, 2500),
xlab="Preço de venda (R$)",
ylab="Frequência absoluta", col="#7700B7",
sub="Amplitude de classe de R$ 2500", main=NULL)
## Esse gráfico tem que a soma da área dos retângulos somam 1 pois o
## produto da amplitude pela densidade é a frequência relativa e a soma
## das frequência relativas é 1.
hist(dus$valor, prob=TRUE, seq(35000, 75000, 2000),
xlab="Preço de venda (R$)",
ylab="Frequência absoluta", col="#6E0039",
sub="Amplitude de classe de R$ 2500", main=NULL)
rug(dus$valor) ## Faz risquinhos no eixo x.
##-----------------------------------------------------------------------------
## Anotações sobre um histograma.
## Com domínio do R se pode fazer gráficos espetaculares, como por
## exemplo esse com variação da tonalidade.
ht <- hist(dus$valor, prob=TRUE, breaks=seq(35000, 75000, 2000),
xlab="Preço de venda (R$)",
ylab="Frequência absoluta", col="#6E0039",
sub="Amplitude de classe de R$ 2500")
rug(dus$valor) ## Faz risquinhos no eixo x.
## Destacar a barra da classe modal usando outra cor.
wm <- which.max(ht$counts)
cols <- rep("yellow", length(ht$counts))
cols[wm] <- "red"
cols
## [1] "yellow" "yellow" "yellow" "yellow" "yellow" "yellow" "yellow" "red" "yellow"
## [10] "yellow" "yellow" "yellow" "yellow" "yellow" "yellow" "yellow" "yellow" "yellow"
## [19] "yellow" "yellow"
plot(ht, col=cols)
## Traçar os segmentos que indicam o valor interpolado para a moda.
ycoor <- with(ht, counts[wm+0:1])
xcoor <- with(ht, breaks[wm+0:1])
segments(xcoor[1], ycoor[1], xcoor[2], ycoor[2])
ycoor <- with(ht, counts[wm-1:0])
xcoor <- with(ht, breaks[wm+0:1])
segments(xcoor[1], ycoor[1], xcoor[2], ycoor[2])
## Por semelhança de triangulos a moda obtida é:
ac <- with(ht, diff(breaks[1:2]))
d <- with(ht, abs(diff(counts[wm+(-1:1)])))
xmoda <- with(ht, breaks[wm]+(ac*d[1])/sum(d)); xmoda
## [1] 49666.67
abline(v=mean(dus$valor))
abline(v=xmoda, col="yellow")
## Como aprimorar um histograma.
plot(ht, col=NULL, lty=0, ann=FALSE, axes=FALSE)
abline(h=seq(0, 100, by=10), lty=2)
plot(ht, col=cols, ann=FALSE, axes=FALSE, add=TRUE)
rug(dus$valor)
axis(side=1, at=seq(35000, 75000, 5000))
axis(side=2, at=seq(0, 100, by=10))
box(bty="L")
title(main="Histograma do valor (R$)",
sub="Dados retirados do webmotors.com",
xlab="Valor (R$)",
ylab="Frequência absoluta")
mtext(side=3, line=0,
text=paste("Amostra de tamanho", length(dus$valor)))
mtext(side=4, line=-1, col="gray70", outer=TRUE, adj=0,
text="Feito por Walmes Zeviani")
legend("topright", fill="red", legend="Classe modal", bty="n")
## Outra variação de um histograma.
ht <- hist(dus$valor, seq(35000, 75000, 2000), plot=FALSE)
nc <- length(ht$mids) ## Número de classes.
ac <- diff(ht$breaks[1:2]) ## Amplitude de classe.
ma <- mean(dus$valor) ## Média da amostra.
md <- median(dus$valor) ## Mediana da amostra.
qts <- fivenum(dus$valor)[c(2,4)] ## 1Q e 3Q da amostra.
modal <- which.max(ht$counts) ## Classe modal.
modal <- list(x=ht$mids[modal], y=ht$counts[modal])
colseq <- rgb(red=0.25, blue=0.7,
green=seq(0.1, 0.9, length.out=nc))
plot(ht, col=colseq, ylim=c(0, modal$y+strheight("1")),
xlab="Preço de venda (R$)",
ylab="Frequência absoluta",
sub=paste("Amplitude de classe de R$", ac),
main=NULL, border="gray50")
text(x=modal$x, y=modal$y, labels=modal$y, pos=3)
rug(dus$valor)
arrows(ma, 0, ma, modal$y/3, code=1, length=0.15)
text(ma, modal$y/3, labels=paste("Média:", round(ma,2)), pos=3)
arrows(md, 0, md, modal$y/6, code=1, length=0.15)
text(ma, modal$y/6, labels=paste("Mediana:", round(md,1)),
pos=ifelse(md<ma, 2, 4))
box()
## Responda: o que de informação foi acrescentado com as barras mudando
## de cor? Alguns não gostam de distração ou poluição visual, outros
## acreditam que isso atrai o leitor.
##-----------------------------------------------------------------------------
## Gráficos de densidade.
den <- density(dus$valor, kernel="triangular")
den <- density(dus$valor, kernel="rectangular", bw=3000)
str(den)
## List of 7
## $ x : num [1:512] 29000 29103 29205 29308 29411 ...
## $ y : num [1:512] 0 0 0 0 0 ...
## $ bw : num 3000
## $ n : int 567
## $ call : language density.default(x = dus$valor, bw = 3000, kernel = "rectangular")
## $ data.name: chr "dus$valor"
## $ has.na : logi FALSE
## - attr(*, "class")= chr "density"
plot(den)
str(den)
## List of 7
## $ x : num [1:512] 29000 29103 29205 29308 29411 ...
## $ y : num [1:512] 0 0 0 0 0 ...
## $ bw : num 3000
## $ n : int 567
## $ call : language density.default(x = dus$valor, bw = 3000, kernel = "rectangular")
## $ data.name: chr "dus$valor"
## $ has.na : logi FALSE
## - attr(*, "class")= chr "density"
modal <- which.max(den$y)
modal <- list(x=den$x[modal], y=den$y[modal])
plot(den, type="n", xlab="Preço de venda (R$)", ylab="Densidade",
ylim=c(0, modal$y+strheight("1")), main="",
sub=paste("Bandwidth:", round(den$bw,3)))
with(den, polygon(x, y, col="gray90"))
with(modal, segments(x, 0, x, y, col=2))
with(modal, text(x, y, labels=round(x, 2), pos=3))
arrows(ma, 0, ma, modal$y/3, code=1, length=0.15)
text(ma, modal$y/3, labels=paste("Média:", round(ma,2)), pos=3)
arrows(md, 0, md, modal$y/6, code=1, length=0.15)
text(ma, modal$y/6, labels=paste("Mediana:", round(md,1)),
pos=ifelse(md<ma, 2, 4))
rug(dus$valor)
## Frequência acumulada empírica.
y <- ecdf(dus$valor)
plot(y)
plot(y, xlab="Preço de venda (R$)",
ylab="Frequência relativa acumulada",
cex=NA, verticals=TRUE, main=NULL)
## Destacando a frequência de veículos com preço de 50 à 60 mil.
lim <- c(50000,60000)
ptbl <- prop.table(table(cut(dus$valor,
breaks=c(-Inf,lim,Inf))))
cs <- cumsum(ptbl)[seq_along(lim)]
plot(y, xlab="Preço de venda (R$)",
ylab="Frequência relativa acumulada",
cex=NA, verticals=TRUE, main=NULL)
segments(lim, 0, lim, cs, lty=2)
segments(lim, cs, par()$usr[3], cs, lty=2)
arrows(lim[1], cs[1], lim[1], cs[2], code=3, length=0.15)
text(lim[1], median(cs), labels=round(ptbl[2], 4),
srt=90, adj=c(0.5,-0.5))
rug(dus$valor)
##-----------------------------------------------------------------------------
## Diagrama de dispersão.
plot(valor~km, data=dus)
plot(valor~km, data=dus,
xlab="Distância percorrida (km)",
ylab="Preço de venda (R$)")
## Adicionar uma linha de tendência suave.
plot(valor~km, data=dus,
xlab="Distância percorrida (km)",
ylab="Preço de venda (R$)")
with(dus, lines(lowess(x=km, y=valor), lwd=2))
## Usar cores diferentes para identificar o tipo de câmbio, com linhas
## de tendência e grid.
plot(valor~km, data=dus, col=c(2,4)[dus$cambio],
xlab="Distância percorrida (km)",
ylab="Preço de venda (R$)")
with(subset(dus, cambio=="AUTOMÁTICO"),
lines(lowess(x=km, y=valor), col=2, lwd=1.5,))
with(subset(dus, cambio=="MANUAL"),
lines(lowess(x=km, y=valor), col=4, lwd=1.5,))
legend("topright", lty=1, col=c(2,4), lwd=1.5,
legend=levels(dus$cambio), bty="n")
grid()
##-----------------------------------------------------------------------------
boxplot(valor~ano, data=dus)
boxplot(valor~cat, data=dus)
levels(dus$cat)
## [1] " " " DYNAMIQUE " " EXPRESSION " " TECH ROAD " " TECH ROAD II "
levels(dus$cat) <- c("?", "Dynamique", "Expression",
"Tech Road I", "Tech Road II")
boxplot(valor~cat, data=dus,
xlab="Modelo", ylab="Preço de venda (R$)")
## Larguras proporcionais à raiz da quantidade em cada grupo.
pal <- c("#583882","#35165F","#43256C","#705199","#8A71AA")
boxplot(valor~cat, data=dus, varwidth=TRUE, pars=list(boxwex=1.25),
col=pal, xlab="Modelo", ylab="Preço de venda (R$)")
table(dus$cat)
##
## ? Dynamique Expression Tech Road I Tech Road II
## 27 355 38 145 2
## Indicação do valor da média.
mds <- with(dus, tapply(valor, cat, mean))
boxplot(valor~cat, data=dus,
xlab="Modelo", ylab="Preço de venda (R$)")
points(x=1:nlevels(dus$cat), y=mds, pch=15, cex=1.5)
##-----------------------------------------------------------------------------
## Gráficos com o valor para a média e barra de erro para o
## desvio-padrão.
res <- aggregate(valor~cat, data=dus,
FUN=function(x){ c(m=mean(x), s=sd(x)) })
## Criando os limites superior e inferior.
res <- transform(res,
lwr=valor[,1]-valor[,2],
upr=valor[,1]+valor[,2],
catf=as.integer(cat))
res
## cat valor.m valor.s lwr upr catf
## 1 ? 47241.814 3635.043 43606.77 50876.86 1
## 2 Dynamique 53107.021 5065.612 48041.41 58172.63 2
## 3 Expression 47610.658 4800.317 42810.34 52410.98 3
## 4 Tech Road I 59625.931 3896.752 55729.18 63522.68 4
## 5 Tech Road II 69245.000 4603.265 64641.73 73848.27 5
with(res, matplot(x=catf, y=cbind(lwr, upr),
xlim=extendrange(x=catf, f=0.1),
ylim=extendrange(x=c(lwr, upr), f=0.1),
type="n", ann=FALSE, xaxt="n"))
grid()
with(res, points(x=catf, y=valor[,"m"], pch=19))
with(res, arrows(catf, lwr, catf, upr, code=3,
angle=90, length=0.1))
with(res, axis(side=1, at=catf, labels=as.character(cat)))
title(xlab="Categoria", ylab="Valor (R$)")
mtext(side=3, line=0,
text=expression("Barras de erro representam "*bar(x) %+-% 1*s))
## Com o boxplot ao lado.
with(res, matplot(x=catf, y=cbind(lwr, upr),
xlim=extendrange(x=catf, f=0.1),
ylim=extendrange(x=c(lwr, upr, dus$valor), f=0.1),
type="n", ann=FALSE, xaxt="n"))
grid()
with(res, points(x=catf, y=valor[,"m"], pch=19))
with(res, arrows(catf, lwr, catf, upr, code=3,
angle=90, length=0.1))
with(res, axis(side=1, at=catf, labels=as.character(cat)))
title(xlab="Categoria", ylab="Valor (R$)")
mtext(side=3, line=0,
text=expression("Barras de erro representam "*bar(x) %+-% 1*s))
boxplot(valor~cat,
at=1:nlevels(dus$cat)+0.2, col="gray45",
data=dus, add=TRUE, ann=FALSE, axes=FALSE,
pars=list(boxwex=0.1))
## Com boxplot e pontos dispersos dos lados.
xlim <- extendrange(x=1:nlevels(dus$cat), f=0.1)
ylim <- extendrange(x=c(res$lwr, res$upr, dus$valor), f=0.1)
par(mar=c(5.1,4.1,4.1,0))
layout(matrix(c(1,2), ncol=2), widths=c(0.85,0.15))
with(dus,
plot.default(
x=jitter(as.integer(cat), factor=0.25)-0.2,
y=valor, xaxt="n", ann=FALSE, col="gray50",
xlim=xlim, ylim=ylim))
grid()
with(res, points(x=catf, y=valor[,"m"], pch=19))
with(res, arrows(catf, lwr, catf, upr, code=3,
angle=90, length=0.05))
with(res, axis(side=1, at=catf, labels=as.character(cat), cex.axis=0.95))
title(xlab="Categoria", ylab="Valor (R$)")
mtext(side=3, line=0,
text=expression("Barras de erro representam "*bar(x) %+-% 1*s))
boxplot(valor~cat,
at=1:nlevels(dus$cat)+0.2, col="gray45",
data=dus, add=TRUE, ann=FALSE, axes=FALSE,
pars=list(boxwex=0.1))
par(mar=c(5.1,0.1,4.1,1))
yhist <- hist(dus$valor, plot=FALSE, breaks=20)
str(yhist)
## List of 6
## $ breaks : num [1:19] 38000 40000 42000 44000 46000 48000 50000 52000 54000 56000 ...
## $ counts : int [1:18] 2 4 15 15 62 91 53 54 68 50 ...
## $ density : num [1:18] 1.76e-06 3.53e-06 1.32e-05 1.32e-05 5.47e-05 ...
## $ mids : num [1:18] 39000 41000 43000 45000 47000 49000 51000 53000 55000 57000 ...
## $ xname : chr "dus$valor"
## $ equidist: logi TRUE
## - attr(*, "class")= chr "histogram"
with(yhist,
plot(x=NULL, y=NULL, ann=FALSE, axes=FALSE,
ylim=ylim, xlim=c(0, max(density))))
rug(side=2, dus$valor)
snc <- 1:length(yhist$mids)
with(yhist,
rect(0, breaks[snc],
density[snc], breaks[snc+1],
col="gray70"))
den <- density(dus$valor)
with(den, lines(x=y, y=x, col="red", lwd=2))
print(sessionInfo(), locale=FALSE)
## R version 3.1.1 (2014-07-10)
## Platform: i686-pc-linux-gnu (32-bit)
##
## attached base packages:
## [1] stats graphics grDevices utils datasets base
##
## other attached packages:
## [1] rmarkdown_0.3.3 knitr_1.7
##
## loaded via a namespace (and not attached):
## [1] digest_0.6.4 evaluate_0.5.5 formatR_1.0 htmltools_0.2.6 stringr_0.6.2
## [6] tools_3.1.1 yaml_2.1.13
Sys.time()
## [1] "2014-11-12 22:22:12 BRST"