Manipulação e Visualização de Dados

leg.ufpr.br/~walmes/cursoR/data-vis

1 Imóveis a venda em Curitiba

1.1 Imóveis

1.2 Qual a distribuição do preço das coberturas?

ggplot(data = imo,
       mapping = aes(x = price)) +
    geom_histogram(color = "black", fill = mycol[1]) +
    geom_rug()

ggplot(data = imo,
       mapping = aes(x = price)) +
    geom_histogram(color = "black", fill = mycol[1]) +
    geom_rug() +
    scale_x_log10()

1.3 Só gráficos por enquanto

ggplot(data = imo,
       mapping = aes(x = price/1000)) +
    geom_density(color = "black") +
    geom_rug()

ggplot(data = imo,
       mapping = aes(x = price/1000)) +
    geom_step(stat = "ecdf") +
    geom_rug()

#-----------------------------------------------------------------------
# Área.

ggplot(data = imo,
       mapping = aes(x = area)) +
    geom_density(color = "black") +
    geom_rug()

ggplot(data = imo,
       mapping = aes(x = area)) +
    geom_step(stat = "ecdf") +
    geom_rug()

ggplot(data = imo,
       mapping = aes(x = 1, y = area)) +
    geom_boxplot(color = mycol[2]) +
    geom_rug()

#-----------------------------------------------------------------------

ggplot(data = imo,
       mapping = aes(x = garages)) +
    geom_bar(fill = mycol[3])

ggplot(data = imo,
       mapping = aes(x = bathroom)) +
    geom_bar(fill = mycol[4])

ggplot(data = imo,
       mapping = aes(x = suites)) +
    geom_bar(fill = mycol[5])

ggplot(data = imo,
       mapping = aes(x = rooms)) +
    geom_bar(fill = mycol[1])

#-----------------------------------------------------------------------

ggplot(data = imo,
       mapping = aes(x = area,
                     y = price/1000)) +
    geom_point(color = mycol[1])

ggplot(data = imo,
       mapping = aes(x = area,
                     y = price/1000)) +
    geom_point(color = mycol[1]) +
    scale_x_log10() +
    scale_y_log10()

url <- "http://leg.ufpr.br/~walmes/data/triathlon.txt"
# browseURL(url)

tri <- read_tsv(file = url,
                col_names = TRUE,
                quote = "",
                comment = "#")
str(tri, give.attr = FALSE)
## Classes 'tbl_df', 'tbl' and 'data.frame':    9545 obs. of  9 variables:
##  $ nome    : chr  "RIEDERER SVEN" "GOMEZ NOYA JAVIER" "BROWNLEE ALISTAIR" "MOULAI TONY" ...
##  $ categ   : chr  "S2M" "S2M" "U23" "S3M" ...
##  $ sexo    : chr  "M" "M" "M" "M" ...
##  $ edicao  : chr  "course-108" "course-108" "course-108" "course-108" ...
##  $ ano     : int  2009 2009 2009 2009 2009 2009 2009 2009 2009 2009 ...
##  $ tnadar  : num  10.6 24.8 14.4 50.8 10.3 ...
##  $ tpedalar: num  18.7 67.2 12.4 36.1 18.8 ...
##  $ tcorrer : num  37.7 72.7 46.5 156.9 36.3 ...
##  $ ttotal  : num  32.8 83 13.4 43 26.7 ...
tri <- tri %>%
    filter(sexo %in% c("M", "F"))

ggplot(data = tri,
       mapping = aes(x = ano)) +
    geom_bar()

ggplot(data = tri,
       mapping = aes(x = categ)) +
    geom_bar() +
    coord_flip()

ggplot(data = tri,
       mapping = aes(x = sexo)) +
    geom_bar()

#-----------------------------------------------------------------------

ggplot(data = tri,
       mapping = aes(x = ano, fill = sexo)) +
    geom_bar(position = "stack", color = "black")

ggplot(data = tri,
       mapping = aes(x = ano, fill = sexo)) +
    geom_bar(position = "dodge", color = "black")

#-----------------------------------------------------------------------

ggplot(data = tri,
       mapping = aes(x = tnadar, color = sexo, fill = sexo)) +
    geom_density(alpha = 0.15) +
    geom_rug()

ggplot(data = tri,
       mapping = aes(x = tcorrer, color = sexo, fill = sexo)) +
    geom_density(alpha = 0.15) +
    geom_rug()

ggplot(data = tri,
       mapping = aes(x = tpedalar, color = sexo, fill = sexo)) +
    geom_density(alpha = 0.15) +
    geom_rug()

#-----------------------------------------------------------------------

ggplot(data = tri,
       mapping = aes(x = tpedalar, color = sexo, fill = sexo)) +
    geom_step(stat = "ecdf") +
    geom_rug()

#-----------------------------------------------------------------------

tril <- tri %>%
    gather(key = "ativ",
           value = "tempo",
           tnadar,
           tcorrer,
           tpedalar)
str(tril)
## Classes 'tbl_df', 'tbl' and 'data.frame':    28065 obs. of  8 variables:
##  $ nome  : chr  "RIEDERER SVEN" "GOMEZ NOYA JAVIER" "BROWNLEE ALISTAIR" "MOULAI TONY" ...
##  $ categ : chr  "S2M" "S2M" "U23" "S3M" ...
##  $ sexo  : chr  "M" "M" "M" "M" ...
##  $ edicao: chr  "course-108" "course-108" "course-108" "course-108" ...
##  $ ano   : int  2009 2009 2009 2009 2009 2009 2009 2009 2009 2009 ...
##  $ ttotal: num  32.8 83 13.4 43 26.7 ...
##  $ ativ  : chr  "tnadar" "tnadar" "tnadar" "tnadar" ...
##  $ tempo : num  10.6 24.8 14.4 50.8 10.3 ...
ggplot(data = tril,
       mapping = aes(x = tempo, color = ativ)) +
    geom_density(alpha = 0.15) +
    scale_x_log10() +
    geom_rug()

# Frações de tempo.
tri <- tri %>%
    mutate(ttotal = tnadar + tpedalar + tcorrer,
           pnadar = tnadar/ttotal,
           ppedalar = tpedalar/ttotal,
           pcorrer = tcorrer/ttotal)
str(tri)
## Classes 'tbl_df', 'tbl' and 'data.frame':    9355 obs. of  12 variables:
##  $ nome    : chr  "RIEDERER SVEN" "GOMEZ NOYA JAVIER" "BROWNLEE ALISTAIR" "MOULAI TONY" ...
##  $ categ   : chr  "S2M" "S2M" "U23" "S3M" ...
##  $ sexo    : chr  "M" "M" "M" "M" ...
##  $ edicao  : chr  "course-108" "course-108" "course-108" "course-108" ...
##  $ ano     : int  2009 2009 2009 2009 2009 2009 2009 2009 2009 2009 ...
##  $ tnadar  : num  10.6 24.8 14.4 50.8 10.3 ...
##  $ tpedalar: num  18.7 67.2 12.4 36.1 18.8 ...
##  $ tcorrer : num  37.7 72.7 46.5 156.9 36.3 ...
##  $ ttotal  : num  67 164.7 73.3 243.8 65.4 ...
##  $ pnadar  : num  0.158 0.15 0.196 0.208 0.158 ...
##  $ ppedalar: num  0.279 0.408 0.169 0.148 0.288 ...
##  $ pcorrer : num  0.563 0.441 0.635 0.644 0.554 ...
#-----------------------------------------------------------------------

library(plotrix)

# triax.plot(x = with(tri, na.omit(cbind(pnadar, ppedalar, pcorrer))))

par(mfrow = c(1, 2))
triax.plot(x = with(subset(tri, sexo == "F"),
                    na.omit(cbind(pnadar, ppedalar, pcorrer))),
           main = "F",
           col.symbols = mycol[2])
triax.plot(x = with(subset(tri, sexo == "M"),
                    na.omit(cbind(pnadar, ppedalar, pcorrer))),
           main = "M",
           col.symbols = mycol[1])

layout(1)
url <- "http://leg.ufpr.br/~walmes/data/euro_football_players.txt"
# browseURL(url)

pla <- read_tsv(file = url,
                col_names = TRUE,
                quote = "",
                comment = "#")
str(pla, give.attr = FALSE)
## Classes 'tbl_df', 'tbl' and 'data.frame':    1528 obs. of  17 variables:
##  $ country: chr  "Austria" "Austria" "Austria" "Austria" ...
##  $ team   : chr  "Salzburg" "Salzburg" "Salzburg" "Salzburg" ...
##  $ name   : chr  "Sadio Mané" "Kevin Kampl" "Alan" "André Ramalho" ...
##  $ pos    : chr  "M(L)" "M(R)" "FW" "D(C)" ...
##  $ age    : int  21 23 24 22 23 28 28 24 21 28 ...
##  $ cm     : int  175 180 182 182 180 172 180 186 184 175 ...
##  $ kg     : int  69 63 73 77 71 69 71 74 78 69 ...
##  $ apps   : chr  "9" "9" "8(1)" "9" ...
##  $ goal   : int  4 2 4 1 NA NA 8 NA NA NA ...
##  $ ass    : int  3 4 2 NA NA 1 1 NA NA NA ...
##  $ yel    : int  1 2 NA 4 NA 3 1 1 3 NA ...
##  $ red    : int  1 NA 1 NA NA NA NA NA NA NA ...
##  $ spg    : num  2 2 4.2 0.9 0.5 1.6 4.1 0.8 1.1 0.9 ...
##  $ ps     : num  77 83.9 60.8 72.3 86.3 79.4 72.8 74.7 69.1 64.5 ...
##  $ aw     : num  1.2 0.3 3.8 3.2 3 0.5 0.3 4 3.4 2.8 ...
##  $ mom    : int  3 1 2 1 NA NA NA NA NA NA ...
##  $ rt     : num  7.98 7.93 7.91 7.67 7.59 7.55 7.54 7.46 7.38 7.27 ...
ggplot(data = pla,
       mapping = aes(x = country)) +
    geom_bar() +
    coord_flip()

ggplot(data = pla,
       mapping = aes(x = reorder(country, country, length))) +
    geom_bar() +
    coord_flip()

ggplot(data = pla,
       mapping = aes(x = reorder(team, team, length))) +
    geom_bar() +
    coord_flip()

ggplot(data = pla,
       mapping = aes(x = age)) +
    geom_bar()

ggplot(data = pla,
       mapping = aes(x = cm)) +
    geom_density()

ggplot(data = pla,
       mapping = aes(x = kg)) +
    geom_density()

ggplot(data = pla,
       mapping = aes(x = kg)) +
    geom_density()

ggplot(data = pla,
       mapping = aes(x = country, y = cm)) +
    geom_boxplot()

ggplot(data = pla,
       mapping = aes(x = reorder(country, cm, median, na.rm = TRUE),
                     y = cm)) +
    geom_boxplot() +
    coord_flip()

pla <- pla %>%
    mutate(posit = gsub("^([A-Z]+).*$", "\\1", pos))
table(pla$posit)
## 
##  AM   D  DM  FW  GK   M 
## 183 507  97 236 173 332
ggplot(data = pla,
       mapping = aes(x = reorder(posit, cm, median, na.rm = TRUE),
                     y = cm)) +
    geom_boxplot()

ggplot(data = pla,
       mapping = aes(x = cm,
                     y = kg)) +
    geom_point()

ggplot(data = pla,
       mapping = aes(x = cm,
                     y = kg,
                     color = posit)) +
    geom_point()

ggplot(data = pla,
       mapping = aes(x = cm,
                     y = kg)) +
    geom_point() +
    facet_wrap(facets = ~posit)

url <- "http://leg.ufpr.br/~walmes/data/aval_carros_nota.txt"
# browseURL(url)

not <- read_tsv(file = url)
str(not, give.attr = FALSE)
## Classes 'tbl_df', 'tbl' and 'data.frame':    124980 obs. of  4 variables:
##  $ carro: chr  "gol" "gol" "gol" "gol" ...
##  $ dono : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ item : chr  "Estilo" "Acabamento" "Posição de dirigir" "Instrumentos" ...
##  $ nota : int  8 7 9 8 7 7 9 9 10 7 ...
notm <- not %>%
    group_by(carro, item) %>%
    summarize(nota = mean(nota))
notm
## # A tibble: 510 x 3
## # Groups:   carro [34]
##    carro item             nota
##    <chr> <chr>           <dbl>
##  1 agile Acabamento       6.36
##  2 agile Câmbio           6.56
##  3 agile Consumo          6.42
##  4 agile Custo-Benefício  6.34
##  5 agile Desempenho       6.70
##  6 agile Estabilidade     7.25
##  7 agile Estilo           8.42
##  8 agile Freios           6.59
##  9 agile Instrumentos     8.33
## 10 agile Interior         8.39
## # … with 500 more rows
ggplot(notm,
       mapping = aes(x = carro, y = nota, color = item)) +
    geom_point()

ggplot(notm,
       mapping = aes(x = carro, y = nota, color = item, group = item)) +
    geom_line()

ggplot(notm,
       mapping = aes(x = carro, y = nota)) +
    geom_line(mapping = aes(group = 1)) +
    facet_wrap(facets = ~item)

ggplot(notm,
       mapping = aes(x = item, y = nota)) +
    geom_line(mapping = aes(group = 1)) +
    facet_wrap(facets = ~carro)

#-----------------------------------------------------------------------

dts <- filter(notm,
              carro %in% c("crossfox", "ecosport"))
dts
## # A tibble: 30 x 3
## # Groups:   carro [2]
##    carro    item             nota
##    <chr>    <chr>           <dbl>
##  1 crossfox Acabamento       6.80
##  2 crossfox Câmbio           8.70
##  3 crossfox Consumo          6.44
##  4 crossfox Custo-Benefício  7.02
##  5 crossfox Desempenho       8.52
##  6 crossfox Estabilidade     7.79
##  7 crossfox Estilo           8.96
##  8 crossfox Freios           7.92
##  9 crossfox Instrumentos     7.77
## 10 crossfox Interior         8.05
## # … with 20 more rows
ggplot(dts,
       mapping = aes(x = item, y = nota)) +
    geom_line(mapping = aes(color = carro, group = carro))

aux <- dts %>%
    spread(key = "item", value = "nota")
aux
## # A tibble: 2 x 16
## # Groups:   carro [2]
##   carro Acabamento Câmbio Consumo `Custo-Benefíci… Desempenho Estabilidade Estilo Freios
##   <chr>      <dbl>  <dbl>   <dbl>            <dbl>      <dbl>        <dbl>  <dbl>  <dbl>
## 1 cros…       6.80   8.70    6.44             7.02       8.52         7.79   8.96   7.92
## 2 ecos…       6.12   8.30    7.42             7.31       7.85         8.63   9.48   8.69
## # … with 7 more variables: Instrumentos <dbl>, Interior <dbl>, Motor <dbl>,
## #   `Porta-malas` <dbl>, `Posição de dirigir` <dbl>, Recomendação <dbl>, Suspensão <dbl>
m <- as.data.frame(aux[, -1])
m <- rbind(matrix(c(10, 0),
                  nrow = 2,
                  ncol = ncol(m),
                  dimnames = list(NULL, names(m))),
           m)

library(fmsb)

radarchart(m)
legend("topright",
       legend = aux$carro,
       col = 1:2,
       lty = 1:2,
       bty = "n")

#-----------------------------------------------------------------------

ggplot(dts,
       mapping = aes(x = carro,
                     y = nota,
                     group = item,
                     color = item)) +
    geom_point() +
    geom_line() +
    geom_text(data = filter(dts,
                            carro == "ecosport"),
              mapping = aes(x = 2,
                            y = nota,
                            label = item),
              color = "black",
              hjust = 0,
              nudge_x = 0.02,
              size = 3.5) +
    coord_cartesian(xlim = c(1.25, 1.75)) +
    guides(colour = FALSE)

aux <- dts %>%
    spread(key = "carro", value = "nota") %>%
    mutate(diff = crossfox - ecosport) %>%
    arrange(diff)
dts$item <- factor(dts$item, levels = aux$item)
dts <- dts %>%
    arrange(item)

ggplot(dts,
       mapping = aes(x = reorder(item,
                                 nota,
                                 mean),
                     y = nota,
                     fill = carro)) +
    geom_bar(stat = "identity",
             position = "dodge",
             color = I("black")) +
    coord_flip()

ggplot(dts,
       mapping = aes(x = item,
                     y = nota,
                     fill = carro)) +
    geom_bar(stat = "identity",
             position = "dodge",
             color = I("black")) +
    coord_flip()

25px

Licença Creative Commons 4.0

Este conteúdo está disponível por meio da Licença Creative Commons 4.0