Configurando sessão R

library(knitr)
opts_chunk$set(cache = FALSE,
               tidy = FALSE,
               fig.width = 9,
               fig.height = 7,
               fig.align = "center",
               eval.after= "fig.cap",
               # dpi = 96,
               # dev = "png",
               dev.args = list(family = "Helvetica"))

# Pacotes.
library(ggplot2)
library(gridExtra)
library(reshape2)
library(gdata)

Lendo os dados

# u <- "2017 11 05 - Dados pré-Tx para R.csv"
# da <- read.csv(file = u,
#                na.string = c("?", "9999"))
# str(da)

# Ler da planilha xls onde as variáveis não estão codificadas.
u <- "2017 11 05 - Descrição I Var. Vanessa H..xlsx"
da <- read.xls(xls = u,
               sheet = 1,
               verbose = TRUE,
               na.string = c("", "?", "9999"),
               skip = 1)
## Using perl at /usr/bin/perl 
## Using perl at /usr/bin/perl 
## 
## Converting xls file
##     "2017 11 05 - Descrição I Var. Vanessa H..xlsx" 
## to csv  file 
##     "/tmp/RtmpHQBTAn/file3366259e73ac.csv" 
## ... 
## 
## Executing ' '/usr/bin/perl' '/usr/lib/R/site-library/gdata/perl/xls2csv.pl'  '2017 11 05 - Descrição I Var. Vanessa H..xlsx' '/tmp/RtmpHQBTAn/file3366259e73ac.csv' '1' '... 
## 
## 0 
## 
## Done.
## 
## Reading csv file  "/tmp/RtmpHQBTAn/file3366259e73ac.csv" ...
## Done.
str(da)
## 'data.frame':    178 obs. of  68 variables:
##  $ ID             : Factor w/ 178 levels "NTXC01","NTXC02",..: 112 113 114 115 116 117 118 119 120 121 ...
##  $ Pop            : Factor w/ 3 levels "NTXC","NTXS",..: 3 3 3 3 3 3 3 3 3 3 ...
##  $ Pop_Art1       : Factor w/ 2 levels "Not selected",..: 2 2 2 2 2 2 2 2 2 2 ...
##  $ Pop_Art3       : Factor w/ 2 levels "Not selected",..: 2 2 2 2 1 1 2 1 1 1 ...
##  $ Pop_Ana        : Factor w/ 1 level "Selected": 1 1 1 1 1 1 1 1 1 1 ...
##  $ Model7         : int  2 3 4 1 3 4 2 5 2 1 ...
##  $ Age            : int  55 34 24 20 47 48 53 25 69 54 ...
##  $ Gender         : Factor w/ 2 levels "F","M": 2 2 2 2 2 2 2 1 2 1 ...
##  $ Weight         : int  53 88 65 62 79 74 84 42 78 66 ...
##  $ BMI            : Factor w/ 4 levels "Normal weight",..: 1 3 1 1 3 1 3 1 1 3 ...
##  $ Height         : int  164 174 170 164 163 175 168 150 193 150 ...
##  $ N_pre_Tx       : int  0 0 0 0 0 0 0 1 0 0 ...
##  $ Re_Tx          : Factor w/ 2 levels "no","yes": 1 1 1 1 1 1 1 2 1 1 ...
##  $ N_pre_blood_tx : int  1 0 0 0 2 0 3 16 0 2 ...
##  $ Multip         : Factor w/ 2 levels "no","yes": 1 1 1 1 1 1 1 1 1 2 ...
##  $ N_pregn        : int  0 0 0 0 0 0 0 0 0 3 ...
##  $ Abortions      : Factor w/ 2 levels "no","yes": 1 1 1 1 1 1 1 1 1 1 ...
##  $ N_abortions    : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ PRA_30         : Factor w/ 2 levels "high PRA","low PRA": 2 1 1 2 2 1 2 1 1 2 ...
##  $ DSAtotal       : Factor w/ 2 levels "no total DSA",..: 1 1 2 1 1 2 1 2 1 1 ...
##  $ DSA_classI     : Factor w/ 2 levels "No DSA classI",..: 1 2 2 1 2 1 1 1 1 2 ...
##  $ DSA_classII    : Factor w/ 2 levels "No DSA classII",..: 1 1 1 1 1 1 1 2 1 1 ...
##  $ MHC_I          : Factor w/ 2 levels "no Ab anti-MHC-I",..: 2 2 2 1 2 2 2 2 2 2 ...
##  $ MHC_II         : Factor w/ 2 levels "no Ab anti-MHC-II",..: 1 2 1 2 1 2 1 2 1 1 ...
##  $ Hd_CAPD        : Factor w/ 4 levels "CAPD","Hd","Hd plus CAPD",..: 2 2 2 2 2 2 2 2 2 2 ...
##  $ Time_dialysis  : int  51 13 15 0 83 54 48 12 96 96 ...
##  $ Ischemia       : Factor w/ 40 levels "00:00:00.00",..: 2 17 2 2 33 28 2 2 37 20 ...
##  $ HLA_MM         : Factor w/ 7 levels "1 Mismatch","2 Mismatches",..: 3 5 2 6 6 3 4 3 4 4 ...
##  $ D_typeI        : Factor w/ 2 levels "Alive donor",..: 1 2 1 1 2 2 1 1 2 2 ...
##  $ D_typeII       : Factor w/ 5 levels "Alive related donor 1 (brothers and parents) DVR1",..: 1 5 1 2 5 5 4 4 5 5 ...
##  $ D_gender       : Factor w/ 2 levels "F","M": 1 1 1 1 2 2 1 2 2 1 ...
##  $ D_age          : int  44 58 33 49 60 31 48 25 27 49 ...
##  $ Diff_age       : int  11 24 9 29 13 17 5 0 42 5 ...
##  $ Diff_weight    : int  22 8 10 15 4 36 21 21 8 4 ...
##  $ Diff_height    : int  6 9 15 3 12 15 12 21 3 15 ...
##  $ Ind_ATG        : Factor w/ 2 levels "no","yes": 1 1 1 1 1 1 1 2 1 1 ...
##  $ B_Hypert       : Factor w/ 2 levels "no","yes": 2 2 2 2 2 2 2 2 2 2 ...
##  $ B_DM           : Factor w/ 2 levels "no","yes": 1 1 1 1 1 1 1 1 1 1 ...
##  $ B_DLP          : Factor w/ 2 levels "no","yes": 1 1 1 1 1 1 1 1 1 1 ...
##  $ B_GNC          : Factor w/ 2 levels "no","yes": 2 2 2 1 2 2 1 1 1 2 ...
##  $ B_PN           : Factor w/ 2 levels "no","yes": 2 1 1 1 2 1 1 2 1 1 ...
##  $ B_PKD          : Factor w/ 2 levels "no","yes": 1 1 1 1 1 1 1 1 2 1 ...
##  $ B_Hypert_nephro: Factor w/ 2 levels "no","yes": 1 1 1 1 1 1 2 1 1 1 ...
##  $ B_CMV          : Factor w/ 2 levels "no","yes": 1 1 1 1 1 1 1 1 1 1 ...
##  $ B_Chagas       : Factor w/ 2 levels "no","yes": 1 1 1 1 1 1 1 1 1 1 ...
##  $ B_Toxoplasma   : Factor w/ 2 levels "no","yes": 1 1 1 1 1 1 1 1 1 1 ...
##  $ B_EBV          : Factor w/ 2 levels "no","yes": 1 1 1 1 1 1 1 1 1 1 ...
##  $ B_HBV          : Factor w/ 2 levels "no","yes": 1 1 1 1 1 1 1 1 1 1 ...
##  $ B_HCV          : Factor w/ 2 levels "no","yes": 1 1 1 1 1 1 1 1 1 1 ...
##  $ B_Others       : Factor w/ 2 levels "no","yes": 1 1 1 1 1 2 1 2 2 2 ...
##  $ B_G_other      : Factor w/ 2 levels "no","yes": 1 1 1 1 1 2 1 2 2 2 ...
##  $ B_G_virus      : Factor w/ 2 levels "no","yes": 1 1 1 1 1 1 1 1 1 1 ...
##  $ B_G_parasitic  : Factor w/ 2 levels "no","yes": 1 1 1 1 1 1 1 1 1 1 ...
##  $ DRC            : Factor w/ 3 levels "DRC-3A ou -3B",..: 3 3 3 2 3 3 3 2 2 3 ...
##  $ Clin_rej       : Factor w/ 2 levels "Developed clinical rejection",..: 2 1 1 1 2 1 1 2 1 1 ...
##  $ Diff_Lost      : int  0 0 0 1 0 0 0 0 0 0 ...
##  $ Thrombosis     : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ RA             : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ RAC1           : int  0 1 2 0 0 0 0 0 5 3 ...
##  $ RAC2           : int  0 7 0 0 0 0 0 0 0 0 ...
##  $ CAN            : int  0 0 0 0 0 0 44 0 0 0 ...
##  $ Nephro_DM      : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ Nephrotoxicity : int  0 0 0 0 0 1 0 0 0 0 ...
##  $ PNA1           : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ PNA2           : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ GNC            : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ Graft_lost     : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ Death          : int  0 0 0 0 0 0 0 0 0 0 ...

Análise exploratória

Variáveis antropométricas

ggplot(data = da,
       mapping = aes(color = Gender, x = Age)) +
    stat_ecdf(geom = "step") +
    labs(x = "Idade (anos)",
         y = "Frequência relativa acumulada")

ggplot(data = da,
       mapping = aes(color = Gender, x = Weight)) +
    geom_density() +
    geom_rug() +
    labs(x = "Massa (kg)",
         y = "Densidade")

ggplot(data = da,
       mapping = aes(color = Gender, y = Weight, x = Height)) +
    geom_point() +
    geom_smooth(method = "loess") +
    labs(x = "Altura (cm)",
         y = "Massa (kg)")

# xtabs(~BMI + Gender, data = da)
# dput(levels(da$BMI)[c(4, 1, 3, 2)])
da$BMI <- factor(da$BMI,
                 levels = c("Underweight",
                            "Normal weight",
                            "Overweight",
                            "Obesity"))

ggplot(data = da,
       mapping = aes(fill = Gender, x = BMI)) +
    geom_bar(position = "dodge", col = "gray50") +
    labs(x = "Classe para índice de massa corporal",
         y = "Frequência absoluta")

Pacientes transplantados (receptores)

#-----------------------------------------------------------------------

# dput(levels(da$Pop)[c(2, 1, 3)])
da$Pop <- factor(da$Pop,
                 levels = c("NTXS", "NTXC", "TXR"))

ggplot(data = da,
       mapping = aes(x = Pop)) +
    geom_bar(position = "dodge", col = "gray50") +
    labs(x = "Tipo de paciente segundo transplante e doença renal",
         y = "Frequência absoluta")

#-----------------------------------------------------------------------
# Apenas nos pacientes transplantados (Pop == TXR)

# Quantos transplantes anteriores.
ggplot(data = subset(da, Pop == "TXR"),
       mapping = aes(x = N_pre_Tx)) +
    geom_bar(position = "dodge", col = "gray50") +
    labs(x = "Número de transplantes prévios",
         y = "Frequência absoluta")

ggplot(data = subset(da, Pop == "TXR"),
       mapping = aes(x = Time_dialysis)) +
    geom_density(na.rm = TRUE) +
    geom_rug() +
    labs(x = "Tempo em diálise prá-transplante (meses)",
         y = "Frequência absoluta")

# Convertendo para horas.
da$IschemiaH <-
    sapply(strsplit(x = as.character(da$Ischemia), split = ":"),
           FUN = function(x) {
               sum(as.numeric(x) * c(60, 1, 1/60))/60
           })

ggplot(data = subset(da, Pop == "TXR"),
       mapping = aes(x = IschemiaH)) +
    geom_density(na.rm = TRUE) +
    geom_rug() +
    labs(x = "Tempo de isquemia fria do órgão transplantado (horas)",
         y = "Frequência absoluta")

Doadores

#-----------------------------------------------------------------------
# Sobre os doadores.

ggplot(data = subset(da, Pop == "TXR"),
       mapping = aes(x = D_typeI)) +
    geom_bar() +
    labs(x = "Condição do doador",
         y = "Frequência absoluta")

ggplot(data = subset(da, Pop == "TXR"),
       mapping = aes(x = D_typeII)) +
    geom_bar() +
    labs(x = "Condição do doador",
         y = "Frequência absoluta") +
    coord_flip()

ggplot(data = subset(da, Pop == "TXR"),
       mapping = aes(x = D_age, color = D_gender)) +
    geom_density() +
    geom_rug() +
    labs(x = "Idade do doador (anos)",
         y = "Densidade")

grid.arrange(
    ggplot(data = subset(da, Pop == "TXR"),
           mapping = aes(x = Age - D_age, color = Gender)) +
    geom_density() +
    geom_rug() +
    geom_vline(xintercept = 0) +
    labs(x = "Diferença de idade entre transplantado e doador (anos)",
         y = "Densidade"),
    #
    ggplot(data = subset(da, Pop == "TXR"),
           mapping = aes(x = Diff_weight, color = Gender)) +
    geom_density() +
    geom_rug() +
    labs(x = "Diferença absoluta de peso entre transplantados (kg)",
         y = "Densidade"),
    #
    ggplot(data = subset(da, Pop == "TXR"),
           mapping = aes(x = Diff_height, color = Gender)) +
    geom_density() +
    geom_rug() +
    labs(x = "Diferença de altura entre transplantado (cm)",
         y = "Densidade")
)

Doenças de base

#-----------------------------------------------------------------------
# Sobre as doenças de base.

dbas <- melt(data = da,
             id.vars = c("Gender", "Age", "Pop"),
             measure.vars = grep("^B_[^G]", names(da), value = TRUE),
             value.name = "valor",
             variable.name = "doenca")

str(dbas)
## 'data.frame':    2314 obs. of  5 variables:
##  $ Gender: Factor w/ 2 levels "F","M": 2 2 2 2 2 2 2 1 2 1 ...
##  $ Age   : int  55 34 24 20 47 48 53 25 69 54 ...
##  $ Pop   : Factor w/ 3 levels "NTXS","NTXC",..: 3 3 3 3 3 3 3 3 3 3 ...
##  $ doenca: Factor w/ 13 levels "B_Hypert","B_DM",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ valor : chr  "yes" "yes" "yes" "yes" ...
ftable(xtabs(~doenca + valor + Pop, data = dbas))
##                       Pop NTXS NTXC TXR
## doenca          valor                  
## B_Hypert        no          67    5   5
##                 yes         12   26  62
## B_DM            no          77   20  55
##                 yes          2   11  12
## B_DLP           no          77   21  61
##                 yes          2   10   6
## B_PN            no          79   29  62
##                 yes          0    2   5
## B_PKD           no          79   30  60
##                 yes          0    2   7
## B_Hypert_nephro no          79   26  57
##                 yes          0    5  10
## B_CMV           no           0   29  65
##                 yes          0    2   2
## B_Chagas        no          79   16  66
##                 yes          0    0   1
## B_Toxoplasma    no          78   10  65
##                 yes          1    8   2
## B_EBV           no          69    7  66
##                 yes         10   11   1
## B_HBV           no          79   16  66
##                 yes          0   10   1
## B_HCV           no          79   24  66
##                 yes          0    1   1
## B_Others        no          71   11  40
##                 yes          8   20  27
ggplot(data = dbas, mapping = aes(x = valor)) +
    geom_bar(position = "dodge") +
    labs(x = "Resposta",
         y = "Frequência absoluta") +
    facet_wrap(facets = doenca ~ Pop,
               ncol = 3,
               # labeller = "label_both",
               drop = FALSE)

Desfechos

#-----------------------------------------------------------------------
# Desfechos: perda do enxerto e morte.

ggplot(data = subset(da, Pop == "TXR"),
       mapping = aes(x = Graft_lost == 0)) +
    geom_bar() +
    labs(x = "Perda do enxerto",
         y = "Frequência absoluta")

ggplot(data = subset(da, Pop == "TXR"),
       mapping = aes(x = Death == 0)) +
    geom_bar() +
    labs(x = "Morte do transplantado",
         y = "Frequência absoluta")

Modelagem