Configurando sessão R

library(knitr)
opts_chunk$set(cache = FALSE,
               tidy = FALSE,
               fig.width = 9,
               fig.height = 7,
               fig.align = "center",
               eval.after= "fig.cap",
               # dpi = 96,
               # dev = "png",
               dev.args = list(family = "Helvetica"))

# Pacotes.
library(ggplot2)
library(gridExtra)
library(reshape2)
library(gdata)
library(pROC)

Lendo os dados

# Ler da planilha xls onde as variáveis não estão codificadas.
u <- "2017 12 16 - Variáveis e dados - Pop TXR.xlsx"
da <- read.xls(xls = u,
               sheet = 1,
               verbose = TRUE,
               na.string = c("", "?", "9999"),
               skip = 0)
## Using perl at /usr/bin/perl 
## Using perl at /usr/bin/perl 
## 
## Converting xls file
##     "2017 12 16 - Variáveis e dados - Pop TXR.xlsx" 
## to csv  file 
##     "/tmp/RtmpPEnqnc/file23be6fe4b60b.csv" 
## ... 
## 
## Executing ' '/usr/bin/perl' '/usr/lib/R/site-library/gdata/perl/xls2csv.pl'  '2017 12 16 - Variáveis e dados - Pop TXR.xlsx' '/tmp/RtmpPEnqnc/file23be6fe4b60b.csv' '1' '... 
## 
## 0 
## 
## Done.
## 
## Reading csv file  "/tmp/RtmpPEnqnc/file23be6fe4b60b.csv" ...
## Done.
str(da)
## 'data.frame':    67 obs. of  38 variables:
##  $ ID             : Factor w/ 67 levels "TXR1R01","TXR1R02",..: 1 2 3 4 5 6 7 8 9 10 ...
##  $ Age            : int  55 34 24 20 47 48 53 25 69 54 ...
##  $ Gender         : Factor w/ 2 levels "F","M": 2 2 2 2 2 2 2 1 2 1 ...
##  $ Weight         : int  53 88 65 62 79 74 84 42 78 66 ...
##  $ BMI            : Factor w/ 4 levels "Normal weight",..: 1 3 1 1 3 1 3 1 1 3 ...
##  $ Height         : int  164 174 170 164 163 175 168 150 193 150 ...
##  $ N_pre_Tx       : int  0 0 0 0 0 0 0 1 0 0 ...
##  $ Re_Tx          : Factor w/ 2 levels "no","yes": 1 1 1 1 1 1 1 2 1 1 ...
##  $ N_pre_blood_tx : int  1 0 0 0 2 0 3 16 0 2 ...
##  $ Multip         : Factor w/ 2 levels "no","yes": 1 1 1 1 1 1 1 1 1 2 ...
##  $ N_pregn        : int  0 0 0 0 0 0 0 0 0 3 ...
##  $ Abortions      : Factor w/ 2 levels "no","yes": 1 1 1 1 1 1 1 1 1 1 ...
##  $ N_abortions    : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ PRA_30         : Factor w/ 2 levels "high PRA","low PRA": 2 1 1 2 2 1 2 1 1 2 ...
##  $ DSAtotal       : Factor w/ 2 levels "no total DSA",..: 1 1 2 1 1 2 1 2 1 1 ...
##  $ Hd_CAPD        : Factor w/ 4 levels "CAPD","Hd","Hd plus CAPD",..: 2 2 2 2 2 2 2 2 2 2 ...
##  $ Time_dialysis  : int  51 13 15 0 83 54 48 12 96 96 ...
##  $ Ischemia       : Factor w/ 40 levels "00:00:00.00",..: 2 17 2 2 33 28 2 2 37 20 ...
##  $ HLA_MM         : Factor w/ 7 levels "1 Mismatch","2 Mismatches",..: 3 5 2 6 6 3 4 3 4 4 ...
##  $ D_typeI        : Factor w/ 2 levels "Alive donor",..: 1 2 1 1 2 2 1 1 2 2 ...
##  $ D_gender       : Factor w/ 2 levels "F","M": 1 1 1 1 2 2 1 2 2 1 ...
##  $ D_age          : int  44 58 33 49 60 31 48 25 27 49 ...
##  $ Diff_age       : int  11 24 9 29 13 17 5 0 42 5 ...
##  $ Diff_weight    : int  22 8 10 15 4 36 21 21 8 4 ...
##  $ Diff_height    : int  6 9 15 3 12 15 12 21 3 15 ...
##  $ B_Hypert       : Factor w/ 2 levels "no","yes": 2 2 2 2 2 2 2 2 2 2 ...
##  $ B_DM           : Factor w/ 2 levels "no","yes": 1 1 1 1 1 1 1 1 1 1 ...
##  $ B_DLP          : Factor w/ 2 levels "no","yes": 1 1 1 1 1 1 1 1 1 1 ...
##  $ B_GNC          : Factor w/ 2 levels "no","yes": 2 2 2 1 2 2 1 1 1 2 ...
##  $ B_PN           : Factor w/ 2 levels "no","yes": 2 1 1 1 2 1 1 2 1 1 ...
##  $ B_PKD          : Factor w/ 2 levels "no","yes": 1 1 1 1 1 1 1 1 2 1 ...
##  $ B_Hypert_nephro: Factor w/ 2 levels "no","yes": 1 1 1 1 1 1 2 1 1 1 ...
##  $ B_G_other      : Factor w/ 2 levels "no","yes": 1 1 1 1 1 2 1 2 2 2 ...
##  $ Imm_rej_type   : Factor w/ 6 levels "GNC","NCE","No immunological rejection",..: 3 4 4 4 3 4 2 3 4 4 ...
##  $ Imm_rej        : Factor w/ 2 levels "No immunological rejection",..: 1 2 2 2 1 2 2 1 2 2 ...
##  $ Graft_lost     : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ Death          : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ Sec_comp_rej   : Factor w/ 21 levels "Bronchopneumonia",..: 15 15 7 15 15 13 15 15 15 15 ...

Análise exploratória

Variáveis antropométricas

ggplot(data = da,
       mapping = aes(color = Gender, x = Age)) +
    stat_ecdf(geom = "step") +
    labs(x = "Idade (anos)",
         y = "Frequência relativa acumulada")

ggplot(data = da,
       mapping = aes(color = Gender, x = Weight)) +
    geom_density() +
    geom_rug() +
    labs(x = "Massa (kg)",
         y = "Densidade")

ggplot(data = da,
       mapping = aes(color = Gender, y = Weight, x = Height)) +
    geom_point() +
    geom_smooth(method = "loess") +
    labs(x = "Altura (cm)",
         y = "Massa (kg)")

# xtabs(~BMI + Gender, data = da)
# dput(levels(da$BMI)[c(4, 1, 3, 2)])
da$BMI <- factor(da$BMI,
                 levels = c("Underweight",
                            "Normal weight",
                            "Overweight",
                            "Obesity"))

ggplot(data = da,
       mapping = aes(fill = Gender, x = BMI)) +
    geom_bar(position = "dodge", col = "gray50") +
    labs(x = "Classe para índice de massa corporal",
         y = "Frequência absoluta")

Pacientes transplantados (receptores)

#-----------------------------------------------------------------------
# Apenas nos pacientes transplantados.

# Quantos transplantes anteriores.
ggplot(data = da,
       mapping = aes(x = N_pre_Tx)) +
    geom_bar(position = "dodge", col = "gray50") +
    labs(x = "Número de transplantes prévios",
         y = "Frequência absoluta")

ggplot(data = da,
       mapping = aes(x = Time_dialysis)) +
    geom_density(na.rm = TRUE) +
    geom_rug() +
    labs(x = "Tempo em diálise prá-transplante (meses)",
         y = "Frequência absoluta")

# Convertendo para horas.
da$IschemiaH <-
    sapply(strsplit(x = as.character(da$Ischemia), split = ":"),
           FUN = function(x) {
               sum(as.numeric(x) * c(60, 1, 1/60))/60
           })

ggplot(data = da,
       mapping = aes(x = IschemiaH)) +
    geom_density(na.rm = TRUE) +
    geom_rug() +
    labs(x = "Tempo de isquemia fria do órgão transplantado (horas)",
         y = "Frequência absoluta")

Doadores

#-----------------------------------------------------------------------
# Sobre os doadores.

ggplot(data = da,
       mapping = aes(x = D_typeI)) +
    geom_bar() +
    labs(x = "Condição do doador",
         y = "Frequência absoluta")

ggplot(data = da,
       mapping = aes(x = D_age, color = D_gender)) +
    geom_density() +
    geom_rug() +
    labs(x = "Idade do doador (anos)",
         y = "Densidade")

grid.arrange(
    ggplot(data = da,
           mapping = aes(x = Age - D_age, color = Gender)) +
    geom_density() +
    geom_rug() +
    geom_vline(xintercept = 0) +
    labs(x = "Diferença de idade entre transplantado e doador (anos)",
         y = "Densidade"),
    #
    ggplot(data = da,
           mapping = aes(x = Diff_weight, color = Gender)) +
    geom_density() +
    geom_rug() +
    labs(x = "Diferença absoluta de peso entre transplantados (kg)",
         y = "Densidade"),
    #
    ggplot(data = da,
           mapping = aes(x = Diff_height, color = Gender)) +
    geom_density() +
    geom_rug() +
    labs(x = "Diferença de altura entre transplantado (cm)",
         y = "Densidade")
)

Desfechos

#-----------------------------------------------------------------------
# Desfechos: perda do enxerto e morte.

ggplot(data = da,
       mapping = aes(x = Graft_lost == 0)) +
    geom_bar() +
    labs(x = "Perda do enxerto",
         y = "Frequência absoluta")

ggplot(data = da,
       mapping = aes(x = Death == 0)) +
    geom_bar() +
    labs(x = "Morte do transplantado",
         y = "Frequência absoluta")

ggplot(data = da,
       mapping = aes(x = Imm_rej)) +
    geom_bar() +
    labs(x = "Rejeição imunológica",
         y = "Frequência absoluta")

Modelagem

Regressão logística

# Cria a variável binária para o descrecho.
levels(da$Imm_rej)
## [1] "No immunological rejection"      "Yes for immunological rejection"
da$y <- as.integer(da$Imm_rej) - 1

# Passa variável para escala numérica.
if (is.factor(da$HLA_MM)) {
    z <- gsub("\\D", "", da$HLA_MM)
    z[z == ""] <- "0"
    z <- as.integer(z)
    da$HLA_MM <- z
}

table(da$y)
## 
##  0  1 
## 37 30
# Encontra missings.
i <- complete.cases(da)
which(!i)
## [1] 16 36 67
da[!i, ]
##         ID Age Gender Weight           BMI Height N_pre_Tx Re_Tx
## 16 TXR1R16  52      F     76    Overweight    162        0    no
## 36 TXR1R36  41      F     65 Normal weight    164        0    no
## 67 TXR1R67  35      M     57 Normal weight    165        0    no
##    N_pre_blood_tx Multip N_pregn Abortions N_abortions   PRA_30
## 16              0    yes       4       yes           2 high PRA
## 36              0    yes       2        no           0 high PRA
## 67              2     no       0        no           0  low PRA
##         DSAtotal Hd_CAPD Time_dialysis    Ischemia HLA_MM        D_typeI
## 16 yes total DSA      Hd            83 19:07:00.00      5 Deceased donor
## 36  no total DSA      Hd            15 04:14:00.00     NA Deceased donor
## 67  no total DSA      Hd            NA 19:48:00.00     NA Deceased donor
##    D_gender D_age Diff_age Diff_weight Diff_height B_Hypert B_DM B_DLP
## 16        F    59        7          26           7      yes   no    no
## 36        M    66       25           0           4      yes   no    no
## 67        F    46       11          11           0      yes   no    no
##    B_GNC B_PN B_PKD B_Hypert_nephro B_G_other               Imm_rej_type
## 16   yes   no    no              no       yes                       <NA>
## 36   yes   no    no              no       yes                        RCA
## 67    no   no    no              no       yes No immunological rejection
##                            Imm_rej Graft_lost Death Sec_comp_rej IschemiaH
## 16 Yes for immunological rejection          1     0         <NA> 19.116667
## 36 Yes for immunological rejection          0     0          PNA  4.233333
## 67      No immunological rejection          0     0       Dengue 19.800000
##    y
## 16 1
## 36 1
## 67 0
# Elimina missings.
db <- da[-c(36, 67), ]

#-----------------------------------------------------------------------
# Avalia a inclusão de uma variável por vez.

# Modelo nulo (apenas intercepto).
m0 <- glm(y ~ 1,
          data = db,
          family = binomial)
summary(m0)
## 
## Call:
## glm(formula = y ~ 1, family = binomial, data = db)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -1.087  -1.087  -1.087   1.270   1.270  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)
## (Intercept)  -0.2162     0.2495  -0.867    0.386
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 89.354  on 64  degrees of freedom
## Residual deviance: 89.354  on 64  degrees of freedom
## AIC: 91.354
## 
## Number of Fisher Scoring iterations: 3
# Variáveis a serem excluídas do escopo.
v <- c(1, 18, 34:38, 40)

# Formula.
scope <- as.formula(sprintf(". ~ %s",
                            paste(names(db)[-v],
                                  collapse = " + ")))

# Modelo testando cada uma das variáveis entrando.
m0a <- add1(m0, scope = scope, test = "LRT")
m0a
## Single term additions
## 
## Model:
## y ~ 1
##                 Df Deviance    AIC    LRT Pr(>Chi)   
## <none>               89.354 91.354                   
## Age              1   88.942 92.942 0.4119 0.520998   
## Gender           1   82.631 86.631 6.7228 0.009519 **
## Weight           1   86.003 90.003 3.3513 0.067153 . 
## BMI              3   88.553 96.553 0.8013 0.849151   
## Height           1   86.954 90.954 2.3998 0.121348   
## N_pre_Tx         1   88.926 92.926 0.4281 0.512905   
## Re_Tx            1   88.984 92.984 0.3699 0.543061   
## N_pre_blood_tx   1   89.070 93.070 0.2838 0.594200   
## Multip           1   89.103 93.103 0.2513 0.616180   
## N_pregn          1   88.169 92.169 1.1847 0.276406   
## Abortions        1   89.277 93.277 0.0772 0.781187   
## N_abortions      1   88.601 92.601 0.7530 0.385533   
## PRA_30           1   89.070 93.070 0.2839 0.594181   
## DSAtotal         1   85.880 89.880 3.4739 0.062345 . 
## Hd_CAPD          3   88.568 96.568 0.7861 0.852787   
## Time_dialysis    1   84.931 88.931 4.4224 0.035470 * 
## HLA_MM           1   82.361 86.361 6.9933 0.008181 **
## D_typeI          1   87.109 91.109 2.2448 0.134064   
## D_gender         1   89.259 93.259 0.0949 0.758090   
## D_age            1   82.722 86.722 6.6313 0.010020 * 
## Diff_age         1   88.597 92.597 0.7569 0.384309   
## Diff_weight      1   89.321 93.321 0.0327 0.856399   
## Diff_height      1   88.868 92.868 0.4858 0.485817   
## B_Hypert         1   87.915 91.915 1.4390 0.230301   
## B_DM             1   89.302 93.302 0.0520 0.819646   
## B_DLP            1   89.005 93.005 0.3485 0.554950   
## B_GNC            1   89.121 93.121 0.2331 0.629249   
## B_PN             1   83.111 87.111 6.2428 0.012470 * 
## B_PKD            1   89.344 93.344 0.0098 0.920988   
## B_Hypert_nephro  1   88.227 92.227 1.1268 0.288465   
## B_G_other        1   89.288 93.288 0.0654 0.798132   
## IschemiaH        1   89.082 93.082 0.2714 0.602365   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Variáveis com significância inferior a 0.25.
subset(m0a, `Pr(>Chi)` <= 0.25)
##               Df Deviance    AIC    LRT Pr(>Chi)   
## Gender         1   82.631 86.631 6.7228 0.009519 **
## Weight         1   86.003 90.003 3.3513 0.067153 . 
## Height         1   86.954 90.954 2.3998 0.121348   
## DSAtotal       1   85.880 89.880 3.4739 0.062345 . 
## Time_dialysis  1   84.931 88.931 4.4224 0.035470 * 
## HLA_MM         1   82.361 86.361 6.9933 0.008181 **
## D_typeI        1   87.109 91.109 2.2448 0.134064   
## D_age          1   82.722 86.722 6.6313 0.010020 * 
## B_Hypert       1   87.915 91.915 1.4390 0.230301   
## B_PN           1   83.111 87.111 6.2428 0.012470 * 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#-----------------------------------------------------------------------
# Modelo "feito a mão".

# Variáveis selecionadas (p-valor <= 0.25) com adição de mais algumas.
hand <- y ~ Gender + Weight + Height + DSAtotal + Time_dialysis +
    HLA_MM + D_typeI + D_age + B_Hypert + B_PN + N_pre_Tx +
    N_pre_blood_tx + N_pregn + N_abortions

m1 <- glm(hand,
          data = db,
          family = binomial)
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
summary(m1)
## 
## Call:
## glm(formula = hand, family = binomial, data = db)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.8288  -0.5106   0.0000   0.3638   1.7600  
## 
## Coefficients:
##                         Estimate Std. Error z value Pr(>|z|)  
## (Intercept)             -4.35436   10.87329  -0.400   0.6888  
## GenderM                  2.01062    1.61317   1.246   0.2126  
## Weight                  -0.04203    0.05002  -0.840   0.4007  
## Height                  -0.03581    0.06528  -0.549   0.5833  
## DSAtotalyes total DSA   21.92967 2401.05888   0.009   0.9927  
## Time_dialysis            0.01615    0.01219   1.325   0.1852  
## HLA_MM                   1.69127    0.78186   2.163   0.0305 *
## D_typeIDeceased donor   -2.69949    1.70826  -1.580   0.1140  
## D_age                    0.10908    0.04696   2.323   0.0202 *
## B_Hypertyes              2.66281    2.26348   1.176   0.2394  
## B_PNyes                -42.03483 3872.54389  -0.011   0.9913  
## N_pre_Tx                -2.13853    1.52599  -1.401   0.1611  
## N_pre_blood_tx           0.37746    0.26152   1.443   0.1489  
## N_pregn                 -0.53132    0.46041  -1.154   0.2485  
## N_abortions              4.81725    3.10153   1.553   0.1204  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 89.354  on 64  degrees of freedom
## Residual deviance: 42.837  on 50  degrees of freedom
## AIC: 72.837
## 
## Number of Fisher Scoring iterations: 18
# Teste marginal para o abandono de cada termo do modelo por LRT.
drop1(m1, test = "Chisq")
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Single term deletions
## 
## Model:
## y ~ Gender + Weight + Height + DSAtotal + Time_dialysis + HLA_MM + 
##     D_typeI + D_age + B_Hypert + B_PN + N_pre_Tx + N_pre_blood_tx + 
##     N_pregn + N_abortions
##                Df Deviance    AIC     LRT  Pr(>Chi)    
## <none>              42.837 72.837                      
## Gender          1   44.536 72.536  1.6991 0.1924059    
## Weight          1   43.583 71.583  0.7461 0.3877000    
## Height          1   43.150 71.150  0.3133 0.5756917    
## DSAtotal        1   56.085 84.085 13.2480 0.0002729 ***
## Time_dialysis   1   44.714 72.714  1.8771 0.1706698    
## HLA_MM          1   52.345 80.345  9.5087 0.0020451 ** 
## D_typeI         1   45.840 73.840  3.0038 0.0830708 .  
## D_age           1   50.330 78.330  7.4935 0.0061921 ** 
## B_Hypert        1   44.507 72.507  1.6708 0.1961508    
## B_PN            1   58.991 86.991 16.1547 5.837e-05 ***
## N_pre_Tx        1   44.999 72.999  2.1623 0.1414319    
## N_pre_blood_tx  1   45.175 73.175  2.3380 0.1262475    
## N_pregn         1   44.419 72.419  1.5823 0.2084338    
## N_abortions     1   46.612 74.612  3.7754 0.0520118 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
hand <- y ~ Gender + DSAtotal + Time_dialysis + HLA_MM + D_typeI +
    D_age + B_PN + N_abortions
m2 <- update(m1, hand)
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
drop1(m2, test = "Chisq")
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Single term deletions
## 
## Model:
## y ~ Gender + DSAtotal + Time_dialysis + HLA_MM + D_typeI + D_age + 
##     B_PN + N_abortions
##               Df Deviance    AIC     LRT  Pr(>Chi)    
## <none>             49.826 67.826                      
## Gender         1   51.643 67.643  1.8165 0.1777331    
## DSAtotal       1   62.273 78.273 12.4471 0.0004186 ***
## Time_dialysis  1   51.111 67.111  1.2852 0.2569275    
## HLA_MM         1   57.291 73.291  7.4651 0.0062906 ** 
## D_typeI        1   50.604 66.604  0.7783 0.3776724    
## D_age          1   56.296 72.296  6.4699 0.0109717 *  
## B_PN           1   63.082 79.082 13.2562 0.0002717 ***
## N_abortions    1   50.887 66.887  1.0606 0.3030800    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(m2)
## 
## Call:
## glm(formula = y ~ Gender + DSAtotal + Time_dialysis + HLA_MM + 
##     D_typeI + D_age + B_PN + N_abortions, family = binomial, 
##     data = db)
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -2.61548  -0.61233  -0.00016   0.58152   1.99117  
## 
## Coefficients:
##                         Estimate Std. Error z value Pr(>|z|)    
## (Intercept)           -7.721e+00  2.338e+00  -3.302  0.00096 ***
## GenderM                1.116e+00  8.443e-01   1.322  0.18626    
## DSAtotalyes total DSA  2.065e+01  2.648e+03   0.008  0.99378    
## Time_dialysis          1.102e-02  9.934e-03   1.109  0.26726    
## HLA_MM                 9.461e-01  4.379e-01   2.161  0.03072 *  
## D_typeIDeceased donor -8.650e-01  9.918e-01  -0.872  0.38310    
## D_age                  7.984e-02  3.433e-02   2.325  0.02005 *  
## B_PNyes               -3.606e+01  4.323e+03  -0.008  0.99334    
## N_abortions            1.105e+00  1.166e+00   0.948  0.34323    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 89.354  on 64  degrees of freedom
## Residual deviance: 49.826  on 56  degrees of freedom
## AIC: 67.826
## 
## Number of Fisher Scoring iterations: 18
#-----------------------------------------------------------------------
# Modelo resultado do forward.

form <- as.formula(sprintf("y ~ %s",
                           paste(names(db)[-v],
                                 collapse = " + ")))

m00 <- glm(form,
           data = db,
           family = binomial)
## Warning: glm.fit: algorithm did not converge

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
# Seleção forward.
m0s <- step(m0, direction = "forward", scope = scope)
## Start:  AIC=91.35
## y ~ 1
## 
##                   Df Deviance    AIC
## + HLA_MM           1   82.361 86.361
## + Gender           1   82.631 86.631
## + D_age            1   82.722 86.722
## + B_PN             1   83.111 87.111
## + Time_dialysis    1   84.931 88.931
## + DSAtotal         1   85.880 89.880
## + Weight           1   86.003 90.003
## + Height           1   86.954 90.954
## + D_typeI          1   87.109 91.109
## <none>                 89.354 91.354
## + B_Hypert         1   87.915 91.915
## + N_pregn          1   88.169 92.169
## + B_Hypert_nephro  1   88.227 92.227
## + Diff_age         1   88.597 92.597
## + N_abortions      1   88.601 92.601
## + Diff_height      1   88.868 92.868
## + N_pre_Tx         1   88.926 92.926
## + Age              1   88.942 92.942
## + Re_Tx            1   88.984 92.984
## + B_DLP            1   89.005 93.005
## + PRA_30           1   89.070 93.070
## + N_pre_blood_tx   1   89.070 93.070
## + IschemiaH        1   89.082 93.082
## + Multip           1   89.103 93.103
## + B_GNC            1   89.121 93.121
## + D_gender         1   89.259 93.259
## + Abortions        1   89.277 93.277
## + B_G_other        1   89.288 93.288
## + B_DM             1   89.302 93.302
## + Diff_weight      1   89.321 93.321
## + B_PKD            1   89.344 93.344
## + BMI              3   88.553 96.553
## + Hd_CAPD          3   88.568 96.568
## 
## Step:  AIC=86.36
## y ~ HLA_MM
## 
##                   Df Deviance    AIC
## + B_PN             1   74.304 80.304
## + DSAtotal         1   77.871 83.871
## + Gender           1   78.377 84.377
## + Time_dialysis    1   78.561 84.561
## + D_age            1   78.671 84.671
## <none>                 82.361 86.361
## + Weight           1   80.405 86.405
## + B_Hypert         1   80.808 86.808
## + N_pregn          1   80.994 86.994
## + Height           1   81.046 87.046
## + B_GNC            1   81.347 87.347
## + D_typeI          1   81.560 87.560
## + N_abortions      1   81.670 87.670
## + PRA_30           1   81.681 87.681
## + B_Hypert_nephro  1   81.838 87.838
## + Diff_height      1   81.910 87.910
## + Diff_age         1   82.055 88.055
## + B_DLP            1   82.125 88.125
## + Multip           1   82.162 88.162
## + Diff_weight      1   82.165 88.165
## + N_pre_blood_tx   1   82.212 88.212
## + B_G_other        1   82.237 88.237
## + Abortions        1   82.238 88.238
## + Re_Tx            1   82.249 88.249
## + N_pre_Tx         1   82.255 88.255
## + Age              1   82.261 88.261
## + B_PKD            1   82.329 88.329
## + D_gender         1   82.357 88.357
## + IschemiaH        1   82.357 88.357
## + B_DM             1   82.358 88.358
## + Hd_CAPD          3   78.925 88.925
## + BMI              3   80.214 90.214
## 
## Step:  AIC=80.3
## y ~ HLA_MM + B_PN
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
##                   Df Deviance    AIC
## + DSAtotal         1   62.053 70.053
## + Time_dialysis    1   69.851 77.851
## + B_GNC            1   70.877 78.877
## + Gender           1   70.945 78.945
## + D_age            1   71.517 79.517
## + N_pregn          1   71.855 79.855
## <none>                 74.304 80.304
## + B_Hypert         1   73.085 81.085
## + PRA_30           1   73.284 81.284
## + N_pre_blood_tx   1   73.422 81.422
## + Multip           1   73.621 81.621
## + Weight           1   73.693 81.693
## + B_DLP            1   73.811 81.811
## + B_G_other        1   73.881 81.881
## + N_abortions      1   73.927 81.927
## + N_pre_Tx         1   73.934 81.934
## + D_typeI          1   74.049 82.049
## + Re_Tx            1   74.077 82.077
## + Diff_height      1   74.144 82.144
## + Diff_weight      1   74.160 82.160
## + B_Hypert_nephro  1   74.176 82.176
## + D_gender         1   74.215 82.215
## + Height           1   74.238 82.238
## + B_DM             1   74.240 82.240
## + Age              1   74.241 82.241
## + Abortions        1   74.288 82.288
## + Diff_age         1   74.297 82.297
## + B_PKD            1   74.299 82.299
## + IschemiaH        1   74.304 82.304
## + Hd_CAPD          3   70.929 82.929
## + BMI              3   72.244 84.244
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## 
## Step:  AIC=70.05
## y ~ HLA_MM + B_PN + DSAtotal
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
##                   Df Deviance    AIC
## + D_age            1   54.034 64.034
## + Gender           1   59.234 69.234
## <none>                 62.053 70.053
## + Time_dialysis    1   60.152 70.152
## + N_pregn          1   60.302 70.302
## + N_pre_blood_tx   1   61.117 71.117
## + B_GNC            1   61.255 71.255
## + B_Hypert         1   61.297 71.297
## + Weight           1   61.515 71.515
## + N_abortions      1   61.568 71.568
## + Diff_height      1   61.634 71.634
## + B_Hypert_nephro  1   61.705 71.705
## + Diff_weight      1   61.745 71.745
## + B_G_other        1   61.758 71.758
## + Multip           1   61.760 71.760
## + B_DM             1   61.826 71.826
## + D_typeI          1   61.838 71.838
## + B_DLP            1   61.957 71.957
## + Abortions        1   61.984 71.984
## + B_PKD            1   61.988 71.988
## + Re_Tx            1   62.008 72.008
## + Height           1   62.017 72.017
## + IschemiaH        1   62.024 72.024
## + Diff_age         1   62.025 72.025
## + N_pre_Tx         1   62.029 72.029
## + Age              1   62.040 72.040
## + PRA_30           1   62.040 72.040
## + D_gender         1   62.041 72.041
## + Hd_CAPD          3   58.742 72.742
## + BMI              3   60.233 74.233
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## 
## Step:  AIC=64.03
## y ~ HLA_MM + B_PN + DSAtotal + D_age
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
##                   Df Deviance    AIC
## <none>                 54.034 64.034
## + Gender           1   52.309 64.309
## + Time_dialysis    1   52.448 64.448
## + N_pregn          1   52.614 64.614
## + B_Hypert         1   52.805 64.805
## + B_PKD            1   53.202 65.202
## + N_pre_blood_tx   1   53.371 65.371
## + Re_Tx            1   53.380 65.380
## + B_GNC            1   53.443 65.443
## + D_gender         1   53.567 65.567
## + Diff_age         1   53.620 65.620
## + Diff_height      1   53.820 65.820
## + PRA_30           1   53.835 65.835
## + Multip           1   53.836 65.836
## + B_Hypert_nephro  1   53.850 65.850
## + Diff_weight      1   53.876 65.876
## + Weight           1   53.912 65.912
## + N_abortions      1   53.922 65.922
## + D_typeI          1   53.980 65.980
## + B_DLP            1   53.985 65.985
## + IschemiaH        1   54.000 66.000
## + Height           1   54.003 66.003
## + Hd_CAPD          3   50.005 66.005
## + B_DM             1   54.011 66.011
## + B_G_other        1   54.018 66.018
## + N_pre_Tx         1   54.020 66.020
## + Abortions        1   54.032 66.032
## + Age              1   54.033 66.033
## + BMI              3   51.264 67.264
# Teste marginal.
drop1(m0s, test = "Chisq")
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Single term deletions
## 
## Model:
## y ~ HLA_MM + B_PN + DSAtotal + D_age
##          Df Deviance    AIC     LRT  Pr(>Chi)    
## <none>        54.034 64.034                      
## HLA_MM    1   64.449 72.449 10.4151  0.001250 ** 
## B_PN      1   70.340 78.340 16.3060 5.389e-05 ***
## DSAtotal  1   71.517 79.517 17.4836 2.898e-05 ***
## D_age     1   62.053 70.053  8.0196  0.004627 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(m0s)
## 
## Call:
## glm(formula = y ~ HLA_MM + B_PN + DSAtotal + D_age, family = binomial, 
##     data = db)
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -2.38909  -0.81549  -0.00015   0.77211   1.66600  
## 
## Coefficients:
##                         Estimate Std. Error z value Pr(>|z|)   
## (Intercept)             -7.64246    2.44291  -3.128  0.00176 **
## HLA_MM                   1.07573    0.43544   2.470  0.01349 * 
## B_PNyes                -37.44502 4397.55819  -0.009  0.99321   
## DSAtotalyes total DSA   21.53193 2633.45558   0.008  0.99348   
## D_age                    0.08292    0.03259   2.544  0.01096 * 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 89.354  on 64  degrees of freedom
## Residual deviance: 54.034  on 60  degrees of freedom
## AIC: 64.034
## 
## Number of Fisher Scoring iterations: 18
# par(mfrow = c(2, 2))
# plot(m0s)
# layout(1)

# Predição.
pred <- as.integer(fitted(m0s) > 0.5)
tb <- table(pred, m0s$model$y)

# Taxa de acerto.
100 * sum(diag(tb))/sum(tb)
## [1] 76.92308
#-----------------------------------------------------------------------
# Curvas ROC dos ajustes.

# help(package = "pROC", h = "html")

perf_m0s <- roc(response = m0s$model$y, predictor = fitted(m0s))
perf_m1 <- roc(response = m1$model$y, predictor = fitted(m1))
perf_m2 <- roc(response = m2$model$y, predictor = fitted(m2))

# Métodos e classes.
# class(perf_m0s)
# methods(class = class(perf_m0s))

# Gráfico das ROC.
plot(perf_m0s,
     col = 2,
     main = "ROC curve",
     xlab = "Specificity",
     ylab = "Sensitivity")
plot(perf_m1, col = 4, add = TRUE)
plot(perf_m2, col = 3, add = TRUE)
legend("bottomright",
       legend = c("Forward", "Hand made 1", "Hand made 2"),
       col = c(2, 4, 3),
       lty = 1,
       bty = "n")

# Para extrair as medidas da curva com área, IC e erro-padrão.
get_ROCmeasures <- function(rocobj) {
    val <- c(c(ci(rocobj)), sqrt(var(rocobj)))
    val <- val[c(2, 1, 3, 4)]
    names(val) <- c("AUC", "lower ci", "upper ci", "std error")
    val
}

# Obtém as medidas para cada um dos modelos.
sapply(list(m0s = perf_m0s,
            m1 = perf_m1,
            m2 = perf_m2), FUN = get_ROCmeasures)
##                 m0s         m1        m2
## AUC       0.8754789 0.94348659 0.9090038
## lower ci  0.7935085 0.89228837 0.8386388
## upper ci  0.9574493 0.99468481 0.9793689
## std error 0.0418224 0.02612202 0.0359012
# Testa se as curvas feitas a mão diferem entre si.
roc.test(roc1 = perf_m1,
         roc2 = perf_m2)
## 
##  DeLong's test for two correlated ROC curves
## 
## data:  perf_m1 and perf_m2
## Z = 1.3809, p-value = 0.1673
## alternative hypothesis: true difference in AUC is not equal to 0
## sample estimates:
## AUC of roc1 AUC of roc2 
##   0.9434866   0.9090038
#-----------------------------------------------------------------------

Árvore de classificação

library(rpart)

levels(da$Gender)
## [1] "F" "M"
levels(da$DSAtotal)
## [1] "no total DSA"  "yes total DSA"
fit <- rpart(hand,
             data = db,
             method = "class",
             control = rpart.control(minsplit = 15, cp = 0.001))

printcp(fit)
## 
## Classification tree:
## rpart(formula = hand, data = db, method = "class", control = rpart.control(minsplit = 15, 
##     cp = 0.001))
## 
## Variables actually used in tree construction:
## [1] D_age         Gender        Time_dialysis
## 
## Root node error: 29/65 = 0.44615
## 
## n= 65 
## 
##         CP nsplit rel error  xerror    xstd
## 1 0.310345      0   1.00000 1.00000 0.13820
## 2 0.103448      1   0.68966 1.00000 0.13820
## 3 0.068966      2   0.58621 1.06897 0.13886
## 4 0.017241      3   0.51724 0.89655 0.13620
## 5 0.001000      5   0.48276 0.86207 0.13525
plotcp(fit)

summary(fit)
## Call:
## rpart(formula = hand, data = db, method = "class", control = rpart.control(minsplit = 15, 
##     cp = 0.001))
##   n= 65 
## 
##           CP nsplit rel error    xerror      xstd
## 1 0.31034483      0 1.0000000 1.0000000 0.1381960
## 2 0.10344828      1 0.6896552 1.0000000 0.1381960
## 3 0.06896552      2 0.5862069 1.0689655 0.1388563
## 4 0.01724138      3 0.5172414 0.8965517 0.1361960
## 5 0.00100000      5 0.4827586 0.8620690 0.1352525
## 
## Variable importance
## Time_dialysis         D_age        Gender        HLA_MM   N_abortions 
##            42            32            13            10             2 
##      DSAtotal 
##             1 
## 
## Node number 1: 65 observations,    complexity param=0.3103448
##   predicted class=0  expected loss=0.4461538  P(node) =1
##     class counts:    36    29
##    probabilities: 0.554 0.446 
##   left son=2 (38 obs) right son=3 (27 obs)
##   Primary splits:
##       D_age         < 47   to the left,  improve=4.491498, (0 missing)
##       Gender        splits as  LR,       improve=3.226391, (0 missing)
##       HLA_MM        < 3.5  to the left,  improve=3.021628, (0 missing)
##       Time_dialysis < 45.5 to the left,  improve=2.549718, (0 missing)
##       B_PN          splits as  RL,       improve=2.156410, (0 missing)
##   Surrogate splits:
##       HLA_MM        < 4.5  to the left,  agree=0.692, adj=0.259, (0 split)
##       Time_dialysis < 45.5 to the left,  agree=0.615, adj=0.074, (0 split)
##       N_abortions   < 1.5  to the left,  agree=0.615, adj=0.074, (0 split)
## 
## Node number 2: 38 observations,    complexity param=0.1034483
##   predicted class=0  expected loss=0.2894737  P(node) =0.5846154
##     class counts:    27    11
##    probabilities: 0.711 0.289 
##   left son=4 (33 obs) right son=5 (5 obs)
##   Primary splits:
##       Time_dialysis < 93   to the left,  improve=3.0012760, (0 missing)
##       Gender        splits as  LR,       improve=2.5789470, (0 missing)
##       DSAtotal      splits as  LR,       improve=2.0274120, (0 missing)
##       HLA_MM        < 0.5  to the left,  improve=0.9649123, (0 missing)
##       D_age         < 25.5 to the left,  improve=0.9649123, (0 missing)
## 
## Node number 3: 27 observations,    complexity param=0.06896552
##   predicted class=1  expected loss=0.3333333  P(node) =0.4153846
##     class counts:     9    18
##    probabilities: 0.333 0.667 
##   left son=6 (6 obs) right son=7 (21 obs)
##   Primary splits:
##       Time_dialysis < 17.5 to the left,  improve=1.71428600, (0 missing)
##       HLA_MM        < 3.5  to the left,  improve=1.61538500, (0 missing)
##       D_age         < 48.5 to the right, improve=0.68571430, (0 missing)
##       Gender        splits as  LR,       improve=0.03947368, (0 missing)
## 
## Node number 4: 33 observations,    complexity param=0.01724138
##   predicted class=0  expected loss=0.2121212  P(node) =0.5076923
##     class counts:    26     7
##    probabilities: 0.788 0.212 
##   left son=8 (18 obs) right son=9 (15 obs)
##   Primary splits:
##       Gender        splits as  LR,       improve=1.9414140, (0 missing)
##       DSAtotal      splits as  LR,       improve=1.7731600, (0 missing)
##       D_age         < 30.5 to the left,  improve=0.9503030, (0 missing)
##       HLA_MM        < 1    to the left,  improve=0.5303030, (0 missing)
##       Time_dialysis < 4    to the right, improve=0.4160173, (0 missing)
##   Surrogate splits:
##       Time_dialysis < 14.5 to the left,  agree=0.697, adj=0.333, (0 split)
##       HLA_MM        < 2.5  to the right, agree=0.606, adj=0.133, (0 split)
##       DSAtotal      splits as  LR,       agree=0.576, adj=0.067, (0 split)
##       D_age         < 23.5 to the right, agree=0.576, adj=0.067, (0 split)
## 
## Node number 5: 5 observations
##   predicted class=1  expected loss=0.2  P(node) =0.07692308
##     class counts:     1     4
##    probabilities: 0.200 0.800 
## 
## Node number 6: 6 observations
##   predicted class=0  expected loss=0.3333333  P(node) =0.09230769
##     class counts:     4     2
##    probabilities: 0.667 0.333 
## 
## Node number 7: 21 observations
##   predicted class=1  expected loss=0.2380952  P(node) =0.3230769
##     class counts:     5    16
##    probabilities: 0.238 0.762 
## 
## Node number 8: 18 observations
##   predicted class=0  expected loss=0.05555556  P(node) =0.2769231
##     class counts:    17     1
##    probabilities: 0.944 0.056 
## 
## Node number 9: 15 observations,    complexity param=0.01724138
##   predicted class=0  expected loss=0.4  P(node) =0.2307692
##     class counts:     9     6
##    probabilities: 0.600 0.400 
##   left son=18 (10 obs) right son=19 (5 obs)
##   Primary splits:
##       Time_dialysis < 21.5 to the right, improve=0.60000000, (0 missing)
##       D_age         < 32   to the left,  improve=0.60000000, (0 missing)
##       HLA_MM        < 2.5  to the left,  improve=0.08888889, (0 missing)
##       D_typeI       splits as  LR,       improve=0.02142857, (0 missing)
##   Surrogate splits:
##       D_age < 37   to the left,  agree=0.733, adj=0.2, (0 split)
## 
## Node number 18: 10 observations
##   predicted class=0  expected loss=0.3  P(node) =0.1538462
##     class counts:     7     3
##    probabilities: 0.700 0.300 
## 
## Node number 19: 5 observations
##   predicted class=1  expected loss=0.4  P(node) =0.07692308
##     class counts:     2     3
##    probabilities: 0.400 0.600
plot(fit, uniform = TRUE)
text(fit, use.n = TRUE, all = TRUE, cex = 0.8)

pred <- apply(predict(fit), MARGIN = 1, FUN = which.max) - 1
tb <- table(pred, db$y)

# Taxa de acerto.
100 * sum(diag(tb))/sum(tb)
## [1] 78.46154