1 Loading extra packages

library(knitr)
library(betareg)
library(TMB)
library(tmbstan)
library(ggplot2)
library(tidyverse)

2 StressAnxiety data set

data1 <- get(data("StressAnxiety"))
tail(data1, n = 10)
##     stress anxiety
## 157   0.45    0.13
## 158   0.21    0.01
## 159   0.41    0.05
## 160   0.21    0.01
## 161   0.05    0.01
## 162   0.37    0.29
## 163   0.53    0.25
## 164   0.65    0.49
## 165   0.17    0.01
## 166   0.09    0.01

3 Descriptive analysis

summary(data1)
##      stress          anxiety      
##  Min.   :0.0100   Min.   :0.0100  
##  1st Qu.:0.1300   1st Qu.:0.0100  
##  Median :0.2500   Median :0.0300  
##  Mean   :0.2642   Mean   :0.0912  
##  3rd Qu.:0.3700   3rd Qu.:0.1300  
##  Max.   :0.8500   Max.   :0.6900
p1 <- data1 %>% 
        ggplot(aes(x = anxiety, y = stress)) + 
        geom_point(size = 1.5, alpha = 0.3, color = "gray40") +
        xlab("Anxiety") + 
        ylab("Stress") +
        ylim(-0.01, 1.01)

p1

4 Univariate beta regression model

4.1 Beta distribution

Let \(Y_1, \ldots, Y_n\) be independent random variables with probability density function given by \[ f(y;\mu,\phi) = \dfrac{\Gamma(\phi)}{\Gamma(\mu \phi) \Gamma[(1 - \mu)\phi]} y^{\mu \phi - 1} (1 - y)^{(1 - \mu) \phi - 1}, \] where \(y, \mu \in (0,1)\) and \(\phi > 0\) denote the precision parameter. According to Ferrari and Cribari-Neto (2004) the expectation and variance of the beta distribution are given, respectively, by

\[\begin{equation} \label{betamoments} \mathrm{E}(Y) = \mu \quad~\mathrm{and}~\quad \mathrm{Var}(Y) = \dfrac{\mu(1-\mu)}{1 + \phi}. \end{equation}\]

4.2 Beta regression model

Let \(y_1, \ldots, y_n\) random variables independent and identically distributed, where \(Y_i \sim \mathcal{B}(\mu_i, \phi)\). Thus, the beta regression model is specified by \[ g(\mu_i) = \mathbf{x}_i^{\top} \boldsymbol{\beta}, \] where \(g(\cdot)\) is a known link function, \(\mathbf{x}_i\) and \(\boldsymbol{\beta}\) are \(p \times 1\) vectors of known covariates and unknown regression parameters, respectively.

4.3 Log-likelihood

Let \(\boldsymbol{\theta} = (\boldsymbol{\beta}, \phi)^{\top}\) denote the vector of parameters in the beta regression model. The log-likelihood function is written as \[ \mathcal{l(\boldsymbol{\theta})} = \log \Gamma(\phi) - \log \Gamma(\mu_i \phi) - \log\Gamma[ (1 - \mu_i) \phi] + (\mu_i \phi - 1) \log y_i + [(1 - \mu_i)\phi - 1] \log(1 - y_i). \]

4.4 Linear predictor

\[ \mathrm{logit}(\mu_i) = \beta_0 + \beta_1 \mathrm{anxiety}_i,~\mathrm{for}~i = 1, \ldots, 166. \]

form1 <- stress ~ anxiety

5 betareg package

For details about betareg package see Cribari-Neto and Zeileis (2010).

fit_betareg <- betareg::betareg(form1, data = data1)
summary(fit_betareg)  
## 
## Call:
## betareg::betareg(formula = form1, data = data1)
## 
## Standardized weighted residuals 2:
##     Min      1Q  Median      3Q     Max 
## -2.2914 -0.4501  0.2401  0.6669  2.1202 
## 
## Coefficients (mean model with logit link):
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -1.57115    0.08295  -18.94   <2e-16 ***
## anxiety      4.93872    0.46704   10.57   <2e-16 ***
## 
## Phi coefficients (precision model with identity link):
##       Estimate Std. Error z value Pr(>|z|)    
## (phi)   6.9012     0.7382   9.348   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Type of estimator: ML (maximum likelihood)
## Log-likelihood: 109.1 on 3 Df
## Pseudo R-squared: 0.3372
## Number of iterations: 15 (BFGS) + 2 (Fisher scoring)

6 Template Model Builder (TMB) package

For details about TMB package see Kristensen et al. (2016).

6.1 Code written in C++ template

// Beta Univariate regression model
#include <TMB.hpp>
template<class Type>
Type objective_function<Type>::operator() ()
{
  DATA_VECTOR(Y);           // Observations
  DATA_MATRIX(X);           // Fixed effect design matrix  
  PARAMETER_VECTOR(beta);   // Fixed effects vector
  PARAMETER(logphi);        // Precision parameter (Beta)
  
  // Preparing
  Type phi = exp(logphi);
  
  // Linear predictor 
  vector<Type> mu = exp(X*beta)/(1 + exp(X*beta));
  
  // Log-likelihood
  Type nll = 0;
  for(int i = 0; i < Y.size(); i++)
    nll -= dbeta(Y(i), mu(i)*phi, (1 - mu(i))*phi, true);
  
  // Delta method
  ADREPORT(phi);

  return nll;
}

6.2 Compiling and loading

TMB::compile("Beta_Univariate.cpp")
dyn.load(TMB::dynlib("Beta_Univariate"))

6.3 Data set

X <- model.matrix(~ anxiety, data = data1)
Y <- data1$stress
data <- list(Y = Y, X = X)

6.4 Initial values

betas <- as.numeric(coef(fit_betareg)[-3])
parameters <- list(beta = betas, logphi = 1)

6.5 Log-likelihood function

Beta_TMB <- TMB::MakeADFun(data, parameters, DLL = "Beta_Univariate", 
                           hessian = TRUE, silent = TRUE)

6.6 Optimizing via nlminb()

fit_TMB <- nlminb(start = Beta_TMB$par, objective = Beta_TMB$fn, gradient = Beta_TMB$gr)
fit_TMB
## $par
##      beta      beta    logphi 
## -1.571147  4.938719  1.931693 
## 
## $objective
## [1] -109.0745
## 
## $convergence
## [1] 0
## 
## $iterations
## [1] 15
## 
## $evaluations
## function gradient 
##       19       16 
## 
## $message
## [1] "relative convergence (4)"

6.7 Getting parameter estimates

report <- TMB::sdreport(Beta_TMB)

6.8 Summary for the fixed effects

summary(report, "fixed", p.value = TRUE)
##         Estimate Std. Error   z value   Pr(>|z^2|)
## beta   -1.571147 0.08132138 -19.32022 3.631180e-83
## beta    4.938719 0.47092822  10.48720 9.891575e-26
## logphi  1.931693 0.10669340  18.10508 2.905803e-73

6.9 Delta method for \(\phi\)

summary(report, "report", p.value = TRUE)
##     Estimate Std. Error  z value   Pr(>|z^2|)
## phi 6.901182  0.7363106 9.372651 7.073317e-21

7 Writing the log-likelihood function

// Beta Univariate regression model
#include <TMB.hpp>
template<class Type>
Type objective_function<Type>::operator() ()
{
  DATA_VECTOR(Y);           // Observations
  DATA_MATRIX(X);           // Fixed effect design matrix  
  PARAMETER_VECTOR(beta);   // Fixed effects vector
  PARAMETER(logphi);        // Precision parameter (Beta)
  
  // Preparing
  Type phi = exp(logphi);
  
  // Linear predictor for mean
  vector<Type> mu = exp(X*beta)/(1 + exp(X*beta));
  
  // Log-likelihood
  Type nll = 0;
  for(int i=0; i < Y.size(); i++)
    nll -= lgamma(phi) - lgamma(mu(i) * phi) - lgamma( (1 - mu(i)) * phi) + 
           (mu(i) * phi - 1) * log(Y(i)) + ((1 - mu(i)) * phi - 1) * 
           log(1 - Y(i));
  
  // Delta method
  ADREPORT(phi);

  return nll;
}
# Compilation
TMB::compile("Beta_Univariate2.cpp")
dyn.load(TMB::dynlib("Beta_Univariate2"))
# Log-likelihood function
Beta_TMB2 <- TMB::MakeADFun(data, parameters, DLL = "Beta_Univariate2", 
                            hessian = TRUE, silent = TRUE)
# Fitting via nlminb() 
fit_TMB2 <- nlminb(start = Beta_TMB2$par, objective = Beta_TMB2$fn, 
                   gradient = Beta_TMB2$gr)
fit_TMB2
## $par
##      beta      beta    logphi 
## -1.571147  4.938719  1.931693 
## 
## $objective
## [1] -109.0745
## 
## $convergence
## [1] 0
## 
## $iterations
## [1] 15
## 
## $evaluations
## function gradient 
##       19       16 
## 
## $message
## [1] "relative convergence (4)"
# Report
report2 <- TMB::sdreport(Beta_TMB2)
# Summary for fixed effects
summary(report2, "fixed", p.value = TRUE)
##         Estimate Std. Error   z value   Pr(>|z^2|)
## beta   -1.571147 0.08132138 -19.32022 3.631180e-83
## beta    4.938719 0.47092822  10.48720 9.891575e-26
## logphi  1.931693 0.10669340  18.10508 2.905803e-73

8 Comparison

8.1 Coefficients betareg versus TMB

tab <- rbind("betareg" = coef(fit_betareg), 
             "TMB dbeta" = c(fit_TMB$par[1:2], exp(fit_TMB$par[3])),
             "TMB log-lik" = c(fit_TMB2$par[1:2], exp(fit_TMB2$par[3])))
tab
##             (Intercept)  anxiety    (phi)
## betareg       -1.571147 4.938718 6.901184
## TMB dbeta     -1.571147 4.938719 6.901182
## TMB log-lik   -1.571147 4.938719 6.901182

8.2 Log-likelihood value

tab2 <- rbind("betareg" = logLik(fit_betareg), "TMB dbeta" = abs(fit_TMB$objective), 
              "TMB log-lik" = abs(fit_TMB2$objective))
colnames(tab2) <- "Log-likelihood"
tab2
##             Log-likelihood
## betareg           109.0745
## TMB dbeta         109.0745
## TMB log-lik       109.0745

9 Profile

# Preparing
par(mfrow = c(1,3), mar=c(2.8, 2.6, 1.2, 0.5), mgp = c(1.6, 0.6, 0))
# beta 0
beta0 <- TMB::tmbprofile(Beta_TMB, 1, trace = F)
plot(beta0)

# beta 1
beta1 <- TMB::tmbprofile(Beta_TMB, 2, trace = F)
plot(beta1)

# logphi
logphi <- TMB::tmbprofile(Beta_TMB, 3, trace = F)
plot(logphi)  

10 tmbstan package

options(mc.cores = parallel::detectCores())
rstan_options(auto_write = T)
fit_TMB_Stan <- tmbstan::tmbstan(Beta_TMB, silent = F, laplace = F, chains = 3)

# Plots
plot(fit_TMB_Stan, pars = names(Beta_TMB$par))

# Traceplot
trace <- traceplot(fit_TMB_Stan, pars = names(Beta_TMB$par))
trace + scale_color_discrete() + theme(legend.position = "top")

References

Cribari-Neto, F., and A. Zeileis. 2010. “Beta Regression in R.” Journal of Statistical Software 34 (2): 1–24.

Ferrari, Silvia, and Francisco Cribari-Neto. 2004. “Beta Regression for Modelling Rates and Proportions.” Journal of Applied Statistics 31 (7): 799–815.

Kristensen, Kasper, Anders Nielsen, Casper W Berg, Hans Skaug, and Bradley M Bell. 2016. “TMB: Automatic Differentiation and Laplace Approximation.” Journal of Statistical Software 70 (5).