Example 1: Univariate beta regression model
library(knitr)
library(betareg)
library(TMB)
library(tmbstan)
library(ggplot2)
library(tidyverse)
data1 <- get(data("StressAnxiety"))
tail(data1, n = 10)
## stress anxiety
## 157 0.45 0.13
## 158 0.21 0.01
## 159 0.41 0.05
## 160 0.21 0.01
## 161 0.05 0.01
## 162 0.37 0.29
## 163 0.53 0.25
## 164 0.65 0.49
## 165 0.17 0.01
## 166 0.09 0.01
summary(data1)
## stress anxiety
## Min. :0.0100 Min. :0.0100
## 1st Qu.:0.1300 1st Qu.:0.0100
## Median :0.2500 Median :0.0300
## Mean :0.2642 Mean :0.0912
## 3rd Qu.:0.3700 3rd Qu.:0.1300
## Max. :0.8500 Max. :0.6900
p1 <- data1 %>%
ggplot(aes(x = anxiety, y = stress)) +
geom_point(size = 1.5, alpha = 0.3, color = "gray40") +
xlab("Anxiety") +
ylab("Stress") +
ylim(-0.01, 1.01)
p1
Let \(Y_1, \ldots, Y_n\) be independent random variables with probability density function given by \[ f(y;\mu,\phi) = \dfrac{\Gamma(\phi)}{\Gamma(\mu \phi) \Gamma[(1 - \mu)\phi]} y^{\mu \phi - 1} (1 - y)^{(1 - \mu) \phi - 1}, \] where \(y, \mu \in (0,1)\) and \(\phi > 0\) denote the precision parameter. According to Ferrari and Cribari-Neto (2004) the expectation and variance of the beta distribution are given, respectively, by
\[\begin{equation} \label{betamoments} \mathrm{E}(Y) = \mu \quad~\mathrm{and}~\quad \mathrm{Var}(Y) = \dfrac{\mu(1-\mu)}{1 + \phi}. \end{equation}\]
Let \(y_1, \ldots, y_n\) random variables independent and identically distributed, where \(Y_i \sim \mathcal{B}(\mu_i, \phi)\). Thus, the beta regression model is specified by \[ g(\mu_i) = \mathbf{x}_i^{\top} \boldsymbol{\beta}, \] where \(g(\cdot)\) is a known link function, \(\mathbf{x}_i\) and \(\boldsymbol{\beta}\) are \(p \times 1\) vectors of known covariates and unknown regression parameters, respectively.
Let \(\boldsymbol{\theta} = (\boldsymbol{\beta}, \phi)^{\top}\) denote the vector of parameters in the beta regression model. The log-likelihood function is written as \[ \mathcal{l(\boldsymbol{\theta})} = \log \Gamma(\phi) - \log \Gamma(\mu_i \phi) - \log\Gamma[ (1 - \mu_i) \phi] + (\mu_i \phi - 1) \log y_i + [(1 - \mu_i)\phi - 1] \log(1 - y_i). \]
\[ \mathrm{logit}(\mu_i) = \beta_0 + \beta_1 \mathrm{anxiety}_i,~\mathrm{for}~i = 1, \ldots, 166. \]
form1 <- stress ~ anxiety
betareg packageFor details about betareg package see Cribari-Neto and Zeileis (2010).
fit_betareg <- betareg::betareg(form1, data = data1)
summary(fit_betareg)
##
## Call:
## betareg::betareg(formula = form1, data = data1)
##
## Standardized weighted residuals 2:
## Min 1Q Median 3Q Max
## -2.2914 -0.4501 0.2401 0.6669 2.1202
##
## Coefficients (mean model with logit link):
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.57115 0.08295 -18.94 <2e-16 ***
## anxiety 4.93872 0.46704 10.57 <2e-16 ***
##
## Phi coefficients (precision model with identity link):
## Estimate Std. Error z value Pr(>|z|)
## (phi) 6.9012 0.7382 9.348 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Type of estimator: ML (maximum likelihood)
## Log-likelihood: 109.1 on 3 Df
## Pseudo R-squared: 0.3372
## Number of iterations: 15 (BFGS) + 2 (Fisher scoring)
For details about TMB package see Kristensen et al. (2016).
C++ template// Beta Univariate regression model
#include <TMB.hpp>
template<class Type>
Type objective_function<Type>::operator() ()
{
DATA_VECTOR(Y); // Observations
DATA_MATRIX(X); // Fixed effect design matrix
PARAMETER_VECTOR(beta); // Fixed effects vector
PARAMETER(logphi); // Precision parameter (Beta)
// Preparing
Type phi = exp(logphi);
// Linear predictor
vector<Type> mu = exp(X*beta)/(1 + exp(X*beta));
// Log-likelihood
Type nll = 0;
for(int i = 0; i < Y.size(); i++)
nll -= dbeta(Y(i), mu(i)*phi, (1 - mu(i))*phi, true);
// Delta method
ADREPORT(phi);
return nll;
}
TMB::compile("Beta_Univariate.cpp")
dyn.load(TMB::dynlib("Beta_Univariate"))
X <- model.matrix(~ anxiety, data = data1)
Y <- data1$stress
data <- list(Y = Y, X = X)
betas <- as.numeric(coef(fit_betareg)[-3])
parameters <- list(beta = betas, logphi = 1)
Beta_TMB <- TMB::MakeADFun(data, parameters, DLL = "Beta_Univariate",
hessian = TRUE, silent = TRUE)
nlminb()fit_TMB <- nlminb(start = Beta_TMB$par, objective = Beta_TMB$fn, gradient = Beta_TMB$gr)
fit_TMB
## $par
## beta beta logphi
## -1.571147 4.938719 1.931693
##
## $objective
## [1] -109.0745
##
## $convergence
## [1] 0
##
## $iterations
## [1] 15
##
## $evaluations
## function gradient
## 19 16
##
## $message
## [1] "relative convergence (4)"
report <- TMB::sdreport(Beta_TMB)
summary(report, "fixed", p.value = TRUE)
## Estimate Std. Error z value Pr(>|z^2|)
## beta -1.571147 0.08132138 -19.32022 3.631180e-83
## beta 4.938719 0.47092822 10.48720 9.891575e-26
## logphi 1.931693 0.10669340 18.10508 2.905803e-73
summary(report, "report", p.value = TRUE)
## Estimate Std. Error z value Pr(>|z^2|)
## phi 6.901182 0.7363106 9.372651 7.073317e-21
// Beta Univariate regression model
#include <TMB.hpp>
template<class Type>
Type objective_function<Type>::operator() ()
{
DATA_VECTOR(Y); // Observations
DATA_MATRIX(X); // Fixed effect design matrix
PARAMETER_VECTOR(beta); // Fixed effects vector
PARAMETER(logphi); // Precision parameter (Beta)
// Preparing
Type phi = exp(logphi);
// Linear predictor for mean
vector<Type> mu = exp(X*beta)/(1 + exp(X*beta));
// Log-likelihood
Type nll = 0;
for(int i=0; i < Y.size(); i++)
nll -= lgamma(phi) - lgamma(mu(i) * phi) - lgamma( (1 - mu(i)) * phi) +
(mu(i) * phi - 1) * log(Y(i)) + ((1 - mu(i)) * phi - 1) *
log(1 - Y(i));
// Delta method
ADREPORT(phi);
return nll;
}
# Compilation
TMB::compile("Beta_Univariate2.cpp")
dyn.load(TMB::dynlib("Beta_Univariate2"))
# Log-likelihood function
Beta_TMB2 <- TMB::MakeADFun(data, parameters, DLL = "Beta_Univariate2",
hessian = TRUE, silent = TRUE)
# Fitting via nlminb()
fit_TMB2 <- nlminb(start = Beta_TMB2$par, objective = Beta_TMB2$fn,
gradient = Beta_TMB2$gr)
fit_TMB2
## $par
## beta beta logphi
## -1.571147 4.938719 1.931693
##
## $objective
## [1] -109.0745
##
## $convergence
## [1] 0
##
## $iterations
## [1] 15
##
## $evaluations
## function gradient
## 19 16
##
## $message
## [1] "relative convergence (4)"
# Report
report2 <- TMB::sdreport(Beta_TMB2)
# Summary for fixed effects
summary(report2, "fixed", p.value = TRUE)
## Estimate Std. Error z value Pr(>|z^2|)
## beta -1.571147 0.08132138 -19.32022 3.631180e-83
## beta 4.938719 0.47092822 10.48720 9.891575e-26
## logphi 1.931693 0.10669340 18.10508 2.905803e-73
betareg versus TMBtab <- rbind("betareg" = coef(fit_betareg),
"TMB dbeta" = c(fit_TMB$par[1:2], exp(fit_TMB$par[3])),
"TMB log-lik" = c(fit_TMB2$par[1:2], exp(fit_TMB2$par[3])))
tab
## (Intercept) anxiety (phi)
## betareg -1.571147 4.938718 6.901184
## TMB dbeta -1.571147 4.938719 6.901182
## TMB log-lik -1.571147 4.938719 6.901182
tab2 <- rbind("betareg" = logLik(fit_betareg), "TMB dbeta" = abs(fit_TMB$objective),
"TMB log-lik" = abs(fit_TMB2$objective))
colnames(tab2) <- "Log-likelihood"
tab2
## Log-likelihood
## betareg 109.0745
## TMB dbeta 109.0745
## TMB log-lik 109.0745
# Preparing
par(mfrow = c(1,3), mar=c(2.8, 2.6, 1.2, 0.5), mgp = c(1.6, 0.6, 0))
# beta 0
beta0 <- TMB::tmbprofile(Beta_TMB, 1, trace = F)
plot(beta0)
# beta 1
beta1 <- TMB::tmbprofile(Beta_TMB, 2, trace = F)
plot(beta1)
# logphi
logphi <- TMB::tmbprofile(Beta_TMB, 3, trace = F)
plot(logphi)
tmbstan packageoptions(mc.cores = parallel::detectCores())
rstan_options(auto_write = T)
fit_TMB_Stan <- tmbstan::tmbstan(Beta_TMB, silent = F, laplace = F, chains = 3)
# Plots
plot(fit_TMB_Stan, pars = names(Beta_TMB$par))
# Traceplot
trace <- traceplot(fit_TMB_Stan, pars = names(Beta_TMB$par))
trace + scale_color_discrete() + theme(legend.position = "top")
Cribari-Neto, F., and A. Zeileis. 2010. “Beta Regression in R.” Journal of Statistical Software 34 (2): 1–24.
Ferrari, Silvia, and Francisco Cribari-Neto. 2004. “Beta Regression for Modelling Rates and Proportions.” Journal of Applied Statistics 31 (7): 799–815.
Kristensen, Kasper, Anders Nielsen, Casper W Berg, Hans Skaug, and Bradley M Bell. 2016. “TMB: Automatic Differentiation and Laplace Approximation.” Journal of Statistical Software 70 (5).