########################################################################################## ## Data analysis 1 - Multivariate count regression models ################################ ## Author: Wagner Hugo Bonat LEG/IMADA ################################################### ## E-mail: wbonat@ufpr.br ################################################################ ## Date: 31/07/2014 ###################################################################### ## Latest updated: 26/03/2015 ############################################################ ########################################################################################## ########################################################################################## #A. Colin Cameron and Pravin K. Trivedi (1998), Regression Analysis of Count Data, #Econometric Society Monograph No.30, Cambridge University Press, 1998. #ISBN: 0 521 63567 5 paperback 432 pages. ########################################################################################## rm(list=ls()) ########################################################################################## ## Loading extra packages ################################################################ ########################################################################################## require(Matrix) ########################################################################################## ## Loading preliminars functions ######################################################### ########################################################################################## source("mcglm.R") ########################################################################################## ## Loading data set ###################################################################### ########################################################################################## data = read.table("DataSet1.txt") data$sex <- as.factor(data$sex) data$levyplus <- as.factor(data$levyplus) data$freepoor <- as.factor(data$freepoor) data$freerepa <- as.factor(data$freerepa) data$chcond1 <- as.factor(data$chcond1) data$chcond2 <- as.factor(data$chcond2) ## Covariates: sex, age, income, levyplus, freepoor, freerepa, hscore, chcond1, chcond2. ## Response: Ndoc, Nndoc, Nadm, Nhosp, Nmed ########################################################################################## ## Exploratory data analysis ############################################################# ########################################################################################## par(mfrow=c(1,5), mar=c(3,2.7,1,0.6),mgp=c(1.8,0.8,0)) plot(table(data$Ndoc), type="h", lwd=5, ylab="Freq", xlab = "Consults doctor", main = "Ndoc") plot(table(data$Nndoc), type="h", lwd=5, ylab="Freq", xlab = "Consults non-doctor", main = "Nndoc") plot(table(data$Nmed), type="h", lwd=5, ylab="Freq", xlab = "Prescribed and nonprescribed medications", main = "Nmed") plot(table(data$Nhosp), type="h", lwd=5, ylab="Freq", xlab = "Nights in a hospital", main = "Nhosp") plot(table(data$Nadm), type="h", lwd=5, ylab="Freq", xlab = "Admission to a hospital", main = "Nadm") ## Naive correlation matrix cor(data[,c(12,13,16,14,15)]) ########################################################################################## ## Independent models #################################################################### ########################################################################################## ## Ndoc mod1 <- glm(Ndoc ~ sex + age + income + levyplus + freepoor + freerepa + hscore + chcond1 + chcond2, data = data, family = poisson(link="log")) ## Nndoc mod2 <- glm(Nndoc ~ sex + age + income + levyplus + freepoor + freerepa + hscore + chcond1 + chcond2, data = data, family = poisson(link="log")) ## Nmed mod3 <- glm(Nmed ~ sex + age + income + levyplus + freepoor + freerepa + hscore + chcond1 + chcond2, data = data, family = poisson(link="log")) ## Nhost mod4 <- glm(Nhosp ~ sex + age + income + levyplus + freepoor + freerepa + hscore + chcond1 + chcond2, data = data, family = poisson(link="log")) ## Nadm mod5 <- glm(Nadm ~ sex + age + income + levyplus + freepoor + freerepa + hscore + chcond1 + chcond2, data = data, family = poisson(link="log")) ## Correlation between residuals cor(cbind(residuals(mod1),residuals(mod2),residuals(mod3),residuals(mod4),residuals(mod5))) ########################################################################################## ## Multivariate Poisson-Tweedie models using uncorrected Pearson estimator ############### ########################################################################################## ## List of initial values list.initial <- list() list.initial$Regression <- list("mod1" = coef(mod1), "mod2" = coef(mod2), "mod3" = coef(mod3), "mod4" = coef(mod4), "mod5" = coef(mod5)) list.initial$power.vector <- c(1,1,1,1,1) list.initial$tau.list <- list("mod1" = c(1.9), "mod2" = c(3.6), "mod3" = c(1.36), "mod4" = c(20), "mod5" = c(1.32)) list.initial$rho <- rep(0,10) ## Matrix linear predictor R0 <- Diagonal(n = dim(data)[1]) Omega.args1 <- list(models = c("user"), data = data, R.mat = R0) Omega.args2 <- list(models = c("user"), data = data, R.mat = R0) Omega.args3 <- list(models = c("user"), data = data, R.mat = R0) Omega.args4 <- list(models = c("user"), data = data, R.mat = R0) Omega.args5 <- list(models = c("user"), data = data, R.mat = R0) ## Fitting the model fit1 <- mcglm(list.predictor = list("mod1" = Ndoc ~ sex + age + income + levyplus + freepoor + freerepa + hscore + chcond1 + chcond2, "mod2" = Nndoc ~ sex + age + income + levyplus + freepoor + freerepa + hscore + chcond1 + chcond2, "mod3" = Nmed ~ sex + age + income + levyplus + freepoor + freerepa + hscore + chcond1 + chcond2, "mod4" = Nhosp ~ sex + age + income + levyplus + freepoor + freerepa + hscore + chcond1 + chcond2, "mod5" = Nadm ~ sex + age + income + levyplus + freepoor + freerepa + hscore + chcond1 + chcond2), list.Omega = list(Omega.args1, Omega.args1, Omega.args1, Omega.args1,Omega.args1), link = c("log","log","log","log","log"), variance = c("poisson.tweedie","poisson.tweedie","poisson.tweedie","poisson.tweedie","poisson.tweedie"), matrix.link = "identity", correct = FALSE, data = data, extra.list = list(NULL,NULL,NULL,NULL, NULL), power.fixed = FALSE, list.initial = list.initial) fit1 ## Comparing estimates and standard errors from McGLM and Poisson independent models ##### cbind(fit1$Regression[[1]]$Estimate,coef(mod1),fit1$Regression[[1]]$Estimate/coef(mod1) ) cbind(fit1$Regression[[1]]$Std.Error,sqrt(diag(vcov(mod1))), fit1$Regression[[1]]$Std.Error/sqrt(diag(vcov(mod1)))) cbind(fit1$Regression[[2]]$Estimate,coef(mod2),fit1$Regression[[2]]$Estimate/coef(mod2)) cbind(fit1$Regression[[2]]$Std.Error,sqrt(diag(vcov(mod2))), fit1$Regression[[2]]$Std.Error/sqrt(diag(vcov(mod2)))) cbind(fit1$Regression[[3]]$Estimate,coef(mod3),fit1$Regression[[3]]$Estimate/coef(mod3)) cbind(fit1$Regression[[3]]$Std.Error,sqrt(diag(vcov(mod3))), fit1$Regression[[3]]$Std.Error/sqrt(diag(vcov(mod3)))) cbind(fit1$Regression[[4]]$Estimate,coef(mod4),fit1$Regression[[4]]$Estimate/coef(mod4)) cbind(fit1$Regression[[4]]$Std.Error,sqrt(diag(vcov(mod4))), fit1$Regression[[4]]$Std.Error/sqrt(diag(vcov(mod4)))) cbind(fit1$Regression[[5]]$Estimate,coef(mod5),fit1$Regression[[5]]$Estimate/coef(mod5)) cbind(fit1$Regression[[5]]$Std.Error,sqrt(diag(vcov(mod5))), fit1$Regression[[5]]$Std.Error/sqrt(diag(vcov(mod5)))) ########################################################################################## ## Fitting the model using the Pearson corrected estimator ############################### ########################################################################################## fit1.correct <- mcglm(list.predictor = list("mod1"= Ndoc ~ sex + age + income + levyplus + freepoor + freerepa + hscore + chcond1 + chcond2, "mod2" = Nndoc ~ sex + age + income + levyplus + freepoor + freerepa + hscore + chcond1 + chcond2, "mod3" = Nmed ~ sex + age + income + levyplus + freepoor + freerepa + hscore + chcond1 + chcond2, "mod4" = Nhosp ~ sex + age + income + levyplus + freepoor + freerepa + hscore + chcond1 + chcond2, "mod5" = Nadm ~ sex + age + income + levyplus + freepoor + freerepa + hscore + chcond1 + chcond2), list.Omega = list(Omega.args1, Omega.args1, Omega.args1, Omega.args1,Omega.args1), link = c("log","log","log","log","log"), variance = c("poisson.tweedie","poisson.tweedie","poisson.tweedie","poisson.tweedie","poisson.tweedie"), matrix.link = "identity", correct = TRUE, data = data, extra.list = list(NULL,NULL,NULL,NULL, NULL), power.fixed = FALSE, list.initial = list.initial) fit1.correct ## Comparing estimatimates and standar errors from corrected and uncorrected McGLM cbind(fit1$Regression[[1]]$Estimate, fit1.correct$Regression[[1]]$Estimate,coef(mod1)) cbind(fit1$Regression[[1]]$Std.Error,fit1.correct$Regression[[1]]$Std.Error, sqrt(diag(vcov(mod1)))) cbind(fit1$Regression[[2]]$Estimate, fit1.correct$Regression[[2]]$Estimate,coef(mod2)) cbind(fit1$Regression[[2]]$Std.Error,sqrt(diag(vcov(mod2)))) cbind(fit1$Regression[[3]]$Estimate, fit1.correct$Regression[[3]]$Estimate,coef(mod3)) cbind(fit1$Regression[[3]]$Std.Error,fit1.correct$Regression[[3]]$Std.Error,sqrt(diag(vcov(mod3)))) cbind(fit1$Regression[[4]]$Estimate, fit1.correct$Regression[[4]]$Estimate, coef(mod4)) cbind(fit1$Regression[[4]]$Std.Error,fit1.correct$Regression[[4]]$Std.Error,sqrt(diag(vcov(mod4)))) cbind(fit1$Regression[[5]]$Estimate,fit1.correct$Regression[[5]]$Estimate,coef(mod5)) cbind(fit1$Regression[[5]]$Std.Error, fit1.correct$Regression[[5]]$Std.Error,sqrt(diag(vcov(mod5)))) ## Computing the average rate between standard errors from McGLM and Poisson models. mean(cbind(fit1.correct$Regression[[1]]$Std.Error,sqrt(diag(vcov(mod1))), fit1.correct$Regression[[1]]$Std.Error/sqrt(diag(vcov(mod1))))[,3]) mean(cbind(fit1$Regression[[2]]$Std.Error,sqrt(diag(vcov(mod2))), fit1$Regression[[2]]$Std.Error/sqrt(diag(vcov(mod2))))[,3]) mean(cbind(fit1$Regression[[3]]$Std.Error,sqrt(diag(vcov(mod3))), fit1$Regression[[3]]$Std.Error/sqrt(diag(vcov(mod3))))[,3]) mean(cbind(fit1$Regression[[4]]$Std.Error,sqrt(diag(vcov(mod4))), fit1$Regression[[4]]$Std.Error/sqrt(diag(vcov(mod4))))[,3]) mean(cbind(fit1$Regression[[5]]$Std.Error,sqrt(diag(vcov(mod5))), fit1$Regression[[5]]$Std.Error/sqrt(diag(vcov(mod5))))[,3]) ## Saving the results save.image("DataSet1.RData")