# Geometric and Tweedie regression models ---------------------------- # Date: 14/10/2019 --------------------------------------------------- rm(list=ls()) # Loading extra packages require(mcglm) require(tweedie) require(numDeriv) require(Matrix) require(splines) require(bbmle) require(statmod) ## Loadind data set data3 <- read.csv("wire_dataset3.csv", header = TRUE) head(data3) str(data3) data <- data3[,c("Receipt.Quarter", "Calendar.Year", "Amount.in.USD", "Sector.Theme")] names(data) <- c("Quarter", "Year", "Y", "Sector") data$Quarter <- relevel(data$Quarter, ref = "JAN-MAR") boxplot(log(data$Y) ~ data$Year) boxplot(log(data$Y) ~ data$Quarter) boxplot(log(data$Y) ~ data$Sector) data$y <- data$Y/1000000 # Linear predictor form <- y ~ Quarter + Sector ## Tweedie regression model source("tweedieregfunctions.r") fit1 <- glm.tw_mle(form, initial = c(2, log(5.25)), data = data, method = "profile", method.optim = "Nelder-Mead") ## Geometric Tweedie regression model source("gtw.r") pts <- gauss.quad(50, kind = "laguerre") ini <- as.numeric(fit1$Estimates[1:9]) fit2 <- glm_geom_tweedie(form, points = pts, integration = "GL", initial = c(ini, 2, 1.80), fit = "dispersion", data = data) fit2$Summary fit2.1 <- glm_geom_tweedie(form, points = pts, integration = "GL", initial = c(ini, 2.3941463, -0.4674562), fit = "full", data = data) fit2.1 fit2.1$Summary$Estimates fit2.2 <- glm_geom_tweedie(form, points = pts, integration = "GL", initial = fit2.1$Summary$Estimates, fit = "dispersion", data = data) fit2.2$logLik fit2.1$logLik ## QMLE - Tweedie fit3 <- mcglm(c(form), list(mc_id(data)), link = "log", variance = "tweedie", power_fixed = FALSE, control_algorithm = list(tuning = 0.25, correct = F, max_iter = 100), data = data) summary(fit3) ## QMLE - Tweedie fit4 <- mcglm(c(form), list(mc_id(data)), link = "log", variance = "geom_tweedie", power_fixed = FALSE, control_algorithm = list(tuning = 0.25, correct = F, max_iter = 100), data = data) cbind(round(fit1$Estimates, 2), round(sqrt(diag(fit1$VCOV)), 2)) round(exp(fit1$Estimates[10]), 2) round(sqrt(exp(fit1$Estimates[10])*diag(fit1$VCOV)[10]), 2) cbind(round(fit2.1$Summary$Estimates, 2), round(fit2.1$Summary$Std_error, 2)) round(exp(fit2.1$Summary$Estimates[11]), 2) round(sqrt(exp(fit2.1$Summary$Estimates[11])*diag(fit2$VCOV)[2]), 2) round(coef(fit3, std.error = TRUE)[,1:2], 2) round(coef(fit4, std.error = TRUE)[,1:2], 2) fit1$logLik fit2.2$logLik gof(fit3) gof(fit4)