# Fit logistic regression model using VB approximation # Bayesian fit of logistic regression model. p coefficients, n observations. # y binary vector of responses, length n # X n x p matrix of covariates, including 1-column for intercept # eps convergence criterion, increase in log-likelihood is no more than this # m0 p-vector of prior means # S0 p x p prior covariance matrix # xi0 p-vector of initial augmented variables # verb verbose output, logical # maxiter upper limit for iterations # # Computes the posterior distribution of regression coefficients in logistic regression # using the method of Jaakkola&Jordan 1996. # library(Matrix) set.seed(123) ## generate data n <- 500 p <- 50 X <- matrix( rnorm(n*p), ncol=p) theta <- rnorm(p) prob <- 1/(1+exp(-X%*%theta)) y <- rbinom(n, 1, prob) ## Use VB & GLM ## vb: fit_vb <- vblogit(y, X) ## glm: fit_glm <- glm(y ~ -1+X, family=binomial,offset=NULL) coefs <- cbind(vb=fit_vb$coef, glm=fit_glm$coef) summary(fit_vb) ## compare vb and glm plot(coefs, main="Estimates") abline(0,1) ## Compare to true coefficients plot(coefs[,1]-theta) points(coefs[,2]-theta, col=3, pch=4) abline(h=0) legend("topright", c("glm","vblogit"), col=c(1,3), pch=c(1,4)) vblogit <- function(y, X, eps=1e-2, m0, S0, S0i, xi0, maxiter=1000) { ### Logistic regression using JJ idea. ## p(y, beta | xi) ## ## Y ~ Bern(logit(X*beta)) ## beta ~ N(m0, S0) iid ## ## varnames <- colnames(data.frame(as.matrix(X[1:2,]))) ## Write X <- Matrix(X) X <- drop0(X) N <- length(y) K <- ncol(X) # # # # Priors and initial estimates. if(missing(S0))S0 <- Diagonal(1e5, n=K) if(missing(S0i))S0i <- solve(S0) if(missing(m0))m0 <- rep(0, K) # Constants: LE_CONST <- as.numeric( -0.5*t(m0)%*%S0i%*%m0 - 0.5*determinant(S0)$mod ) Sm0 <- S0i%*%m0 # start values for xi: if(missing(xi0))xi0 <- rep(4, N) if(length(xi0)!=N) xi0 <- rep(xi0, N)[1:N] est <- list(m=m0, S=S0, Si=S0i, xi=xi0) # # ## helper functions needed: Afunc <- function(x) -tanh(x/2)/(4*x) Cfunc <- function(x) x/2 - log(1+exp(x)) + x*tanh(x/2)/4 ### ## loop le <- -Inf le_hist <- le loop <- TRUE iter <- 0 # initials: la <- Afunc(xi0) Si <- S0i - 2 * t(X*la)%*%X S <- solve(Si) m <- S%*%( t(X)%*%(y-0.5) + Sm0 ) # elboplot <-numeric(maxiter) ctr=1 # Main loop: while(loop){ old <- le # update variational parameters M <- S+m%*%t(m) # force symmetric in case of tiny numerical errors M <- (M+t(M))/2 L <- t(chol(M)) V <- X%*%L dR <- rowSums(V^2) xi2 <- dR xi <- sqrt(xi2) la <- Afunc(xi) # update post covariance Si <- S0i - 2 * t(X*la)%*%X S <- solve(Si) # update post mean m <- S%*%( t(X)%*%(y-0.5) + Sm0 ) ## compute the log evidence le <- as.numeric( 0.5*determinant(S)$mod + sum( Cfunc(xi) ) + 0.5*t(m)%*%Si%*%m + LE_CONST ) elboplot[ctr] <-le ctr=ctr+1 # check convergence d <- le - old if(d < 0) warning("Log-evidence decreasing, try different starting values for xi.") loop <- abs(d) > eps & (iter<-iter+1) <= maxiter le_hist <- c(le_hist, le) cat("diff:", d, " \r") } elboplot=elboplot[1:iter] if(iter == maxiter) warning("Maximum iteration limit reached.") cat("\n") ## done. Compile: est <- list(m=m, S=S, Si=Si, xi=xi, lambda_xi=la) # # Marginal evidence est$logLik <- le est$elboplot<- elboplot # # # some additional parts, like in glm output est$coefficients <- est$m[,1] names(est$coefficients) <- varnames est$converged <- !(maxiter==iter) # additional stuff est$logp_hist <- le_hist est$parameters <- list(eps=eps, maxiter=maxiter) est$priors <- list(m=m0, S=S0) est$iterations <- iter ## return est }