# Fit logistic regression model using VB approximation
# Bayesian fit of logistic regression model. p coefficients, n observations.

#  y binary vector of responses, length n
#  X n x p matrix of covariates, including 1-column for intercept
#  eps convergence criterion, increase in log-likelihood is no more than this
#  m0 p-vector of prior means
#  S0 p x p prior covariance matrix
#  xi0 p-vector of initial augmented variables
#  verb verbose output, logical
#  maxiter upper limit for iterations

#
#  Computes the posterior distribution of regression coefficients in logistic regression
#  using the method of Jaakkola&Jordan 1996.
#  
library(Matrix)

set.seed(123)

 ## generate data
  n <- 500
  p <- 50
  X <- matrix( rnorm(n*p), ncol=p)
  theta <- rnorm(p)
  prob <- 1/(1+exp(-X%*%theta))
   y <- rbinom(n, 1, prob)
 
 ## Use VB & GLM
 ## vb:
   fit_vb <- vblogit(y, X) 
 ## glm:
   fit_glm <- glm(y ~ -1+X, family=binomial,offset=NULL)
 
   coefs <- cbind(vb=fit_vb$coef, glm=fit_glm$coef)

   summary(fit_vb)
  
 ## compare vb and glm
   plot(coefs, main="Estimates")
   abline(0,1)

  ## Compare to true coefficients
   plot(coefs[,1]-theta)
   points(coefs[,2]-theta, col=3, pch=4)
   abline(h=0)
   legend("topright", c("glm","vblogit"), col=c(1,3), pch=c(1,4))
   
   

vblogit <- function(y, X,  eps=1e-2, m0, S0, S0i, xi0, maxiter=1000) {
  ### Logistic regression using JJ idea.
  ## p(y, beta | xi)
  ##
  ## Y ~ Bern(logit(X*beta))
  ## beta  ~ N(m0, S0) iid
  ##
  ##
  
  varnames <- colnames(data.frame(as.matrix(X[1:2,])))
  
  ## Write 
  X <- Matrix(X)
  X <- drop0(X)
  N <- length(y)
  K <- ncol(X)
  #
  #

  #
  # Priors and initial estimates.
  if(missing(S0))S0   <- Diagonal(1e5, n=K)
  if(missing(S0i))S0i <- solve(S0)
  if(missing(m0))m0   <- rep(0, K)
  # Constants:
  
  LE_CONST <- as.numeric( -0.5*t(m0)%*%S0i%*%m0 - 0.5*determinant(S0)$mod ) 
  Sm0 <- S0i%*%m0
  # start values for xi:
  if(missing(xi0))xi0   <- rep(4, N)
  if(length(xi0)!=N) xi0 <- rep(xi0, N)[1:N]
  
  est <- list(m=m0, S=S0, Si=S0i, xi=xi0)
  #
  #
  ## helper functions needed:
  Afunc <- function(x)  -tanh(x/2)/(4*x)
  Cfunc <- function(x)  x/2 - log(1+exp(x)) + x*tanh(x/2)/4
  ###
  ## loop
  le <- -Inf
  le_hist <- le
  loop <- TRUE
  iter <- 0
  # initials:
  la <- Afunc(xi0)
  Si <- S0i - 2 * t(X*la)%*%X
  S <- solve(Si)
  m <- S%*%( t(X)%*%(y-0.5) + Sm0  )
  #
  elboplot <-numeric(maxiter)
  ctr=1
  # Main loop:
  while(loop){
    old <- le
    # update variational parameters
    M <- S+m%*%t(m)
    # force symmetric in case of tiny numerical errors
    M <- (M+t(M))/2
    L <- t(chol(M))
    V <- X%*%L
    dR <- rowSums(V^2)
    xi2 <- dR
    
    xi <- sqrt(xi2)
    la <- Afunc(xi)
    # update post covariance
    Si <- S0i - 2 * t(X*la)%*%X
    S <- solve(Si)
    # update post mean
    m <- S%*%( t(X)%*%(y-0.5) + Sm0  )
    ## compute the log evidence
    le <-  as.numeric( 0.5*determinant(S)$mod + sum( Cfunc(xi) ) +  0.5*t(m)%*%Si%*%m + LE_CONST    )
    elboplot[ctr] <-le
    ctr=ctr+1
    # check convergence
    d <- le - old
    if(d < 0) warning("Log-evidence decreasing, try different starting values for xi.")
    loop <- abs(d) > eps & (iter<-iter+1) <= maxiter
    le_hist <- c(le_hist, le)
    cat("diff:", d, "             \r")
  }
  elboplot=elboplot[1:iter]
  if(iter == maxiter) warning("Maximum iteration limit reached.")
  cat("\n")
  ## done. Compile:
  est <- list(m=m, S=S, Si=Si, xi=xi, lambda_xi=la)
  # 
  # Marginal evidence
  est$logLik <- le
  est$elboplot<- elboplot
  #
  
  # 
  # some additional parts, like in glm output
  est$coefficients <- est$m[,1]
  names(est$coefficients) <- varnames
  est$converged <- !(maxiter==iter)
  # additional stuff
  est$logp_hist <- le_hist
  est$parameters <- list(eps=eps, maxiter=maxiter)
  est$priors <- list(m=m0, S=S0)
  est$iterations <- iter
  
  ## return
  
  est
}