library("rstan") #rstan_options(auto_write = TRUE) #options(mc.cores = parallel::detectCores()) ## Check https://mc-stan.org/docs/2_19/stan-users-guide/ for a lot of ## examples ## Eight Schools Example eightschools <- " data { int J; // number of schools real y[J]; // estimated treatment effects real sigma[J]; // s.e. of effect estimates } parameters { real mu; // mean effect for schools real tau; // variance real eta[J]; // individual school effect } transformed parameters { // theta is a function of our parameters real theta[J]; for (j in 1:J) theta[j] = mu + tau * eta[j]; } model { target += normal_lpdf(eta | 0, 1); // eta ~ N(0,1) target += normal_lpdf(y | theta, sigma); // y ~ N(theta, sigma^2), theta(mu, tau, eta) }" require("rstan") set.seed(0) schools_dat <- list(J = 8, y = c(28, 8, -3, 7, -1, 1, 18, 12), sigma = c(15, 10, 16, 11, 9, 11, 10, 18)) fit <- stan(model_code = eightschools, data = schools_dat, iter = 10000, warmup = 100, chains = 4) ## Extract the posterior samples class(fit) list_of_draws <- extract(fit) print(names(list_of_draws)) head(list_of_draws$mu) head(list_of_draws$tau) head(list_of_draws$theta) matrix_of_draws <- as.matrix(fit) print(colnames(matrix_of_draws)) df_of_draws <- as.data.frame(fit) print(colnames(df_of_draws)) array_of_draws <- as.array(fit) print(dimnames(array_of_draws)) print(dim(matrix_of_draws)) print(dim(df_of_draws)) print(dim(array_of_draws)) mu_and_theta1 <- as.matrix(fit, pars = c("mu", "theta[1]")) head(mu_and_theta1) ## Obtain summary fit_summary <- summary(fit) print(names(fit_summary)) #In fit_summary$summary all chains are merged whereas #fit_summary$c_summary contains summaries for each chain individually. #Typically we want the summary for all chains merged, which is what we’ll #focus on here. The summary is a matrix with rows #corresponding to parameters and columns to the #various summary quantities. #These include the posterior mean, #the posterior standard deviation, #and various quantiles computed from the draws. #The probs argument can be used to specify which #quantiles to compute and pars can be used to specify a #subset of parameters to include in the summary. #For models fit using MCMC, #also included in the summary are the Monte Carlo standard error #(se_mean), the effective sample size (n_eff), and the R-hat statistic (Rhat). #If, for example, #we wanted the only quantiles included to be 10% and 90%, #and for only the parameters included to be mu and tau, #we would specify that like this: mu_tau_summary <- summary(fit, pars = c("mu", "tau"), probs = c(0.1, 0.9))$summary print(mu_tau_summary) ## Show confidence intervals plot(fit, ci_level = 0.95, outer_level = 0.999) ## Sampler diagnostics #For models fit using MCMC the stanfit object #will also contain the values of parameters used for the sampler. #The get_sampler_params function can be used to access this information. #The object returned by get_sampler_params is a list with one component #(a matrix) per chain. #Each of the matrices has number of columns #corresponding to the number of sampler parameters #and the column names provide the parameter names. #The optional argument inc_warmup (defaulting to TRUE) #indicates whether to include the warmup period. traceplot(fit, pars = c("mu", "tau")) sampler_params <- get_sampler_params(fit, inc_warmup = FALSE) sampler_params_chain1 <- sampler_params[[1]] colnames(sampler_params_chain1) #To do things like calculate #the average value of accept_stat__ for each chain #(or the maximum value of treedepth__ for each chain # if using the NUTS algorithm, etc.) #the sapply function is useful as it will apply the #same function to each component of sampler_params: mean_accept_stat_by_chain <- sapply(sampler_params, function(x) mean(x[, "accept_stat__"])) print(mean_accept_stat_by_chain) ##The Stan program itself is also stored ##in the stanfit object and can be accessed using get_stancode: code <- get_stancode(fit) #The object code is a single string and is not very #intelligible when printed: print(code) #A readable version can be printed using cat: cat(code) ### The get_inits function returns #initial values as a list with one component per chain. #Each component is itself a (named) list containing #the initial values for each parameter for the corresponding chain: inits <- get_inits(fit) inits_chain1 <- inits[[1]] print(inits_chain1) #The get_seed function returns the (P)RNG seed as an integer: print(get_seed(fit)) # The get_elapsed_time function returns a matrix with the # warmup and sampling times for each chain: print(get_elapsed_time(fit))