Title: | Bayesian Structural Equation Modeling in Multiple Omics Data Integration |
---|---|
Description: | Provides Markov Chain Monte Carlo (MCMC) routine for the structural equation modelling described in Maity et. al. (2020) <doi:10.1093/bioinformatics/btaa286>. This MCMC sampler is useful when one attempts to perform an integrative survival analysis for multiple platforms of the Omics data where the response is time to event and the predictors are different omics expressions for different platforms. |
Authors: | Arnab Maity [aut, cre], Sang Chan Lee [aut], Bani K. Mallick [aut], Samsiddhi Bhattacharjee [aut], Nidhan K. Biswas [aut] |
Maintainer: | Arnab Maity <[email protected]> |
License: | GPL-3 |
Version: | 0.0.6 |
Built: | 2024-11-05 02:40:21 UTC |
Source: | https://github.com/maitya02/semmcmc |
MCMC routine for the strucatural equation model
mcmc(ct, u1, u2, X, nburnin = 1000, nmc = 2000, nthin = 1)
mcmc(ct, u1, u2, X, nburnin = 1000, nmc = 2000, nthin = 1)
ct |
survival response, a |
u1 |
Matix of predictors from the first platform, dimension |
u2 |
Matix of predictors from the first platform, dimension |
X |
Matrix of covariates, dimension |
nburnin |
number of burnin samples |
nmc |
number of markov chain samples |
nthin |
thinning parameter. Default is 1 (no thinning) |
pMean.beta.t |
|
pMean.beta.t |
|
pMean.alpha.t |
|
pMean.alpha.t |
|
pMean.phi.t |
|
pMean.phi.t |
|
pMean.alpha.u1 |
|
pMean.alpha.u2 |
|
pMean.alpha.u2 |
|
pMean.phi.u1 |
|
pMean.eta1 |
|
pMean.eta2 |
|
pMean.sigma.t.square |
|
pMean.sigma.u1.square |
|
pMean.sigma.u2.square |
|
alpha.t.samples |
|
phi.t.samples |
|
beta1.t.samples |
|
beta2.t.samples |
|
beta.t.samples |
|
alpha.u1.samples |
|
alpha.u2.samples |
|
phi.u1.samples |
|
phi.u2.samples |
|
eta1.samples |
|
eta2.samples |
|
sigma.t.square.samples |
|
sigma.u1.square.samples |
|
sigma.u2.square.samples |
|
pMean.logt.hat |
|
DIC |
|
WAIC |
Maity, A. K., Lee, S. C., Mallick, B. K., & Sarkar, T. R. (2020). Bayesian structural equation modeling in multiple omics data integration with application to circadian genes. Bioinformatics, 36(13), 3951-3958.
require(MASS) # for random number from multivariate normal distribution n <- 100 # number of individuals p <- 5 # number of variables q1 <- 20 # dimension of the response q2 <- 20 # dimension of the response ngrid <- 1000 nburnin <- 100 nmc <- 200 nthin <- 5 niter <- nburnin + nmc effsamp <- (niter - nburnin)/nthin alpha.tt <- runif(n = 1, min = -1, max = 1) # intercept term alpha.u1t <- runif(n = 1, min = -1, max = 1) # intercept term alpha.u2t <- runif(n = 1, min = -1, max = 1) # intercept term beta.tt <- runif(n = p, min = -1, max = 1) # regression parameter gamma1.t <- runif(n = q1, min = -1, max = 1) gamma2.t <- runif(n = q2, min = -1, max = 1) phi.tt <- 1 phi.u1t <- 1 phi.u2t <- 1 sigma.tt <- 1 sigma.u1t <- 1 sigma.u2t <- 1 sigma.etat1 <- 1 sigma.etat2 <- 1 x <- mvrnorm(n = n, mu = rep(0, p), Sigma = diag(p)) eta2 <- rnorm(n = 1, mean = 0, sd = sigma.etat2) eta1 <- rnorm(n = 1, mean = eta2, sd = sigma.etat1) logt <- rnorm(n = n, mean = alpha.tt + x %*% beta.tt + eta1 * phi.tt, sd = sigma.tt) u1 <- matrix(rnorm(n = n * q1, mean = alpha.u1t + eta1 * phi.u1t, sd = sigma.u1t), nrow = n, ncol = q1) u2 <- matrix(rnorm(n = n * q2, mean = alpha.u2t + eta2 * phi.u2t, sd = sigma.u2t), nrow = n, ncol = q2) logt <- rnorm(n = n, mean = alpha.tt + x %*% beta.tt + u1 %*% gamma1.t + u2 %*% gamma2.t, sd = sigma.tt) # Survival time generation T <- exp(logt) # AFT model C <- rgamma(n, shape = 1, rate = 1) # 50% censor time <- pmin(T, C) # observed time is min of censored and true status = time == T # set to 1 if event is observed 1 - sum(status)/length(T) # censoring rate censor.rate <- 1 - sum(status)/length(T) # censoring rate censor.rate summary(C) summary(T) ct <- as.matrix(cbind(time = time, status = status)) # censored time logt.grid <- seq(from = min(logt) - 1, to = max(logt) + 1, length.out = ngrid) index1 <- which(ct[, 2] == 1) # which are NOT censored ct1 <- ct[index1, ] posterior.fit.sem <- mcmc(ct, u1, u2, x, nburnin = nburnin, nmc = nmc, nthin = nthin) pMean.beta.t <- posterior.fit.sem$pMean.beta.t pMean.alpha.t <- posterior.fit.sem$pMean.alpha.t pMean.phi.t <- posterior.fit.sem$pMean.phi.t pMean.alpha.u1 <- posterior.fit.sem$pMean.alpha.u1 pMean.alpha.u2 <- posterior.fit.sem$pMean.alpha.u2 pMean.phi.u1 <- posterior.fit.sem$pMean.phi.u1 pMean.phi.u2 <- posterior.fit.sem$pMean.phi.u2 pMean.eta1 <- posterior.fit.sem$pMean.eta1 pMean.eta2 <- posterior.fit.sem$pMean.eta2 pMean.logt.hat <- posterior.fit.sem$posterior.fit.sem pMean.sigma.t.square <- posterior.fit.sem$pMean.sigma.t.square pMean.sigma.u1.square <- posterior.fit.sem$pMean.sigma.u1.square pMean.sigma.u2.square <- posterior.fit.sem$pMean.sigma.u2.square pMean.logt.hat <- posterior.fit.sem$pMean.logt.hat DIC.sem <- posterior.fit.sem$DIC WAIC.sem <- posterior.fit.sem$WAIC mse.sem <- mean(pMean.logt.hat[index1] - log(ct1[, 1]))^2 alpha.t.samples <- posterior.fit.sem$alpha.t.samples beta1.t.samples <- posterior.fit.sem$beta1.t.samples beta2.t.samples <- posterior.fit.sem$beta2.t.samples beta.t.samples <- posterior.fit.sem$beta.t.samples phi.t.samples <- posterior.fit.sem$phi.t.samples alpha.u1.samples <- posterior.fit.sem$alpha.u1.samples alpha.u2.samples <- posterior.fit.sem$alpha.u2.samples phi.u1.samples <- posterior.fit.sem$phi.u1.samples phi.u2.samples <- posterior.fit.sem$phi.u2.samples sigma.t.square.samples <- posterior.fit.sem$sigma.t.square.samples sigma.u1.square.samples <- posterior.fit.sem$sigma.u1.square.samples sigma.u2.square.samples <- posterior.fit.sem$sigma.u2.square.samples eta1.samples <- posterior.fit.sem$eta1.samples eta2.samples <- posterior.fit.sem$eta2.samples inv.cpo <- matrix(0, nrow = effsamp, ncol = n) # this will store inverse cpo values log.cpo <- rep(0, n) # this will store log cpo for(iter in 1:effsamp) # Post burn in { inv.cpo[iter, ] <- 1/(dnorm(ct[, 1], mean = alpha.t.samples[iter] + x %*% beta.t.samples[, iter] + + eta1.samples[iter] * phi.t.samples[iter], sd = sqrt(sigma.t.square.samples[iter]))^ct[, 2] * pnorm(ct[, 1], mean = alpha.t.samples[iter] + x %*% beta.t.samples[, iter] + + eta1.samples[iter] * phi.t.samples[iter], sd = sqrt(sigma.t.square.samples[iter]), lower.tail = FALSE)^(1 - ct[, 2])) } # End of iter loop for (i in 1:n){ log.cpo[i] <- -log(mean(inv.cpo[, i])) # You average invcpo[i] over the iterations, # then take 1/average and then take log. # Hence the negative sign in the log } lpml.sem <- sum(log.cpo) DIC.sem WAIC.sem mse.sem
require(MASS) # for random number from multivariate normal distribution n <- 100 # number of individuals p <- 5 # number of variables q1 <- 20 # dimension of the response q2 <- 20 # dimension of the response ngrid <- 1000 nburnin <- 100 nmc <- 200 nthin <- 5 niter <- nburnin + nmc effsamp <- (niter - nburnin)/nthin alpha.tt <- runif(n = 1, min = -1, max = 1) # intercept term alpha.u1t <- runif(n = 1, min = -1, max = 1) # intercept term alpha.u2t <- runif(n = 1, min = -1, max = 1) # intercept term beta.tt <- runif(n = p, min = -1, max = 1) # regression parameter gamma1.t <- runif(n = q1, min = -1, max = 1) gamma2.t <- runif(n = q2, min = -1, max = 1) phi.tt <- 1 phi.u1t <- 1 phi.u2t <- 1 sigma.tt <- 1 sigma.u1t <- 1 sigma.u2t <- 1 sigma.etat1 <- 1 sigma.etat2 <- 1 x <- mvrnorm(n = n, mu = rep(0, p), Sigma = diag(p)) eta2 <- rnorm(n = 1, mean = 0, sd = sigma.etat2) eta1 <- rnorm(n = 1, mean = eta2, sd = sigma.etat1) logt <- rnorm(n = n, mean = alpha.tt + x %*% beta.tt + eta1 * phi.tt, sd = sigma.tt) u1 <- matrix(rnorm(n = n * q1, mean = alpha.u1t + eta1 * phi.u1t, sd = sigma.u1t), nrow = n, ncol = q1) u2 <- matrix(rnorm(n = n * q2, mean = alpha.u2t + eta2 * phi.u2t, sd = sigma.u2t), nrow = n, ncol = q2) logt <- rnorm(n = n, mean = alpha.tt + x %*% beta.tt + u1 %*% gamma1.t + u2 %*% gamma2.t, sd = sigma.tt) # Survival time generation T <- exp(logt) # AFT model C <- rgamma(n, shape = 1, rate = 1) # 50% censor time <- pmin(T, C) # observed time is min of censored and true status = time == T # set to 1 if event is observed 1 - sum(status)/length(T) # censoring rate censor.rate <- 1 - sum(status)/length(T) # censoring rate censor.rate summary(C) summary(T) ct <- as.matrix(cbind(time = time, status = status)) # censored time logt.grid <- seq(from = min(logt) - 1, to = max(logt) + 1, length.out = ngrid) index1 <- which(ct[, 2] == 1) # which are NOT censored ct1 <- ct[index1, ] posterior.fit.sem <- mcmc(ct, u1, u2, x, nburnin = nburnin, nmc = nmc, nthin = nthin) pMean.beta.t <- posterior.fit.sem$pMean.beta.t pMean.alpha.t <- posterior.fit.sem$pMean.alpha.t pMean.phi.t <- posterior.fit.sem$pMean.phi.t pMean.alpha.u1 <- posterior.fit.sem$pMean.alpha.u1 pMean.alpha.u2 <- posterior.fit.sem$pMean.alpha.u2 pMean.phi.u1 <- posterior.fit.sem$pMean.phi.u1 pMean.phi.u2 <- posterior.fit.sem$pMean.phi.u2 pMean.eta1 <- posterior.fit.sem$pMean.eta1 pMean.eta2 <- posterior.fit.sem$pMean.eta2 pMean.logt.hat <- posterior.fit.sem$posterior.fit.sem pMean.sigma.t.square <- posterior.fit.sem$pMean.sigma.t.square pMean.sigma.u1.square <- posterior.fit.sem$pMean.sigma.u1.square pMean.sigma.u2.square <- posterior.fit.sem$pMean.sigma.u2.square pMean.logt.hat <- posterior.fit.sem$pMean.logt.hat DIC.sem <- posterior.fit.sem$DIC WAIC.sem <- posterior.fit.sem$WAIC mse.sem <- mean(pMean.logt.hat[index1] - log(ct1[, 1]))^2 alpha.t.samples <- posterior.fit.sem$alpha.t.samples beta1.t.samples <- posterior.fit.sem$beta1.t.samples beta2.t.samples <- posterior.fit.sem$beta2.t.samples beta.t.samples <- posterior.fit.sem$beta.t.samples phi.t.samples <- posterior.fit.sem$phi.t.samples alpha.u1.samples <- posterior.fit.sem$alpha.u1.samples alpha.u2.samples <- posterior.fit.sem$alpha.u2.samples phi.u1.samples <- posterior.fit.sem$phi.u1.samples phi.u2.samples <- posterior.fit.sem$phi.u2.samples sigma.t.square.samples <- posterior.fit.sem$sigma.t.square.samples sigma.u1.square.samples <- posterior.fit.sem$sigma.u1.square.samples sigma.u2.square.samples <- posterior.fit.sem$sigma.u2.square.samples eta1.samples <- posterior.fit.sem$eta1.samples eta2.samples <- posterior.fit.sem$eta2.samples inv.cpo <- matrix(0, nrow = effsamp, ncol = n) # this will store inverse cpo values log.cpo <- rep(0, n) # this will store log cpo for(iter in 1:effsamp) # Post burn in { inv.cpo[iter, ] <- 1/(dnorm(ct[, 1], mean = alpha.t.samples[iter] + x %*% beta.t.samples[, iter] + + eta1.samples[iter] * phi.t.samples[iter], sd = sqrt(sigma.t.square.samples[iter]))^ct[, 2] * pnorm(ct[, 1], mean = alpha.t.samples[iter] + x %*% beta.t.samples[, iter] + + eta1.samples[iter] * phi.t.samples[iter], sd = sqrt(sigma.t.square.samples[iter]), lower.tail = FALSE)^(1 - ct[, 2])) } # End of iter loop for (i in 1:n){ log.cpo[i] <- -log(mean(inv.cpo[, i])) # You average invcpo[i] over the iterations, # then take 1/average and then take log. # Hence the negative sign in the log } lpml.sem <- sum(log.cpo) DIC.sem WAIC.sem mse.sem