第 9 章 贝叶斯回归
9.2 贝叶斯线性回归
9.2.1 模型形式
我们首先回忆一下线性回归模型:
\[y_i = \beta_0 + \beta_1 x_1 + \cdots + \beta_p x_p + \epsilon_i,\]其中 \(\epsilon_i \sim N(0, \sigma^2)\)。为了估计这个回归模型,我们希望得到 \(\beta_0,\beta_1, \cdots, \beta_p\) 以及 \(\sigma^2\) 的联合概率分布。根据高斯假设下的最小二乘估计,我们有\(\beta|\sigma^2 \sim N\left[(x'x)^{-1}x'y,(x'x)^{-1}\sigma^2\right]\),\(\sigma^2 \sim \chi^2(n-p)\)。
根据贝叶斯法则,我们知道
\[\begin{align*} p\left(\beta, \sigma^{2} | y, x\right) & \propto p\left(y | \beta, \sigma^{2}, x\right) p\left(\beta, \sigma^{2}\right) \\ &=p\left(y | \beta, \sigma^{2}, x\right) p\left(\beta | \sigma^{2}\right) p\left(\sigma^{2}\right) \end{align*}\]其中 \(p(y|\beta,\sigma^2,x)\) 是模型的似然函数, \(p(\beta,\sigma^2) = p(\beta|\sigma^2)p(\sigma^2)\) 为先验信息,\(p(\beta,\sigma^2|y,x)\) 是综合数据和先验之后得到的后验分布。
9.2.4 后验分布
###----------------------------------------------
### DGP: Data generating process
###----------------------------------------------
n <- 1000
p <- 3
epsilon <- rnorm(n, 0, 0.1)
beta <- matrix(c(-3, 1, 3, 5))
X <- cbind(1, matrix(runif(n*p), n))
y <- X%*%beta + epsilon
###----------------------------------------------
### the log posterior function
###----------------------------------------------
library(mvtnorm)
LogPosterior <- function(beta, sigma2, y, X)
{
p <- length(beta)
## The log likelihood function
LogLikelihood <- sum(dnorm(x = y,
mean = X%*%beta,
sd = sqrt(sigma2),
log = TRUE))
## The priors
LogPrior4beta <- dmvnorm(
x = t(beta),
mean = matrix(0, 1, p),
sigma = diag(p), log = TRUE)
LogPrior4sigma2 <- dchisq(x = sigma2, df = 10,
log = TRUE)
LogPrior <- LogPrior4beta + LogPrior4sigma2
## The log posterior
LogPosterior <- LogLikelihood + LogPrior
## Return
return(LogPosterior)
}9.2.5 后验推断
###----------------------------------------------
### The Metropolis algorithm with Gibbs
###----------------------------------------------
nIter <- 1000
p <- ncol(X)
## Reserve space
beta <- matrix(NA, nIter, p)
sigma2 <- matrix(NA, nIter, 1)
acc <- matrix(NA, nIter, 2)
## Initial values
beta[1, ] <- c(-2, 3, 4, 1)
sigma2[1, ] <- 0.5
for(i in 2:nIter)
{
beta.current <- beta[i-1, ]
sigma2.current <- sigma2[i-1,]
## The Metropolis Algorithm For beta
beta.prop <- rmvnorm(
n = 1,
mean = matrix(beta.current, 1),
sigma = diag(p)) # FV
logPosterior.beta.prop <- LogPosterior(
beta = t(beta.prop),
sigma2 = sigma2.current,
y = y, X = X)
logPosterior.beta.current <- LogPosterior(
beta = beta.current,
sigma2 = sigma2.current,
y = y, X = X)
logratio <- (logPosterior.beta.prop -
logPosterior.beta.current)
acc.prob.beta <- min(exp(logratio), 1)
if(!is.na(acc.prob.beta) &
runif(1) < acc.prob.beta) # accept the proposal
{
beta.current <- beta.prop
}
acc[i, 1] <- acc.prob.beta
beta[i, ] <- beta.current
## The Metropolis Algorithm For sigma2
sigma2.prop <- rnorm(1, sigma2.current, 1) # FV
logPosterior.sigma2.prop <- LogPosterior(
beta = matrix(beta.current),
sigma2 = sigma2.prop,
y = y, X = X)
logPosterior.sigma2.current <- LogPosterior(
beta = matrix(beta.current),
sigma2 = sigma2.current,
y = y, X = X)
logratio <- logPosterior.sigma2.prop -
logPosterior.sigma2.current
acc.prob.sigma2 <- min(exp(logratio), 1)
if(!is.na(acc.prob.sigma2)&
runif(1, 0, 1) < acc.prob.sigma2) # accept the proposal
{
sigma2.current <- sigma2.prop
}
## Update the matrix
acc[i, 2] <- acc.prob.sigma2
sigma2[i, ] <- sigma2.current
}
## Summarize beta and sigma2
apply(beta, 2, mean)
#> [1] -3.1237 0.9976 3.1001 5.1548
apply(beta, 2, sd)
#> [1] 0.08883 0.19232 0.21274 0.40125