第 9 章 贝叶斯回归
9.2 贝叶斯线性回归
9.2.1 模型形式
我们首先回忆一下线性回归模型:
\[y_i = \beta_0 + \beta_1 x_1 + \cdots + \beta_p x_p + \epsilon_i,\]其中 \(\epsilon_i \sim N(0, \sigma^2)\)。为了估计这个回归模型,我们希望得到 \(\beta_0,\beta_1, \cdots, \beta_p\) 以及 \(\sigma^2\) 的联合概率分布。根据高斯假设下的最小二乘估计,我们有\(\beta|\sigma^2 \sim N\left[(x'x)^{-1}x'y,(x'x)^{-1}\sigma^2\right]\),\(\sigma^2 \sim \chi^2(n-p)\)。
根据贝叶斯法则,我们知道
\[\begin{align*} p\left(\beta, \sigma^{2} | y, x\right) & \propto p\left(y | \beta, \sigma^{2}, x\right) p\left(\beta, \sigma^{2}\right) \\ &=p\left(y | \beta, \sigma^{2}, x\right) p\left(\beta | \sigma^{2}\right) p\left(\sigma^{2}\right) \end{align*}\]其中 \(p(y|\beta,\sigma^2,x)\) 是模型的似然函数, \(p(\beta,\sigma^2) = p(\beta|\sigma^2)p(\sigma^2)\) 为先验信息,\(p(\beta,\sigma^2|y,x)\) 是综合数据和先验之后得到的后验分布。
9.2.4 后验分布
###----------------------------------------------
### DGP: Data generating process
###----------------------------------------------
1000
n <- 3
p <-
rnorm(n, 0, 0.1)
epsilon <- matrix(c(-3, 1, 3, 5))
beta <- cbind(1, matrix(runif(n*p), n))
X <- X%*%beta + epsilon
y <-
###----------------------------------------------
### the log posterior function
###----------------------------------------------
library(mvtnorm)
function(beta, sigma2, y, X)
LogPosterior <-
{ length(beta)
p <-
## The log likelihood function
sum(dnorm(x = y,
LogLikelihood <-mean = X%*%beta,
sd = sqrt(sigma2),
log = TRUE))
## The priors
dmvnorm(
LogPrior4beta <-x = t(beta),
mean = matrix(0, 1, p),
sigma = diag(p), log = TRUE)
dchisq(x = sigma2, df = 10,
LogPrior4sigma2 <-log = TRUE)
LogPrior4beta + LogPrior4sigma2
LogPrior <-
## The log posterior
LogLikelihood + LogPrior
LogPosterior <-
## Return
return(LogPosterior)
}
9.2.5 后验推断
###----------------------------------------------
### The Metropolis algorithm with Gibbs
###----------------------------------------------
1000
nIter <- ncol(X)
p <-
## Reserve space
matrix(NA, nIter, p)
beta <- matrix(NA, nIter, 1)
sigma2 <- matrix(NA, nIter, 2)
acc <-
## Initial values
1, ] <- c(-2, 3, 4, 1)
beta[1, ] <- 0.5
sigma2[
for(i in 2:nIter)
{ beta[i-1, ]
beta.current <- sigma2[i-1,]
sigma2.current <-
## The Metropolis Algorithm For beta
rmvnorm(
beta.prop <-n = 1,
mean = matrix(beta.current, 1),
sigma = diag(p)) # FV
LogPosterior(
logPosterior.beta.prop <-beta = t(beta.prop),
sigma2 = sigma2.current,
y = y, X = X)
LogPosterior(
logPosterior.beta.current <-beta = beta.current,
sigma2 = sigma2.current,
y = y, X = X)
(logPosterior.beta.prop -
logratio <- logPosterior.beta.current)
min(exp(logratio), 1)
acc.prob.beta <-
if(!is.na(acc.prob.beta) &
runif(1) < acc.prob.beta) # accept the proposal
{ beta.prop
beta.current <-
}1] <- acc.prob.beta
acc[i, beta.current
beta[i, ] <-
## The Metropolis Algorithm For sigma2
rnorm(1, sigma2.current, 1) # FV
sigma2.prop <- LogPosterior(
logPosterior.sigma2.prop <-beta = matrix(beta.current),
sigma2 = sigma2.prop,
y = y, X = X)
LogPosterior(
logPosterior.sigma2.current <-beta = matrix(beta.current),
sigma2 = sigma2.current,
y = y, X = X)
logPosterior.sigma2.prop -
logratio <- logPosterior.sigma2.current
min(exp(logratio), 1)
acc.prob.sigma2 <-
if(!is.na(acc.prob.sigma2)&
runif(1, 0, 1) < acc.prob.sigma2) # accept the proposal
{ sigma2.prop
sigma2.current <-
}
## Update the matrix
2] <- acc.prob.sigma2
acc[i, sigma2.current
sigma2[i, ] <-
}## Summarize beta and sigma2
apply(beta, 2, mean)
#> [1] -3.1237 0.9976 3.1001 5.1548
apply(beta, 2, sd)
#> [1] 0.08883 0.19232 0.21274 0.40125