# STA 414/2104, Spring 2014, Assignment #2 - Mixture model functions.
# CONSTANTS REGARDING THE DIGIT CLASSIFICATION PROBLEM.
ndigits <- 10 # Number of different digits
npixels <- 14*14 # Numbers of pixels in digit images
# FIT A MIXTURE MODEL WITH K COMPONENTS FOR EACH DIGIT. Arguments are
# the matrix of pixels for each training case (trn), the vector of
# labels for the training cases (trnlab), the number of components for
# each digit (K), the penalty magnitude (alpha), and the number of
# iterations of EM to do. The result is a list with elements pi
# (component probabilities), theta (pixel probabilities for each
# component), and r (responsibilities). Responsibilities are set
# randomly to start, then M and E steps alternate for the specified
# number of iterations. The log likelihood and log likelihood plus
# penalty are printed after each iteration.
digit_mix_train <- function(trn,trnlab,K,alpha,iters)
{
# Allocate the matrix of pixel probabilities, with rows for pixels and
# columns for mixture components.
theta <- matrix(NA,npixels,ndigits*K)
# Set the responsibilities randomly to start.
r <- matrix(0,ntrn,ndigits*K)
for (i in 1:ntrn)
{ sub <- K*trnlab[i]+(1:K)
r[i,sub] <- runif(K,1,2)
r[i,sub] <- r[i,sub] / sum(r[i,sub])
}
# Do "iters" iterations of the EM algorithm, beginning with an M step.
prev_pen_ll <- -Inf
for (iter in 1:iters)
{
# The M step. Re-estimates pi and theta.
pi <- colMeans(r)
for (k in 1:(ndigits*K))
{ theta[,k] <- (colSums(r[,k]*trn) + alpha) / (sum(r[,k]) + 2*alpha)
}
# The E step. Computes responsibilities, and also the log likelihood.
ll <- 0
for (i in 1:ntrn)
{ sub <- K*trnlab[i]+(1:K)
pixels <- trn[i,]
p <- pi[sub] * exp (colSums (log (theta[,sub,drop=FALSE]^pixels
* (1-theta[,sub,drop=FALSE])^(1-pixels))))
r[i,sub] <- p / sum(p)
ll <- ll + log(sum(p))
}
# Print the log likelihood (ll) and log likelihood plus penalty (pen_ll).
pen_ll <- ll + alpha * sum(log(theta*(1-theta)))
cat("Iteration",iter,": log likelihood =",round(ll,3),
" log likelihood + penalty =",round(pen_ll,3),"\n")
# Warn if the log likelihood plus penalty has gone down (it shouldn't!).
# But ignore decreases of less than 1e-6 (possibly just due to rounding).
if (pen_ll < prev_pen_ll-1e-6)
{ cat("WARNING: log likelihood + penalty decreased!\n")
}
prev_pen_ll <- pen_ll
}
# Return the parameter estimates and responsibilities.
list(pi=pi,theta=theta,r=r)
}
# FIND THE PROBABILITY DISTRIBUTIONS FOR LABELS IN TEST CASES. The
# arguments are the pi and theta parameters (as found by digit_mix_train),
# and the matrix of pixels for test cases (tst). The result is a matrix
# of label probabilities for each test case.
digit_mix_predict <- function (pi,theta,tst)
{
K <- length(pi) / ndigits
pr <- matrix(NA,nrow(tst),ndigits)
for (i in 1:nrow(tst))
{
# Compute the matrix of probabilities that this digit's image would be
# generated from each component. Note that theta is a matrix with
# rows for pixels and columns for components, so when it is raised
# to a vector of pixel values, the operation is done column-by-column.
p <- pi * exp (colSums (log (theta^tst[i,] * (1-theta)^(1-tst[i,]))))
# Sum these probabilities separately for each possible digit label.
for (y in 0:(ndigits-1)) pr[i,y+1] <- sum(p[K*y+(1:K)])
# Normalize these sums to produce a probability distribution over
# labels for this test case.
pr[i,] <- pr[i,] / sum(pr[i,])
}
pr
}