# STA 410, ASSIGNMENT 3, SPRING 2004.
# FIND VALUES OF NUMBERS IN A VECTOR MODULO 2PI.
mod2pi <- function (x)
{
f <- x/(2*pi)
f <- f-floor(f)
f*(2*pi)
}
# FIND THE PROBABILITY DENSITY OF POINTS UNDER THE WRAPPED NORMAL DISTRIBUTION.
# The arguments are the the vector of points, the mean of the wrapped normal,
# and the standard deviation of the wrapped normal. The points and the mean
# are moved to the interval [0,2pi) if necessary.
#
# The density is computed by adding the density of the point under the normal
# distribution with means of mu+2*pi*i for i an integer from -t to t, with t
# chosen so that 2*pi*i will span a range of at least 10 standard deviations.
dwrpnorm <- function (x, mu, sigma)
{
x <- mod2pi(x)
mu <- mod2pi(mu)
t <- ceiling(10*sigma/(2*pi))
d <- rep(0,length(x))
for (i in (-t):t)
{ d <- d + dnorm (x, mu+2*pi*i, sigma)
}
d
}
# LOG LIKELIHOOD FUNCTION FOR WRAPPED NORMAL MODEL. Arguments are the vector
# of data points, and the mean and standard deviation of the distribution.
lglik <- function (x, mu, sigma)
{
sum (log (dwrpnorm (x, mu, sigma)))
}
# FIND THE MAXIMUM LIKELIHOOD ESTIMATES FOR MU AND SIGMA USING EM. The
# arguments are the vector of data points, the initial values for mu and
# sigma, the number of iterations of EM to do, and a debug flag, which
# if TRUE causes the parameters and log likelihood to be printed for
# each iteration. The value of this function is a list with elements
# "mu" and "sigma", containing the final maximum likelihood estimates.
em <- function (x, mu, sigma, iters=30, debug=FALSE)
{
# Make sure data and mean arguments are in [0,2pi).
x <- mod2pi(x)
mu <- mod2pi(mu)
if (debug)
{ cat(0,mu,sigma,lglik(x,mu,sigma),"\n")
}
for (k in 1:iters)
{
# The E step. First, sets "i" to the wrap indices that span the
# relevant range. Then allocates space for "y" and "d" matrices. The
# "y" matrix is set to the data values offset by the multiples of 2pi
# corresponding to these indices. (These are possible unobserved data
# values before wrapping.) The "d" matrix is first set to the probability
# density of the "y" values under a normal distribution with mean mu
# and standard deviation sigma. The "d" values are then normalized to
# sum to one for each data point, so that they can be regarded as
# probabilities for the different "y" values.
t <- max(1,ceiling(10*sigma/(2*pi)))
i <- (-t):t
y <- matrix(0,length(x),length(i))
d <- matrix(0,length(x),length(i))
for (j in 1:length(i))
{ y[,j] <- x+2*pi*i[j]
d[,j] <- dnorm (y[,j], mu, sigma)
}
for (j in 1:length(x))
{ d[j,] <- d[j,] / sum(d[j,])
}
# M step. The mean is re-estimated based on the weighted average of
# the "y" values. The standard deviation is then re-estimated based
# on the weighted average of squared deviations from mu.
mu <- sum(d*y)/length(x)
sigma <- sqrt(sum(d*(y-mu)^2)/length(x))
# Move mu into the range [0,2p). Done only AFTER re-estimating sigma.
mu <- mod2pi(mu)
if (debug)
{ cat(k,mu,sigma,lglik(x,mu,sigma),"\n");
}
}
list (mu=mu, sigma=sigma)
}