library(futile.logger)
library(data.table)

## Normal means

We consider the simple model given by

$y_{it} = \alpha_i + \epsilon_{it}$

For simplicity we consider the case where $$\epsilon_{it}$$ is Normal iid. And we consider a discrete distribution for $$\alpha_i$$ parametrized by $$\alpha_k , p_k$$.

We then set our model and simulate data.

# we consider a discrete presentation of a normal distribution for alpha
# require(Ckmeans.1d.dp)
# nk = 10
# nr = 1e6
# x <- rnorm(nr)
# result <- Ckmeans.1d.dp(x,nk)
# model0 = list(eps_sd=1, p_k = tabulate(result$cluster)/nr, alpha_k = result$centers)
#
# # simulate data
# nn = 1000
# sdata = data.table(eta_i = sample.int(nk,nn,prob=model0$p_k,replace=TRUE)) # sdata[, alpha_i := model0$alpha_k[eta_i]]
# sdata[, y1 := alpha_i + model0$eps_sd * rnorm(.N)] # sdata[, y2 := alpha_i + model0$eps_sd * rnorm(.N)]

model0 = list(eps_sd=1, alpha_sd=1.5)
# simulate data
nn = 50000
sdata = data.table(alpha_i = model0$alpha_sd * rnorm(nn)) sdata[, y1 := alpha_i + model0$eps_sd * rnorm(.N)]
sdata[, y2 := alpha_i + model0$eps_sd * rnorm(.N)] ## FE approach Here we want to recover the $$\alpha_i$$ within each individual. A natural estimator in this case is then the average. sdata[, alpha_hat := 0.5*(y1+y2)] sdata[,var(alpha_hat)] ##  2.763268 ## RE approach Here we want to recover the $$\alpha_i$$ within each individual. A natural estimator in this case is then the average. # initialize with some parameters model = list(eps_sd=0.1,alpha_sd=0.1) rr = data.frame() for (rep in 1:15) { # we compute the posererior porobabilities s = 1/(1/model$alpha_sd^2 + 2/model$eps_sd^2) sdata[, mu_i := s * (y1+y2)/model$eps_sd^2]
sdata[, alpha_i_draw := mu_i + sqrt(s) * rnorm(.N)]

model$eps_sd = sqrt(sdata[, 0.5*(var( y2 - alpha_i_draw) + var( y1 - alpha_i_draw) )]) model$alpha_sd = sqrt(sdata[, var(alpha_i_draw)])

flog.info("eps_sd=%4.4f eps_alpha=%4.4f",model$eps_sd,model$eps_sd)
rr = rbind(rr,data.frame(model))
}
## INFO [2019-11-19 09:25:37] eps_sd=0.8985 eps_alpha=0.8985
## INFO [2019-11-19 09:25:37] eps_sd=0.9853 eps_alpha=0.9853
## INFO [2019-11-19 09:25:37] eps_sd=0.9998 eps_alpha=0.9998
## INFO [2019-11-19 09:25:37] eps_sd=1.0025 eps_alpha=1.0025
## INFO [2019-11-19 09:25:37] eps_sd=1.0036 eps_alpha=1.0036
## INFO [2019-11-19 09:25:37] eps_sd=1.0001 eps_alpha=1.0001
## INFO [2019-11-19 09:25:37] eps_sd=0.9960 eps_alpha=0.9960
## INFO [2019-11-19 09:25:37] eps_sd=0.9909 eps_alpha=0.9909
## INFO [2019-11-19 09:25:37] eps_sd=0.9948 eps_alpha=0.9948
## INFO [2019-11-19 09:25:37] eps_sd=0.9963 eps_alpha=0.9963
## INFO [2019-11-19 09:25:37] eps_sd=0.9974 eps_alpha=0.9974
## INFO [2019-11-19 09:25:37] eps_sd=0.9983 eps_alpha=0.9983
## INFO [2019-11-19 09:25:37] eps_sd=0.9964 eps_alpha=0.9964
## INFO [2019-11-19 09:25:37] eps_sd=0.9962 eps_alpha=0.9962
## INFO [2019-11-19 09:25:37] eps_sd=0.9961 eps_alpha=0.9961
rr\$t=1:15
ggplot(rr,aes(x=t,y=eps_sd)) + geom_line() + theme_bw() + geom_line(aes(y=alpha_sd)) ## Variational approach

We now are going to also estaimate the posterior. Basically we estimate also a linear function for the mean and variance of the posterior.