Simple Lab on studying bootstrap

require(ggplot2)
require(data.table)
require(reshape2)
## Loading required package: reshape2
## 
## Attaching package: 'reshape2'
## The following objects are masked from 'package:data.table':
## 
##     dcast, melt
require(foreach)
## Loading required package: foreach
require(MASS)
## Loading required package: MASS
require(stringr)
## Loading required package: stringr
library(glmnet)
## Loading required package: Matrix
## Loaded glmnet 2.0-18
# let's consider a simple duration model
lambda = 0.01
n  = 10

rr = data.table(foreach (i = 1:1000,.combine=rbind) %do% {
  D = rexp(n,lambda)
  res = data.frame(value=1/mean(D),value0=lambda)
  res$rep = i
  res$name = rownames(res)
  res
})

ggplot(rr,aes(x=value)) +  geom_density(fill="blue",alpha=0.2,color="blue") + geom_vline(xintercept=lambda,linetype=2,color="red") + geom_vline(xintercept=rr[,mean(value)],linetype=2,color="blue")+theme_bw()

Compare bootstrap distribution to asymptotic one

We know that the asymptotic variance is \(\lambda^2\) so we can compare the asymptotic distribution approximation to bootstrap distribution.

D0 = rexp(n,lambda)
boostest <- function(D) {
  # we drawn from D with replacement
  D2 = sample(D,size = n,replace = TRUE)
  return(1/mean(D2))
}

rr = data.table(foreach (i = 1:1000,.combine=rbind) %do% {
  D = rexp(n,lambda)
  res = data.frame(value=1/mean(D),value0=lambda)
  res$rep = i
  res$name = rownames(res)
  res
})

rr[,valueb := boostest(D0),rep]
rr[,valuea := lambda + 1/sqrt(n) * lambda * rnorm(.N)]

ggplot(rr,aes(x=(value-lambda)/lambda)) + geom_density(fill="blue",alpha=0.2,color="blue") +
  geom_density(aes(x=(valueb-lambda)/lambda),fill="red",alpha=0.2,color="red") + 
  geom_density(aes(x=(valuea-lambda)/lambda),fill="green",alpha=0.2,color="green") + 
  geom_vline(xintercept=lambda,linetype=2,color="red") +
  theme_bw()

ggplot(rr,aes(x=(value))) + geom_density(fill="blue",alpha=0.2,color="blue") +
  geom_density(aes(x=(valueb)),fill="red",alpha=0.2,color="red") + 
  geom_density(aes(x=(valuea)),fill="green",alpha=0.2,color="green") + 
  geom_vline(xintercept=lambda,linetype=2,color="red") +
  theme_bw()

bootstrapping each time

boostest <- function(D) {
  # we drawn from D with replacement
  D2 = sample(D,size = n,replace = TRUE)
  return(1/mean(D2))
}

rr = data.table(foreach (i = 1:5000,.combine=rbind) %do% {
  D = rexp(n,lambda)
  valueb = rep(0,200)
  # compute bootstrap estimates
  for (j in 1:200) {
    valueb[j] = boostest(D)
  }
  
  res = data.frame(value=1/mean(D),value0=lambda,valueb=mean(valueb),valuesd=sd(valueb))
  res$rep = i
  res$name = rownames(res)
  res
})

rr[,valuea := lambda + 1/sqrt(n) * lambda * rnorm(.N)]

ggplot(rr,aes(x=value)) + geom_density(fill="blue",alpha=0.2,color="blue") + geom_density(aes(x=valuea),fill="red",alpha=0.2,color="red") +
geom_density(aes(x=valueb),fill="green",alpha=0.2,color="green") +
 geom_vline(xintercept=lambda,linetype=2,color="red") + geom_vline(xintercept=rr[,mean(value)],linetype=2,color="blue")+
geom_vline(xintercept=rr[,mean(valueb)],linetype=2,color="green")+
geom_vline(xintercept=rr[,2*mean(value) - mean(valueb)],linetype=1,color="orange")+theme_bw()

Extract and compare standard errors

# check a simple test that lambda is not zero

rr[,mean( abs(value-value0)<1.96*valuesd )]
## [1] 0.9366
rr[,mean( abs(valuea-value0)<1.96*valuesd )]
## [1] 0.9248