Simple Lab on studying bootstrap
require(ggplot2)
## Loading required package: ggplot2
require(data.table)
## Loading required package: data.table
require(reshape2)
## Loading required package: reshape2
##
## Attaching package: 'reshape2'
## The following objects are masked from 'package:data.table':
##
## dcast, melt
require(foreach)
## Loading required package: foreach
require(MASS)
## Loading required package: MASS
require(stringr)
## Loading required package: stringr
library(glmnet)
## Loading required package: Matrix
## Loaded glmnet 4.1-6
# let's consider a simple duration model
lambda = 0.01
n = 10
rr = data.table(foreach (i = 1:1000,.combine=rbind) %do% {
D = rexp(n,lambda)
res = data.frame(value=1/mean(D),value0=lambda)
res$rep = i
res$name = rownames(res)
res
})
ggplot(rr,aes(x=value)) + geom_density(fill="blue",alpha=0.2,color="blue") + geom_vline(xintercept=lambda,linetype=2,color="red") + geom_vline(xintercept=rr[,mean(value)],linetype=2,color="blue")+theme_bw()
We know that the asymptotic variance is \(\lambda^2\) so we can compare the asymptotic distribution approximation to bootstrap distribution.
D0 = rexp(n,lambda)
boostest <- function(D) {
# we drawn from D with replacement
D2 = sample(D,size = n,replace = TRUE)
return(1/mean(D2))
}
rr = data.table(foreach (i = 1:1000,.combine=rbind) %do% {
D = rexp(n,lambda)
res = data.frame(value=1/mean(D),value0=lambda)
res$rep = i
res$name = rownames(res)
res
})
rr[,valueb := boostest(D0),rep]
rr[,valuea := lambda + 1/sqrt(n) * lambda * rnorm(.N)]
ggplot(rr,aes(x=(value-lambda)/lambda)) + geom_density(fill="blue",alpha=0.2,color="blue") +
geom_density(aes(x=(valueb-lambda)/lambda),fill="red",alpha=0.2,color="red") +
geom_density(aes(x=(valuea-lambda)/lambda),fill="green",alpha=0.2,color="green") +
geom_vline(xintercept=lambda,linetype=2,color="red") +
theme_bw()
ggplot(rr,aes(x=(value))) + geom_density(fill="blue",alpha=0.2,color="blue") +
geom_density(aes(x=(valueb)),fill="red",alpha=0.2,color="red") +
geom_density(aes(x=(valuea)),fill="green",alpha=0.2,color="green") +
geom_vline(xintercept=lambda,linetype=2,color="red") +
theme_bw()
boostest <- function(D) {
# we drawn from D with replacement
D2 = sample(D,size = n,replace = TRUE)
return(1/mean(D2))
}
rr = data.table(foreach (i = 1:5000,.combine=rbind) %do% {
D = rexp(n,lambda)
valueb = rep(0,200)
# compute bootstrap estimates
for (j in 1:200) {
valueb[j] = boostest(D)
}
res = data.frame(value=1/mean(D),value0=lambda,valueb=mean(valueb),valuesd=sd(valueb))
res$rep = i
res$name = rownames(res)
res
})
rr[,valuea := lambda + 1/sqrt(n) * lambda * rnorm(.N)]
ggplot(rr,aes(x=value)) + geom_density(fill="blue",alpha=0.2,color="blue") + geom_density(aes(x=valuea),fill="red",alpha=0.2,color="red") +
geom_density(aes(x=valueb),fill="green",alpha=0.2,color="green") +
geom_vline(xintercept=lambda,linetype=2,color="red") + geom_vline(xintercept=rr[,mean(value)],linetype=2,color="blue")+
geom_vline(xintercept=rr[,mean(valueb)],linetype=2,color="green")+
geom_vline(xintercept=rr[,2*mean(value) - mean(valueb)],linetype=1,color="orange")+theme_bw()
# check a simple test that lambda is not zero
rr[,mean( abs(value-value0)<1.96*valuesd )]
## [1] 0.9298
rr[,mean( abs(valuea-value0)<1.96*valuesd )]
## [1] 0.9222