set.seed(2026)
x = rgamma(20,4,1) #Actual skewness is 2/sqrt(alpha)=1
hist(x)
library(timeDate)
n = length(x) 
s = skewness(x) 
s  #Sample skewness=0.5389618
#Generating the pseudo observations
sj = rep(0,n)
for(i in 1:n){
  y = x[-i]
  sj[i] = s+(n-1)*(s-skewness(y))
}
corrected = mean(sj)
corrected #Jackknife bias corrected skewness=0.5723043
variance = var(sj)/n
sqrt(variance)
(1-corrected)/sqrt(variance)

# Bootstrap
B = 1000
sb = rep(0,B)
set.seed(1)
for(i in 1:B){
  y = sample(x,n,replace=T)
  sb[i] = skewness(y)
}
hist(sb,breaks=20)
abline(v=mean(sb), col="blue")
mean(sb) #bootstrap mean=0.5430879
quantile(sb,0.025)
quantile(sb,0.975)
#95% CI (-0.068,1.238) contains the true parameter value

#Parametric bootstrap
mu = mean(x)
sig = sd(x)
alpha=(mu^2)/sig^2
beta=mu/sig^2
set.seed(1)
for(i in 1:B){
  y = rgamma(n,mu,sig)
  sb[i] = skewness(y)
}
hist(sb,breaks=20)
abline(v=mean(sb), col="blue")
mean(sb) #parametric bootstrap mean=0.645, closest to the true parameter
quantile(sb,0.025)
quantile(sb,0.975)
#95% CI (-0.220,1.737) contains the true parameter value


#Generating bivariate data
X=runif(200,min=0,max=4*pi)
Y=sin(X)+rnorm(200,sd=0.3)
plot(X,Y,pch=20)
n=length(X)

#Leave on out CV with h=0.9
CV_err=rep(NA,n)
for(i in 1:n){
  X_val= X[i]
  Y_val= Y[i]
  #validationset
  X_tr=X[-i]
  Y_tr=Y[-i]
  #trainingset
  Y_val_predict= ksmooth(x=X_tr,y=Y_tr,kernel="normal",bandwidth=0.9,
                         x.points=X_val)
  CV_err[i]=(Y_val-Y_val_predict$y)^2
  #wemeasuretheerrorintermsofdifferencesqaure
}
mean(CV_err)


#Leave on out CV with changing h
h_seq= seq(from=0.2,to=2.0,by=0.1)
#smoothingbandwidthsweareusing
CV_err_h=rep(NA,length(h_seq))
for(j in 1:length(h_seq)){
  h_using = h_seq[j]
  CV_err=rep(NA,n)
  for(i in 1:n){
    X_val=X[i]
    Y_val=Y[i]
    #validationset
    X_tr=X[-i]
    Y_tr=Y[-i]
    #trainingset
    Y_val_predict= ksmooth(x=X_tr,y=Y_tr,kernel="normal",bandwidth=h_using,x.points=X_val)
    CV_err[i]=(Y_val-Y_val_predict$y)^2
    #wemeasuretheerrorintermsofdifferencesquare
  }
  CV_err_h[j]=mean(CV_err)
}
CV_err_h
plot(x=h_seq,y=CV_err_h,type="b",lwd=3,col="blue",
     xlab="Smoothingbandwidth",ylab="LOOCVpredictionerror")
h_seq[which(CV_err_h== min(CV_err_h))]


#5-fold  CV with changing h
N_cv=100
k=5
cv_lab=sample(n,n,replace=F)%%k
##randomlysplitalltheindicesintoknumbers
h_seq= seq(from=0.2,to=2.0,by=0.1)
CV_err_h=rep(0,length(h_seq))
for(i_tmp in 1:N_cv){
  CV_err_h_tmp=rep(0, length(h_seq))
  cv_lab=sample(n,n,replace=F)%%k
  for(i in 1:length(h_seq)){
    h0=h_seq[i]
    CV_err=0
    for(i_cv in 1:k){
      w_val= which(cv_lab==(i_cv-1))
      X_tr=X[-w_val]
      Y_tr=Y[-w_val]
      X_val=X[w_val]
      Y_val=Y[w_val]
      kernel_reg= ksmooth(x=X_tr,y=Y_tr,kernel="normal",bandwidth=h0,
                          x.points=X_val)
      # WARNING!Theksmooth()functionwillorderthex.pointsfrom
      # thesmallesttothelargest!
      CV_err=CV_err+mean((Y_val[order(X_val)]-kernel_reg$y)^2,na.rm=T)
      #na.rm=T:removethecase of 'NA'
    }
    CV_err_h_tmp[i]=CV_err/k
  }
  CV_err_h=CV_err_h+CV_err_h_tmp
}
CV_err_h=CV_err_h/N_cv
plot(h_seq,CV_err_h,type="b",lwd=4,col="blue",xlab="SmoothingBandwidth",
     ylab="5-CVError")
h_seq[which(CV_err_h==min(CV_err_h))]