set.seed(2026) x = rgamma(20,4,1) #Actual skewness is 2/sqrt(alpha)=1 hist(x) library(timeDate) n = length(x) s = skewness(x) s #Sample skewness=0.5389618 #Generating the pseudo observations sj = rep(0,n) for(i in 1:n){ y = x[-i] sj[i] = s+(n-1)*(s-skewness(y)) } corrected = mean(sj) corrected #Jackknife bias corrected skewness=0.5723043 variance = var(sj)/n sqrt(variance) (1-corrected)/sqrt(variance) # Bootstrap B = 1000 sb = rep(0,B) set.seed(1) for(i in 1:B){ y = sample(x,n,replace=T) sb[i] = skewness(y) } hist(sb,breaks=20) abline(v=mean(sb), col="blue") mean(sb) #bootstrap mean=0.5430879 quantile(sb,0.025) quantile(sb,0.975) #95% CI (-0.068,1.238) contains the true parameter value #Parametric bootstrap mu = mean(x) sig = sd(x) alpha=(mu^2)/sig^2 beta=mu/sig^2 set.seed(1) for(i in 1:B){ y = rgamma(n,mu,sig) sb[i] = skewness(y) } hist(sb,breaks=20) abline(v=mean(sb), col="blue") mean(sb) #parametric bootstrap mean=0.645, closest to the true parameter quantile(sb,0.025) quantile(sb,0.975) #95% CI (-0.220,1.737) contains the true parameter value #Generating bivariate data X=runif(200,min=0,max=4*pi) Y=sin(X)+rnorm(200,sd=0.3) plot(X,Y,pch=20) n=length(X) #Leave on out CV with h=0.9 CV_err=rep(NA,n) for(i in 1:n){ X_val= X[i] Y_val= Y[i] #validationset X_tr=X[-i] Y_tr=Y[-i] #trainingset Y_val_predict= ksmooth(x=X_tr,y=Y_tr,kernel="normal",bandwidth=0.9, x.points=X_val) CV_err[i]=(Y_val-Y_val_predict$y)^2 #wemeasuretheerrorintermsofdifferencesqaure } mean(CV_err) #Leave on out CV with changing h h_seq= seq(from=0.2,to=2.0,by=0.1) #smoothingbandwidthsweareusing CV_err_h=rep(NA,length(h_seq)) for(j in 1:length(h_seq)){ h_using = h_seq[j] CV_err=rep(NA,n) for(i in 1:n){ X_val=X[i] Y_val=Y[i] #validationset X_tr=X[-i] Y_tr=Y[-i] #trainingset Y_val_predict= ksmooth(x=X_tr,y=Y_tr,kernel="normal",bandwidth=h_using,x.points=X_val) CV_err[i]=(Y_val-Y_val_predict$y)^2 #wemeasuretheerrorintermsofdifferencesquare } CV_err_h[j]=mean(CV_err) } CV_err_h plot(x=h_seq,y=CV_err_h,type="b",lwd=3,col="blue", xlab="Smoothingbandwidth",ylab="LOOCVpredictionerror") h_seq[which(CV_err_h== min(CV_err_h))] #5-fold CV with changing h N_cv=100 k=5 cv_lab=sample(n,n,replace=F)%%k ##randomlysplitalltheindicesintoknumbers h_seq= seq(from=0.2,to=2.0,by=0.1) CV_err_h=rep(0,length(h_seq)) for(i_tmp in 1:N_cv){ CV_err_h_tmp=rep(0, length(h_seq)) cv_lab=sample(n,n,replace=F)%%k for(i in 1:length(h_seq)){ h0=h_seq[i] CV_err=0 for(i_cv in 1:k){ w_val= which(cv_lab==(i_cv-1)) X_tr=X[-w_val] Y_tr=Y[-w_val] X_val=X[w_val] Y_val=Y[w_val] kernel_reg= ksmooth(x=X_tr,y=Y_tr,kernel="normal",bandwidth=h0, x.points=X_val) # WARNING!Theksmooth()functionwillorderthex.pointsfrom # thesmallesttothelargest! CV_err=CV_err+mean((Y_val[order(X_val)]-kernel_reg$y)^2,na.rm=T) #na.rm=T:removethecase of 'NA' } CV_err_h_tmp[i]=CV_err/k } CV_err_h=CV_err_h+CV_err_h_tmp } CV_err_h=CV_err_h/N_cv plot(h_seq,CV_err_h,type="b",lwd=4,col="blue",xlab="SmoothingBandwidth", ylab="5-CVError") h_seq[which(CV_err_h==min(CV_err_h))]