############# "R-code notes compilation for Week 8 March 15th, 17th Compiled by: Deepta Basak, B.Math(hons.) 1st year" ############# ### Simulating Central Limit # loading tidyverse for ggplot library(tidyverse) binomialsim1 <- rbinom(100, 10, 0.25) # Generates 100 Binomial(10,0.25) samples binomialsim2 <- replicate(100, rbinom(1, 10, 0.75)) # Generates 100 Binomial(10,0.75) samples binomialsim3 <- replicate(100, rbinom(1, 10, 0.5)) # Generates 100 Binomial(10,0.5) samples # Let's plot these par(mfrow = c(1, 3)) # par() -> used to set graphical entry parameters. # mfrow = c(1, 3) -> ensures all subsequent figures will be drawn in a 1x3 array on the device BY COLUMNS. # Histogram plots of all the above three simulations hist(binomialsim1, xlim = c(0, 10), main = "Binomial(10, 0.25)") hist(binomialsim2, xlim = c(0, 10), main = "Binomial(10, 0.75)") hist(binomialsim3, xlim = c(0, 10), main = "Binomial(10, 0.5)") # From the above plots, it seems that for n = 10, # symmetry is achieved when p = 0.5 and not when p = 0.25 or p = 0.75 ### Visualizing the Central Limit Theorem ## n = 10 binomialsim1 <- replicate( 100, (rbinom(1, 10, 0.1) - 1) / sqrt(10 * 0.1 * 0.9) ) binomialsim2 <- replicate( 100, (rbinom(1, 10, 0.25) - 2.5) / sqrt(10 * 0.25 * 0.75) ) binomialsim3 <- replicate( 100, (rbinom(1, 10, 0.5) - 5) / sqrt(10 * 0.5 * 0.5) ) # Plotting par(mfrow = c(1, 3)) # Standardized histograms for Binomial distribution with n = 10 and p = 0.1, 0.25, 0.5 hist(binomialsim1, main = "Std-Binomial(10, 0.1)") hist(binomialsim2, main = "Std-Binomial(10, 0.25)") hist(binomialsim3, main = "Std-Binomial(10, 0.5)") # Perhaps n = 10 is not large enough to see the Central Limit Theorem occurring ## n = 20 binomialsim1 <- replicate( 100, (rbinom(1, 20, 0.1) - 2) / sqrt(20 * 0.1 * 0.9) ) binomialsim2 <- replicate( 100, (rbinom(1, 20, 0.25) - 5) / sqrt(20 * 0.25 * 0.75) ) binomialsim3 <- replicate( 100, (rbinom(1, 20, 0.5) - 10) / sqrt(20 * 0.5 * 0.5) ) # Plotting par(mfrow = c(1, 3)) # Standardized histograms for Binomial distribution with n = 20 and p = 0.1, 0.25, 0.5 hist(binomialsim1, main = "Std-Binomial(20, 0.1)") hist(binomialsim2, main = "Std-Binomial(20, 0.25)") hist(binomialsim3, main = "Std-Binomial(20, 0.5)") # n = 20 seems better for visualizing the Central Limit Theorem ## n = 50 binomialsim1 <- replicate( 100, (rbinom(1, 50, 0.1) - 5) / sqrt(50 * 0.1 * 0.9) ) binomialsim2 <- replicate( 100, (rbinom(1, 50, 0.25) - 12.5) / sqrt(50 * 0.25 * 0.75) ) binomialsim3 <- replicate( 100, (rbinom(1, 50, 0.5) - 25) / sqrt(50 * 0.5 * 0.5) ) # Plotting par(mfrow = c(1, 3)) # Standardized histograms for Binomial distribution with n = 50 and p = 0.1, 0.25, 0.5 hist(binomialsim1, main = "Std-Binomial(50, 0.1)") hist(binomialsim2, main = "Std-Binomial(50, 0.25)") hist(binomialsim3, main = "Std-Binomial(50, 0.5)") # With n = 50, we get closer to a Normal distribution ## Confidence Intervals cifn <- function(x, alpha = 0.95) { z <- qnorm( (1 - alpha) / 2, lower.tail = FALSE ) # z = 1.96 sdx <- sqrt(1 / length(x)) # sdx = \frac{1}{\sqrt{n}} c(mean(x) - z * sdx, mean(x) + z * sdx) # (mean(x) - \frac{1.96}{\sqrt{n}}, mean(x) - \frac{1.96}{\sqrt{n}}) } ## x1 <- rnorm(100, 0, 1) # 100 samples from Normal(0,1) y <- cifn(x1) # 95% confidence interval x2 <- rnorm(100, 0, 1) # 100 samples from Normal(0,1) z <- cifn(x2) # 95% confidence interval x3 <- rnorm(100, 0, 1) # 100 samples from Normal(0,1) w <- cifn(x3) # 95% confidence interval ## Let's plot the confidence intervals par(mfrow = c(1, 3)) plot(c(0, 0), y, type = "l", xlab = "", ylab = "", axes = FALSE) axis(2, at = c(-0.25, -0.15, -0.1, 0, 0.1, 0.15, 0.25)) points(0, 0, pch = 19, col = "blue") points(0, y[1], pch = 23, col = "brown") points(0, y[2], pch = 24, col = "brown") # Plot of the first confidence interval y plot(c(0, 0), z, type = "l", xlab = "", ylab = "", axes = FALSE) axis(2, at = c(-0.25, -0.15, -0.1, 0, 0.1, 0.15, 0.25)) points(0, 0, pch = 19, col = "blue") points(0, z[1], pch = 23, col = "brown") points(0, z[2], pch = 24, col = "brown") # Plot of the second confidence interval z plot(c(0, 0), w, type = "l", xlab = "", ylab = "", axes = FALSE) axis(2, at = c(-0.25, -0.15, -0.1, 0, 0.1, 0.15, 0.25)) points(0, 0, pch = 19, col = "blue") points(0, w[1], pch = 23, col = "brown") points(0, w[2], pch = 24, col = "brown") # Plot of the third confidence interval w ### In this simulation, 0 belongs to all three confidence intervals y, z, w # Checking confidence intervals with differnent number of trials ### 10 trials normaldata <- replicate(10, rnorm(100, 0, 1), simplify = FALSE ) # Generate 10 trials of 100 samples from the Normal(0,1) distribution cidata <- sapply(normaldata, cifn) # Apply cfin() to each trial TRUEIN <- cidata[1, ] * cidata[2, ] < 0 table(TRUEIN) # Check how many of the confidence intervals contain 0 require(ggplot2) df <- data.frame(x = 1:10, z1 = cidata[1, ], z2 = cidata[2, ]) ggplot(df, aes(x = x, y = 0)) + geom_point(size = 4, color = "#760b0b") + geom_errorbar( aes(ymax = z2, ymin = z1), color = "#371edd" ) # ggplot of the above 10 trials ### 40 trials normaldata <- replicate(40, rnorm(100, 0, 1), simplify = FALSE ) # Generate 40 trials of 100 samples from the Normal(0,1) distribution cidata <- sapply(normaldata, cifn) # Apply cfin() to each trial TRUEIN <- cidata[1, ] * cidata[2, ] < 0 table(TRUEIN) # Check how many of the above confidence intervals contain 0 require(ggplot2) df <- data.frame(x = 1:40, z1 = cidata[1, ], z2 = cidata[2, ]) ggplot(df, aes(x = x, y = 0)) + geom_point(size = 1, color = "#760b0b") + geom_errorbar( aes(ymax = z2, ymin = z1), color = "#371edd" ) # ggplot of the above 40 trials ### 100 trials normaldata <- replicate(100, rnorm(100, 0, 1), simplify = FALSE ) # Generate 100 trials of 100 samples from the Normal(0,1) distribution cidata <- sapply(normaldata, cifn) # Apply cfin() to each trial TRUEIN <- cidata[1, ] * cidata[2, ] < 0 table(TRUEIN) # Check how many of the above confidence intervals contain 0 require(ggplot2) df <- data.frame(x = 1:100, z1 = cidata[1, ], z2 = cidata[2, ]) ggplot(df, aes(x = x, y = 0)) + geom_point(size = 0.5, color = "#760b0b") + geom_errorbar( aes(ymax = z2, ymin = z1), color = "#371edd" ) # ggplot of the above 100 trials ###