############# "R-code notes compilation for Week 13 April 19th and 21st Compiled by: Bikram Halder, B.Math(hons.) 1st year" ############# library(tidyverse) library(viridis) library(cowplot) #### Hypothesis Testing Code prop.test(43, 100) # Inbuilt R function for z-test ztestci <- function(x, mu = 0, sigma = 1, alpha = 0.95) { z <- qnorm( (1 - alpha) / 2, lower.tail = FALSE ) # P(|Z| >= z) sdx <- sigma / sqrt(length(x)) pvalue <- pnorm( (mean(x) - mu) / sdx, lower.tail = FALSE ) # P(Z >= sqrt(n)(X-mu)/sigma) c(mean(x) - z * sdx, mean(x) + z * sdx, pvalue) # returns a vector containing # 95% confidence interval in the 1st 2 index # and pvalue at the 3rd index } # Writing custom function for z-test x <- c(75, 76, 73, 75, 74, 73, 76, 73, 79) # Data vector y <- ztestci(x, 76, 1.5) # Null Hypothesis - H_0: mu = 76 # Alt Hypothesis - H_0: mu > 76 y t.test(x, mu = 74) # Inbuilt R function for t-test # Ex: Try and write out code for t-test wilcox.test(x, mu = 74, alt = "greater") # Inbuilt R function for wilcoxon signed rank test ### Chi-squared Test x <- c(35, 40, 25) prob <- c(0.38, 0.32, 0.3) # Null distribution n <- 100 # Sample size Chisquare <- sum((x - n * prob)^2 / (n * prob)) # Chisquare statistic follows chisquare with k-1 # degrees of freedom with k = 3 pchisq(Chisquare, df = 2, lower.tail = FALSE) # p-vaue df <- data.frame( x = c("NDA", "UPA", "3rd"), observed = x, expected = 100 * prob, stringsAsFactors = FALSE ) plt_obs <- df %>% ggplot() + geom_bar( mapping = aes( x = x, y = observed, fill = observed ), stat = "identity" ) + theme(legend.position = "none") plt_exp <- df %>% ggplot() + geom_bar( mapping = aes( x = x, y = expected, fill = expected ), stat = "identity" ) + theme(legend.position = "none") plt_diff <- df %>% ggplot() + geom_bar( mapping = aes( x = x, y = observed - expected, fill = observed - expected ), stat = "identity" ) + theme(legend.position = "none") plot_grid(plt_exp, plt_obs, plt_diff, nrow = 1) # Plotting 3 bargraphs for Expected, Observed and Difference of the data ### Contigency Tables ## Bivariate Data is often presented as a two-way table # For example in Dengue Data from Manipal Hospital y <- read.table( "./dengueb.csv", header = TRUE ) Diagnosis <- y$DIAGNO Marker <- y$BICARB1 summary(Marker) # Doctor's needs: # A patient arrives with Dengue. # Based on Marker doctor needs to decide on Treatment z <- replace(Marker, Marker <= 21, 0) Cat.Marker <- replace(z, 21 < z, 2) T <- table(Cat.Marker, Diagnosis) T # Statistical test performed: # We collected data of patients : Marker and final diagnosis chisq.test(T) # We test if Marker is independent of Diagnosis prop.test(c(17, 21), c(25, 25), alternative = "less") # z-test for bivariate data x <- c(15, 10, 13, 7, 9, 8, 21, 9, 14, 8) y <- c(15, 14, 12, 8, 14, 7, 16, 10, 15, 12) t.test(x, y, var.equal = TRUE) # t-test for bivariate data