#least square fit in R data(iris) head(iris) fit1 <- lm(Sepal.Length ~ Petal.Width, data = iris) summary(fit1) plot(Sepal.Length ~ Petal.Width, data = iris) abline(fit1) library(ggplot2) ggplot(iris, aes(x = Petal.Width, y = Sepal.Length)) + geom_point() + stat_smooth(method = "lm", col = "red") ##Normal Equations x1 <- c(10,0,45,56,79) x2 <- c(2,34,15,5,4) y <- x1 + x2 + rnorm(5,mean=0,sd=0.3); Y <- as.matrix(y); X <- as.matrix(cbind(1,x1,x2)); betahat = solve(t(X) %*% X) %*% (t(X) %*% Y) #bethaat solution of X^TX Beta =X^Ty betahat lm.fit <- lm(y~1+x1+x2) lm.fit$coefficients #https://www.statlearning.com/s/Chapter-3-Lab.txt # Chapter 3 Lab: Linear Regression library(MASS) library(ISLR) # Simple Linear Regression head(Boston) names(Boston) # Boston is a data set in ISLR lm.fit=lm(medv~lstat) # error as boston is not added to the data set lm.fit=lm(medv~lstat,data=Boston) ## alternatively one can attach the data attach(Boston) lm.fit=lm(medv~lstat) lm.fit summary(lm.fit) ## discuss what each summary is stating names(lm.fit) coef(lm.fit) # coefficients of lm. fit are confint(lm.fit) # provides the confidence interval for the slope. predict(lm.fit,data.frame(lstat=(c(5,10,15))), interval="confidence") predict(lm.fit,data.frame(lstat=(c(5,10,15))), interval="prediction") plot(lstat,medv) abline(lm.fit) abline(lm.fit,lwd=3) abline(lm.fit,lwd=3,col="red") plot(lstat,medv,col="red") plot(lstat,medv,pch=20) plot(lstat,medv,pch="+") plot(1:20,1:20,pch=1:20) par(mfrow=c(2,2)) plot(lm.fit) plot(predict(lm.fit), residuals(lm.fit)) plot(predict(lm.fit), rstudent(lm.fit)) plot(hatvalues(lm.fit)) which.max(hatvalues(lm.fit)) OneFourthPiSamples= replicate(100, { u <- runif(10000) v <- runif(10000) z <- sqrt(u^2 + v^2) sum(z < 1) / 10000 }) require(graphics) 1 - pt(1:5, df = 1) qt(.975, df = c(1:10,20,50,100,1000)) tt <- seq(0, 10, length.out = 21) ncp <- seq(0, 6, length.out = 31) ptn <- outer(tt, ncp, function(t, d) pt(t, df = 3, ncp = d)) t.tit <- "Non-central t - Probabilities" image(tt, ncp, ptn, zlim = c(0,1), main = t.tit) persp(tt, ncp, ptn, zlim = 0:1, r = 2, phi = 20, theta = 200, main = t.tit, xlab = "t", ylab = "non-centrality parameter", zlab = "Pr(T <= t)") plot(function(x) dt(x, df = 3, ncp = 2), -3, 11, ylim = c(0, 0.5), main = "Non-central t - Density", yaxs = "i") lines(function(x) dt(x, df = 30, ncp = 2), -3, 11, ylim = c(0, 0.5), main = "Non-central t - Density", yaxs = "i") lines(function(x) dt(x, df = 300, ncp = 2), -3, 11, ylim = c(0, 0.5), main = "Non-central t - Density", yaxs = "i") plot(function(x) dt(x, df = 3, ncp = 2), -3, 11, ylim = c(0, 0.5), main = "Non-central t - Density", yaxs = "i") plot(function(x) dt(x, df = 25, ncp = 2), -3, 11, ylim = c(0, 0.5)) x=seq(-3,11,by=100) y = dt(x, df = 300, ncp = 2) lines(y~x )