#least square fit in R
data(iris)
head(iris)

fit1 <- lm(Sepal.Length ~ Petal.Width, data = iris)
summary(fit1)
plot(Sepal.Length ~ Petal.Width, data = iris)
abline(fit1)

library(ggplot2)

ggplot(iris, aes(x = Petal.Width, y = Sepal.Length)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red")


##Normal Equations
x1 <- c(10,0,45,56,79)
x2 <- c(2,34,15,5,4)
y <-  x1 + x2 + rnorm(5,mean=0,sd=0.3);
Y <- as.matrix(y);
X <- as.matrix(cbind(1,x1,x2));

betahat = solve(t(X) %*% X) %*% (t(X) %*% Y) 
#bethaat solution of X^TX Beta =X^Ty

betahat


lm.fit <- lm(y~1+x1+x2) 
lm.fit$coefficients


#https://www.statlearning.com/s/Chapter-3-Lab.txt
# Chapter 3 Lab: Linear Regression

library(MASS)
library(ISLR)

# Simple Linear Regression

head(Boston)
names(Boston)
# Boston is a data set in ISLR

lm.fit=lm(medv~lstat)
# error as boston is not added to the data set

lm.fit=lm(medv~lstat,data=Boston)
## alternatively one can attach the data

attach(Boston)
lm.fit=lm(medv~lstat)
lm.fit

summary(lm.fit)
## discuss what each summary is stating

names(lm.fit)
coef(lm.fit)
# coefficients of lm. fit are 

confint(lm.fit)
# provides the confidence interval for the slope.

predict(lm.fit,data.frame(lstat=(c(5,10,15))), interval="confidence")
predict(lm.fit,data.frame(lstat=(c(5,10,15))), interval="prediction")
plot(lstat,medv)
abline(lm.fit)
abline(lm.fit,lwd=3)
abline(lm.fit,lwd=3,col="red")


plot(lstat,medv,col="red")
plot(lstat,medv,pch=20)
plot(lstat,medv,pch="+")
plot(1:20,1:20,pch=1:20)
par(mfrow=c(2,2))
plot(lm.fit)
plot(predict(lm.fit), residuals(lm.fit))
plot(predict(lm.fit), rstudent(lm.fit))
plot(hatvalues(lm.fit))
which.max(hatvalues(lm.fit))


OneFourthPiSamples= replicate(100, {
  u <- runif(10000)
  v <- runif(10000)
  z <- sqrt(u^2 + v^2)
  sum(z < 1) / 10000
})


require(graphics)

1 - pt(1:5, df = 1)
qt(.975, df = c(1:10,20,50,100,1000))

tt <- seq(0, 10, length.out = 21)
ncp <- seq(0, 6, length.out = 31)
ptn <- outer(tt, ncp, function(t, d) pt(t, df = 3, ncp = d))
t.tit <- "Non-central t - Probabilities"
image(tt, ncp, ptn, zlim = c(0,1), main = t.tit)
persp(tt, ncp, ptn, zlim = 0:1, r = 2, phi = 20, theta = 200, main = t.tit,
      xlab = "t", ylab = "non-centrality parameter",
      zlab = "Pr(T <= t)")


plot(function(x) dt(x, df = 3, ncp = 2), -3, 11, ylim = c(0, 0.5),
     main = "Non-central t - Density", yaxs = "i")
lines(function(x) dt(x, df = 30, ncp = 2), -3, 11, ylim = c(0, 0.5),
      main = "Non-central t - Density", yaxs = "i")
lines(function(x) dt(x, df = 300, ncp = 2), -3, 11, ylim = c(0, 0.5),
      main = "Non-central t - Density", yaxs = "i")


plot(function(x) dt(x, df = 3, ncp = 2), -3, 11, ylim = c(0, 0.5),
     main = "Non-central t - Density", yaxs = "i")
plot(function(x) dt(x, df = 25, ncp = 2), -3, 11, ylim = c(0, 0.5))
x=seq(-3,11,by=100)
y = dt(x, df = 300, ncp = 2)
lines(y~x )