#ccar for continuous variables ccar<-mtcars[,c("mpg","disp","hp","drat","wt")] head(ccar) summary(ccar) pairs(ccar) # There is no colliniarity between pairs of predictors. lm1<-lm(formula = mpg ~ hp + disp + drat + wt, data = ccar) summary(lm1) # Only the variables weight and horsepower have significant effect on the response. # So we do a reduced regression by dropping other predictors. lm2<-lm(formula = mpg ~ hp + wt, data = ccar) summary(lm2) par(mfrow=c(2,2)) plot(lm2) # There is a quadratic pattern remaining in the residuals. hp2<-mtcars$hp^2 lm3<-lm(formula = mpg ~ hp + hp2 + wt , data = ccar) summary(lm3) # The new variable hp2 is significant and the adjusted R^2 has gone up. plot(lm3) # There is still some quadratic type dependence. wt2<-mtcars$wt^2 lm4<-lm(formula = mpg ~ hp + hp2 + wt +wt2, data = ccar) summary(lm4) plot(lm4) # Now the quadratic effect is gone. The R^2 has gone wp and all predictor variables are significant. # But there are four outliers. which(rownames(ccar)=="Pontiac Firebird") which(rownames(ccar)=="Chrysler Imperial") which(rownames(ccar)=="Fiat 128") which(rownames(ccar)=="Toyota Corolla") ccarred<-ccar[-c(17,18,20,25),] wt2<-ccarred$wt^2 hp2<-ccarred$hp^2 lm5<-lm(formula = mpg ~ hp + hp2 + wt + wt2, data = ccarred) summary(lm5) plot(lm5,id.n=NULL) # The final adjusted R^2 is 0.92. All predictors are significant and all diagnostic plots are satisfactory. So this is our final model. # The t-statistic and F statistic do not give same p-value for multiple regression. The ANOVA is computed pne-by-one, so the order matters. lm6<-lm(formula = mpg ~ wt+hp, data = ccar) anova(lm6) anova(lm2) summary(lm2)