library('tibble') library('ggplot2') challenger=as.data.frame(challenger) logistic.reg.model=glm(challenger$Damaged~challenger$Temperature,family = binomial(link = "logit")) summary(logistic.reg.model) plot(challenger$Temperature,challenger$Damaged,ylim=c(-1,2)) points(challenger$Temperature,logistic.reg.model$fitted.values,col="red") ndata<-data.frame(temp=challenger$Temperature) ## add the fitted values by predicting from the model for the new data ndata <- add_column(ndata, fit = predict(logistic.reg.model, type = 'response')) ndata <- add_column(ndata,se = predict(logistic.reg.model,type = 'response',se.fit = TRUE)$se.fit) ndata <- add_column(ndata, upr = ndata$fit + 2 * ndata$se, lwr = ndata$fit - 2 * ndata$se) ggplot(ndata, aes(x = temp, y = fit)) +geom_line()+ geom_ribbon(data = ndata, aes(ymin = lwr, ymax = upr),alpha = 0.1) #Data Source: https://archive.ics.uci.edu/ml/datasets/Statlog+%28German+Credit+Data%29 #Abstract: This dataset classifies people described by a set of attributes as good or bad credit risks. #Objective: Can we build algorithm which will predict whether a loan applicant is good or bad credit risk? German_Credit_Data = read.csv(file="https://statfin.cmi.ac.in/2019/German_Credit_Data.csv",header = TRUE,stringsAsFactors=FALSE) data = German_Credit_Data head(data) data$Good_Bad = data$Good_Bad-1 ## 1= Bad ; 0 = Good head(data) table(data$Good_Bad) train_proportion = 0.8 n = nrow(German_Credit_Data) m = ceiling(n*train_proportion) set.seed(321) train_id = sample(1:n,m,replace = FALSE) y_train = data$Good_Bad[train_id] y_test = data$Good_Bad[-train_id] predvars = c("Duration_in_month", "Credit_amount", "Installment_rate" ,"Present_residence_since","Age","No_of_Existing_credits","liable_people_for_maintenance") data.subset = data[predvars] summary(data.subset) train_data = data.subset[train_id,] test_data = data.subset[-train_id,] head(train_data) myvars = c(predvars,'Good_Bad') train_data = data[train_id,myvars] test_data = data[-train_id,myvars] head(train_data) logistic.reg.model=step(glm(Good_Bad~. ,family = binomial(link = "logit") ,data =train_data),trace=0) summary(logistic.reg.model) logistic.reg.pred = predict(logistic.reg.model,newdata = test_data,type = "response") logistic.reg.pred[logistic.reg.pred<0.5] = 0 logistic.reg.pred[logistic.reg.pred>=0.5] = 1 ## Confusion Table round(100*sum(y_test == logistic.reg.pred)/length(y_test),2) conf_tabl = table(logistic.reg.pred,y_test) conf_tabl