library('tibble')
library('ggplot2')
challenger=as.data.frame(challenger)
logistic.reg.model=glm(challenger$Damaged~challenger$Temperature,family = binomial(link = "logit"))
summary(logistic.reg.model)
plot(challenger$Temperature,challenger$Damaged,ylim=c(-1,2))
points(challenger$Temperature,logistic.reg.model$fitted.values,col="red")
ndata<-data.frame(temp=challenger$Temperature)
## add the fitted values by predicting from the model for the new data
ndata <- add_column(ndata, fit = predict(logistic.reg.model, type = 'response'))
ndata <- add_column(ndata,se = predict(logistic.reg.model,type = 'response',se.fit = TRUE)$se.fit)
ndata <- add_column(ndata, upr = ndata$fit + 2 * ndata$se, lwr = ndata$fit - 2 * ndata$se)
ggplot(ndata, aes(x = temp, y = fit)) +geom_line()+ geom_ribbon(data = ndata, aes(ymin = lwr, ymax = upr),alpha = 0.1)

#Data Source:  https://archive.ics.uci.edu/ml/datasets/Statlog+%28German+Credit+Data%29
#Abstract: This dataset classifies people described by a set of attributes as good or bad credit risks.
#Objective: Can we build algorithm which will predict whether a loan applicant is good or bad credit risk?
  
  
  
German_Credit_Data = read.csv(file="https://statfin.cmi.ac.in/2019/German_Credit_Data.csv",header = TRUE,stringsAsFactors=FALSE)
data = German_Credit_Data
head(data)
data$Good_Bad = data$Good_Bad-1
## 1= Bad ; 0 = Good
head(data)
table(data$Good_Bad)

train_proportion = 0.8
n = nrow(German_Credit_Data)
m = ceiling(n*train_proportion)
set.seed(321)
train_id = sample(1:n,m,replace = FALSE)
y_train = data$Good_Bad[train_id]
y_test = data$Good_Bad[-train_id]

predvars = c("Duration_in_month", "Credit_amount", "Installment_rate"
             ,"Present_residence_since","Age","No_of_Existing_credits","liable_people_for_maintenance")
data.subset = data[predvars]

summary(data.subset)
train_data = data.subset[train_id,]
test_data = data.subset[-train_id,]
head(train_data)
myvars = c(predvars,'Good_Bad')
train_data = data[train_id,myvars]
test_data = data[-train_id,myvars]

head(train_data)
logistic.reg.model=step(glm(Good_Bad~.
                    ,family = binomial(link = "logit")
                    ,data =train_data),trace=0)
summary(logistic.reg.model)
logistic.reg.pred = predict(logistic.reg.model,newdata = test_data,type = "response")
logistic.reg.pred[logistic.reg.pred<0.5] = 0
logistic.reg.pred[logistic.reg.pred>=0.5] = 1
## Confusion Table
round(100*sum(y_test == logistic.reg.pred)/length(y_test),2)
conf_tabl = table(logistic.reg.pred,y_test)
conf_tabl