library(ISLR)
summary(Default)
## default student balance income
## No :9667 No :7056 Min. : 0 Min. : 772
## Yes: 333 Yes:2944 1st Qu.: 482 1st Qu.:21340
## Median : 824 Median :34553
## Mean : 835 Mean :33517
## 3rd Qu.:1166 3rd Qu.:43808
## Max. :2654 Max. :73554
attach(Default)
set.seed(1)
glm.fit = glm(default ~ income + balance, data = Default, family = binomial)
FiveB = function() {
# i.
train = sample(dim(Default)[1], dim(Default)[1]/2)
# ii.
glm.fit = glm(default ~ income + balance, data = Default, family = binomial,
subset = train)
# iii.
glm.pred = rep("No", dim(Default)[1]/2)
glm.probs = predict(glm.fit, Default[-train, ], type = "response")
glm.pred[glm.probs > 0.5] = "Yes"
# iv.
return(mean(glm.pred != Default[-train, ]$default))
}
FiveB()
## [1] 0.0286
2.86% test error rate from validation set approach.
FiveB()
## [1] 0.0236
FiveB()
## [1] 0.028
FiveB()
## [1] 0.0268
It seems to average around 2.6% test error rate.
train = sample(dim(Default)[1], dim(Default)[1]/2)
glm.fit = glm(default ~ income + balance + student, data = Default, family = binomial,
subset = train)
glm.pred = rep("No", dim(Default)[1]/2)
glm.probs = predict(glm.fit, Default[-train, ], type = "response")
glm.pred[glm.probs > 0.5] = "Yes"
mean(glm.pred != Default[-train, ]$default)
## [1] 0.0264
2.64% test error rate, with student dummy variable. Using the validation set approach, it doesn't appear adding the student dummy variable leads to a reduction in the test error rate.