Healthy Hearts

Objective
Packages
Data
- Importing
- Cleaning
Visuals
- Correlation Plot
- Heart Health Graphs in Different Styles
Modeling

Objective

To showcase and practice machine learning techniques and visualizations.

Packages

library(data.table)  #fread()
library(ggplot2)
library(dplyr)       #The usual
library(caret)       #train()
library(rsample)     #initial_split() vfold_cv()
library(purrr)       #map()
library(Metrics)     #recall() mae()
library(ranger)      #ranger()
library(glmnet)      #glmnet()
library(broom)       #map()
library(ggthemes)    #theme_

Data

Source: The data comes from kaggle where a user had subset the data from the UCI machine learning repository.

Importing

heart <- fread("heart.csv", header = TRUE)
heart.c <- fread("heart.csv", header = TRUE)

glimpse(heart)

Observations: 1,025
Variables: 14
$ age      <int> 52, 53, 70, 61, 62, 58, 58, 55, 46, 54, 71, 43, 34, 5...
$ sex      <int> 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1,...
$ cp       <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 2, 0,...
$ trestbps <int> 125, 140, 145, 148, 138, 100, 114, 160, 120, 122, 112...
$ chol     <int> 212, 203, 174, 203, 294, 248, 318, 289, 249, 286, 149...
$ fbs      <int> 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0,...
$ restecg  <int> 1, 0, 1, 1, 1, 0, 2, 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0,...
$ thalach  <int> 168, 155, 125, 161, 106, 122, 140, 145, 144, 116, 125...
$ exang    <int> 0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1,...
$ oldpeak  <dbl> 1.0, 3.1, 2.6, 0.0, 1.9, 1.0, 4.4, 0.8, 0.8, 3.2, 1.6...
$ slope    <int> 2, 0, 0, 2, 1, 1, 0, 1, 2, 1, 1, 1, 2, 1, 1, 2, 2, 1,...
$ ca       <int> 2, 0, 0, 1, 3, 0, 3, 1, 0, 2, 0, 0, 0, 3, 0, 0, 1, 1,...
$ thal     <int> 3, 3, 3, 3, 2, 2, 1, 3, 3, 2, 2, 3, 2, 3, 0, 2, 2, 3,...
$ target   <int> 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 0,...

Cleaning

##### For a Little bit of clarity of variables
list_names <- c("Age" , "Gender", "ChestPain", "RestingBloodPressure", "Cholestrol", "FastingBloodSugar", "RestingECG", "MaxHeartRateAchivied", "ExerciseIndusedAngina", "Oldpeak", "Slope", "MajorVessels", "Thalassemia", "Target")

colnames(heart) <- list_names
colnames(heart.c) <- list_names

heart$Gender <-  as.factor(heart$Gender)
heart$ChestPain <- as.factor(heart$ChestPain)
heart$ExerciseIndusedAngina <- as.factor(heart$ExerciseIndusedAngina)
heart$Thalassemia <- as.factor(heart$Thalassemia)
heart$Target <- as.factor(heart$Target)

# levels(heart$ChestPain)

heart <- transform(heart,
          ChestPain=plyr::revalue(ChestPain,c("0"="Typical Angina", "1"="Atypical Angina", "2"="Non-Anginal", "3"="Asymptomatic")))

heart <- transform(heart,
          Gender=plyr::revalue(Gender, c("0"="Female", "1"="Male")))


heart <- transform(heart,
          Target=plyr::revalue(Target, c("0"="Healthy Heart", "1"="Heart Disease")))

glimpse(heart)

Observations: 1,025
Variables: 14
$ Age                   <int> 52, 53, 70, 61, 62, 58, 58, 55, 46, 54, ...
$ Gender                <fct> Male, Male, Male, Male, Female, Female, ...
$ ChestPain             <fct> Typical Angina, Typical Angina, Typical ...
$ RestingBloodPressure  <int> 125, 140, 145, 148, 138, 100, 114, 160, ...
$ Cholestrol            <int> 212, 203, 174, 203, 294, 248, 318, 289, ...
$ FastingBloodSugar     <int> 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0...
$ RestingECG            <int> 1, 0, 1, 1, 1, 0, 2, 0, 0, 0, 1, 0, 1, 1...
$ MaxHeartRateAchivied  <int> 168, 155, 125, 161, 106, 122, 140, 145, ...
$ ExerciseIndusedAngina <fct> 0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1...
$ Oldpeak               <dbl> 1.0, 3.1, 2.6, 0.0, 1.9, 1.0, 4.4, 0.8, ...
$ Slope                 <int> 2, 0, 0, 2, 1, 1, 0, 1, 2, 1, 1, 1, 2, 1...
$ MajorVessels          <int> 2, 0, 0, 1, 3, 0, 3, 1, 0, 2, 0, 0, 0, 3...
$ Thalassemia           <fct> 3, 3, 3, 3, 2, 2, 1, 3, 3, 2, 2, 3, 2, 3...
$ Target                <fct> Healthy Heart, Healthy Heart, Healthy He...

Visuals

Correlation Plot

Heart Health Graphs in Different Styles

Minimal & custom colour

posn_d <- position_dodge(width = 0.4)


heart %>%
  ggplot( aes(x = factor(ChestPain), fill = factor(Target))) +
  geom_bar(position = posn_d ,alpha = 0.8) +
  labs(x = "Chest Pain", y = "Count",title = "Heart Health", subtitle = "By type of chest pain", caption = "Heart Disease UCI from Kaggle") +
  geom_text(aes(label=..count..),stat="count",position=posn_d, vjust = -0.3) +
  scale_fill_manual("legend", values = c("Healthy Heart" = "goldenrod1", "Heart Disease" = "mediumorchid1")) +
  theme_minimal() +
  theme(legend.title = element_blank(),
        panel.grid.major = element_blank())

Fivethirtyeight & custom colour

heart %>%
  ggplot( aes(x = factor(Gender), fill = factor(Target)) ) +
  geom_bar(position = posn_d ,alpha = 0.8) +
  labs(x = "Sex",title = "Heart Health", subtitle = "By sex", caption = "Heart Disease UCI from Kaggle") +
  geom_text(aes(label=..count..),stat="count",position=posn_d, vjust = -0.3) +
  scale_color_fivethirtyeight() + 
  theme_fivethirtyeight() +
  theme(legend.title = element_blank()) +
  scale_fill_manual("legend", values = c("Healthy Heart" = "brown1", "Heart Disease" = "forestgreen"))

Stata theme

heart %>%
  ggplot( aes(x = factor(Gender), fill = factor(ChestPain) )) +
  geom_bar(position = "dodge" ,alpha = 0.6) +
  labs(x = "Sex", y = "Count",title = "Type of Chest Pain", subtitle = "By sex", caption = "Heart Disease UCI from Kaggle") +
  geom_text(aes(label=..count..),stat="count",position=position_dodge(width = 0.9), vjust = -0.3) +
  theme_stata() + scale_fill_stata() +
  theme(legend.title = element_blank())

Stata blue

heart[, .N, by = .(Age, Target, Gender)] %>%
  ggplot( aes(x = Age, y = N) ) +
  geom_col( fill = "dodgerblue") +
  facet_grid(Gender ~ Target, scales = "free") +
  labs( y = "Count",title = "Age Distribution", subtitle = "By heart health and sex", caption = "Heart Disease UCI from Kaggle") +
  theme_stata()

Dotplot and histogram

heart %>% 
  ggplot(aes(x=Age, y=Cholestrol, col = Gender)) + 
  geom_point() + 
  facet_wrap(~Target, scales = "free") +
  geom_smooth(method=lm) + ylim(120,420) +
  theme_grey() +
  labs(title = "Cholestrol by Age", subtitle = "By heart health and sex", caption = "Heart Disease UCI from Kaggle") +
  theme(legend.title = element_blank(),
        panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank())

heart %>% 
  ggplot(aes(x=Age)) + 
  geom_histogram( fill = "purple", bins = 10) + 
  facet_wrap(~Target) + 
  theme_grey() + 
  labs(title = "Age Distribution", subtitle = "By Heart Health", caption = "Heart Disease UCI from Kaggle") +
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())

 # facet_grid(Gender ~ Target, scales = "free")

Modeling

Training Data

levels(heart$Target) <- make.names(levels(factor(heart$Target)))

set.seed(1337)

# Prepare the initial split object
data_split <- initial_split(heart, prop = 0.65)

# Extract the training dataframe
training_data <- training(data_split)

# Extract the testing dataframe
testing_data <- testing(data_split)



set.seed(1337)
cv_split <- vfold_cv(training_data, v = 5)

cv_data <- cv_split %>% 
  mutate(train = map(splits, ~training(.x)),validate = map(splits, ~testing(.x)))


########################################################
myControl <- trainControl(
  method = "cv", 
  number = 10,
  summaryFunction = twoClassSummary,              #twoClassSummary for logistic
  classProbs = TRUE,                              #Classifcation probabilities
  verboseIter = TRUE
)
########################################################

Random Forest - Cross-Validated Models

 cv_tune <- cv_data %>%  
   expand_grid(mtry = 2:13) # mtry range 1:# of features
 
 #NOTE: crossing() has been updated, expand_grid() now replaces it.

cv_models_rf <- cv_tune %>% 
  mutate(model = map2(train, mtry, ~ranger(formula = Target~., 
                                           data = .x, mtry = .y, 
                                           num.trees = 2000, seed = 1337)))



cv_prep_rf <- cv_models_rf %>% 
  mutate( validate_actual = map(validate, ~.x$Target == "Heart.Disease"),
           validate_predicted = map2(.x = model, .y = validate, 
                                     ~predict(.x, .y, type = "response")$predictions == "Heart.Disease")
         )



# Recall on Random Forest models at different levels of mtry
cv_perf_recall <- cv_prep_rf %>% 
  mutate(recall = map2_dbl(.x = validate_actual, .y = validate_predicted, ~recall(actual = .x, predicted = .y)))

temp <- cv_perf_recall %>%
  select(mtry, recall)
j = matrix(nrow = 12, ncol = 2)

for(i in 2:13){
  j[i-1,] = i
  j[i-1,2] = temp %>%
            filter(mtry == i) %>%
             summarize(mean_recall = mean(recall)) %>%
             as.numeric()
}

colnames(j) <-  c("mtry","mean_recall")
knitr::kable(as.data.frame(j))

mtry	mean_recall
2	0.9779728
3	0.9753061
4	0.9753061
5	0.9753061
6	0.9753061
7	0.9753061
8	0.9753061
9	0.9785848
10	0.9814017
11	0.9814017
12	0.9814017
13	0.9840683

# Tuning mtry
cv_eval_tune <- cv_prep_rf %>% 
 mutate(validate_mae = map2_dbl(.x = validate_actual, .y = validate_predicted, ~mae(actual = .x, predicted = .y)))


#################################################################
# Mean validate MAE for each fold and mtry combination
# cv_eval_tune %>% 
#   group_by(mtry) %>%
#   summarise(mean_validate_mae = mean(validate_mae))
      #Not working it did before

# cv_eval_tune %>%
#   select(mtry, validate_mae) %>%
#   group_nest(mtry) %>%
#   unnest()
      #Kind of gets there
#################################################################





temp <- cv_eval_tune %>%
  select(mtry, validate_mae)
k = matrix(nrow = 12, ncol = 2)

for(i in 2:13){
  k[i-1,] = i
  k[i-1,2] = temp %>%
            filter(mtry == i) %>%
             summarize(mean_validate_mae = mean(validate_mae)) %>%
             as.numeric()
}

colnames(k) <-  c("mtry","mean_validate_mae")
knitr::kable(as.data.frame(k))

mtry	mean_validate_mae
2	0.0285265
3	0.0270228
4	0.0270228
5	0.0270228
6	0.0270228
7	0.0270228
8	0.0270228
9	0.0255190
10	0.0240153
11	0.0240153
12	0.0225115
13	0.0240265

The Mean Absolute Error (MAE) measures how much on average the predicted values differ from actual values, taking the mean of the MAE over different folds the hyperparameter mtry is best at 12.

On average the true positive rate (Sensitivity/Recall) seems to increase as mtry increases, in general. These predictive models performance quite well.

Model Results on Test Data

Mean Absolute Error

best_model <- ranger(formula = Target~., data = training_data , mtry = 12, num.trees = 2000, seed = 1337)

test_actual <- testing_data$Target == "Heart.Disease"
test_predicted <- predict(best_model, testing_data, type = "response")$predictions == "Heart.Disease"

mae(test_actual, test_predicted)

[1] 0.008379888

Accuracy Measures how well the model predicted both TRUE and FALSE classes.

Metrics::accuracy(test_actual,test_predicted)

[1] 0.9916201

Precision Calculates how often the model is correct at the TRUE class."

Metrics::precision(test_actual,test_predicted)

[1] 0.9836066

Logistic Regression - Cross-Validated Models

cv_models_lr <- cv_data %>% 
  mutate(model = map(train, ~glm(formula = Target~., data = .x, family = "binomial")))


# Examine the first model and validate 
model <- cv_models_lr$model[[1]]
validate <- cv_models_lr$validate[[1]]



# Prepare binary vector of actual Heart Disease values in validate
validate_actual <- validate$Target == "Heart.Disease"

# Predict the probabilities for the observations in validate
validate_prob <- predict(model, validate, type = "response")


# Prepare binary vector of predicted Heart Disease values for validate
validate_predicted <- validate_prob > 0.5
# Compare the actual & predicted performance visually using a table

# table(validate_actual, validate_predicted)



ap.list <- matrix(nrow = 5, ncol = 2)

for(i in 1:5){
  m <- cv_models_lr$model[[i]]
  v <- cv_models_lr$validate[[i]]
  
  v_actual <- v$Target == "Heart.Disease"
  v_prob <- predict(m, v, type = "response")

  v_predicted <- v_prob > 0.5

               print(table(v_actual, v_predicted))

  
  ap.list[i,1] = Metrics::accuracy(v_actual, v_predicted)
  ap.list[i,2] = Metrics::precision(v_actual, v_predicted)
  
            if (i == 5){
              ap = apply(ap.list, 2, mean)
              print("Mean Accuracy")
              print(ap[1])
              print("Mean Precision")
              print(ap[2])
              }
}

        v_predicted
v_actual FALSE TRUE
   FALSE    43   16
   TRUE      3   72
        v_predicted
v_actual FALSE TRUE
   FALSE    48   12
   TRUE     17   57
        v_predicted
v_actual FALSE TRUE
   FALSE    51   11
   TRUE      9   62
        v_predicted
v_actual FALSE TRUE
   FALSE    58   10
   TRUE      5   60
        v_predicted
v_actual FALSE TRUE
   FALSE    58   14
   TRUE     10   51
[1] "Mean Accuracy"
[1] 0.8396364
[1] "Mean Precision"
[1] 0.8270684

Confusion matrix of all the logistic regression models
Using a logistic regression model to predict heart disease, the metrics of interest will be accuracy and precision. We would like to know who does and does not have heart disease, as well as how correct that true classification is.
Mean Recall of training models

cv_prep_lr <- cv_models_lr %>% 
  mutate(validate_actual = map(validate, ~.x$Target == "Heart.Disease"),
         validate_predicted = map2(.x = model, .y = validate, 
                                   ~predict(.x, .y, type = "response") > 0.5)
         )

# Validate recall for each cross validation fold
cv_perf_recall <- cv_prep_lr %>% 
  mutate(validate_recall = map2_dbl(validate_actual, validate_predicted, 
                                    ~recall(actual = .x, predicted = .y)))


# cv_perf_recall$validate_recall
mean(cv_perf_recall$validate_recall)

[1] 0.8725304

Glmnet Modeling

# Custom tuning grid for RF-modeling
# tune.grid <- data.frame(
#   .mtry = 2:length(heart),
#   .splitrule = "variance",
#   .min.node.size = 5
# )


# Custom tuning grid for Lasso or Ridge regression
tune.grid <- expand.grid(
  alpha = 0:1,
  lambda = seq(0.0001, 1, length = 20)
)


# Glmnet places constraints on coeff to prevent overfitting, fits a glm via maximum likelihood

# Ridge (0) or Lasso (1)
model.glmnet <- train(
  Target ~., 
  heart,
  metric = "ROC",
  tuneGrid = tune.grid,
  method = "glmnet",
  trControl = myControl,
  preProcess = c("medianImpute", "center", "scale")
)

+ Fold01: alpha=0, lambda=1 
- Fold01: alpha=0, lambda=1 
+ Fold01: alpha=1, lambda=1 
- Fold01: alpha=1, lambda=1 
+ Fold02: alpha=0, lambda=1 
- Fold02: alpha=0, lambda=1 
+ Fold02: alpha=1, lambda=1 
- Fold02: alpha=1, lambda=1 
+ Fold03: alpha=0, lambda=1 
- Fold03: alpha=0, lambda=1 
+ Fold03: alpha=1, lambda=1 
- Fold03: alpha=1, lambda=1 
+ Fold04: alpha=0, lambda=1 
- Fold04: alpha=0, lambda=1 
+ Fold04: alpha=1, lambda=1 
- Fold04: alpha=1, lambda=1 
+ Fold05: alpha=0, lambda=1 
- Fold05: alpha=0, lambda=1 
+ Fold05: alpha=1, lambda=1 
- Fold05: alpha=1, lambda=1 
+ Fold06: alpha=0, lambda=1 
- Fold06: alpha=0, lambda=1 
+ Fold06: alpha=1, lambda=1 
- Fold06: alpha=1, lambda=1 
+ Fold07: alpha=0, lambda=1 
- Fold07: alpha=0, lambda=1 
+ Fold07: alpha=1, lambda=1 
- Fold07: alpha=1, lambda=1 
+ Fold08: alpha=0, lambda=1 
- Fold08: alpha=0, lambda=1 
+ Fold08: alpha=1, lambda=1 
- Fold08: alpha=1, lambda=1 
+ Fold09: alpha=0, lambda=1 
- Fold09: alpha=0, lambda=1 
+ Fold09: alpha=1, lambda=1 
- Fold09: alpha=1, lambda=1 
+ Fold10: alpha=0, lambda=1 
- Fold10: alpha=0, lambda=1 
+ Fold10: alpha=1, lambda=1 
- Fold10: alpha=1, lambda=1 
Aggregating results
Selecting tuning parameters
Fitting alpha = 1, lambda = 1e-04 on full training set

plot(model.glmnet)

model.glmnet

glmnet 

1025 samples
  13 predictor
   2 classes: 'Healthy.Heart', 'Heart.Disease' 

Pre-processing: median imputation (17), centered (17), scaled (17) 
Resampling: Cross-Validated (10 fold) 
Summary of sample sizes: 922, 923, 922, 923, 922, 923, ... 
Resampling results across tuning parameters:

  alpha  lambda      ROC        Sens       Spec     
  0      0.00010000  0.9220681  0.8116327  0.8879173
  0      0.05272632  0.9205358  0.8096327  0.8822206
  0      0.10535263  0.9181369  0.8076327  0.8727504
  0      0.15797895  0.9169969  0.8156327  0.8727504
  0      0.21060526  0.9160041  0.8176327  0.8574383
  0      0.26323158  0.9152798  0.8216327  0.8574746
  0      0.31585789  0.9150876  0.8236327  0.8537010
  0      0.36848421  0.9146720  0.8256327  0.8537010
  0      0.42111053  0.9139078  0.8256327  0.8537010
  0      0.47373684  0.9134100  0.8256327  0.8537010
  0      0.52636316  0.9134099  0.8256327  0.8537010
  0      0.57898947  0.9127240  0.8256327  0.8537010
  0      0.63161579  0.9125709  0.8256327  0.8537010
  0      0.68424211  0.9121145  0.8256327  0.8537010
  0      0.73686842  0.9122655  0.8256327  0.8537010
  0      0.78949474  0.9122263  0.8256327  0.8537010
  0      0.84212105  0.9121116  0.8176327  0.8537010
  0      0.89474737  0.9119570  0.8156327  0.8537010
  0      0.94737368  0.9117284  0.8156327  0.8537010
  0      1.00000000  0.9115390  0.8136327  0.8537010
  1      0.00010000  0.9226709  0.7955510  0.8955007
  1      0.05272632  0.9028522  0.7975918  0.8556241
  1      0.10535263  0.8879042  0.7735510  0.8309869
  1      0.15797895  0.8690479  0.7554694  0.7834180
  1      0.21060526  0.8189731  0.7354694  0.7834180
  1      0.26323158  0.5385007  0.0000000  1.0000000
  1      0.31585789  0.5000000  0.0000000  1.0000000
  1      0.36848421  0.5000000  0.0000000  1.0000000
  1      0.42111053  0.5000000  0.0000000  1.0000000
  1      0.47373684  0.5000000  0.0000000  1.0000000
  1      0.52636316  0.5000000  0.0000000  1.0000000
  1      0.57898947  0.5000000  0.0000000  1.0000000
  1      0.63161579  0.5000000  0.0000000  1.0000000
  1      0.68424211  0.5000000  0.0000000  1.0000000
  1      0.73686842  0.5000000  0.0000000  1.0000000
  1      0.78949474  0.5000000  0.0000000  1.0000000
  1      0.84212105  0.5000000  0.0000000  1.0000000
  1      0.89474737  0.5000000  0.0000000  1.0000000
  1      0.94737368  0.5000000  0.0000000  1.0000000
  1      1.00000000  0.5000000  0.0000000  1.0000000

ROC was used to select the optimal model using the largest value.
The final values used for the model were alpha = 1 and lambda = 1e-04.

#max(model.glmnet[["results"]][["ROC"]])
plot(model.glmnet$finalModel)

Stepwise Selection - Logistic Regression

model.0 <- glm(Target ~ 1, data = heart, family ="binomial")
model.1 <- glm(Target ~ . -Slope, data = heart, family = "binomial")

 step(model.0 , scope = formula(model.1), direction="forward", k = 2)

Start:  AIC=1422.24
Target ~ 1

                        Df Deviance    AIC
+ ChestPain              3   1124.0 1132.0
+ Thalassemia            3   1125.0 1133.0
+ Oldpeak                1   1195.3 1199.3
+ ExerciseIndusedAngina  1   1214.1 1218.1
+ MaxHeartRateAchivied   1   1219.6 1223.6
+ MajorVessels           1   1256.6 1260.6
+ Gender                 1   1337.8 1341.8
+ Age                    1   1365.1 1369.1
+ RestingBloodPressure   1   1400.3 1404.3
+ RestingECG             1   1401.6 1405.6
+ Cholestrol             1   1409.9 1413.9
<none>                       1420.2 1422.2
+ FastingBloodSugar      1   1418.5 1422.5

Step:  AIC=1131.99
Target ~ ChestPain

                        Df Deviance     AIC
+ Thalassemia            3   948.04  962.04
+ Oldpeak                1   993.33 1003.33
+ MajorVessels           1  1015.13 1025.13
+ MaxHeartRateAchivied   1  1040.05 1050.05
+ Gender                 1  1043.17 1053.17
+ ExerciseIndusedAngina  1  1063.34 1073.34
+ Age                    1  1092.20 1102.20
+ RestingBloodPressure   1  1106.02 1116.02
+ RestingECG             1  1114.53 1124.53
+ Cholestrol             1  1118.39 1128.39
+ FastingBloodSugar      1  1118.81 1128.81
<none>                      1123.99 1131.99

Step:  AIC=962.04
Target ~ ChestPain + Thalassemia

                        Df Deviance    AIC
+ MajorVessels           1   856.91 872.91
+ Oldpeak                1   866.97 882.97
+ MaxHeartRateAchivied   1   890.18 906.18
+ ExerciseIndusedAngina  1   913.62 929.62
+ Age                    1   920.92 936.92
+ Gender                 1   924.70 940.70
+ RestingECG             1   935.15 951.15
+ RestingBloodPressure   1   940.36 956.36
+ Cholestrol             1   941.63 957.63
<none>                       948.04 962.04
+ FastingBloodSugar      1   946.14 962.14

Step:  AIC=872.91
Target ~ ChestPain + Thalassemia + MajorVessels

                        Df Deviance    AIC
+ Oldpeak                1   789.42 807.42
+ MaxHeartRateAchivied   1   814.56 832.56
+ ExerciseIndusedAngina  1   821.17 839.17
+ Gender                 1   842.18 860.18
+ RestingECG             1   844.80 862.80
+ Age                    1   846.63 864.63
+ Cholestrol             1   849.28 867.28
+ RestingBloodPressure   1   850.78 868.78
<none>                       856.91 872.91
+ FastingBloodSugar      1   856.31 874.31

Step:  AIC=807.42
Target ~ ChestPain + Thalassemia + MajorVessels + Oldpeak

                        Df Deviance    AIC
+ ExerciseIndusedAngina  1   767.93 787.93
+ MaxHeartRateAchivied   1   767.95 787.95
+ Gender                 1   772.91 792.91
+ RestingECG             1   773.07 793.07
+ Age                    1   783.40 803.40
+ Cholestrol             1   784.28 804.28
+ RestingBloodPressure   1   785.65 805.65
<none>                       789.42 807.42
+ FastingBloodSugar      1   788.74 808.74

Step:  AIC=787.93
Target ~ ChestPain + Thalassemia + MajorVessels + Oldpeak + ExerciseIndusedAngina

                       Df Deviance    AIC
+ Gender                1   751.35 773.35
+ RestingECG            1   753.30 775.30
+ MaxHeartRateAchivied  1   753.41 775.41
+ Age                   1   762.89 784.89
+ Cholestrol            1   763.84 785.84
+ RestingBloodPressure  1   764.68 786.68
<none>                      767.93 787.93
+ FastingBloodSugar     1   767.61 789.61

Step:  AIC=773.35
Target ~ ChestPain + Thalassemia + MajorVessels + Oldpeak + ExerciseIndusedAngina + 
    Gender

                       Df Deviance    AIC
+ MaxHeartRateAchivied  1   731.44 755.44
+ RestingECG            1   738.70 762.70
+ Age                   1   739.13 763.13
+ Cholestrol            1   740.39 764.39
+ RestingBloodPressure  1   745.47 769.47
<none>                      751.35 773.35
+ FastingBloodSugar     1   751.10 775.10

Step:  AIC=755.44
Target ~ ChestPain + Thalassemia + MajorVessels + Oldpeak + ExerciseIndusedAngina + 
    Gender + MaxHeartRateAchivied

                       Df Deviance    AIC
+ Cholestrol            1   717.71 743.71
+ RestingECG            1   719.39 745.39
+ RestingBloodPressure  1   721.61 747.61
+ Age                   1   727.26 753.26
<none>                      731.44 755.44
+ FastingBloodSugar     1   730.86 756.86

Step:  AIC=743.71
Target ~ ChestPain + Thalassemia + MajorVessels + Oldpeak + ExerciseIndusedAngina + 
    Gender + MaxHeartRateAchivied + Cholestrol

                       Df Deviance    AIC
+ RestingBloodPressure  1   708.85 736.85
+ RestingECG            1   709.86 737.86
<none>                      717.71 743.71
+ Age                   1   715.82 743.82
+ FastingBloodSugar     1   717.32 745.32

Step:  AIC=736.85
Target ~ ChestPain + Thalassemia + MajorVessels + Oldpeak + ExerciseIndusedAngina + 
    Gender + MaxHeartRateAchivied + Cholestrol + RestingBloodPressure

                    Df Deviance    AIC
+ RestingECG         1   701.69 731.69
<none>                   708.85 736.85
+ Age                1   708.34 738.34
+ FastingBloodSugar  1   708.81 738.81

Step:  AIC=731.69
Target ~ ChestPain + Thalassemia + MajorVessels + Oldpeak + ExerciseIndusedAngina + 
    Gender + MaxHeartRateAchivied + Cholestrol + RestingBloodPressure + 
    RestingECG

                    Df Deviance    AIC
<none>                   701.69 731.69
+ Age                1   701.51 733.51
+ FastingBloodSugar  1   701.68 733.68


Call:  glm(formula = Target ~ ChestPain + Thalassemia + MajorVessels + 
    Oldpeak + ExerciseIndusedAngina + Gender + MaxHeartRateAchivied + 
    Cholestrol + RestingBloodPressure + RestingECG, family = "binomial", 
    data = heart)

Coefficients:
             (Intercept)  ChestPainAtypical Angina  
                0.000998                  1.130884  
    ChestPainNon-Anginal     ChestPainAsymptomatic  
                1.911585                  1.873335  
            Thalassemia1              Thalassemia2  
                1.717835                  1.756457  
            Thalassemia3              MajorVessels  
                0.335443                 -0.761845  
                 Oldpeak    ExerciseIndusedAngina1  
               -0.640488                 -0.812629  
              GenderMale      MaxHeartRateAchivied  
               -1.449652                  0.024614  
              Cholestrol      RestingBloodPressure  
               -0.006052                 -0.015617  
              RestingECG  
                0.513108  

Degrees of Freedom: 1024 Total (i.e. Null);  1010 Residual
Null Deviance:      1420 
Residual Deviance: 701.7    AIC: 731.7

# formula = Target ~ ChestPain + Thalassemia + MajorVessels + 
#    Oldpeak + ExerciseIndusedAngina + Gender + MaxHeartRateAchivied + 
#    Cholestrol + RestingBloodPressure + RestingECG

model.heart <- glm(formula = Target ~ ChestPain + Thalassemia + MajorVessels + 
    Oldpeak + ExerciseIndusedAngina + Gender + MaxHeartRateAchivied + 
    Cholestrol + RestingBloodPressure + RestingECG, family = "binomial", 
    data = heart)

summary(model.heart)


Call:
glm(formula = Target ~ ChestPain + Thalassemia + MajorVessels + 
    Oldpeak + ExerciseIndusedAngina + Gender + MaxHeartRateAchivied + 
    Cholestrol + RestingBloodPressure + RestingECG, family = "binomial", 
    data = heart)

Deviance Residuals: 
    Min       1Q   Median       3Q      Max  
-2.5893  -0.4198   0.1205   0.5466   2.5864  

Coefficients:
                          Estimate Std. Error z value Pr(>|z|)    
(Intercept)               0.000998   1.448018   0.001 0.999450    
ChestPainAtypical Angina  1.130884   0.295093   3.832 0.000127 ***
ChestPainNon-Anginal      1.911585   0.246880   7.743 9.71e-15 ***
ChestPainAsymptomatic     1.873335   0.339647   5.516 3.48e-08 ***
Thalassemia1              1.717835   1.103014   1.557 0.119375    
Thalassemia2              1.756457   1.044426   1.682 0.092619 .  
Thalassemia3              0.335443   1.048298   0.320 0.748977    
MajorVessels             -0.761845   0.102930  -7.402 1.35e-13 ***
Oldpeak                  -0.640487   0.110271  -5.808 6.31e-09 ***
ExerciseIndusedAngina1   -0.812629   0.230142  -3.531 0.000414 ***
GenderMale               -1.449652   0.268969  -5.390 7.06e-08 ***
MaxHeartRateAchivied      0.024614   0.005171   4.760 1.94e-06 ***
Cholestrol               -0.006052   0.002048  -2.955 0.003131 ** 
RestingBloodPressure     -0.015617   0.005525  -2.827 0.004704 ** 
RestingECG                0.513108   0.192277   2.669 0.007617 ** 
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 1420.24  on 1024  degrees of freedom
Residual deviance:  701.69  on 1010  degrees of freedom
AIC: 731.69

Number of Fisher Scoring iterations: 6