### Introduction

The blorr package offers tools for building and validating binary logistic regression models. It is most suitable for beginner/intermediate R users and those who teach statistics using R. The API is very simple and most of the functions take either a data.frame/tibble or a model as input. blorr use consistent prefix blr_ for easy tab completion. The below diagram shows the API design:

This document is a quick start guide to the tools offered by blorr. Other vignettes provide more details on specific topics:

• Bivariate Analysis
• Variable Selection
• Residual Diagnsotics
• Model Validation

### Libraries

library(blorr)
library(magrittr)

### Data

We will use the bank marketing data set.

### Bivariate Analysis

blr_bivariate_analysis(bank_marketing, y, job, marital, education, default, housing,
loan, contact, poutcome)
#>                           Bivariate Analysis
#> ----------------------------------------------------------------------
#> Variable     Information Value    LR Chi Square    LR DF    LR p-value
#> ----------------------------------------------------------------------
#>    job             0.16              75.2690        11        0.0000
#>  marital           0.05              21.6821         2        0.0000
#> education          0.05              25.0466         3        0.0000
#>  default           0.02              6.0405          1        0.0140
#>  housing           0.16              72.2813         1        0.0000
#>   loan             0.06              26.6615         1        0.0000
#>  contact           0.31             124.3834         2        0.0000
#> poutcome           0.53             270.6450         3        0.0000
#> ----------------------------------------------------------------------

### Weight of Evidence & Information Value

blr_woe_iv(bank_marketing, job, y)
#>                                Weight of Evidence
#> --------------------------------------------------------------------------------
#>    levels        0s_count    1s_count    0s_dist    1s_dist        woe      iv
#> --------------------------------------------------------------------------------
#>  management        809         130          0.20       0.25      -0.22     0.01
#>  technician        682          79          0.17       0.15       0.11     0.00
#> entrepreneur       139          12          0.03       0.02       0.40     0.00
#>  blue-collar       937          73          0.23       0.14       0.51     0.05
#>    unknown          29          2           0.01       0.00       0.61     0.00
#>    retired         152          47          0.04       0.09      -0.87     0.05
#>    admin.          433          61          0.11       0.12      -0.09     0.00
#>   services         392          39          0.10       0.08       0.26     0.01
#> self-employed      132          22          0.03       0.04      -0.26     0.00
#>  unemployed        126          15          0.03       0.03       0.08     0.00
#>   housemaid        110          12          0.03       0.02       0.17     0.00
#>    student          63          25          0.02       0.05      -1.13     0.04
#> --------------------------------------------------------------------------------
#>
#>       Information Value
#> -----------------------------
#> Variable    Information Value
#> -----------------------------
#>   job            0.1594
#> -----------------------------

#### Plot

k <- blr_woe_iv(bank_marketing, job, y)
plot(k)
#> Warning in fun(x, ...): NAs introduced by coercion
#> Warning in FUN(X[[i]], ...): NAs introduced by coercion

#> Warning in FUN(X[[i]], ...): NAs introduced by coercion
#> Warning: Removed 12 rows containing missing values (geom_point).

#### Multiple Variables

blr_woe_iv_stats(bank_marketing, y, job, marital, education, default, housing,
loan, contact, poutcome)
#> Variable: job
#>
#>                                Weight of Evidence
#> --------------------------------------------------------------------------------
#>    levels        0s_count    1s_count    0s_dist    1s_dist        woe      iv
#> --------------------------------------------------------------------------------
#>  management        809         130          0.20       0.25      -0.22     0.01
#>  technician        682          79          0.17       0.15       0.11     0.00
#> entrepreneur       139          12          0.03       0.02       0.40     0.00
#>  blue-collar       937          73          0.23       0.14       0.51     0.05
#>    unknown          29          2           0.01       0.00       0.61     0.00
#>    retired         152          47          0.04       0.09      -0.87     0.05
#>    admin.          433          61          0.11       0.12      -0.09     0.00
#>   services         392          39          0.10       0.08       0.26     0.01
#> self-employed      132          22          0.03       0.04      -0.26     0.00
#>  unemployed        126          15          0.03       0.03       0.08     0.00
#>   housemaid        110          12          0.03       0.02       0.17     0.00
#>    student          63          25          0.02       0.05      -1.13     0.04
#> --------------------------------------------------------------------------------
#>
#>       Information Value
#> -----------------------------
#> Variable    Information Value
#> -----------------------------
#>   job            0.1594
#> -----------------------------
#>
#>
#> Variable: marital
#>
#>                             Weight of Evidence
#> ---------------------------------------------------------------------------
#>  levels     0s_count    1s_count    0s_dist    1s_dist        woe      iv
#> ---------------------------------------------------------------------------
#> married       2467        273          0.62       0.53       0.15     0.01
#>  single       1079        191          0.27       0.37      -0.32     0.03
#> divorced      458          53          0.11       0.10       0.11     0.00
#> ---------------------------------------------------------------------------
#>
#>       Information Value
#> -----------------------------
#> Variable    Information Value
#> -----------------------------
#> marital          0.0464
#> -----------------------------
#>
#>
#> Variable: education
#>
#>                              Weight of Evidence
#> ----------------------------------------------------------------------------
#>  levels      0s_count    1s_count    0s_dist    1s_dist        woe      iv
#> ----------------------------------------------------------------------------
#> tertiary       1104        195          0.28       0.38      -0.31     0.03
#> secondary      2121        231          0.53       0.45       0.17     0.01
#>  unknown       154          25          0.04       0.05      -0.23     0.00
#>  primary       625          66          0.16       0.13       0.20     0.01
#> ----------------------------------------------------------------------------
#>
#>       Information Value
#> ------------------------------
#> Variable     Information Value
#> ------------------------------
#> education         0.0539
#> ------------------------------
#>
#>
#> Variable: default
#>
#>                            Weight of Evidence
#> -------------------------------------------------------------------------
#> levels    0s_count    1s_count    0s_dist    1s_dist        woe      iv
#> -------------------------------------------------------------------------
#>   no        3928        514          0.98       0.99      -0.01     0.00
#>  yes         76          3           0.02       0.01       1.19     0.02
#> -------------------------------------------------------------------------
#>
#>       Information Value
#> -----------------------------
#> Variable    Information Value
#> -----------------------------
#> default          0.0159
#> -----------------------------
#>
#>
#> Variable: housing
#>
#>                            Weight of Evidence
#> ------------------------------------------------------------------------
#> levels    0s_count    1s_count    0s_dist    1s_dist       woe      iv
#> ------------------------------------------------------------------------
#>  yes        2364        203          0.59       0.39      0.41     0.08
#>   no        1640        314          0.41       0.61     -0.39     0.08
#> ------------------------------------------------------------------------
#>
#>       Information Value
#> -----------------------------
#> Variable    Information Value
#> -----------------------------
#> housing          0.1586
#> -----------------------------
#>
#>
#> Variable: loan
#>
#>                            Weight of Evidence
#> -------------------------------------------------------------------------
#> levels    0s_count    1s_count    0s_dist    1s_dist        woe      iv
#> -------------------------------------------------------------------------
#>  yes        680          45          0.17       0.09       0.67     0.06
#>   no        3324        472          0.83       0.91      -0.10     0.01
#> -------------------------------------------------------------------------
#>
#>       Information Value
#> -----------------------------
#> Variable    Information Value
#> -----------------------------
#>   loan           0.0633
#> -----------------------------
#>
#>
#> Variable: contact
#>
#>                             Weight of Evidence
#> ---------------------------------------------------------------------------
#>  levels      0s_count    1s_count    0s_dist    1s_dist       woe      iv
#> ---------------------------------------------------------------------------
#>  unknown       1292         54          0.32       0.10      1.13     0.25
#> cellular       2452        424          0.61       0.82     -0.29     0.06
#> telephone      260          39          0.06       0.08     -0.15     0.00
#> ---------------------------------------------------------------------------
#>
#>       Information Value
#> -----------------------------
#> Variable    Information Value
#> -----------------------------
#> contact          0.3086
#> -----------------------------
#>
#>
#> Variable: poutcome
#>
#>                             Weight of Evidence
#> --------------------------------------------------------------------------
#> levels     0s_count    1s_count    0s_dist    1s_dist        woe      iv
#> --------------------------------------------------------------------------
#> unknown      3352        334          0.84       0.65       0.26     0.05
#> failure      450          58          0.11       0.11       0.00     0.00
#>  other       156          30          0.04       0.06      -0.40     0.01
#> success       46          95          0.01       0.18      -2.77     0.48
#> --------------------------------------------------------------------------
#>
#>       Information Value
#> -----------------------------
#> Variable    Information Value
#> -----------------------------
#> poutcome         0.5346
#> -----------------------------