The blorr package offers tools for building and validating binary logistic regression models. It is most suitable for beginner/intermediate R users and those who teach statistics using R. The API is very simple and most of the functions take either a data.frame/tibble or a model as input. blorr use consistent prefix blr_ for easy tab completion. The below diagram shows the API design:
This document is a quick start guide to the tools offered by blorr. Other vignettes provide more details on specific topics:
blr_bivariate_analysis(bank_marketing, y, job, marital, education, default, housing,
loan, contact, poutcome)
#> Bivariate Analysis
#> ----------------------------------------------------------------------
#> Variable Information Value LR Chi Square LR DF LR p-value
#> ----------------------------------------------------------------------
#> job 0.16 75.2690 11 0.0000
#> marital 0.05 21.6821 2 0.0000
#> education 0.05 25.0466 3 0.0000
#> default 0.02 6.0405 1 0.0140
#> housing 0.16 72.2813 1 0.0000
#> loan 0.06 26.6615 1 0.0000
#> contact 0.31 124.3834 2 0.0000
#> poutcome 0.53 270.6450 3 0.0000
#> ----------------------------------------------------------------------
blr_woe_iv(bank_marketing, job, y)
#> Weight of Evidence
#> --------------------------------------------------------------------------------
#> levels 0s_count 1s_count 0s_dist 1s_dist woe iv
#> --------------------------------------------------------------------------------
#> management 809 130 0.20 0.25 -0.22 0.01
#> technician 682 79 0.17 0.15 0.11 0.00
#> entrepreneur 139 12 0.03 0.02 0.40 0.00
#> blue-collar 937 73 0.23 0.14 0.51 0.05
#> unknown 29 2 0.01 0.00 0.61 0.00
#> retired 152 47 0.04 0.09 -0.87 0.05
#> admin. 433 61 0.11 0.12 -0.09 0.00
#> services 392 39 0.10 0.08 0.26 0.01
#> self-employed 132 22 0.03 0.04 -0.26 0.00
#> unemployed 126 15 0.03 0.03 0.08 0.00
#> housemaid 110 12 0.03 0.02 0.17 0.00
#> student 63 25 0.02 0.05 -1.13 0.04
#> --------------------------------------------------------------------------------
#>
#> Information Value
#> -----------------------------
#> Variable Information Value
#> -----------------------------
#> job 0.1594
#> -----------------------------
k <- blr_woe_iv(bank_marketing, job, y)
plot(k)
#> Warning in fun(x, ...): NAs introduced by coercion
#> Warning in FUN(X[[i]], ...): NAs introduced by coercion
#> Warning in FUN(X[[i]], ...): NAs introduced by coercion
#> Warning: Removed 12 rows containing missing values (geom_point).
blr_woe_iv_stats(bank_marketing, y, job, marital, education, default, housing,
loan, contact, poutcome)
#> Variable: job
#>
#> Weight of Evidence
#> --------------------------------------------------------------------------------
#> levels 0s_count 1s_count 0s_dist 1s_dist woe iv
#> --------------------------------------------------------------------------------
#> management 809 130 0.20 0.25 -0.22 0.01
#> technician 682 79 0.17 0.15 0.11 0.00
#> entrepreneur 139 12 0.03 0.02 0.40 0.00
#> blue-collar 937 73 0.23 0.14 0.51 0.05
#> unknown 29 2 0.01 0.00 0.61 0.00
#> retired 152 47 0.04 0.09 -0.87 0.05
#> admin. 433 61 0.11 0.12 -0.09 0.00
#> services 392 39 0.10 0.08 0.26 0.01
#> self-employed 132 22 0.03 0.04 -0.26 0.00
#> unemployed 126 15 0.03 0.03 0.08 0.00
#> housemaid 110 12 0.03 0.02 0.17 0.00
#> student 63 25 0.02 0.05 -1.13 0.04
#> --------------------------------------------------------------------------------
#>
#> Information Value
#> -----------------------------
#> Variable Information Value
#> -----------------------------
#> job 0.1594
#> -----------------------------
#>
#>
#> Variable: marital
#>
#> Weight of Evidence
#> ---------------------------------------------------------------------------
#> levels 0s_count 1s_count 0s_dist 1s_dist woe iv
#> ---------------------------------------------------------------------------
#> married 2467 273 0.62 0.53 0.15 0.01
#> single 1079 191 0.27 0.37 -0.32 0.03
#> divorced 458 53 0.11 0.10 0.11 0.00
#> ---------------------------------------------------------------------------
#>
#> Information Value
#> -----------------------------
#> Variable Information Value
#> -----------------------------
#> marital 0.0464
#> -----------------------------
#>
#>
#> Variable: education
#>
#> Weight of Evidence
#> ----------------------------------------------------------------------------
#> levels 0s_count 1s_count 0s_dist 1s_dist woe iv
#> ----------------------------------------------------------------------------
#> tertiary 1104 195 0.28 0.38 -0.31 0.03
#> secondary 2121 231 0.53 0.45 0.17 0.01
#> unknown 154 25 0.04 0.05 -0.23 0.00
#> primary 625 66 0.16 0.13 0.20 0.01
#> ----------------------------------------------------------------------------
#>
#> Information Value
#> ------------------------------
#> Variable Information Value
#> ------------------------------
#> education 0.0539
#> ------------------------------
#>
#>
#> Variable: default
#>
#> Weight of Evidence
#> -------------------------------------------------------------------------
#> levels 0s_count 1s_count 0s_dist 1s_dist woe iv
#> -------------------------------------------------------------------------
#> no 3928 514 0.98 0.99 -0.01 0.00
#> yes 76 3 0.02 0.01 1.19 0.02
#> -------------------------------------------------------------------------
#>
#> Information Value
#> -----------------------------
#> Variable Information Value
#> -----------------------------
#> default 0.0159
#> -----------------------------
#>
#>
#> Variable: housing
#>
#> Weight of Evidence
#> ------------------------------------------------------------------------
#> levels 0s_count 1s_count 0s_dist 1s_dist woe iv
#> ------------------------------------------------------------------------
#> yes 2364 203 0.59 0.39 0.41 0.08
#> no 1640 314 0.41 0.61 -0.39 0.08
#> ------------------------------------------------------------------------
#>
#> Information Value
#> -----------------------------
#> Variable Information Value
#> -----------------------------
#> housing 0.1586
#> -----------------------------
#>
#>
#> Variable: loan
#>
#> Weight of Evidence
#> -------------------------------------------------------------------------
#> levels 0s_count 1s_count 0s_dist 1s_dist woe iv
#> -------------------------------------------------------------------------
#> yes 680 45 0.17 0.09 0.67 0.06
#> no 3324 472 0.83 0.91 -0.10 0.01
#> -------------------------------------------------------------------------
#>
#> Information Value
#> -----------------------------
#> Variable Information Value
#> -----------------------------
#> loan 0.0633
#> -----------------------------
#>
#>
#> Variable: contact
#>
#> Weight of Evidence
#> ---------------------------------------------------------------------------
#> levels 0s_count 1s_count 0s_dist 1s_dist woe iv
#> ---------------------------------------------------------------------------
#> unknown 1292 54 0.32 0.10 1.13 0.25
#> cellular 2452 424 0.61 0.82 -0.29 0.06
#> telephone 260 39 0.06 0.08 -0.15 0.00
#> ---------------------------------------------------------------------------
#>
#> Information Value
#> -----------------------------
#> Variable Information Value
#> -----------------------------
#> contact 0.3086
#> -----------------------------
#>
#>
#> Variable: poutcome
#>
#> Weight of Evidence
#> --------------------------------------------------------------------------
#> levels 0s_count 1s_count 0s_dist 1s_dist woe iv
#> --------------------------------------------------------------------------
#> unknown 3352 334 0.84 0.65 0.26 0.05
#> failure 450 58 0.11 0.11 0.00 0.00
#> other 156 30 0.04 0.06 -0.40 0.01
#> success 46 95 0.01 0.18 -2.77 0.48
#> --------------------------------------------------------------------------
#>
#> Information Value
#> -----------------------------
#> Variable Information Value
#> -----------------------------
#> poutcome 0.5346
#> -----------------------------