Start with the required libraries, and then load some data.
Code
# data manipulation
library(tidyverse)
library(janitor)
# Data Exploration
library(ppsr)
library(correlationfunnel)
library(DataExplorer)
# Visualisation addition
library(plotly)
# Load Data ----
<- readr::read_csv(file = "train.csv") %>%
promotions_tbl ::clean_names()
janitor
# reduce the dataset size
<-
cleaned_promotions_tbl %>%
promotions_tbl ::drop_na() %>%
tidyr::mutate(
dplyris_promoted = as.character(is_promoted),
is_promoted = if_else(is_promoted==1, "Yes", "No") %>% as.factor()
)
1. Predictive Power Score
Code
%>%
cleaned_promotions_tbl select(-employee_id) %>%
visualize_pps(
y = 'is_promoted',
do_parallel = FALSE
)
2. Correlation Funnel
Code
%>%
cleaned_promotions_tbl select(-employee_id) %>%
binarize() %>%
correlate(target = is_promoted__Yes) %>%
plot_correlation_funnel(interactive = TRUE) %>%
::config(displayModeBar = FALSE) plotly
3. DataExplorer
Default Heatmap
Code
<- cleaned_promotions_tbl %>%
corr_plot select(-employee_id) %>%
::plot_correlation(
DataExplorertheme_config = list(
legend.position = "none",
axis.text.x = element_text(angle = 90)
) )
Interactive Heatmap
Code
$data$value <- round(corr_plot$data$value, digits = 2)
corr_plot
::plotly_build(corr_plot) %>%
plotly::layout(width = 700, height = 700) %>%
plotly::config(displayModeBar = FALSE) plotly
Reuse
Citation
BibTeX citation:
@online{dmckinnon2023,
author = {Adam D McKinnon},
title = {Expediting {Exploratory} {Data} {Analysis}},
date = {2023-01-03},
url = {https://www.adam-d-mckinnon.com//posts/2023-01-03-exploratory_analysis},
langid = {en}
}
For attribution, please cite this work as:
Adam D McKinnon. 2023. “Expediting Exploratory Data
Analysis.” January 3, 2023. https://www.adam-d-mckinnon.com//posts/2023-01-03-exploratory_analysis.