Emergency Department Overcrowding Analysis

Excel, R and Tableau analysis of Predictive Model comparison with Emergency department data. Within this analysis, the information from one month in an emergency department has been recorded and will be analyzed through different means in different IDEs.

The written analysis describes the overall issue, its existing recording metrics, findings and solutions. Additionally I have included the recorded powerpoint presentation of the finding and alternate visuals created while reviewing the data.

emergency-department-overcrowding-analysis Download

presentation-full-with-video-only Download

charts-1 Download

This code will use R to explore the predictive qualities of Support Vector Machine (SVM), Naive Bayes, and Random Forest in relation to emergency department overcrowding.

Data file (this was cleaned and brought into R)

project-predictive-data-final Download

Packages

install.packages("tidyverse")
install.packages("descr")
install.packages("forcats")
install.packages("e1071")
install.packages("caret")
install.packages("lattice")
install.packages("caTools")
install.packages("rmarkdown")
install.packages("ggplot2")
install.packages("corrplot")
install.packages("randomForest")
install.packages("plyr")
install.packages("ROSE")
install.packages("readxl")
install.packages("lubridate")
install.packages("zoo")
install.packages("magrittr")
install.packages("ROCR")
install.packages("pROC")
install.packages("readr")

library(plyr)
library(dplyr)
library(tidyverse)
library(descr) # Descriptive Statistics
library(ggplot2)
library(forcats) # Tools for Working with Categorical Variables (Factors) 
library(e1071) # Misc Functions of the Department of Statistics, Probability Theory Group
library(lattice)
library(caret) # Classification and Regression Training
library(caTools)
library(rmarkdown)
library(ggplot2)
library(corrplot)
library(randomForest)


library(ROSE)
library(readxl)
library(lubridate)
library(zoo)
library(magrittr)
library(ROCR)
library(pROC)
library(readr)

Load file – reload same file

nedoc_data <- read.csv(file.choose())
dfilter1 <- read.csv(file.choose())
Pdfilter1 <- read.csv(file.choose())


#PD filter is for predictive
#D filter is for general analysis and exploratory predictive

Data Prep

#####-----------------------DATA PREP

dfilter1$OVR <- as.factor(dfilter1$OVR)
dfilter1$SEV <- as.factor(dfilter1$SEV)
dfilter1$DNG <- as.factor(dfilter1$DNG)
dfilter1$Hour <- as.factor(dfilter1$Hour)
dfilter1$Day <- as.factor(dfilter1$Day)
dfilter1$Week <- as.factor(dfilter1$Week)


DNGfile <- filter(dfilter1, dfilter1$NAVG == 4)
SEVfile <- filter(dfilter1, dfilter1$NAVG == 3)
OVRfile <- filter(dfilter1, dfilter1$NAVG == 2)

NE_Week <- filter(dfilter1, dfilter1$BSY == 1, dfilter1$DNG == 0, dfilter1$Week %in% c(2,3,4))
NE_Week2 <- filter(dfilter1, dfilter1$DNG == 0, dfilter1$Week %in% c(1,2,3,4))

#Can use regular filter file because after clean I move data into different files for pred
#move OVR to front with Date so that you can have both Naive and SVM with same dataset


Pdfilter1$OVR <- as.factor(Pdfilter1$OVR)
Pdfilter1$Hour <- as.factor(Pdfilter1$Hour)
Pdfilter1$Day <- as.factor(Pdfilter1$Day)
Pdfilter1$Week <- as.factor(Pdfilter1$Week)

Pdfilter1$OVRcut <- ifelse(Pdfilter1$OVR == 0,"good","bad")
Pdfilter1$OVRcut <- as.factor(Pdfilter1$OVRcut)
summary(Pdfilter1$DEP)
Pdfilter1$DEPcut <- cut(Pdfilter1$DEP, breaks=c(20,43,76,109),
                      labels=c("bottom 25%","middle 50%", "top 25%"))
table(Pdfilter1$DEPcut)
summary(Pdfilter1$EDW)
Pdfilter1$EDWcut <- cut(Pdfilter1$EDW, breaks=c(0,6,14,29),
                      labels=c("bottom 25%","middle 50%", "top 25%"))
table(Pdfilter1$EDWcut)
summary(Pdfilter1$CC)
Pdfilter1$CCcut <- cut(Pdfilter1$CC, breaks=c(0,3.5,7,14),
                     labels=c("bottom 25%","middle 50%", "top 25%"))
table(Pdfilter1$CCcut)
summary(Pdfilter1$DTB)
Pdfilter1$DTBcut <- cut(Pdfilter1$DTB, breaks=c(-0.07,0.03,0.8825,6.76),
                      labels=c("bottom 25%","middle 50%", "top 25%"))
table(Pdfilter1$DTBcut)
summary(Pdfilter1$LAT)
Pdfilter1$LAT2 <- Pdfilter1$LAT
Pdfilter1$LAT2[Pdfilter1$LAT2 == 0.00] <- 0.01
Pdfilter1$LATcut <- cut(Pdfilter1$LAT2, breaks=c(0.00,1.63,6.562,30.02),
                      labels=c("bottom 25%","middle 50%", "top 25%"))
table(Pdfilter1$LATcut)

str(Pdfilter1)

SVMD1 <- Pdfilter1

SVMD1$OVR <- as.factor(SVMD$OVR)
SVMD1$Hour <- as.factor(SVMD$Hour)
SVMD1$Day <- as.factor(SVMD$Day)
SVMD1$Week <- as.factor(SVMD$Week)

NaiveD1 <- Pdfilter1
str(NaiveD1)
NaiveD1$OVRcut <- ifelse(NaiveD1$OVR == 0,"good","bad")
NaiveD1$OVRcut <- as.factor(NaiveD1$OVRcut)
summary(NaiveD1$DEP)
NaiveD1$DEPcut <- cut(NaiveD1$DEP, breaks=c(20,43,76,109),
                   labels=c("bottom 25%","middle 50%", "top 25%"))
table(NaiveD1$DEPcut)
summary(NaiveD1$EDW)
NaiveD1$EDWcut <- cut(NaiveD1$EDW, breaks=c(0,6,14,29),
                      labels=c("bottom 25%","middle 50%", "top 25%"))
table(NaiveD1$EDWcut)
summary(NaiveD1$CC)
NaiveD1$CCcut <- cut(NaiveD1$CC, breaks=c(0,3.5,7,14),
                      labels=c("bottom 25%","middle 50%", "top 25%"))
table(NaiveD1$CCcut)
summary(NaiveD1$DTB)
NaiveD1$DTBcut <- cut(NaiveD1$DTB, breaks=c(-0.07,0.03,0.8825,6.76),
                      labels=c("bottom 25%","middle 50%", "top 25%"))
table(NaiveD1$DTBcut)
summary(NaiveD1$LAT)
NaiveD1$LATcut <- cut(NaiveD1$LAT, breaks=c(0.0,1.63,6.562,30.02),
                      labels=c("bottom 25%","middle 50%", "top 25%"))
table(NaiveD1$LATcut)

SVMData <- select(SVMD1, DEP, EDW, CC, DTB, LAT, Hour, Day, Week, OVR)
NaiveData <- select(NaiveD1,OVRcut, DEPcut,EDWcut,CCcut,DTBcut,LATcut,Hour, Day, Week)

Data Viz pre-predictive

#############------------------DATA VIZ


#Distribution of NeDoc Scores
DViz <- ggplot(data=dfilter1, aes(x=AVG)) + 
  geom_histogram(aes(y=..density..),
                 col='black', 
                 fill='dodgerblue1', 
                 alpha=0.3) +
  geom_density(adjust=3)
print(DViz + theme(plot.title=element_text(face="bold")) + ggtitle('Distribution of the NeDoc Averages'))

#NE_Week2 Data viz of distribution Shows more balanced avg without week 5 or Danger (only in week 1)
DVizNE <- ggplot(data=NE_Week2, aes(x=AVG)) + 
  geom_histogram(aes(y=..density..),
                 col='black', 
                 fill='dodgerblue1', 
                 alpha=0.3) +
  geom_density(adjust=3)
print(DVizNE + theme(plot.title=element_text(face="bold")) + ggtitle('NE_Week Distribution of the NeDoc Averages'))

##hR CHART SHOWS GOOD REP FOR OVR
NAVGdist <- ggplot(data=dfilter1, aes(x=dfilter1$Hour, fill=OVR)) + 
  geom_bar(aes(y = (..count..)/sum(..count..)), position='stack', alpha=0.5) + scale_y_continuous(labels=scales::percent)
print(NAVGdist + theme(plot.title=element_text(face="bold")) + ggtitle('Hour to Overcrowded'))

##Pretty cool Day Chart
NAVGdist <- ggplot(data=dfilter1, aes(x=dfilter1$Day, fill=OVR)) + 
  geom_bar(aes(y = (..count..)/sum(..count..)), position='stack', alpha=0.5) + scale_y_continuous(labels=scales::percent)
print(NAVGdist + theme(plot.title=element_text(face="bold")) + ggtitle('Day to Overcrowded'))

#good rep of decline in overcrowded
NAVGdist <- ggplot(data=dfilter1, aes(x=dfilter1$Week, fill=OVR)) + 
  geom_bar(aes(y = (..count..)/sum(..count..)), position='stack', alpha=0.5) + scale_y_continuous(labels=scales::percent)
print(NAVGdist + theme(plot.title=element_text(face="bold")) + ggtitle('Week to Overcrowded'))


#Dep Higher More OVR, cc, LAT  (EDW, DTB even distribution)  
#Dont know which charts to use but here is the build just need to change the name
NAVGdist <- ggplot(data=dfilter1, aes(x=dfilter1$Week, fill=OVR)) + 
  geom_bar(aes(y = (..count..)/sum(..count..)), position='stack', alpha=0.5) + scale_y_continuous(labels=scales::percent)
print(NAVGdist + theme(plot.title=element_text(face="bold")) + ggtitle('Home Ownership vs Loan Default'))

Establishing train and test datasets to be used through different models

Emergency Department Overcrowding Analysis

Share this:

Leave a comment Cancel reply