library(readxl)
library(dplyr)
library(ggplot2)
library(data.table)
library(gridExtra)
library(rlang)
library(stringr)
library(knitr)
library(kableExtra)
# Checking the sheets
excel_sheets('failures.xlsm')
## [1] "FAILURES" "ORDERS" "PRODUCTION"
# Failures
failures <- read_excel('failures.xlsm', sheet = 'FAILURES', skip = 2)
production <- read_excel('failures.xlsm', sheet = 'PRODUCTION', skip = 2)
# Checking the type of variables
str(failures)
## Classes 'tbl_df', 'tbl' and 'data.frame': 676 obs. of 45 variables:
## $ ORDER : chr "095598.AA" "095598.AA" "095598.AA" "095598.AA" ...
## $ ID : chr "10-2092/90D" "10-2084/90C" "10-2084/90B" "10-2081/02C" ...
## $ LENGTH : num 222 202 202 202 204 202 202 202 202 102 ...
## $ MACHINE : chr "BA_12" "BA_15" "BA_15" "BA_04" ...
## $ DATE : POSIXct, format: "2018-12-04" "2018-12-04" ...
## $ 1 : logi NA NA NA NA NA NA ...
## $ 2 : chr "22" NA NA NA ...
## $ 3 : chr NA NA NA NA ...
## $ 4 : chr "95, 120, 124" "152" NA NA ...
## $ 5 : chr NA NA "135" NA ...
## $ 6 : chr NA NA NA NA ...
## $ 7 : chr NA NA NA NA ...
## $ 8 : chr NA NA NA NA ...
## $ 9 : chr NA NA NA NA ...
## $ 10 : chr NA NA NA "130" ...
## $ 11 : chr NA NA NA NA ...
## $ 12 : num NA NA NA NA NA NA NA NA NA NA ...
## $ 13 : chr NA NA NA NA ...
## $ 14 : logi NA NA NA NA NA NA ...
## $ 15 : num NA NA NA NA NA NA NA NA NA NA ...
## $ 16 : chr "23" NA NA NA ...
## $ 17 : chr "147" NA NA NA ...
## $ 18 : chr NA NA NA NA ...
## $ 19_NOTES : chr NA NA NA NA ...
## $ 19_LENGTH: chr NA NA NA NA ...
## $ 1_ : logi NA NA NA NA NA NA ...
## $ 2_ : num 1 NA NA NA NA 1 NA 1 NA NA ...
## $ 3_ : num NA NA NA NA 1 NA NA NA NA NA ...
## $ 4_ : num 3 1 NA NA 1 NA 1 NA NA 1 ...
## $ 5_ : num NA NA 1 NA NA NA NA NA 1 1 ...
## $ 6_ : num NA NA NA NA NA NA NA NA NA NA ...
## $ 7_ : num NA NA NA NA NA NA NA NA NA NA ...
## $ 8_ : num NA NA NA NA NA NA NA NA NA NA ...
## $ 9_ : num NA NA NA NA NA NA NA NA NA NA ...
## $ 10_ : num NA NA NA 1 NA NA NA NA NA NA ...
## $ 11_ : num NA NA NA NA NA NA NA NA NA NA ...
## $ 12_ : num NA NA NA NA NA NA NA NA NA NA ...
## $ 13_ : num NA NA NA NA NA NA NA NA NA NA ...
## $ 14_ : logi NA NA NA NA NA NA ...
## $ 15_ : num NA NA NA NA NA NA NA NA NA NA ...
## $ 16_ : num 1 NA NA NA NA NA NA NA NA NA ...
## $ 17_ : num 1 NA NA NA NA NA NA NA NA 1 ...
## $ 18_ : num NA NA NA NA NA NA NA NA NA NA ...
## $ 19_ : num NA NA NA NA NA NA NA NA NA NA ...
## $ TOTAL : num 6 1 1 1 2 1 1 1 1 3 ...
# Setting failures$MACHINE as categorical
failures$MACHINE <- as.factor(failures$MACHINE)
# Setting production$MACHINE and production$VERSION as categorical
production$MACHINE <- as.factor(production$MACHINE)
# Replacing VERSION NAs with 0
production$VERSION[is.na(production$VERSION)] <- 0
production$VERSION <- as.factor(production$VERSION)
# Setting 0 as BEFORE and 1 as AFTER
levels(production$VERSION) <- c('BEFORE', 'AFTER')
# Standardizing IDs of the failures set
head(production$ID)
## [1] "10-2754/91" "10-2737/91" "10-2743/90" "10-2759/90" "10-2736/91"
## [6] "10-2758/90"
head(failures$ID)
## [1] "10-2092/90D" "10-2084/90C" "10-2084/90B" "10-2081/02C" "10-2066/91D"
## [6] "10-2066/91C"
# To standardize the ID of failures set, it's only needed to remove the letter from the end of the ID
failures_original <- failures
failures$ID <- gsub('\\D$', '', failures$ID)
Considering that the current situation regards to the failures of a production, it’s important to consider relational metrics intead of absolute: Perfomance instead of Production or Failures.
The performance metric that will be chosen is an adaptation of MTBF (Mean Tima Between Failures) concept.
For this project the performance will be measure by: Produced Length Between Failures.
To be able to calculate this performance, it’s important to discard failures registers that don’t have production registers.
registers <- as.vector(unlist(production$ID))
failures <- failures %>% filter(ID %in% registers)
colnames(failures)
## [1] "ORDER" "ID" "LENGTH" "MACHINE" "DATE"
## [6] "1" "2" "3" "4" "5"
## [11] "6" "7" "8" "9" "10"
## [16] "11" "12" "13" "14" "15"
## [21] "16" "17" "18" "19_NOTES" "19_LENGTH"
## [26] "1_" "2_" "3_" "4_" "5_"
## [31] "6_" "7_" "8_" "9_" "10_"
## [36] "11_" "12_" "13_" "14_" "15_"
## [41] "16_" "17_" "18_" "19_" "TOTAL"
production_failures <- merge(production, failures[, c(2, 6:45)], by = 'ID', all = T)
kable(head(production_failures, 10)) %>%
kable_styling(bootstrap_options = c('striped', 'hover', 'condensed'),
font_size = 9)
ID | ORDER | PRODUCT | MACHINE | DATE | LENGTH | VERSION | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19_NOTES | 19_LENGTH | 1_ | 2_ | 3_ | 4_ | 5_ | 6_ | 7_ | 8_ | 9_ | 10_ | 11_ | 12_ | 13_ | 14_ | 15_ | 16_ | 17_ | 18_ | 19_ | TOTAL |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
10-0650/90 | 091284.AA | TYPE A | BA_12 | 2018-04-27 | 808 | BEFORE | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA |
10-2019/91 | 094011.AA | TYPE B | CA_13 | 2018-12-03 | 1075 | BEFORE | NA | NA | NA | 93 | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | 1 | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | 1 |
10-2019/91 | 094011.AA | TYPE B | CA_13 | 2018-12-03 | 1075 | BEFORE | NA | 24 | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | 22, 75 | NA | NA | NA | NA | 1 | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | 2 | NA | NA | 3 |
10-2019/91 | 094011.AA | TYPE B | CA_13 | 2018-12-03 | 1075 | BEFORE | NA | NA | NA | 88 | 62 | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | 8 | NA | NA | NA | NA | NA | NA | 1 | 1 | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | 1 | NA | NA | 3 |
10-2019/91 | 094011.AA | TYPE B | CA_13 | 2018-12-03 | 1075 | BEFORE | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | 66 | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | 1 | NA | NA | 1 |
10-2019/91 | 094011.AA | TYPE B | CA_13 | 2018-12-03 | 1075 | BEFORE | NA | NA | NA | NA | 94 | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | 13 | NA | NA | NA | NA | NA | NA | NA | 1 | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | 1 | NA | NA | 2 |
10-2090/91 | 095598.AA | TYPE A | BA_10 | 2018-12-04 | 797 | BEFORE | NA | NA | NA | 48, 64 A 78, 83, 85, 144 | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | 5 | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | 5 |
10-2090/91 | 095598.AA | TYPE A | BA_10 | 2018-12-04 | 797 | BEFORE | NA | NA | NA | 159 | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | 1 | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | 1 |
10-2092/91 | 095598.AA | TYPE A | BA_12 | 2018-12-05 | 784 | BEFORE | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | 43 | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | 1 | NA | NA | NA | 1 |
10-2092/91 | 095598.AA | TYPE A | BA_12 | 2018-12-05 | 784 | BEFORE | NA | NA | NA | 20, 26, 34 | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | 3 | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | 3 |
colnames(production_failures)
## [1] "ID" "ORDER" "PRODUCT" "MACHINE" "DATE"
## [6] "LENGTH" "VERSION" "1" "2" "3"
## [11] "4" "5" "6" "7" "8"
## [16] "9" "10" "11" "12" "13"
## [21] "14" "15" "16" "17" "18"
## [26] "19_NOTES" "19_LENGTH" "1_" "2_" "3_"
## [31] "4_" "5_" "6_" "7_" "8_"
## [36] "9_" "10_" "11_" "12_" "13_"
## [41] "14_" "15_" "16_" "17_" "18_"
## [46] "19_" "TOTAL"
production_failures[, c(29:47)][is.na(production_failures[, c(29:47)])] <- 0
kable(head(production_failures, 10)) %>%
kable_styling(bootstrap_options = c('striped', 'hover', 'condensed'),
font_size = 9)
ID | ORDER | PRODUCT | MACHINE | DATE | LENGTH | VERSION | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19_NOTES | 19_LENGTH | 1_ | 2_ | 3_ | 4_ | 5_ | 6_ | 7_ | 8_ | 9_ | 10_ | 11_ | 12_ | 13_ | 14_ | 15_ | 16_ | 17_ | 18_ | 19_ | TOTAL |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
10-0650/90 | 091284.AA | TYPE A | BA_12 | 2018-04-27 | 808 | BEFORE | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
10-2019/91 | 094011.AA | TYPE B | CA_13 | 2018-12-03 | 1075 | BEFORE | NA | NA | NA | 93 | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
10-2019/91 | 094011.AA | TYPE B | CA_13 | 2018-12-03 | 1075 | BEFORE | NA | 24 | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | 22, 75 | NA | NA | NA | NA | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0 | 0 | 3 |
10-2019/91 | 094011.AA | TYPE B | CA_13 | 2018-12-03 | 1075 | BEFORE | NA | NA | NA | 88 | 62 | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | 8 | NA | NA | NA | NA | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 |
10-2019/91 | 094011.AA | TYPE B | CA_13 | 2018-12-03 | 1075 | BEFORE | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | 66 | NA | NA | NA | NA | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 |
10-2019/91 | 094011.AA | TYPE B | CA_13 | 2018-12-03 | 1075 | BEFORE | NA | NA | NA | NA | 94 | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | 13 | NA | NA | NA | NA | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 2 |
10-2090/91 | 095598.AA | TYPE A | BA_10 | 2018-12-04 | 797 | BEFORE | NA | NA | NA | 48, 64 A 78, 83, 85, 144 | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | 0 | 0 | 5 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5 |
10-2090/91 | 095598.AA | TYPE A | BA_10 | 2018-12-04 | 797 | BEFORE | NA | NA | NA | 159 | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
10-2092/91 | 095598.AA | TYPE A | BA_12 | 2018-12-05 | 784 | BEFORE | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | 43 | NA | NA | NA | NA | NA | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 |
10-2092/91 | 095598.AA | TYPE A | BA_12 | 2018-12-05 | 784 | BEFORE | NA | NA | NA | 20, 26, 34 | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | 0 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 |
production_failures <- production_failures %>%
mutate(PERFORMANCE = if_else(TOTAL == 0, LENGTH, LENGTH / TOTAL))
kable(head(production_failures, 10)) %>%
kable_styling(bootstrap_options = c('striped', 'hover', 'condensed'),
font_size = 9)
ID | ORDER | PRODUCT | MACHINE | DATE | LENGTH | VERSION | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19_NOTES | 19_LENGTH | 1_ | 2_ | 3_ | 4_ | 5_ | 6_ | 7_ | 8_ | 9_ | 10_ | 11_ | 12_ | 13_ | 14_ | 15_ | 16_ | 17_ | 18_ | 19_ | TOTAL | PERFORMANCE |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
10-0650/90 | 091284.AA | TYPE A | BA_12 | 2018-04-27 | 808 | BEFORE | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 808.0000 |
10-2019/91 | 094011.AA | TYPE B | CA_13 | 2018-12-03 | 1075 | BEFORE | NA | NA | NA | 93 | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1075.0000 |
10-2019/91 | 094011.AA | TYPE B | CA_13 | 2018-12-03 | 1075 | BEFORE | NA | 24 | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | 22, 75 | NA | NA | NA | NA | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0 | 0 | 3 | 358.3333 |
10-2019/91 | 094011.AA | TYPE B | CA_13 | 2018-12-03 | 1075 | BEFORE | NA | NA | NA | 88 | 62 | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | 8 | NA | NA | NA | NA | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 358.3333 |
10-2019/91 | 094011.AA | TYPE B | CA_13 | 2018-12-03 | 1075 | BEFORE | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | 66 | NA | NA | NA | NA | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1075.0000 |
10-2019/91 | 094011.AA | TYPE B | CA_13 | 2018-12-03 | 1075 | BEFORE | NA | NA | NA | NA | 94 | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | 13 | NA | NA | NA | NA | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 2 | 537.5000 |
10-2090/91 | 095598.AA | TYPE A | BA_10 | 2018-12-04 | 797 | BEFORE | NA | NA | NA | 48, 64 A 78, 83, 85, 144 | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | 0 | 0 | 5 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | 159.4000 |
10-2090/91 | 095598.AA | TYPE A | BA_10 | 2018-12-04 | 797 | BEFORE | NA | NA | NA | 159 | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 797.0000 |
10-2092/91 | 095598.AA | TYPE A | BA_12 | 2018-12-05 | 784 | BEFORE | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | 43 | NA | NA | NA | NA | NA | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 784.0000 |
10-2092/91 | 095598.AA | TYPE A | BA_12 | 2018-12-05 | 784 | BEFORE | NA | NA | NA | 20, 26, 34 | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | 0 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 261.3333 |
num_failures_before = production_failures[, c(7, 28:46)] %>% filter(VERSION == 'BEFORE')
num_failures_before[is.na(num_failures_before)] <- 0
num_failures_before <- num_failures_before[, -1]
num_failures_before <- data.frame(cbind(
FAILURES = as.factor(gsub('_', '', colnames(production_failures[, c(28:46)]))),
FREQUENCY = apply(num_failures_before, 2, sum)))
num_failures_before <- num_failures_before[order(-num_failures_before$FREQUENCY),]
num_failures_before <- num_failures_before %>% mutate(
CUM_FAILURES = cumsum(FREQUENCY),
CUM_PERC = round((CUM_FAILURES / sum(FREQUENCY) * 100), 2)
)
# Plotting the occurrences
ggplot(num_failures_before, aes(x = reorder(FAILURES, CUM_FAILURES))) +
geom_col(aes(y = FREQUENCY), fill = 'lightblue', color = 'darkblue', alpha = 0.7, group = 1) +
ggtitle('FREQUENCY OF FAILURES BEFORE THE CHANGES') +
geom_line(aes(y = CUM_PERC*264/100), group = 2, color = 'darkred') +
geom_point(aes(y = CUM_PERC*264/100), group = 2, shape = 21, color = 'darkred', fill = 'darkred', size = 2) +
geom_text(aes(y = CUM_PERC*264/100, label = paste(CUM_PERC, '%', sep = '')), vjust = -1, size = 3) +
scale_y_continuous(sec.axis = sec_axis(~.*100/264, name = 'Cumulative percentage'), limits = c(0, 264)) +
theme(plot.title = element_text(vjust = 0.5, hjust = 0.5, face = 'bold'))
As shown in the plot, 6 failures represent about 80% of the total of failures BEFORE the changes: 4, 3, 16, 2, 5 and 17.
num_failures_after = production_failures[, c(7, 28:46)] %>% filter(VERSION == 'AFTER')
num_failures_after[is.na(num_failures_after)] <- 0
num_failures_after <- num_failures_after[, -1]
num_failures_after <- data.frame(cbind(
FAILURES = as.factor(gsub('_', '', colnames(production_failures[, c(28:46)]))),
FREQUENCY = apply(num_failures_after, 2, sum)))
num_failures_after <- num_failures_after[order(-num_failures_after$FREQUENCY),]
num_failures_after <- num_failures_after %>% mutate(
CUM_FAILURES = cumsum(FREQUENCY),
CUM_PERC = round((CUM_FAILURES / sum(FREQUENCY) * 100), 2)
)
# Plotting the occurrences
ggplot(num_failures_after, aes(x = reorder(FAILURES, -FREQUENCY))) +
geom_col(aes(y = FREQUENCY), fill = 'lightblue', color = 'darkblue', alpha = 0.7, group = 1) +
ggtitle('FREQUENCY OF FAILURES AFTER THE CHANGES') +
geom_line(aes(y = CUM_PERC*61/100), group = 2, color = 'darkred') +
geom_point(aes(y = CUM_PERC*61/100), group = 2, shape = 21, color = 'darkred', fill = 'darkred', size = 2) +
geom_text(aes(y = CUM_PERC*61/100, label = paste(CUM_PERC, '%', sep = '')), vjust = -1, size = 3) +
scale_y_continuous(sec.axis = sec_axis(~.*100/61, name = 'Cumulative percentage'), limits = c(0, 61)) +
theme(plot.title = element_text(vjust = 0.5, hjust = 0.5, face = 'bold'))
After the changes there are again 6 failures which represent about 80% of the total.
But only the failure 16 from the list of before changes is repeated.
It shows that probably the changes have contributed to the improvements of that failures.
Even though only the mean of performance can’t be enough to make decisions, it’s a simple way to check the evolution between before and after modifications.
ggplot(distinct(production_failures %>%
group_by(VERSION) %>%
mutate(PERFORMANCE = mean(PERFORMANCE)) %>%
select(VERSION, PERFORMANCE),
VERSION, .keep_all = T)) +
geom_col(aes(x = VERSION, y = PERFORMANCE), fill = 'lightblue', color = 'darkblue', alpha = 0.7) +
ggtitle('MEAN OF PERFORMANCE BEFORE AND AFTER MODIFICATIONS') +
theme(plot.title = element_text(vjust = 0.5, hjust = 0.5, face = 'bold'))
As expected, the mean of performance after modifications is higher than before
Function to plot the comparisons of performance between before and after
compare_performances <- function(df, variable){
target <- parse_quosure(variable) #quo
# Listing the occurrences of variable to be compared
list_to_compare <- as.vector(df[, variable])
lapply(list_to_compare, function(x){
before = ggplot(df %>% filter(!!target == x), aes(x = !!target)) +
geom_col(aes(y = BEFORE), fill = 'lightblue', color = 'darkblue', alpha = 0.7) +
ggtitle('PERFORMANCE BEFORE CHANGES') +
geom_text(aes(y = BEFORE, label = round(BEFORE)), vjust = -0.5, size = 3) +
theme(plot.title = element_text(vjust = 0.5, hjust = 0.5, face = 'bold')) +
ylim(0, if_else(max(df$BEFORE) > max(df$AFTER), max(df$BEFORE), max(df$AFTER)))
after = ggplot(df %>% filter(!!target == x), aes(x = !!target)) +
geom_col(aes(y = AFTER), fill = 'lightblue', color = 'darkblue', alpha = 0.7) +
ggtitle('PERFORMANCE AFTER CHANGES') +
geom_text(aes(y = AFTER, label = round(AFTER)), vjust = -0.5, size = 3) +
theme(plot.title = element_text(vjust = 0.5, hjust = 0.5, face = 'bold')) +
ylim(0, if_else(max(df$BEFORE) > max(df$AFTER), max(df$BEFORE), max(df$AFTER)))
grid.arrange(before, after, ncol = 2)
})
}
product_performance <- dcast(distinct(production_failures %>%
group_by(PRODUCT, VERSION) %>%
mutate(PERFORMANCE = mean(PERFORMANCE)) %>%
select(PRODUCT, VERSION, PERFORMANCE),
PRODUCT, VERSION, PERFORMANCE, .keep_all = T),
PRODUCT ~ VERSION)
## Using 'PERFORMANCE' as value column. Use 'value.var' to override
# Replacing NA values with 0
product_performance[is.na(product_performance)] <- 0
# Calculating the evolution of performance for products that was produced both before and after changes
product_performance <- product_performance %>%
mutate(DIF_PERFORMANCE = if_else(BEFORE == 0 | AFTER == 0, 0, AFTER - BEFORE),
STATUS_EVOLUTION = if_else(DIF_PERFORMANCE > 0, 1, -1))
# Plotting the performances
compare_performances(product_performance, 'PRODUCT')
product_performance
Having 0 performance, it means that the production don’t repeat for both versions.
Either it stoped the production after changes or it started the production.
But keeping these products in the dataset is quite important to check the variation of performances.
ggplot(product_performance %>% filter(DIF_PERFORMANCE != 0)) +
geom_col(aes(x = PRODUCT,
y = DIF_PERFORMANCE,
fill = factor(STATUS_EVOLUTION, labels = c('Worse', 'Better'))), alpha = 0.5) +
ggtitle('EVOLUTION OF PERFORMANCE BY PRODUCT') +
labs(fill = 'Situation') +
geom_text(aes(x = PRODUCT, y = DIF_PERFORMANCE, label = round(DIF_PERFORMANCE, 2)), vjust = -0.5, size = 3) +
theme(plot.title = element_text(vjust = 0.5, hjust = 0.5, face = 'bold'),
legend.position = 'bottom',
legend.title = element_text(face = 'bold'))
Between the products with production before and after changes, only TYPE A got worse.
List of products which has gotten worse
worse_products <- as.vector(unlist(product_performance %>% filter(DIF_PERFORMANCE < 0) %>% select(PRODUCT)))
List of products which has improved
best_products <- as.vector(unlist(product_performance %>% filter(DIF_PERFORMANCE > 0) %>% select(PRODUCT)))
machine_performance <- dcast(distinct(production_failures %>%
group_by(MACHINE, VERSION) %>%
mutate(PERFORMANCE = mean(PERFORMANCE)) %>%
select(MACHINE, VERSION, PERFORMANCE),
MACHINE, VERSION, PERFORMANCE, .keep_all = T),
MACHINE ~ VERSION)
## Using 'PERFORMANCE' as value column. Use 'value.var' to override
# Replacing NA values with 0
machine_performance[is.na(machine_performance)] <- 0
# Calculating the evolution of performance for machines that was produced both before and after changes
machine_performance <- machine_performance %>%
mutate(DIF_PERFORMANCE = if_else(BEFORE == 0 | AFTER == 0, 0, AFTER - BEFORE),
STATUS_EVOLUTION = if_else(DIF_PERFORMANCE > 0, 1, -1))
# Plotting the performances
compare_performances(machine_performance, 'MACHINE')
Having 0 of performance, it means that the production wont repeat for both versions.
Either it stoped the production after changes or it started the production.
But keeping these machines in the dataset is quite important to check the variation of performances.
ggplot(machine_performance %>% filter(DIF_PERFORMANCE != 0)) +
geom_col(aes(x = MACHINE,
y = DIF_PERFORMANCE,
fill = factor(STATUS_EVOLUTION, labels = c('Worse', 'Better'))), alpha = 0.5) +
ggtitle('EVOLUTION OF PERFORMANCE BY MACHINE') +
labs(fill = 'Situation') +
geom_text(aes(x = MACHINE, y = DIF_PERFORMANCE, label = round(DIF_PERFORMANCE, 2)), vjust = -0.5, size = 3) +
theme(plot.title = element_text(vjust = 0.5, hjust = 0.5, face = 'bold'),
legend.position = 'bottom',
legend.title = element_text(face = 'bold'))
Between the machines with production before and after changes, three of them got worse: BA_13, BA_14, CA_06 and DT_12.
But for the whole department, the most machines improved.
List of machines which has gotten worse
worse_machines <- as.vector(unlist(machine_performance %>% filter(DIF_PERFORMANCE < 0) %>% select(MACHINE)))
List of machines which has improved
best_machines <- as.vector(unlist(machine_performance %>% filter(DIF_PERFORMANCE > 0) %>% select(MACHINE)))
demand_by_machine_worse_products <- dcast(distinct(production_failures %>%
filter(PRODUCT %in% worse_products) %>%
group_by(PRODUCT, MACHINE, VERSION) %>%
mutate(PRODUCTION = sum(LENGTH)) %>%
select(PRODUCT, MACHINE, VERSION, PRODUCTION),
MACHINE, VERSION, .keep_all = T),
MACHINE + PRODUCT ~ VERSION)
## Using 'PRODUCTION' as value column. Use 'value.var' to override
demand_by_machine_worse_products[is.na(demand_by_machine_worse_products)] <- 0
demand_by_machine_worse_products %>% mutate(DIF_DEMAND = if_else(BEFORE > AFTER, 'REDUCED', 'INCREASED'))
## MACHINE PRODUCT BEFORE AFTER DIF_DEMAND
## 1 BA_01 TYPE A 20290 0 REDUCED
## 2 BA_04 TYPE A 9360 0 REDUCED
## 3 BA_05 TYPE A 6835 0 REDUCED
## 4 BA_06 TYPE A 24036 0 REDUCED
## 5 BA_07 TYPE A 40180 8073 REDUCED
## 6 BA_08 TYPE A 40334 11267 REDUCED
## 7 BA_09 TYPE A 16851 0 REDUCED
## 8 BA_10 TYPE A 25855 0 REDUCED
## 9 BA_12 TYPE A 15908 4800 REDUCED
## 10 BA_15 TYPE A 11300 10462 REDUCED
## 11 DT_11 TYPE A 36457 0 REDUCED
## 12 DT_12 TYPE A 83804 29557 REDUCED
As checked above, all the machines that produced the worse products have reduced the demand.
There are at least two basic conclusions:
1st: The machines have not to do with the descreasement of the mean of performance
2nd: As shorter the products are, as more it’s fails
To test this second one, it’s important to check if the demand by machine of the best products have increased.
demand_by_machine_best_products <- dcast(distinct(production_failures %>%
filter(PRODUCT %in% best_products) %>%
group_by(PRODUCT, MACHINE, VERSION) %>%
mutate(PRODUCTION = sum(LENGTH)) %>%
select(PRODUCT, MACHINE, VERSION, PRODUCTION),
MACHINE, VERSION, .keep_all = T),
MACHINE + PRODUCT ~ VERSION)
## Using 'PRODUCTION' as value column. Use 'value.var' to override
demand_by_machine_best_products[is.na(demand_by_machine_best_products)] <- 0
demand_by_machine_best_products <- demand_by_machine_best_products %>%
mutate(DIF_DEMAND = if_else(BEFORE > AFTER, 'REDUCED', 'INCREASED'))
kable(table(demand_by_machine_best_products$DIF_DEMAND))
Var1 | Freq |
---|---|
INCREASED | 9 |
REDUCED | 12 |
In general, the most cases show that the demand has decreased.
But to assume it’s true, it’s important to measure the difference of length mean.
demand_by_machine_best_products <- dcast(distinct(production_failures %>%
filter(PRODUCT %in% best_products) %>%
group_by(PRODUCT, MACHINE, ID, VERSION) %>%
mutate(PRODUCTION = mean(LENGTH)) %>%
select(PRODUCT, MACHINE, ID, VERSION, PRODUCTION),
MACHINE, VERSION, .keep_all = T),
MACHINE + ID + PRODUCT ~ VERSION)
## Using 'PRODUCTION' as value column. Use 'value.var' to override
kable(demand_by_machine_best_products) %>%
kable_styling(bootstrap_options = c('striped', 'hover', 'condensed'),
font_size = 9)
MACHINE | ID | PRODUCT | BEFORE | AFTER |
---|---|---|---|---|
BA_02 | 11-1078/90 | TYPE B | NA | 1020 |
BA_02 | 11-1078/91 | TYPE B | NA | 1014 |
BA_08 | 11-0966/93 | TYPE B | NA | 513 |
BA_09 | 10-2828/90 | TYPE B | 286 | NA |
BA_09 | 11-0966/90 | TYPE B | NA | 1020 |
BA_09 | 11-0966/91 | TYPE B | NA | 1020 |
BA_09 | 11-0966/92 | TYPE B | NA | 1020 |
BA_09 | 11-1087/90 | TYPE B | NA | 1122 |
BA_09 | 11-1087/91 | TYPE B | NA | 1032 |
BA_10 | 11-0975/90 | TYPE B | 1020 | NA |
BA_10 | 11-0975/91 | TYPE B | NA | 1020 |
BA_10 | 11-0975/92 | TYPE B | 1020 | NA |
BA_10 | 11-0975/93 | TYPE B | 547 | NA |
BA_13 | 11-1018/90 | TYPE B | 1020 | NA |
BA_13 | 11-1018/91 | TYPE B | NA | 1007 |
BA_14 | 11-0192/90 | TYPE C | 1017 | NA |
BA_14 | 11-0192/91 | TYPE C | 1203 | NA |
BA_17 | 11-1088/90 | TYPE C | NA | 1043 |
CA_05 | 10-2828/91 | TYPE B | 519 | NA |
CA_05 | 11-0191/90 | TYPE C | 1020 | NA |
CA_05 | 11-0191/91 | TYPE C | 1031 | NA |
CA_06 | 10-2803/90 | TYPE C | 1010 | NA |
CA_06 | 11-1020/90 | TYPE C | NA | 1020 |
CA_06 | 11-1020/91 | TYPE C | NA | 1137 |
CA_06 | 11-1090/90 | TYPE B | 1122 | NA |
CA_06 | 11-1090/91 | TYPE B | NA | 1077 |
CA_08 | 11-0958/90 | TYPE B | NA | 1020 |
CA_08 | 11-0958/91 | TYPE B | NA | 1006 |
CA_08 | 11-0969/90 | TYPE B | 1020 | NA |
CA_08 | 11-0969/91 | TYPE B | 1020 | NA |
CA_08 | 11-1080/90 | TYPE B | 480 | NA |
CA_08 | 11-1080/91 | TYPE B | 916 | NA |
CA_10 | 11-1017/90 | TYPE B | 1020 | NA |
CA_10 | 11-1017/91 | TYPE B | 989 | NA |
CA_10 | 11-1019/90 | TYPE C | NA | 1020 |
CA_10 | 11-1019/91 | TYPE C | NA | 1175 |
CA_11 | 11-1015/90 | TYPE B | NA | 1020 |
CA_11 | 11-1015/91 | TYPE B | 1025 | NA |
CA_12 | 11-1026/90 | TYPE C | NA | 1020 |
CA_12 | 11-1026/92 | TYPE C | NA | 1195 |
CA_12 | 11-1093/90 | TYPE B | NA | 1122 |
CA_12 | 11-1093/91 | TYPE B | NA | 892 |
CA_13 | 10-2019/91 | TYPE B | 1075 | NA |
CA_15 | 10-2805/90 | TYPE C | 1002 | NA |
DT_11 | 11-0956/90 | TYPE B | NA | 136 |
DT_11 | 11-0956/91 | TYPE B | NA | 884 |
DT_11 | 11-0956/92 | TYPE B | NA | 1020 |
DT_11 | 11-0956/93 | TYPE B | NA | 1526 |
DT_11 | 11-0973/90 | TYPE B | NA | 940 |
DT_11 | 11-0973/91 | TYPE B | NA | 1020 |
DT_11 | 11-0973/92 | TYPE B | NA | 1020 |
DT_11 | 11-0973/93 | TYPE B | NA | 587 |
DT_11 | 11-1089/90 | TYPE C | 1136 | NA |
DT_11 | 11-1095/90 | TYPE C | 825 | NA |
mean(demand_by_machine_best_products$AFTER, na.rm = T)-mean(demand_by_machine_best_products$BEFORE, na.rm = T)
## [1] 62.20337
Even it’s shown that the products have incresead the length by unit, 62 meters aren’t enough to affirm that the length has impact to the number of failures.
To finally check if there are a correlation between the length of products and number of failures, it will be calculated the correlation coefficient.
ggplot(production_failures) +
geom_point(aes(x = LENGTH, y = TOTAL), fill = 'lightblue', color = 'darkblue', alpha = 0.7) +
ggtitle('CORRELATION BETWEEN LENGTH OF PRODUCT AND FAILURES OCCURRENCES') +
labs(subtitle = paste('Correlation coefficient',
round(cor(production_failures$LENGTH,
production_failures$TOTAL,
method = c('pearson', 'kendall', 'spearman')),2))) +
theme(plot.title = element_text(vjust = 0.5, hjust = 0.5, face='bold'),
plot.subtitle = element_text(vjust = 0.5, hjust = 0.5))
With a correlation coefficient close to 0, it’s proved that there aren’t a a strong correlation between the length of the product with the number of failures.
So, at this point, the first suggestion that the machines have not to do with the performance decreasement of the product TYPE A is being considered true, and the second one isn’t comproved.
Since the product can be measured by length, by analysing the number of failures per secction can bring up some insights that could help to understand if the failures occur more at the begin of the proccess, at the middle of the proccess or at the end of the proccess.
Considering that the failures ID has a letter at the end, and this letters represent the secction of the product in a sequential order (A = begin, B = 2nd part, C = 3rd part, …) it may be used to calculate if the products have a tendence of more failures in a specific secction.
failures_by_section <- failures_original
failures_by_section <- failures_by_section %>% mutate(ID = str_extract(ID, '\\w$'))
Checking unique values
unique(failures_by_section$ID)
## [1] "D" "C" "B" "A" "E" "F" "H" "I" "1" "J" "G" "0" "K"
0 and 1 should not be part of this analysis, because it’s due to products that aren’t splitted
failures_by_section <- failures_by_section %>% filter(ID != '0' & ID != '1')
kable(table(failures_by_section$ID))
Var1 | Freq |
---|---|
A | 159 |
B | 133 |
C | 128 |
D | 130 |
E | 31 |
F | 16 |
G | 16 |
H | 23 |
I | 14 |
J | 18 |
K | 5 |
The secctions between A and D has the most number of occurrences. It regards to the most products are splitted into only four secctions.
To guarantee a better annalysis, it’s being considered to discard secctions above D
failures_by_section <- failures_by_section %>% filter(
ID == 'A' | ID == 'B' | ID == 'C' | ID == 'D'
)
failures_by_section <- distinct(
failures_by_section %>%
group_by(ID) %>%
mutate(FAILURES = sum(TOTAL)) %>%
select(ID, FAILURES),
ID, .keep_all = T)
# Plotting the failures by secction
ggplot(failures_by_section) +
geom_col(aes(x = ID, y = FAILURES), fill = 'lightblue', color = 'darkblue', alpha = 0.7) +
ggtitle('FAILURES BY SECCTION') +
geom_text(aes(x = ID, y = FAILURES, label = FAILURES), vjust = -0.5, size = 3) +
theme(plot.title = element_text(vjust = 0.5, hjust = 0.5, face = 'bold'))
As shown in the plot in general the most failures are at the begin of the products.
At this point it will be considered that it important to check if the setup of the machines have to do with this occurrences.
To consider if the performance has improved, it’s important to select machines that have productions before and after the changes.
performance_comparison <- machine_performance %>%
filter(STATUS_EVOLUTION == 1) %>%
select(MACHINE, BEFORE, AFTER)
shapiro.test(performance_comparison$BEFORE)
##
## Shapiro-Wilk normality test
##
## data: performance_comparison$BEFORE
## W = 0.92412, p-value = 0.3926
shapiro.test(performance_comparison$AFTER)
##
## Shapiro-Wilk normality test
##
## data: performance_comparison$AFTER
## W = 0.97018, p-value = 0.8925
Since the both distribution was tested as normal, and the machines are the same, so the hypothesis test that will be reproduced is t test for parametric statistic.
The alternative hypothesis is: (mean of AFTER - mean of BEFORE) > 0
The confidence level is: 0.95
t.test(performance_comparison$AFTER, performance_comparison$BEFORE,
alternative = 'greater',
paired = T)
##
## Paired t-test
##
## data: performance_comparison$AFTER and performance_comparison$BEFORE
## t = 5.3593, df = 9, p-value = 0.0002284
## alternative hypothesis: true difference in means is greater than 0
## 95 percent confidence interval:
## 132.0073 Inf
## sample estimates:
## mean of the differences
## 200.6317
With a p-value reaching less than 0.05 the alternative hypothesis has been accepted, confirming that thechanges have made the difference in the process.
performance_comparison <- melt(performance_comparison,
id.vars = c('MACHINE'),
variable.name = 'VERSION',
value.name = 'PERFORMANCE',
variable.factor = T)
ggplot(performance_comparison, aes(x = VERSION, y = PERFORMANCE, fill = VERSION)) +
geom_boxplot(alpha = 0.5) +
ggtitle('PERFORMANCE BEFORE AND AFTER CHANGES IN THE PROCESS') +
stat_summary(fun.y = mean, colour = 'red', geom = 'point', shape = 3, size = 2) +
theme(plot.title = element_text(vjus = 0.5, hjust = 0.5, face = 'bold'),
legend.position = 'bottom',
legend.title = element_text(face = 'bold'))
1 - There’s a evidence that probably the changes have contributed to reduce the frequency of the failures 4, 3, 2, 5 and 17.
2 - The failures aren’t centered in specific machines.
3 - The most failures are concentrated at the beginning of the products.
4 - The changes have done difference and contributed to improve the performance.