suppressMessages(require(rms, quietly = TRUE, warn.conflicts = FALSE))
require(splines, quietly = TRUE)
require(plotly, quietly = TRUE, warn.conflicts = FALSE)
require(Hmisc, quietly = TRUE)
require(e1071, quietly = TRUE)
##
## Attaching package: 'e1071'
## The following object is masked from 'package:Hmisc':
##
## impute
require(caret, quietly = TRUE)
##
## Attaching package: 'caret'
## The following object is masked from 'package:survival':
##
## cluster
require(BiodiversityR, quietly = TRUE)
## This is vegan 2.4-3
##
## Attaching package: 'vegan'
## The following object is masked from 'package:caret':
##
## tolerance
## The following object is masked from 'package:rms':
##
## calibrate
## BiodiversityR 2.8-3: Use command BiodiversityRGUI() to launch the Graphical User Interface and to learn about backward compatibility
require(logistf, quietly = TRUE)
require(rmarkdown, quietly = TRUE)
## Warning: package 'rmarkdown' was built under R version 3.4.2
NA_THRESHOLD = 0.03
CORR_THRESHOLD = 0.7
# Remove variables that correlation are non-relevant. These are the identification variables or dependant variable.
# Warning: This is redefined for each model construction!!!
dropToPredict = c("File.Path", "Project", "Language", "Table.Name","Name","Kind","Table.Name", "X..Bugs.Post", "File", "Distinct.count.of.Issue.Key.POST", "X..Catch", "X..Throws")
source(file = "construction_functions.R")
source(file = "analysis_functions.R")
load(file ="0-all_no_missing.RData")
We build in total many models. They were per project, per group of files (i.e. all files, with catch blocks, with throws, with both) and finally we built a base model for reference, and then we include exception handling features according to the model construction analysis.
all_list_omitted_1 = all_no_missing
all_list_omitted = all_list_omitted_1
all_list_omitted = vector("list", 0)
for (i in 1:length(projects)) {
print(paste("Project:", projects[i]))
temp_data = as.data.frame(all_list_omitted_1[i])
print(paste("nrow:",nrow(temp_data),"ncol:",ncol(temp_data)))
#Don't do anything in here, repack only.
all_list_omitted <- c(all_list_omitted, list(list(project=projects[i], data=temp_data)))
}
## [1] "Project: hadoop-2.6"
## [1] "nrow: 3662 ncol: 159"
## [1] "Project: hibernate-5.0"
## [1] "nrow: 3450 ncol: 159"
## [1] "Project: umbraco-7.6"
## [1] "nrow: 3083 ncol: 159"
modelSelectionAndNormalityAdjustment(all_list_omitted)
## [1] "Project: hadoop-2.6 D.F. Budget: 244"
## [1] "Project: hibernate-5.0 D.F. Budget: 230"
## [1] "Project: umbraco-7.6 D.F. Budget: 205"
## [1] "Project: hadoop-2.6 skewness, 11.2075144591477"
## [1] "Project: hadoop-2.6 kurtosis, 206.311714751217"
## [1] "Project: hibernate-5.0 skewness, 5.65031532192437"
## [1] "Project: hibernate-5.0 kurtosis, 47.2644938004385"
## [1] "Project: umbraco-7.6 skewness, 7.94132944771468"
## [1] "Project: umbraco-7.6 kurtosis, 75.8810831241314"