suppressMessages(require(rms, quietly = TRUE, warn.conflicts = FALSE))
require(splines, quietly = TRUE)
require(plotly, quietly = TRUE, warn.conflicts = FALSE)
require(Hmisc, quietly = TRUE)
require(e1071, quietly = TRUE)
##
## Attaching package: 'e1071'
## The following object is masked from 'package:Hmisc':
##
## impute
require(caret, quietly = TRUE)
##
## Attaching package: 'caret'
## The following object is masked from 'package:survival':
##
## cluster
require(BiodiversityR, quietly = TRUE)
## This is vegan 2.4-3
##
## Attaching package: 'vegan'
## The following object is masked from 'package:caret':
##
## tolerance
## The following object is masked from 'package:rms':
##
## calibrate
## BiodiversityR 2.8-3: Use command BiodiversityRGUI() to launch the Graphical User Interface and to learn about backward compatibility
require(logistf, quietly = TRUE)
require(rmarkdown, quietly = TRUE)
## Warning: package 'rmarkdown' was built under R version 3.4.2
NA_THRESHOLD = 0.03
CORR_THRESHOLD = 0.7
# Remove variables that correlation are non-relevant. These are the identification variables or dependant variable.
# Warning: This is redefined for each model construction!!!
dropToPredict = c("File.Path", "Project", "Language", "Table.Name","Name","Kind","Table.Name", "X..Bugs.Post", "File", "Distinct.count.of.Issue.Key.POST", "X..Catch", "X..Throws")
source(file = "construction_functions.R")
source(file = "analysis_functions.R")
load(file ="0-all_no_missing.RData")
We build in total many models. They were per project, per group of files (i.e. all files, with catch blocks, with throws, with both) and finally we built a base model for reference, and then we include exception handling features according to the model construction analysis.
all_list_omitted_1 = all_no_missing
all_list_omitted = all_list_omitted_1
all_list_omitted = vector("list", 0)
for (i in 1:length(projects)) {
print(paste("Project:", projects[i]))
temp_data = as.data.frame(all_list_omitted_1[i])
print(paste("nrow:",nrow(temp_data),"ncol:",ncol(temp_data)))
#Don't do anything in here, repack only.
all_list_omitted <- c(all_list_omitted, list(list(project=projects[i], data=temp_data)))
}
## [1] "Project: hadoop-2.6"
## [1] "nrow: 3662 ncol: 159"
## [1] "Project: hibernate-5.0"
## [1] "nrow: 3450 ncol: 159"
## [1] "Project: umbraco-7.6"
## [1] "nrow: 3083 ncol: 159"
modelSelectionAndNormalityAdjustment(all_list_omitted)
## [1] "Project: hadoop-2.6 D.F. Budget: 244"
## [1] "Project: hibernate-5.0 D.F. Budget: 230"
## [1] "Project: umbraco-7.6 D.F. Budget: 205"
## [1] "Project: hadoop-2.6 skewness, 11.2075144591477"
## [1] "Project: hadoop-2.6 kurtosis, 206.311714751217"
## [1] "Project: hibernate-5.0 skewness, 5.65031532192437"
## [1] "Project: hibernate-5.0 kurtosis, 47.2644938004385"
## [1] "Project: umbraco-7.6 skewness, 7.94132944771468"
## [1] "Project: umbraco-7.6 kurtosis, 75.8810831241314"
## Model 0 - BASE Only Before looking into models that include catch blocks or throws blocks data we would consider also models that only have base metrics. We aim to understand the difference between BASE only metrics and BASE + EH Metrics ### Drop variables
all_list_omitted_m0 = vector("list", 0)
for (i in 1:length(projects)) {
print(paste("Project:", projects[i]))
temp_data = all_list_omitted[[i]]$data
temp_data = temp_data[,!(names(temp_data) %in% catch_names)]
temp_data = temp_data[,!(names(temp_data) %in% try_names)]
temp_data = temp_data[,!(names(temp_data) %in% throws_names)]
print(nrow(temp_data))
print(names(temp_data))
all_list_omitted_m0 <- c(all_list_omitted_m0, list(temp_data))
}
## [1] "Project: hadoop-2.6"
## [1] 3662
## [1] "fileSize"
## [2] "Distinct.count.of.Author.Email.PRE"
## [3] "Distinct.count.of.Commit.Hash.PRE"
## [4] "Distinct.count.of.Issue.Key.POST"
## [5] "Distinct.count.of.Issue.Key.PRE"
## [6] "Churn.PRE"
## [7] "AvgCyclomaticModified"
## [8] "AvgCyclomaticStrict"
## [9] "AvgCyclomatic"
## [10] "AvgEssential"
## [11] "AvgLineBlank"
## [12] "AvgLineCode"
## [13] "AvgLineComment"
## [14] "AvgLine"
## [15] "CountDeclClassMethod"
## [16] "CountDeclClassVariable"
## [17] "CountDeclClass"
## [18] "CountDeclFunction"
## [19] "CountDeclInstanceMethod"
## [20] "CountDeclInstanceVariable"
## [21] "CountDeclMethodDefault"
## [22] "CountDeclMethodPrivate"
## [23] "CountDeclMethodProtected"
## [24] "CountDeclMethodPublic"
## [25] "CountDeclMethod"
## [26] "CountLineBlank"
## [27] "CountLineCodeDecl"
## [28] "CountLineCodeExe"
## [29] "CountLineCode"
## [30] "CountLineComment"
## [31] "CountLine"
## [32] "CountSemicolon"
## [33] "CountStmtDecl"
## [34] "CountStmtExe"
## [35] "CountStmt"
## [36] "File"
## [37] "Kind"
## [38] "MaxCyclomaticModified"
## [39] "MaxCyclomaticStrict"
## [40] "MaxCyclomatic"
## [41] "MaxEssential"
## [42] "MaxNesting"
## [43] "Name"
## [44] "Number.of.Records"
## [45] "RatioCommentToCode"
## [46] "SumCyclomaticModified"
## [47] "SumCyclomaticStrict"
## [48] "SumCyclomatic"
## [49] "SumEssential"
## [50] "Table.Name"
## [1] "Project: hibernate-5.0"
## [1] 3450
## [1] "fileSize"
## [2] "Distinct.count.of.Author.Email.PRE"
## [3] "Distinct.count.of.Commit.Hash.PRE"
## [4] "Distinct.count.of.Issue.Key.POST"
## [5] "Distinct.count.of.Issue.Key.PRE"
## [6] "Churn.PRE"
## [7] "AvgCyclomaticModified"
## [8] "AvgCyclomaticStrict"
## [9] "AvgCyclomatic"
## [10] "AvgEssential"
## [11] "AvgLineBlank"
## [12] "AvgLineCode"
## [13] "AvgLineComment"
## [14] "AvgLine"
## [15] "CountDeclClassMethod"
## [16] "CountDeclClassVariable"
## [17] "CountDeclClass"
## [18] "CountDeclFunction"
## [19] "CountDeclInstanceMethod"
## [20] "CountDeclInstanceVariable"
## [21] "CountDeclMethodDefault"
## [22] "CountDeclMethodPrivate"
## [23] "CountDeclMethodProtected"
## [24] "CountDeclMethodPublic"
## [25] "CountDeclMethod"
## [26] "CountLineBlank"
## [27] "CountLineCodeDecl"
## [28] "CountLineCodeExe"
## [29] "CountLineCode"
## [30] "CountLineComment"
## [31] "CountLine"
## [32] "CountSemicolon"
## [33] "CountStmtDecl"
## [34] "CountStmtExe"
## [35] "CountStmt"
## [36] "File"
## [37] "Kind"
## [38] "MaxCyclomaticModified"
## [39] "MaxCyclomaticStrict"
## [40] "MaxCyclomatic"
## [41] "MaxEssential"
## [42] "MaxNesting"
## [43] "Name"
## [44] "Number.of.Records"
## [45] "RatioCommentToCode"
## [46] "SumCyclomaticModified"
## [47] "SumCyclomaticStrict"
## [48] "SumCyclomatic"
## [49] "SumEssential"
## [50] "Table.Name"
## [1] "Project: umbraco-7.6"
## [1] 3083
## [1] "fileSize"
## [2] "Distinct.count.of.Author.Email.PRE"
## [3] "Distinct.count.of.Commit.Hash.PRE"
## [4] "Distinct.count.of.Issue.Key.POST"
## [5] "Distinct.count.of.Issue.Key.PRE"
## [6] "Churn.PRE"
## [7] "AvgCyclomaticModified"
## [8] "AvgCyclomaticStrict"
## [9] "AvgCyclomatic"
## [10] "AvgEssential"
## [11] "AvgLineBlank"
## [12] "AvgLineCode"
## [13] "AvgLineComment"
## [14] "AvgLine"
## [15] "CountDeclClassMethod"
## [16] "CountDeclClassVariable"
## [17] "CountDeclClass"
## [18] "CountDeclFunction"
## [19] "CountDeclInstanceMethod"
## [20] "CountDeclInstanceVariable"
## [21] "CountDeclMethodDefault"
## [22] "CountDeclMethodPrivate"
## [23] "CountDeclMethodProtected"
## [24] "CountDeclMethodPublic"
## [25] "CountDeclMethod"
## [26] "CountLineBlank"
## [27] "CountLineCodeDecl"
## [28] "CountLineCodeExe"
## [29] "CountLineCode"
## [30] "CountLineComment"
## [31] "CountLine"
## [32] "CountSemicolon"
## [33] "CountStmtDecl"
## [34] "CountStmtExe"
## [35] "CountStmt"
## [36] "File"
## [37] "Kind"
## [38] "MaxCyclomaticModified"
## [39] "MaxCyclomaticStrict"
## [40] "MaxCyclomatic"
## [41] "MaxEssential"
## [42] "MaxNesting"
## [43] "Name"
## [44] "Number.of.Records"
## [45] "RatioCommentToCode"
## [46] "SumCyclomaticModified"
## [47] "SumCyclomaticStrict"
## [48] "SumCyclomatic"
## [49] "SumEssential"
## [50] "Table.Name"
dropToPredict = c("File.Path", "Project", "Language", "Table.Name","Name","Kind", "X..Bugs.Post", "File", "Distinct.count.of.Issue.Key.POST")
all_list_model_m0 = dataApplyReduction(all_list_omitted_m0)
## [1] "Project: hadoop-2.6"
## [1] "NumberOfMetricsInitial: 45"
## [1] "NumberOfMetricsKept: 15"
## [1] "Distinct.count.of.Issue.Key.PRE + Churn.PRE + AvgEssential + AvgLineBlank + AvgLineComment + CountDeclClassMethod + CountDeclClassVariable + CountDeclClass + CountDeclInstanceVariable + CountDeclMethodDefault + CountDeclMethodPrivate + CountDeclMethodProtected + CountDeclMethodPublic + CountLineComment + MaxEssential"
## [1] "Project: hibernate-5.0"
## [1] "NumberOfMetricsInitial: 45"
## [1] "NumberOfMetricsKept: 16"
## [1] "Distinct.count.of.Author.Email.PRE + Distinct.count.of.Issue.Key.PRE + AvgCyclomaticModified + AvgEssential + AvgLineBlank + AvgLineComment + CountDeclClassMethod + CountDeclClassVariable + CountDeclClass + CountDeclInstanceVariable + CountDeclMethodDefault + CountDeclMethodPrivate + CountDeclMethodProtected + CountDeclMethodPublic + CountLineComment + RatioCommentToCode"
## [1] "Project: umbraco-7.6"
## [1] "NumberOfMetricsInitial: 45"
## [1] "NumberOfMetricsKept: 8"
## [1] "Distinct.count.of.Author.Email.PRE + Distinct.count.of.Issue.Key.PRE + AvgEssential + AvgLineBlank + AvgLineComment + CountDeclClass + CountDeclFunction + RatioCommentToCode"
## [1] "Project: hadoop-2.6"
## [1] "Redudant variables: CountDeclMethodPrivate,␣CountDeclClassMethod,␣CountDeclClass,␣CountDeclInstanceVariable,␣CountLineComment"
## [1] "Distinct.count.of.Issue.Key.POST + Distinct.count.of.Issue.Key.PRE + Churn.PRE + AvgEssential + AvgLineBlank + AvgLineComment + CountDeclClassVariable + CountDeclMethodDefault + CountDeclMethodProtected + CountDeclMethodPublic + File + Kind + MaxEssential + Name + Table.Name"
## [1] "Project: hibernate-5.0"
## [1] "Redudant variables: "
## [1] "Distinct.count.of.Author.Email.PRE + Distinct.count.of.Issue.Key.POST + Distinct.count.of.Issue.Key.PRE + AvgCyclomaticModified + AvgEssential + AvgLineBlank + AvgLineComment + CountDeclClassMethod + CountDeclClassVariable + CountDeclClass + CountDeclInstanceVariable + CountDeclMethodDefault + CountDeclMethodPrivate + CountDeclMethodProtected + CountDeclMethodPublic + CountLineComment + File + Kind + Name + RatioCommentToCode + Table.Name"
## [1] "Project: umbraco-7.6"
## [1] "Redudant variables: "
## [1] "Distinct.count.of.Author.Email.PRE + Distinct.count.of.Issue.Key.POST + Distinct.count.of.Issue.Key.PRE + AvgEssential + AvgLineBlank + AvgLineComment + CountDeclClass + CountDeclFunction + File + Kind + Name + RatioCommentToCode + Table.Name"
## [1] "Project: hadoop-2.6"
## [1] "NumberOfMetricsInitial: 10 Budget: 244 Over Budget: FALSE NumberOfMetricsKept: 10 CorrelationCutoff: 0.7"
## [1] "Project: hibernate-5.0"
## [1] "NumberOfMetricsInitial: 16 Budget: 230 Over Budget: FALSE NumberOfMetricsKept: 16 CorrelationCutoff: 0.7"
## [1] "Project: umbraco-7.6"
## [1] "NumberOfMetricsInitial: 8 Budget: 205 Over Budget: FALSE NumberOfMetricsKept: 8 CorrelationCutoff: 0.7"
form_list_bin_m0 = dataSetupFormulasBinary(all_list_model_m0)
## [1] "Project: hadoop-2.6"
## [1] "Distinct.count.of.Issue.Key.PRE + Churn.PRE + AvgEssential + AvgLineBlank + AvgLineComment + CountDeclClassVariable + CountDeclMethodDefault + CountDeclMethodProtected + CountDeclMethodPublic + MaxEssential"
## [1] "Project: hibernate-5.0"
## [1] "Distinct.count.of.Author.Email.PRE + Distinct.count.of.Issue.Key.PRE + AvgCyclomaticModified + AvgEssential + AvgLineBlank + AvgLineComment + CountDeclClassMethod + CountDeclClassVariable + CountDeclClass + CountDeclInstanceVariable + CountDeclMethodDefault + CountDeclMethodPrivate + CountDeclMethodProtected + CountDeclMethodPublic + CountLineComment + RatioCommentToCode"
## [1] "Project: umbraco-7.6"
## [1] "Distinct.count.of.Author.Email.PRE + Distinct.count.of.Issue.Key.PRE + AvgEssential + AvgLineBlank + AvgLineComment + CountDeclClass + CountDeclFunction + RatioCommentToCode"
models_1_BASE = modelFitLogistic(all_list_model_m0,form_list_bin_m0,"ALL.BASE")
## [1] "Project: hadoop-2.6"
## Logistic Regression Model
##
## lrm(formula = as.formula(form_bin), data = temp_data_log, x = T,
## y = T)
##
## Model Likelihood Discrimination Rank Discrim.
## Ratio Test Indexes Indexes
## Obs 3662 LR chi2 437.88 R2 0.306 C 0.843
## FALSE 3439 d.f. 10 g 1.103 Dxy 0.687
## TRUE 223 Pr(> chi2) <0.0001 gr 3.014 gamma 0.703
## max |deriv| 1e-13 gp 0.076 tau-a 0.079
## Brier 0.045
##
## Coef S.E. Wald Z Pr(>|Z|)
## Intercept -4.3614 0.2832 -15.40 <0.0001
## Distinct.count.of.Issue.Key.PRE 1.9006 0.4227 4.50 <0.0001
## Churn.PRE 0.7777 0.0906 8.58 <0.0001
## AvgEssential -0.9794 0.9775 -1.00 0.3164
## AvgLineBlank 0.7203 0.6229 1.16 0.2476
## AvgLineComment 0.7669 0.5625 1.36 0.1728
## CountDeclClassVariable 0.6099 0.2040 2.99 0.0028
## CountDeclMethodDefault 0.3829 0.1924 1.99 0.0466
## CountDeclMethodProtected -0.0073 0.2461 -0.03 0.9763
## CountDeclMethodPublic -0.0068 0.1907 -0.04 0.9717
## MaxEssential 1.3270 0.4330 3.06 0.0022
##
## [1] "Project: hibernate-5.0"
## Logistic Regression Model
##
## lrm(formula = as.formula(form_bin), data = temp_data_log, x = T,
## y = T)
##
## Model Likelihood Discrimination Rank Discrim.
## Ratio Test Indexes Indexes
## Obs 3450 LR chi2 381.53 R2 0.216 C 0.788
## FALSE 3096 d.f. 16 g 1.301 Dxy 0.576
## TRUE 354 Pr(> chi2) <0.0001 gr 3.672 gamma 0.579
## max |deriv| 1e-13 gp 0.105 tau-a 0.106
## Brier 0.079
##
## Coef S.E. Wald Z Pr(>|Z|)
## Intercept -3.7664 0.4069 -9.26 <0.0001
## Distinct.count.of.Author.Email.PRE 0.7984 0.4904 1.63 0.1035
## Distinct.count.of.Issue.Key.PRE 1.1755 0.2436 4.83 <0.0001
## AvgCyclomaticModified -1.1593 0.7392 -1.57 0.1168
## AvgEssential 0.0771 0.9183 0.08 0.9331
## AvgLineBlank 1.1003 0.5101 2.16 0.0310
## AvgLineComment 0.1181 0.5153 0.23 0.8187
## CountDeclClassMethod -0.5231 0.2698 -1.94 0.0526
## CountDeclClassVariable -0.5600 0.3033 -1.85 0.0649
## CountDeclClass -0.5608 0.4574 -1.23 0.2201
## CountDeclInstanceVariable -0.5720 0.2058 -2.78 0.0054
## CountDeclMethodDefault 0.0603 0.2993 0.20 0.8402
## CountDeclMethodPrivate 0.8586 0.2708 3.17 0.0015
## CountDeclMethodProtected -0.1600 0.2357 -0.68 0.4973
## CountDeclMethodPublic 1.2950 0.2343 5.53 <0.0001
## CountLineComment 1.0226 0.3158 3.24 0.0012
## RatioCommentToCode -2.8372 0.5818 -4.88 <0.0001
##
## [1] "Project: umbraco-7.6"
## Logistic Regression Model
##
## lrm(formula = as.formula(form_bin), data = temp_data_log, x = T,
## y = T)
##
## Model Likelihood Discrimination Rank Discrim.
## Ratio Test Indexes Indexes
## Obs 3083 LR chi2 94.37 R2 0.136 C 0.750
## FALSE 2999 d.f. 8 g 1.005 Dxy 0.500
## TRUE 84 Pr(> chi2) <0.0001 gr 2.731 gamma 0.513
## max |deriv| 9e-07 gp 0.028 tau-a 0.027
## Brier 0.025
##
## Coef S.E. Wald Z Pr(>|Z|)
## Intercept -5.4088 0.5125 -10.55 <0.0001
## Distinct.count.of.Author.Email.PRE 3.1202 0.6078 5.13 <0.0001
## Distinct.count.of.Issue.Key.PRE 0.3255 1.0879 0.30 0.7648
## AvgEssential 0.8683 1.2120 0.72 0.4737
## AvgLineBlank 1.5648 0.6844 2.29 0.0222
## AvgLineComment -1.0431 0.8267 -1.26 0.2070
## CountDeclClass -0.4660 1.0231 -0.46 0.6488
## CountDeclFunction 1.3171 0.2756 4.78 <0.0001
## RatioCommentToCode 1.5581 0.9372 1.66 0.0964
##
As we can see in the clusters, all the throws blocks data where correlated to each other. Similarly, all catch blocks data and try blocks data were correlated to each other. In this situation, we can see that exception handling metrics are important and we will dig further to understand it better. ### Drop variables
all_list_omitted_m1 = vector("list", 0)
for (i in 1:length(projects)) {
print(paste("Project:", projects[i]))
temp_data = all_list_omitted[[i]]$data
keepForID = c("Project", "File.Path")
keepForCatch = c("X..Catch",keepForID)
keepForTry = c(keepForID)
keepForThrows = c("X..Throws", keepForID)
catch_names_drop = catch_names[!(catch_names %in% keepForCatch)]
try_names_drop = try_names[!(try_names %in% keepForTry)]
throws_names_drop = throws_names[!(throws_names %in% keepForThrows)]
temp_data = temp_data[,!(names(temp_data) %in% catch_names_drop)]
temp_data = temp_data[,!(names(temp_data) %in% try_names_drop)]
temp_data = temp_data[,!(names(temp_data) %in% throws_names_drop)]
print(names(temp_data))
all_list_omitted_m1 <- c(all_list_omitted_m1, list(temp_data))
}
## [1] "Project: hadoop-2.6"
## [1] "File.Path"
## [2] "Project"
## [3] "fileSize"
## [4] "Distinct.count.of.Author.Email.PRE"
## [5] "Distinct.count.of.Commit.Hash.PRE"
## [6] "Distinct.count.of.Issue.Key.POST"
## [7] "Distinct.count.of.Issue.Key.PRE"
## [8] "Churn.PRE"
## [9] "AvgCyclomaticModified"
## [10] "AvgCyclomaticStrict"
## [11] "AvgCyclomatic"
## [12] "AvgEssential"
## [13] "AvgLineBlank"
## [14] "AvgLineCode"
## [15] "AvgLineComment"
## [16] "AvgLine"
## [17] "CountDeclClassMethod"
## [18] "CountDeclClassVariable"
## [19] "CountDeclClass"
## [20] "CountDeclFunction"
## [21] "CountDeclInstanceMethod"
## [22] "CountDeclInstanceVariable"
## [23] "CountDeclMethodDefault"
## [24] "CountDeclMethodPrivate"
## [25] "CountDeclMethodProtected"
## [26] "CountDeclMethodPublic"
## [27] "CountDeclMethod"
## [28] "CountLineBlank"
## [29] "CountLineCodeDecl"
## [30] "CountLineCodeExe"
## [31] "CountLineCode"
## [32] "CountLineComment"
## [33] "CountLine"
## [34] "CountSemicolon"
## [35] "CountStmtDecl"
## [36] "CountStmtExe"
## [37] "CountStmt"
## [38] "File"
## [39] "Kind"
## [40] "MaxCyclomaticModified"
## [41] "MaxCyclomaticStrict"
## [42] "MaxCyclomatic"
## [43] "MaxEssential"
## [44] "MaxNesting"
## [45] "Name"
## [46] "Number.of.Records"
## [47] "RatioCommentToCode"
## [48] "SumCyclomaticModified"
## [49] "SumCyclomaticStrict"
## [50] "SumCyclomatic"
## [51] "SumEssential"
## [52] "Table.Name"
## [53] "X..Throws"
## [54] "X..Catch"
## [1] "Project: hibernate-5.0"
## [1] "File.Path"
## [2] "Project"
## [3] "fileSize"
## [4] "Distinct.count.of.Author.Email.PRE"
## [5] "Distinct.count.of.Commit.Hash.PRE"
## [6] "Distinct.count.of.Issue.Key.POST"
## [7] "Distinct.count.of.Issue.Key.PRE"
## [8] "Churn.PRE"
## [9] "AvgCyclomaticModified"
## [10] "AvgCyclomaticStrict"
## [11] "AvgCyclomatic"
## [12] "AvgEssential"
## [13] "AvgLineBlank"
## [14] "AvgLineCode"
## [15] "AvgLineComment"
## [16] "AvgLine"
## [17] "CountDeclClassMethod"
## [18] "CountDeclClassVariable"
## [19] "CountDeclClass"
## [20] "CountDeclFunction"
## [21] "CountDeclInstanceMethod"
## [22] "CountDeclInstanceVariable"
## [23] "CountDeclMethodDefault"
## [24] "CountDeclMethodPrivate"
## [25] "CountDeclMethodProtected"
## [26] "CountDeclMethodPublic"
## [27] "CountDeclMethod"
## [28] "CountLineBlank"
## [29] "CountLineCodeDecl"
## [30] "CountLineCodeExe"
## [31] "CountLineCode"
## [32] "CountLineComment"
## [33] "CountLine"
## [34] "CountSemicolon"
## [35] "CountStmtDecl"
## [36] "CountStmtExe"
## [37] "CountStmt"
## [38] "File"
## [39] "Kind"
## [40] "MaxCyclomaticModified"
## [41] "MaxCyclomaticStrict"
## [42] "MaxCyclomatic"
## [43] "MaxEssential"
## [44] "MaxNesting"
## [45] "Name"
## [46] "Number.of.Records"
## [47] "RatioCommentToCode"
## [48] "SumCyclomaticModified"
## [49] "SumCyclomaticStrict"
## [50] "SumCyclomatic"
## [51] "SumEssential"
## [52] "Table.Name"
## [53] "X..Throws"
## [54] "X..Catch"
## [1] "Project: umbraco-7.6"
## [1] "File.Path"
## [2] "Project"
## [3] "fileSize"
## [4] "Distinct.count.of.Author.Email.PRE"
## [5] "Distinct.count.of.Commit.Hash.PRE"
## [6] "Distinct.count.of.Issue.Key.POST"
## [7] "Distinct.count.of.Issue.Key.PRE"
## [8] "Churn.PRE"
## [9] "AvgCyclomaticModified"
## [10] "AvgCyclomaticStrict"
## [11] "AvgCyclomatic"
## [12] "AvgEssential"
## [13] "AvgLineBlank"
## [14] "AvgLineCode"
## [15] "AvgLineComment"
## [16] "AvgLine"
## [17] "CountDeclClassMethod"
## [18] "CountDeclClassVariable"
## [19] "CountDeclClass"
## [20] "CountDeclFunction"
## [21] "CountDeclInstanceMethod"
## [22] "CountDeclInstanceVariable"
## [23] "CountDeclMethodDefault"
## [24] "CountDeclMethodPrivate"
## [25] "CountDeclMethodProtected"
## [26] "CountDeclMethodPublic"
## [27] "CountDeclMethod"
## [28] "CountLineBlank"
## [29] "CountLineCodeDecl"
## [30] "CountLineCodeExe"
## [31] "CountLineCode"
## [32] "CountLineComment"
## [33] "CountLine"
## [34] "CountSemicolon"
## [35] "CountStmtDecl"
## [36] "CountStmtExe"
## [37] "CountStmt"
## [38] "File"
## [39] "Kind"
## [40] "MaxCyclomaticModified"
## [41] "MaxCyclomaticStrict"
## [42] "MaxCyclomatic"
## [43] "MaxEssential"
## [44] "MaxNesting"
## [45] "Name"
## [46] "Number.of.Records"
## [47] "RatioCommentToCode"
## [48] "SumCyclomaticModified"
## [49] "SumCyclomaticStrict"
## [50] "SumCyclomatic"
## [51] "SumEssential"
## [52] "Table.Name"
## [53] "X..Throws"
## [54] "X..Catch"
dropToPredict = c("File.Path", "Project", "Language", "Table.Name","Name","Kind", "X..Bugs.Post", "File", "Distinct.count.of.Issue.Key.POST")
all_list_model_m1 = dataApplyReduction(all_list_omitted_m1)
## [1] "Project: hadoop-2.6"
## [1] "NumberOfMetricsInitial: 47"
## [1] "NumberOfMetricsKept: 18"
## [1] "Distinct.count.of.Author.Email.PRE + Distinct.count.of.Issue.Key.PRE + AvgEssential + AvgLineBlank + AvgLineComment + CountDeclClassMethod + CountDeclClassVariable + CountDeclClass + CountDeclInstanceVariable + CountDeclMethodDefault + CountDeclMethodPrivate + CountDeclMethodProtected + CountDeclMethodPublic + CountLineComment + MaxEssential + RatioCommentToCode + X..Throws + X..Catch"
## [1] "Project: hibernate-5.0"
## [1] "NumberOfMetricsInitial: 47"
## [1] "NumberOfMetricsKept: 18"
## [1] "Distinct.count.of.Author.Email.PRE + Distinct.count.of.Issue.Key.PRE + AvgCyclomaticModified + AvgEssential + AvgLineBlank + AvgLineComment + CountDeclClassMethod + CountDeclClassVariable + CountDeclClass + CountDeclInstanceVariable + CountDeclMethodDefault + CountDeclMethodPrivate + CountDeclMethodProtected + CountDeclMethodPublic + CountLineComment + RatioCommentToCode + X..Throws + X..Catch"
## [1] "Project: umbraco-7.6"
## [1] "NumberOfMetricsInitial: 47"
## [1] "NumberOfMetricsKept: 9"
## [1] "Distinct.count.of.Author.Email.PRE + Distinct.count.of.Issue.Key.PRE + AvgEssential + AvgLineBlank + AvgLineComment + CountDeclClass + CountDeclFunction + RatioCommentToCode + X..Catch"
## [1] "Project: hadoop-2.6"
## [1] "Redudant variables: CountDeclMethodPrivate,␣CountDeclClassMethod,␣X..Throws,␣CountDeclClass,␣CountDeclInstanceVariable,␣CountDeclMethodProtected,␣CountLineComment"
## [1] "File.Path + Project + Distinct.count.of.Author.Email.PRE + Distinct.count.of.Issue.Key.POST + Distinct.count.of.Issue.Key.PRE + AvgEssential + AvgLineBlank + AvgLineComment + CountDeclClassVariable + CountDeclMethodDefault + CountDeclMethodPublic + File + Kind + MaxEssential + Name + RatioCommentToCode + Table.Name + X..Catch"
## [1] "Project: hibernate-5.0"
## [1] "Redudant variables: "
## [1] "File.Path + Project + Distinct.count.of.Author.Email.PRE + Distinct.count.of.Issue.Key.POST + Distinct.count.of.Issue.Key.PRE + AvgCyclomaticModified + AvgEssential + AvgLineBlank + AvgLineComment + CountDeclClassMethod + CountDeclClassVariable + CountDeclClass + CountDeclInstanceVariable + CountDeclMethodDefault + CountDeclMethodPrivate + CountDeclMethodProtected + CountDeclMethodPublic + CountLineComment + File + Kind + Name + RatioCommentToCode + Table.Name + X..Throws + X..Catch"
## [1] "Project: umbraco-7.6"
## [1] "Redudant variables: "
## [1] "File.Path + Project + Distinct.count.of.Author.Email.PRE + Distinct.count.of.Issue.Key.POST + Distinct.count.of.Issue.Key.PRE + AvgEssential + AvgLineBlank + AvgLineComment + CountDeclClass + CountDeclFunction + File + Kind + Name + RatioCommentToCode + Table.Name + X..Catch"
## [1] "Project: hadoop-2.6"
## [1] "NumberOfMetricsInitial: 11 Budget: 244 Over Budget: FALSE NumberOfMetricsKept: 11 CorrelationCutoff: 0.7"
## [1] "Project: hibernate-5.0"
## [1] "NumberOfMetricsInitial: 18 Budget: 230 Over Budget: FALSE NumberOfMetricsKept: 18 CorrelationCutoff: 0.7"
## [1] "Project: umbraco-7.6"
## [1] "NumberOfMetricsInitial: 9 Budget: 205 Over Budget: FALSE NumberOfMetricsKept: 9 CorrelationCutoff: 0.7"
form_list_bin_m1 = dataSetupFormulasBinary(all_list_model_m1)
## [1] "Project: hadoop-2.6"
## [1] "Distinct.count.of.Author.Email.PRE + Distinct.count.of.Issue.Key.PRE + AvgEssential + AvgLineBlank + AvgLineComment + CountDeclClassVariable + CountDeclMethodDefault + CountDeclMethodPublic + MaxEssential + RatioCommentToCode + X..Catch"
## [1] "Project: hibernate-5.0"
## [1] "Distinct.count.of.Author.Email.PRE + Distinct.count.of.Issue.Key.PRE + AvgCyclomaticModified + AvgEssential + AvgLineBlank + AvgLineComment + CountDeclClassMethod + CountDeclClassVariable + CountDeclClass + CountDeclInstanceVariable + CountDeclMethodDefault + CountDeclMethodPrivate + CountDeclMethodProtected + CountDeclMethodPublic + CountLineComment + RatioCommentToCode + X..Throws + X..Catch"
## [1] "Project: umbraco-7.6"
## [1] "Distinct.count.of.Author.Email.PRE + Distinct.count.of.Issue.Key.PRE + AvgEssential + AvgLineBlank + AvgLineComment + CountDeclClass + CountDeclFunction + RatioCommentToCode + X..Catch"
models_1_BSEH = modelFitLogistic(all_list_model_m1,form_list_bin_m1,"ALL.BSEH")
## [1] "Project: hadoop-2.6"
## Logistic Regression Model
##
## lrm(formula = as.formula(form_bin), data = temp_data_log, x = T,
## y = T)
##
## Model Likelihood Discrimination Rank Discrim.
## Ratio Test Indexes Indexes
## Obs 3662 LR chi2 482.25 R2 0.335 C 0.855
## FALSE 3439 d.f. 11 g 1.272 Dxy 0.711
## TRUE 223 Pr(> chi2) <0.0001 gr 3.566 gamma 0.718
## max |deriv| 8e-10 gp 0.080 tau-a 0.081
## Brier 0.043
##
## Coef S.E. Wald Z Pr(>|Z|)
## Intercept -3.4030 0.4134 -8.23 <0.0001
## Distinct.count.of.Author.Email.PRE 3.8178 0.4021 9.50 <0.0001
## Distinct.count.of.Issue.Key.PRE 0.4338 0.4910 0.88 0.3769
## AvgEssential -1.4945 1.0469 -1.43 0.1534
## AvgLineBlank 0.4635 0.6410 0.72 0.4696
## AvgLineComment 0.3672 0.5883 0.62 0.5326
## CountDeclClassVariable 0.3420 0.2122 1.61 0.1070
## CountDeclMethodDefault 0.1828 0.1948 0.94 0.3480
## CountDeclMethodPublic -0.3671 0.1982 -1.85 0.0640
## MaxEssential 1.1103 0.4488 2.47 0.0134
## RatioCommentToCode -1.9394 0.6839 -2.84 0.0046
## X..Catch 0.6860 0.2468 2.78 0.0054
##
## [1] "Project: hibernate-5.0"
## Logistic Regression Model
##
## lrm(formula = as.formula(form_bin), data = temp_data_log, x = T,
## y = T)
##
## Model Likelihood Discrimination Rank Discrim.
## Ratio Test Indexes Indexes
## Obs 3450 LR chi2 394.22 R2 0.223 C 0.791
## FALSE 3096 d.f. 18 g 1.299 Dxy 0.583
## TRUE 354 Pr(> chi2) <0.0001 gr 3.664 gamma 0.586
## max |deriv| 2e-13 gp 0.106 tau-a 0.107
## Brier 0.079
##
## Coef S.E. Wald Z Pr(>|Z|)
## Intercept -3.6083 0.4140 -8.71 <0.0001
## Distinct.count.of.Author.Email.PRE 0.5765 0.4992 1.15 0.2482
## Distinct.count.of.Issue.Key.PRE 1.2353 0.2482 4.98 <0.0001
## AvgCyclomaticModified -1.1501 0.7589 -1.52 0.1296
## AvgEssential 0.2388 0.9173 0.26 0.7946
## AvgLineBlank 1.0465 0.5128 2.04 0.0413
## AvgLineComment 0.1323 0.5219 0.25 0.7999
## CountDeclClassMethod -0.3915 0.2744 -1.43 0.1537
## CountDeclClassVariable -0.5651 0.3079 -1.84 0.0665
## CountDeclClass -0.5269 0.4607 -1.14 0.2527
## CountDeclInstanceVariable -0.5167 0.2092 -2.47 0.0135
## CountDeclMethodDefault -0.0451 0.3042 -0.15 0.8821
## CountDeclMethodPrivate 0.8261 0.2780 2.97 0.0030
## CountDeclMethodProtected -0.3077 0.2433 -1.26 0.2060
## CountDeclMethodPublic 1.1546 0.2401 4.81 <0.0001
## CountLineComment 0.8879 0.3213 2.76 0.0057
## RatioCommentToCode -2.7248 0.5853 -4.66 <0.0001
## X..Throws 0.7217 0.2017 3.58 0.0003
## X..Catch -0.1280 0.2801 -0.46 0.6476
##
## [1] "Project: umbraco-7.6"
## Logistic Regression Model
##
## lrm(formula = as.formula(form_bin), data = temp_data_log, x = T,
## y = T)
##
## Model Likelihood Discrimination Rank Discrim.
## Ratio Test Indexes Indexes
## Obs 3083 LR chi2 102.71 R2 0.148 C 0.758
## FALSE 2999 d.f. 9 g 0.951 Dxy 0.516
## TRUE 84 Pr(> chi2) <0.0001 gr 2.587 gamma 0.531
## max |deriv| 5e-07 gp 0.028 tau-a 0.027
## Brier 0.024
##
## Coef S.E. Wald Z Pr(>|Z|)
## Intercept -5.3076 0.5079 -10.45 <0.0001
## Distinct.count.of.Author.Email.PRE 3.0602 0.6169 4.96 <0.0001
## Distinct.count.of.Issue.Key.PRE 0.2517 1.1103 0.23 0.8206
## AvgEssential 1.2942 1.1674 1.11 0.2676
## AvgLineBlank 1.3611 0.6901 1.97 0.0486
## AvgLineComment -1.4642 0.8769 -1.67 0.0950
## CountDeclClass -0.4619 1.0443 -0.44 0.6582
## CountDeclFunction 0.9862 0.2991 3.30 0.0010
## RatioCommentToCode 1.5917 0.9281 1.71 0.0864
## X..Catch 1.5400 0.5152 2.99 0.0028
##
In this section, we present the selected statistics for our analysis. As explained in our approach, they are the steps: MC7, MA1, MA2, MA3 and MA4.
Here we extract the selected statistics and we add the data (columns) to an object that will be exported to CSV in the section Output.
model_things_1_BASE = vector("list", 0)
model_things_1_BSEH = vector("list", 0)
model_things_1_BASE = modelStats(models_1_BASE)
model_things_1_BSEH = modelStats(models_1_BSEH)
model_things_1_BASE = modelValidate(models_1_BASE, model_things_1_BASE)
model_things_1_BSEH = modelValidate(models_1_BSEH, model_things_1_BSEH)
model_things_1_BASE = modelSignificance(models_1_BASE, model_things_1_BASE)
## [1] "project: hadoop-2.6 model: ALL.BASE"
## Wald Statistics Response: Distinct.count.of.Issue.Key.POST > 0
##
## Factor Chi-Square d.f. P
## Distinct.count.of.Issue.Key.PRE 20.22 1 <.0001
## Churn.PRE 73.62 1 <.0001
## AvgEssential 1.00 1 0.3164
## AvgLineBlank 1.34 1 0.2476
## AvgLineComment 1.86 1 0.1728
## CountDeclClassVariable 8.94 1 0.0028
## CountDeclMethodDefault 3.96 1 0.0466
## CountDeclMethodProtected 0.00 1 0.9763
## CountDeclMethodPublic 0.00 1 0.9717
## MaxEssential 9.39 1 0.0022
## TOTAL 365.46 10 <.0001
## [1] "project: hibernate-5.0 model: ALL.BASE"
## Wald Statistics Response: Distinct.count.of.Issue.Key.POST > 0
##
## Factor Chi-Square d.f. P
## Distinct.count.of.Author.Email.PRE 2.65 1 0.1035
## Distinct.count.of.Issue.Key.PRE 23.29 1 <.0001
## AvgCyclomaticModified 2.46 1 0.1168
## AvgEssential 0.01 1 0.9331
## AvgLineBlank 4.65 1 0.0310
## AvgLineComment 0.05 1 0.8187
## CountDeclClassMethod 3.76 1 0.0526
## CountDeclClassVariable 3.41 1 0.0649
## CountDeclClass 1.50 1 0.2201
## CountDeclInstanceVariable 7.72 1 0.0054
## CountDeclMethodDefault 0.04 1 0.8402
## CountDeclMethodPrivate 10.05 1 0.0015
## CountDeclMethodProtected 0.46 1 0.4973
## CountDeclMethodPublic 30.55 1 <.0001
## CountLineComment 10.48 1 0.0012
## RatioCommentToCode 23.78 1 <.0001
## TOTAL 306.17 16 <.0001
## [1] "project: umbraco-7.6 model: ALL.BASE"
## Wald Statistics Response: Distinct.count.of.Issue.Key.POST > 0
##
## Factor Chi-Square d.f. P
## Distinct.count.of.Author.Email.PRE 26.35 1 <.0001
## Distinct.count.of.Issue.Key.PRE 0.09 1 0.7648
## AvgEssential 0.51 1 0.4737
## AvgLineBlank 5.23 1 0.0222
## AvgLineComment 1.59 1 0.2070
## CountDeclClass 0.21 1 0.6488
## CountDeclFunction 22.85 1 <.0001
## RatioCommentToCode 2.76 1 0.0964
## TOTAL 99.58 8 <.0001
model_things_1_BSEH = modelSignificance(models_1_BSEH, model_things_1_BSEH)
## [1] "project: hadoop-2.6 model: ALL.BSEH"
## Wald Statistics Response: Distinct.count.of.Issue.Key.POST > 0
##
## Factor Chi-Square d.f. P
## Distinct.count.of.Author.Email.PRE 90.17 1 <.0001
## Distinct.count.of.Issue.Key.PRE 0.78 1 0.3769
## AvgEssential 2.04 1 0.1534
## AvgLineBlank 0.52 1 0.4696
## AvgLineComment 0.39 1 0.5326
## CountDeclClassVariable 2.60 1 0.1070
## CountDeclMethodDefault 0.88 1 0.3480
## CountDeclMethodPublic 3.43 1 0.0640
## MaxEssential 6.12 1 0.0134
## RatioCommentToCode 8.04 1 0.0046
## X..Catch 7.72 1 0.0054
## TOTAL 377.62 11 <.0001
## [1] "project: hibernate-5.0 model: ALL.BSEH"
## Wald Statistics Response: Distinct.count.of.Issue.Key.POST > 0
##
## Factor Chi-Square d.f. P
## Distinct.count.of.Author.Email.PRE 1.33 1 0.2482
## Distinct.count.of.Issue.Key.PRE 24.77 1 <.0001
## AvgCyclomaticModified 2.30 1 0.1296
## AvgEssential 0.07 1 0.7946
## AvgLineBlank 4.17 1 0.0413
## AvgLineComment 0.06 1 0.7999
## CountDeclClassMethod 2.03 1 0.1537
## CountDeclClassVariable 3.37 1 0.0665
## CountDeclClass 1.31 1 0.2527
## CountDeclInstanceVariable 6.10 1 0.0135
## CountDeclMethodDefault 0.02 1 0.8821
## CountDeclMethodPrivate 8.83 1 0.0030
## CountDeclMethodProtected 1.60 1 0.2060
## CountDeclMethodPublic 23.12 1 <.0001
## CountLineComment 7.64 1 0.0057
## RatioCommentToCode 21.68 1 <.0001
## X..Throws 12.81 1 0.0003
## X..Catch 0.21 1 0.6476
## TOTAL 312.19 18 <.0001
## [1] "project: umbraco-7.6 model: ALL.BSEH"
## Wald Statistics Response: Distinct.count.of.Issue.Key.POST > 0
##
## Factor Chi-Square d.f. P
## Distinct.count.of.Author.Email.PRE 24.61 1 <.0001
## Distinct.count.of.Issue.Key.PRE 0.05 1 0.8206
## AvgEssential 1.23 1 0.2676
## AvgLineBlank 3.89 1 0.0486
## AvgLineComment 2.79 1 0.0950
## CountDeclClass 0.20 1 0.6582
## CountDeclFunction 10.87 1 0.0010
## RatioCommentToCode 2.94 1 0.0864
## X..Catch 8.93 1 0.0028
## TOTAL 111.72 9 <.0001
model_things_1_BASE = modelSimplification(models_1_BASE, model_things_1_BASE)
## [1] "project: hadoop-2.6 model: ALL.BASE Refit"
## Logistic Regression Model
##
## lrm(formula = as.formula(final_form), data = temp_data, x = T,
## y = T)
##
## Model Likelihood Discrimination Rank Discrim.
## Ratio Test Indexes Indexes
## Obs 3662 LR chi2 435.76 R2 0.305 C 0.843
## FALSE 3439 d.f. 6 g 1.112 Dxy 0.686
## TRUE 223 Pr(> chi2) <0.0001 gr 3.039 gamma 0.704
## max |deriv| 7e-13 gp 0.076 tau-a 0.078
## Brier 0.045
##
## Coef S.E. Wald Z Pr(>|Z|)
## Intercept -4.5565 0.1819 -25.04 <0.0001
## Distinct.count.of.Issue.Key.PRE 1.9348 0.4198 4.61 <0.0001
## Churn.PRE 0.7776 0.0900 8.64 <0.0001
## AvgLineComment 1.0720 0.4323 2.48 0.0131
## CountDeclClassVariable 0.6407 0.1821 3.52 0.0004
## CountDeclMethodDefault 0.3969 0.1810 2.19 0.0283
## MaxEssential 1.0918 0.3207 3.40 0.0007
##
## [1] "project: hadoop-2.6 model: ALL.BASE Refit - summary"
## [1] "project: hadoop-2.6 model: ALL.BASE Refit - validate"
## [1] "project: hadoop-2.6 model: ALL.BASE Refit - anova"
## Wald Statistics Response: Distinct.count.of.Issue.Key.POST > 0
##
## Factor Chi-Square d.f. P
## Distinct.count.of.Issue.Key.PRE 21.24 1 <.0001
## Churn.PRE 74.61 1 <.0001
## AvgLineComment 6.15 1 0.0131
## CountDeclClassVariable 12.39 1 0.0004
## CountDeclMethodDefault 4.81 1 0.0283
## MaxEssential 11.59 1 0.0007
## TOTAL 364.01 6 <.0001
## [1] "Distinct.count.of.Issue.Key.PRE"
## [1] "Churn.PRE"
## [1] "AvgLineComment"
## [1] "CountDeclClassVariable"
## [1] "CountDeclMethodDefault"
## [1] "MaxEssential"
## Distinct.count.of.Issue.Key.PRE Churn.PRE AvgLineComment
## 1 0.1815948 35.92873 0.1876024
## CountDeclClassVariable CountDeclMethodDefault MaxEssential
## 1 3.207264 2.5 2.105953
## [1] "Fixed at Mean: 0.123004383674709"
## [1] "Distinct.count.of.Issue.Key.PRE Coef at Mean + 10%: 0.124393541892917"
## [1] "Churn.PRE Coef at Mean + 10%: 0.126426941265937"
## [1] "AvgLineComment Coef at Mean + 10%: 0.123793698883498"
## [1] "CountDeclClassVariable Coef at Mean + 10%: 0.125226626518196"
## [1] "CountDeclMethodDefault Coef at Mean + 10%: 0.124293128730026"
## [1] "MaxEssential Coef at Mean + 10%: 0.126399646985173"
## [1] "project: hibernate-5.0 model: ALL.BASE Refit"
## Logistic Regression Model
##
## lrm(formula = as.formula(final_form), data = temp_data, x = T,
## y = T)
##
## Model Likelihood Discrimination Rank Discrim.
## Ratio Test Indexes Indexes
## Obs 3450 LR chi2 370.78 R2 0.211 C 0.784
## FALSE 3096 d.f. 8 g 1.320 Dxy 0.568
## TRUE 354 Pr(> chi2) <0.0001 gr 3.742 gamma 0.572
## max |deriv| 1e-13 gp 0.103 tau-a 0.105
## Brier 0.080
##
## Coef S.E. Wald Z Pr(>|Z|)
## Intercept -4.0272 0.2605 -15.46 <0.0001
## Distinct.count.of.Issue.Key.PRE 1.3039 0.2232 5.84 <0.0001
## CountDeclClassMethod -0.6381 0.2609 -2.45 0.0145
## CountDeclClassVariable -0.6382 0.2939 -2.17 0.0299
## CountDeclInstanceVariable -0.6315 0.2013 -3.14 0.0017
## CountDeclMethodPrivate 0.8407 0.2600 3.23 0.0012
## CountDeclMethodPublic 1.2029 0.1926 6.24 <0.0001
## CountLineComment 1.0839 0.2559 4.24 <0.0001
## RatioCommentToCode -2.7812 0.4801 -5.79 <0.0001
##
## [1] "project: hibernate-5.0 model: ALL.BASE Refit - summary"
## [1] "project: hibernate-5.0 model: ALL.BASE Refit - validate"
## [1] "project: hibernate-5.0 model: ALL.BASE Refit - anova"
## Wald Statistics Response: Distinct.count.of.Issue.Key.POST > 0
##
## Factor Chi-Square d.f. P
## Distinct.count.of.Issue.Key.PRE 34.11 1 <.0001
## CountDeclClassMethod 5.98 1 0.0145
## CountDeclClassVariable 4.72 1 0.0299
## CountDeclInstanceVariable 9.85 1 0.0017
## CountDeclMethodPrivate 10.46 1 0.0012
## CountDeclMethodPublic 38.99 1 <.0001
## CountLineComment 17.94 1 <.0001
## RatioCommentToCode 33.56 1 <.0001
## TOTAL 294.73 8 <.0001
## [1] "Distinct.count.of.Issue.Key.PRE"
## [1] "CountDeclClassMethod"
## [1] "CountDeclClassVariable"
## [1] "CountDeclInstanceVariable"
## [1] "CountDeclMethodPrivate"
## [1] "CountDeclMethodPublic"
## [1] "CountLineComment"
## [1] "RatioCommentToCode"
## Distinct.count.of.Issue.Key.PRE CountDeclClassMethod
## 1 0.8608696 0.4921739
## CountDeclClassVariable CountDeclInstanceVariable CountDeclMethodPrivate
## 1 0.5472464 2.086667 0.8017391
## CountDeclMethodPublic CountLineComment RatioCommentToCode
## 1 6.750435 30.19391 1.602136
## [1] "Fixed at Mean: 0.0781686690572031"
## [1] "Distinct.count.of.Issue.Key.PRE Coef at Mean + 10%: 0.0800340031553238"
## [1] "CountDeclClassMethod Coef at Mean + 10%: 0.0775231487255036"
## [1] "CountDeclClassVariable Coef at Mean + 10%: 0.0774773009175703"
## [1] "CountDeclInstanceVariable Coef at Mean + 10%: 0.0768856493217207"
## [1] "CountDeclMethodPrivate Coef at Mean + 10%: 0.0793218101740674"
## [1] "CountDeclMethodPublic Coef at Mean + 10%: 0.0813708002046483"
## [1] "CountLineComment Coef at Mean + 10%: 0.0813606299522386"
## [1] "RatioCommentToCode Coef at Mean + 10%: 0.0731241274231537"
## [1] "project: umbraco-7.6 model: ALL.BASE Refit"
## Logistic Regression Model
##
## lrm(formula = as.formula(final_form), data = temp_data, x = T,
## y = T)
##
## Model Likelihood Discrimination Rank Discrim.
## Ratio Test Indexes Indexes
## Obs 3083 LR chi2 90.19 R2 0.130 C 0.752
## FALSE 2999 d.f. 3 g 0.937 Dxy 0.504
## TRUE 84 Pr(> chi2) <0.0001 gr 2.552 gamma 0.526
## max |deriv| 4e-08 gp 0.027 tau-a 0.027
## Brier 0.025
##
## Coef S.E. Wald Z Pr(>|Z|)
## Intercept -5.0906 0.2761 -18.44 <0.0001
## Distinct.count.of.Author.Email.PRE 3.2032 0.4964 6.45 <0.0001
## AvgLineBlank 1.0669 0.4850 2.20 0.0278
## CountDeclFunction 1.2719 0.2435 5.22 <0.0001
##
## [1] "project: umbraco-7.6 model: ALL.BASE Refit - summary"
## [1] "project: umbraco-7.6 model: ALL.BASE Refit - validate"
## [1] "project: umbraco-7.6 model: ALL.BASE Refit - anova"
## Wald Statistics Response: Distinct.count.of.Issue.Key.POST > 0
##
## Factor Chi-Square d.f. P
## Distinct.count.of.Author.Email.PRE 41.64 1 <.0001
## AvgLineBlank 4.84 1 0.0278
## CountDeclFunction 27.29 1 <.0001
## TOTAL 97.44 3 <.0001
## [1] "Distinct.count.of.Author.Email.PRE"
## [1] "AvgLineBlank"
## [1] "CountDeclFunction"
## Distinct.count.of.Author.Email.PRE AvgLineBlank CountDeclFunction
## 1 0.203049 0.4469672 7.637366
## [1] "Fixed at Mean: 0.0301397678996167"
## [1] "Distinct.count.of.Author.Email.PRE Coef at Mean + 10%: 0.0308278626078932"
## [1] "AvgLineBlank Coef at Mean + 10%: 0.030554569960672"
## [1] "CountDeclFunction Coef at Mean + 10%: 0.0315383464906605"
model_things_1_BSEH = modelSimplification(models_1_BSEH, model_things_1_BSEH)
## [1] "project: hadoop-2.6 model: ALL.BSEH Refit"
## Logistic Regression Model
##
## lrm(formula = as.formula(final_form), data = temp_data, x = T,
## y = T)
##
## Model Likelihood Discrimination Rank Discrim.
## Ratio Test Indexes Indexes
## Obs 3662 LR chi2 476.36 R2 0.331 C 0.854
## FALSE 3439 d.f. 6 g 1.296 Dxy 0.708
## TRUE 223 Pr(> chi2) <0.0001 gr 3.655 gamma 0.716
## max |deriv| 1e-09 gp 0.080 tau-a 0.081
## Brier 0.043
##
## Coef S.E. Wald Z Pr(>|Z|)
## Intercept -3.8208 0.3011 -12.69 <0.0001
## Distinct.count.of.Author.Email.PRE 4.1195 0.3136 13.13 <0.0001
## CountDeclClassVariable 0.4366 0.2048 2.13 0.0331
## CountDeclMethodPublic -0.3627 0.1804 -2.01 0.0444
## MaxEssential 0.9128 0.3435 2.66 0.0079
## RatioCommentToCode -1.7704 0.6652 -2.66 0.0078
## X..Catch 0.8016 0.2342 3.42 0.0006
##
## [1] "project: hadoop-2.6 model: ALL.BSEH Refit - summary"
## [1] "project: hadoop-2.6 model: ALL.BSEH Refit - validate"
## [1] "project: hadoop-2.6 model: ALL.BSEH Refit - anova"
## Wald Statistics Response: Distinct.count.of.Issue.Key.POST > 0
##
## Factor Chi-Square d.f. P
## Distinct.count.of.Author.Email.PRE 172.53 1 <.0001
## CountDeclClassVariable 4.54 1 0.0331
## CountDeclMethodPublic 4.04 1 0.0444
## MaxEssential 7.06 1 0.0079
## RatioCommentToCode 7.08 1 0.0078
## X..Catch 11.71 1 0.0006
## TOTAL 375.72 6 <.0001
## [1] "Distinct.count.of.Author.Email.PRE"
## [1] "CountDeclClassVariable"
## [1] "CountDeclMethodPublic"
## [1] "MaxEssential"
## [1] "RatioCommentToCode"
## [1] "X..Catch"
## Distinct.count.of.Author.Email.PRE CountDeclClassVariable
## 1 0.5030038 3.207264
## CountDeclMethodPublic MaxEssential RatioCommentToCode X..Catch
## 1 20.03359 2.105953 1.249798 1.610049
## [1] "Fixed at Mean: 0.0415111995740052"
## [1] "Distinct.count.of.Author.Email.PRE Coef at Mean + 10%: 0.0439188000839555"
## [1] "CountDeclClassVariable Coef at Mean + 10%: 0.0420689470587737"
## [1] "CountDeclMethodPublic Coef at Mean + 10%: 0.0409447729221982"
## [1] "MaxEssential Coef at Mean + 10%: 0.0425584030154636"
## [1] "RatioCommentToCode Coef at Mean + 10%: 0.0398884338814381"
## [1] "X..Catch Coef at Mean + 10%: 0.042348247764015"
## [1] "project: hibernate-5.0 model: ALL.BSEH Refit"
## Logistic Regression Model
##
## lrm(formula = as.formula(final_form), data = temp_data, x = T,
## y = T)
##
## Model Likelihood Discrimination Rank Discrim.
## Ratio Test Indexes Indexes
## Obs 3450 LR chi2 380.16 R2 0.216 C 0.787
## FALSE 3096 d.f. 8 g 1.288 Dxy 0.574
## TRUE 354 Pr(> chi2) <0.0001 gr 3.625 gamma 0.577
## max |deriv| 9e-14 gp 0.104 tau-a 0.106
## Brier 0.079
##
## Coef S.E. Wald Z Pr(>|Z|)
## Intercept -3.7491 0.2627 -14.27 <0.0001
## Distinct.count.of.Issue.Key.PRE 1.2733 0.2245 5.67 <0.0001
## CountDeclClassVariable -0.6819 0.2962 -2.30 0.0213
## CountDeclInstanceVariable -0.5058 0.1964 -2.58 0.0100
## CountDeclMethodPrivate 0.6097 0.2483 2.46 0.0141
## CountDeclMethodPublic 1.0597 0.1952 5.43 <0.0001
## CountLineComment 0.7713 0.2576 2.99 0.0028
## RatioCommentToCode -2.4712 0.4795 -5.15 <0.0001
## X..Throws 0.7479 0.1880 3.98 <0.0001
##
## [1] "project: hibernate-5.0 model: ALL.BSEH Refit - summary"
## [1] "project: hibernate-5.0 model: ALL.BSEH Refit - validate"
## [1] "project: hibernate-5.0 model: ALL.BSEH Refit - anova"
## Wald Statistics Response: Distinct.count.of.Issue.Key.POST > 0
##
## Factor Chi-Square d.f. P
## Distinct.count.of.Issue.Key.PRE 32.17 1 <.0001
## CountDeclClassVariable 5.30 1 0.0213
## CountDeclInstanceVariable 6.63 1 0.0100
## CountDeclMethodPrivate 6.03 1 0.0141
## CountDeclMethodPublic 29.48 1 <.0001
## CountLineComment 8.96 1 0.0028
## RatioCommentToCode 26.56 1 <.0001
## X..Throws 15.83 1 0.0001
## TOTAL 300.29 8 <.0001
## [1] "Distinct.count.of.Issue.Key.PRE"
## [1] "CountDeclClassVariable"
## [1] "CountDeclInstanceVariable"
## [1] "CountDeclMethodPrivate"
## [1] "CountDeclMethodPublic"
## [1] "CountLineComment"
## [1] "RatioCommentToCode"
## [1] "X..Throws"
## Distinct.count.of.Issue.Key.PRE CountDeclClassVariable
## 1 0.8608696 0.5472464
## CountDeclInstanceVariable CountDeclMethodPrivate CountDeclMethodPublic
## 1 2.086667 0.8017391 6.750435
## CountLineComment RatioCommentToCode X..Throws
## 1 30.19391 1.602136 0.9147826
## [1] "Fixed at Mean: 0.0872898819679512"
## [1] "Distinct.count.of.Issue.Key.PRE Coef at Mean + 10%: 0.0893029606500913"
## [1] "CountDeclClassVariable Coef at Mean + 10%: 0.0864732499914876"
## [1] "CountDeclInstanceVariable Coef at Mean + 10%: 0.0861519023412411"
## [1] "CountDeclMethodPrivate Coef at Mean + 10%: 0.0882127342132414"
## [1] "CountDeclMethodPublic Coef at Mean + 10%: 0.0904008426227624"
## [1] "CountLineComment Coef at Mean + 10%: 0.0897872626790859"
## [1] "RatioCommentToCode Coef at Mean + 10%: 0.0823145150572586"
## [1] "X..Throws Coef at Mean + 10%: 0.0885050898270227"
## [1] "project: umbraco-7.6 model: ALL.BSEH Refit"
## Logistic Regression Model
##
## lrm(formula = as.formula(final_form), data = temp_data, x = T,
## y = T)
##
## Model Likelihood Discrimination Rank Discrim.
## Ratio Test Indexes Indexes
## Obs 3083 LR chi2 94.84 R2 0.137 C 0.750
## FALSE 2999 d.f. 3 g 0.790 Dxy 0.501
## TRUE 84 Pr(> chi2) <0.0001 gr 2.203 gamma 0.525
## max |deriv| 5e-09 gp 0.026 tau-a 0.027
## Brier 0.025
##
## Coef S.E. Wald Z Pr(>|Z|)
## Intercept -4.7810 0.2516 -19.01 <0.0001
## Distinct.count.of.Author.Email.PRE 3.2044 0.5009 6.40 <0.0001
## CountDeclFunction 0.9304 0.2542 3.66 0.0003
## X..Catch 1.5387 0.4918 3.13 0.0018
##
## [1] "project: umbraco-7.6 model: ALL.BSEH Refit - summary"
## [1] "project: umbraco-7.6 model: ALL.BSEH Refit - validate"
## [1] "project: umbraco-7.6 model: ALL.BSEH Refit - anova"
## Wald Statistics Response: Distinct.count.of.Issue.Key.POST > 0
##
## Factor Chi-Square d.f. P
## Distinct.count.of.Author.Email.PRE 40.93 1 <.0001
## CountDeclFunction 13.39 1 0.0003
## X..Catch 9.79 1 0.0018
## TOTAL 108.60 3 <.0001
## [1] "Distinct.count.of.Author.Email.PRE"
## [1] "CountDeclFunction"
## [1] "X..Catch"
## Distinct.count.of.Author.Email.PRE CountDeclFunction X..Catch
## 1 0.203049 7.637366 0.170613
## [1] "Fixed at Mean: 0.0279969681007204"
## [1] "Distinct.count.of.Author.Email.PRE Coef at Mean + 10%: 0.0286378345266876"
## [1] "CountDeclFunction Coef at Mean + 10%: 0.0289438662470886"
## [1] "X..Catch Coef at Mean + 10%: 0.028261306759101"
Here we output the selected statistics from the R functions results and we output in the CSV files in the folder “output”.
write.table(data.frame(model_things_1_BASE[[1]])[0,], 'output/base_test_1.csv' , append= F, sep=',', row.names = F, col.names = T )
lapply(model_things_1_BASE, function(x) write.table( data.frame(x), 'output/base_test_1.csv' , append= T, sep=',', row.names = F, col.names = F ))
## [[1]]
## NULL
##
## [[2]]
## NULL
##
## [[3]]
## NULL
lapply(model_things_1_BSEH, function(x) write.table( data.frame(x), 'output/base_test_1.csv' , append= T, sep=',', row.names = F, col.names = F ))
## [[1]]
## NULL
##
## [[2]]
## NULL
##
## [[3]]
## NULL
We now consider that files without catch blocks is missing data. We then re-run the analysis.
all_list_omitted_2 = all_no_missing
all_list_omitted = vector("list", 0)
for (i in 1:length(projects)) {
print(paste("Project:", projects[i]))
temp_data = as.data.frame(all_list_omitted_2[i])
temp_data_bkp = temp_data
print(paste("nrow:",nrow(temp_data),"ncol:",ncol(temp_data)))
# Make 0 catch blocks become NA and remove NA's.
No_Catch <- temp_data$X..Catch == 0
temp_data$X..Catch[No_Catch] <- NA
temp_data <- na.omit(temp_data)
print(paste("nrow:",nrow(temp_data),"ncol:",ncol(temp_data)))
if (nrow(temp_data) == 0){
write.csv(temp_data_bkp, file = "temp_data.csv")
}
# Remove throws columns
temp_data = temp_data[,!(names(temp_data) %in% throws_names)]
print(paste("nrow:",nrow(temp_data),"ncol:",ncol(temp_data)))
all_list_omitted <- c(all_list_omitted, list(list(project=projects[i], data=temp_data)))
}
## [1] "Project: hadoop-2.6"
## [1] "nrow: 3662 ncol: 159"
## [1] "nrow: 890 ncol: 159"
## [1] "nrow: 890 ncol: 152"
## [1] "Project: hibernate-5.0"
## [1] "nrow: 3450 ncol: 159"
## [1] "nrow: 440 ncol: 159"
## [1] "nrow: 440 ncol: 152"
## [1] "Project: umbraco-7.6"
## [1] "nrow: 3083 ncol: 159"
## [1] "nrow: 230 ncol: 159"
## [1] "nrow: 230 ncol: 152"
modelSelectionAndNormalityAdjustment(all_list_omitted)
## [1] "Project: hadoop-2.6 D.F. Budget: 59"
## [1] "Project: hibernate-5.0 D.F. Budget: 29"
## [1] "Project: umbraco-7.6 D.F. Budget: 15"
## [1] "Project: hadoop-2.6 skewness, 6.74527749465777"
## [1] "Project: hadoop-2.6 kurtosis, 71.519920764886"
## [1] "Project: hibernate-5.0 skewness, 3.71910197274668"
## [1] "Project: hibernate-5.0 kurtosis, 17.4752211740334"
## [1] "Project: umbraco-7.6 skewness, 4.0532455757886"
## [1] "Project: umbraco-7.6 kurtosis, 20.7020533105116"
## Model 0 - BASE Only ### Drop variables
all_list_omitted_m0 = vector("list", 0)
for (i in 1:length(projects)) {
print(paste("Project:", projects[i]))
temp_data = as.data.frame(all_list_omitted[[i]]$data)
keepForID = c("Project", "File.Path")
keepForCatch = c(keepForID)
keepForTry = c(keepForID)
keepForThrows = c(keepForID)
catch_names_drop = catch_names[!(catch_names %in% keepForCatch)]
try_names_drop = try_names[!(try_names %in% keepForTry)]
throws_names_drop = throws_names[!(throws_names %in% keepForThrows)]
temp_data = temp_data[,!(names(temp_data) %in% catch_names_drop)]
temp_data = temp_data[,!(names(temp_data) %in% try_names_drop)]
temp_data = temp_data[,!(names(temp_data) %in% throws_names_drop)]
print(names(temp_data))
all_list_omitted_m0 <- c(all_list_omitted_m0, list(temp_data))
}
## [1] "Project: hadoop-2.6"
## [1] "fileSize"
## [2] "Distinct.count.of.Author.Email.PRE"
## [3] "Distinct.count.of.Commit.Hash.PRE"
## [4] "Distinct.count.of.Issue.Key.POST"
## [5] "Distinct.count.of.Issue.Key.PRE"
## [6] "Churn.PRE"
## [7] "AvgCyclomaticModified"
## [8] "AvgCyclomaticStrict"
## [9] "AvgCyclomatic"
## [10] "AvgEssential"
## [11] "AvgLineBlank"
## [12] "AvgLineCode"
## [13] "AvgLineComment"
## [14] "AvgLine"
## [15] "CountDeclClassMethod"
## [16] "CountDeclClassVariable"
## [17] "CountDeclClass"
## [18] "CountDeclFunction"
## [19] "CountDeclInstanceMethod"
## [20] "CountDeclInstanceVariable"
## [21] "CountDeclMethodDefault"
## [22] "CountDeclMethodPrivate"
## [23] "CountDeclMethodProtected"
## [24] "CountDeclMethodPublic"
## [25] "CountDeclMethod"
## [26] "CountLineBlank"
## [27] "CountLineCodeDecl"
## [28] "CountLineCodeExe"
## [29] "CountLineCode"
## [30] "CountLineComment"
## [31] "CountLine"
## [32] "CountSemicolon"
## [33] "CountStmtDecl"
## [34] "CountStmtExe"
## [35] "CountStmt"
## [36] "File"
## [37] "Kind"
## [38] "MaxCyclomaticModified"
## [39] "MaxCyclomaticStrict"
## [40] "MaxCyclomatic"
## [41] "MaxEssential"
## [42] "MaxNesting"
## [43] "Name"
## [44] "Number.of.Records"
## [45] "RatioCommentToCode"
## [46] "SumCyclomaticModified"
## [47] "SumCyclomaticStrict"
## [48] "SumCyclomatic"
## [49] "SumEssential"
## [50] "Table.Name"
## [1] "Project: hibernate-5.0"
## [1] "fileSize"
## [2] "Distinct.count.of.Author.Email.PRE"
## [3] "Distinct.count.of.Commit.Hash.PRE"
## [4] "Distinct.count.of.Issue.Key.POST"
## [5] "Distinct.count.of.Issue.Key.PRE"
## [6] "Churn.PRE"
## [7] "AvgCyclomaticModified"
## [8] "AvgCyclomaticStrict"
## [9] "AvgCyclomatic"
## [10] "AvgEssential"
## [11] "AvgLineBlank"
## [12] "AvgLineCode"
## [13] "AvgLineComment"
## [14] "AvgLine"
## [15] "CountDeclClassMethod"
## [16] "CountDeclClassVariable"
## [17] "CountDeclClass"
## [18] "CountDeclFunction"
## [19] "CountDeclInstanceMethod"
## [20] "CountDeclInstanceVariable"
## [21] "CountDeclMethodDefault"
## [22] "CountDeclMethodPrivate"
## [23] "CountDeclMethodProtected"
## [24] "CountDeclMethodPublic"
## [25] "CountDeclMethod"
## [26] "CountLineBlank"
## [27] "CountLineCodeDecl"
## [28] "CountLineCodeExe"
## [29] "CountLineCode"
## [30] "CountLineComment"
## [31] "CountLine"
## [32] "CountSemicolon"
## [33] "CountStmtDecl"
## [34] "CountStmtExe"
## [35] "CountStmt"
## [36] "File"
## [37] "Kind"
## [38] "MaxCyclomaticModified"
## [39] "MaxCyclomaticStrict"
## [40] "MaxCyclomatic"
## [41] "MaxEssential"
## [42] "MaxNesting"
## [43] "Name"
## [44] "Number.of.Records"
## [45] "RatioCommentToCode"
## [46] "SumCyclomaticModified"
## [47] "SumCyclomaticStrict"
## [48] "SumCyclomatic"
## [49] "SumEssential"
## [50] "Table.Name"
## [1] "Project: umbraco-7.6"
## [1] "fileSize"
## [2] "Distinct.count.of.Author.Email.PRE"
## [3] "Distinct.count.of.Commit.Hash.PRE"
## [4] "Distinct.count.of.Issue.Key.POST"
## [5] "Distinct.count.of.Issue.Key.PRE"
## [6] "Churn.PRE"
## [7] "AvgCyclomaticModified"
## [8] "AvgCyclomaticStrict"
## [9] "AvgCyclomatic"
## [10] "AvgEssential"
## [11] "AvgLineBlank"
## [12] "AvgLineCode"
## [13] "AvgLineComment"
## [14] "AvgLine"
## [15] "CountDeclClassMethod"
## [16] "CountDeclClassVariable"
## [17] "CountDeclClass"
## [18] "CountDeclFunction"
## [19] "CountDeclInstanceMethod"
## [20] "CountDeclInstanceVariable"
## [21] "CountDeclMethodDefault"
## [22] "CountDeclMethodPrivate"
## [23] "CountDeclMethodProtected"
## [24] "CountDeclMethodPublic"
## [25] "CountDeclMethod"
## [26] "CountLineBlank"
## [27] "CountLineCodeDecl"
## [28] "CountLineCodeExe"
## [29] "CountLineCode"
## [30] "CountLineComment"
## [31] "CountLine"
## [32] "CountSemicolon"
## [33] "CountStmtDecl"
## [34] "CountStmtExe"
## [35] "CountStmt"
## [36] "File"
## [37] "Kind"
## [38] "MaxCyclomaticModified"
## [39] "MaxCyclomaticStrict"
## [40] "MaxCyclomatic"
## [41] "MaxEssential"
## [42] "MaxNesting"
## [43] "Name"
## [44] "Number.of.Records"
## [45] "RatioCommentToCode"
## [46] "SumCyclomaticModified"
## [47] "SumCyclomaticStrict"
## [48] "SumCyclomatic"
## [49] "SumEssential"
## [50] "Table.Name"
all_list_model_m0 = dataApplyReduction(all_list_omitted_m0)
## [1] "Project: hadoop-2.6"
## [1] "NumberOfMetricsInitial: 45"
## [1] "NumberOfMetricsKept: 17"
## [1] "Distinct.count.of.Commit.Hash.PRE + AvgCyclomaticStrict + AvgEssential + AvgLineBlank + AvgLineComment + CountDeclClassMethod + CountDeclClassVariable + CountDeclClass + CountDeclInstanceVariable + CountDeclMethodDefault + CountDeclMethodPrivate + CountDeclMethodProtected + CountDeclMethodPublic + CountLineComment + MaxEssential + MaxNesting + RatioCommentToCode"
## [1] "Project: hibernate-5.0"
## [1] "NumberOfMetricsInitial: 45"
## [1] "NumberOfMetricsKept: 18"
## [1] "Distinct.count.of.Author.Email.PRE + Distinct.count.of.Issue.Key.PRE + AvgCyclomaticModified + AvgEssential + AvgLineBlank + AvgLineComment + CountDeclClassMethod + CountDeclClassVariable + CountDeclClass + CountDeclInstanceVariable + CountDeclMethodDefault + CountDeclMethodPrivate + CountDeclMethodProtected + CountDeclMethodPublic + CountLineComment + MaxEssential + MaxNesting + RatioCommentToCode"
## [1] "Project: umbraco-7.6"
## [1] "NumberOfMetricsInitial: 45"
## [1] "NumberOfMetricsKept: 12"
## [1] "Distinct.count.of.Author.Email.PRE + Distinct.count.of.Issue.Key.PRE + AvgCyclomaticStrict + AvgEssential + AvgLineBlank + AvgLineComment + CountDeclClass + CountLineBlank + MaxCyclomaticModified + MaxEssential + MaxNesting + RatioCommentToCode"
## [1] "Project: hadoop-2.6"
## [1] "Redudant variables: CountDeclMethodPublic,␣CountDeclClassMethod,␣CountDeclMethodPrivate,␣CountDeclInstanceVariable,␣CountDeclClass,␣CountDeclClassVariable,␣CountLineComment"
## [1] "Distinct.count.of.Commit.Hash.PRE + Distinct.count.of.Issue.Key.POST + AvgCyclomaticStrict + AvgEssential + AvgLineBlank + AvgLineComment + CountDeclMethodDefault + CountDeclMethodProtected + File + Kind + MaxEssential + MaxNesting + Name + RatioCommentToCode + Table.Name"
## [1] "Project: hibernate-5.0"
## [1] "Redudant variables: "
## [1] "Distinct.count.of.Author.Email.PRE + Distinct.count.of.Issue.Key.POST + Distinct.count.of.Issue.Key.PRE + AvgCyclomaticModified + AvgEssential + AvgLineBlank + AvgLineComment + CountDeclClassMethod + CountDeclClassVariable + CountDeclClass + CountDeclInstanceVariable + CountDeclMethodDefault + CountDeclMethodPrivate + CountDeclMethodProtected + CountDeclMethodPublic + CountLineComment + File + Kind + MaxEssential + MaxNesting + Name + RatioCommentToCode + Table.Name"
## [1] "Project: umbraco-7.6"
## [1] "Redudant variables: "
## [1] "Distinct.count.of.Author.Email.PRE + Distinct.count.of.Issue.Key.POST + Distinct.count.of.Issue.Key.PRE + AvgCyclomaticStrict + AvgEssential + AvgLineBlank + AvgLineComment + CountDeclClass + CountLineBlank + File + Kind + MaxCyclomaticModified + MaxEssential + MaxNesting + Name + RatioCommentToCode + Table.Name"
## [1] "Project: hadoop-2.6"
## [1] "NumberOfMetricsInitial: 10 Budget: 59 Over Budget: FALSE NumberOfMetricsKept: 10 CorrelationCutoff: 0.7"
## [1] "Project: hibernate-5.0"
## [1] "NumberOfMetricsInitial: 18 Budget: 29 Over Budget: FALSE NumberOfMetricsKept: 18 CorrelationCutoff: 0.7"
## [1] "Project: umbraco-7.6"
## [1] "NumberOfMetricsInitial: 12 Budget: 15 Over Budget: FALSE NumberOfMetricsKept: 12 CorrelationCutoff: 0.7"
form_list_bin_m0 = dataSetupFormulasBinary(all_list_model_m0)
## [1] "Project: hadoop-2.6"
## [1] "Distinct.count.of.Commit.Hash.PRE + AvgCyclomaticStrict + AvgEssential + AvgLineBlank + AvgLineComment + CountDeclMethodDefault + CountDeclMethodProtected + MaxEssential + MaxNesting + RatioCommentToCode"
## [1] "Project: hibernate-5.0"
## [1] "Distinct.count.of.Author.Email.PRE + Distinct.count.of.Issue.Key.PRE + AvgCyclomaticModified + AvgEssential + AvgLineBlank + AvgLineComment + CountDeclClassMethod + CountDeclClassVariable + CountDeclClass + CountDeclInstanceVariable + CountDeclMethodDefault + CountDeclMethodPrivate + CountDeclMethodProtected + CountDeclMethodPublic + CountLineComment + MaxEssential + MaxNesting + RatioCommentToCode"
## [1] "Project: umbraco-7.6"
## [1] "Distinct.count.of.Author.Email.PRE + Distinct.count.of.Issue.Key.PRE + AvgCyclomaticStrict + AvgEssential + AvgLineBlank + AvgLineComment + CountDeclClass + CountLineBlank + MaxCyclomaticModified + MaxEssential + MaxNesting + RatioCommentToCode"
models_2_BASE = modelFitLogistic(all_list_model_m0,form_list_bin_m0,"CAT.BASE")
## [1] "Project: hadoop-2.6"
## Logistic Regression Model
##
## lrm(formula = as.formula(form_bin), data = temp_data_log, x = T,
## y = T)
##
## Model Likelihood Discrimination Rank Discrim.
## Ratio Test Indexes Indexes
## Obs 890 LR chi2 201.97 R2 0.347 C 0.838
## FALSE 747 d.f. 10 g 1.472 Dxy 0.677
## TRUE 143 Pr(> chi2) <0.0001 gr 4.359 gamma 0.679
## max |deriv| 6e-09 gp 0.179 tau-a 0.183
## Brier 0.100
##
## Coef S.E. Wald Z Pr(>|Z|)
## Intercept -3.8209 0.7348 -5.20 <0.0001
## Distinct.count.of.Commit.Hash.PRE 3.0387 0.3237 9.39 <0.0001
## AvgCyclomaticStrict 4.0722 1.2426 3.28 0.0010
## AvgEssential -4.2398 1.6071 -2.64 0.0083
## AvgLineBlank -1.1918 0.7955 -1.50 0.1341
## AvgLineComment 0.2730 0.7050 0.39 0.6986
## CountDeclMethodDefault 0.2415 0.2406 1.00 0.3157
## CountDeclMethodProtected 0.0093 0.2697 0.03 0.9724
## MaxEssential 0.8405 0.5782 1.45 0.1460
## MaxNesting -0.1061 1.0227 -0.10 0.9173
## RatioCommentToCode -2.2086 1.7023 -1.30 0.1945
##
## [1] "Project: hibernate-5.0"
## Logistic Regression Model
##
## lrm(formula = as.formula(form_bin), data = temp_data_log, x = T,
## y = T)
##
## Model Likelihood Discrimination Rank Discrim.
## Ratio Test Indexes Indexes
## Obs 440 LR chi2 81.26 R2 0.262 C 0.780
## FALSE 347 d.f. 18 g 1.250 Dxy 0.561
## TRUE 93 Pr(> chi2) <0.0001 gr 3.491 gamma 0.562
## max |deriv| 6e-12 gp 0.187 tau-a 0.187
## Brier 0.133
##
## Coef S.E. Wald Z Pr(>|Z|)
## Intercept -3.5085 1.1036 -3.18 0.0015
## Distinct.count.of.Author.Email.PRE 1.8362 0.9499 1.93 0.0532
## Distinct.count.of.Issue.Key.PRE 0.9870 0.5853 1.69 0.0917
## AvgCyclomaticModified -3.1864 1.7111 -1.86 0.0626
## AvgEssential 1.1959 2.0271 0.59 0.5552
## AvgLineBlank 0.8497 0.9166 0.93 0.3539
## AvgLineComment 0.8245 0.9984 0.83 0.4089
## CountDeclClassMethod -0.5618 0.4770 -1.18 0.2389
## CountDeclClassVariable -0.2766 0.5120 -0.54 0.5890
## CountDeclClass -0.1171 0.8290 -0.14 0.8876
## CountDeclInstanceVariable -0.4749 0.4822 -0.99 0.3246
## CountDeclMethodDefault 0.0178 0.7730 0.02 0.9816
## CountDeclMethodPrivate 0.4233 0.5320 0.80 0.4262
## CountDeclMethodProtected -0.3484 0.3928 -0.89 0.3750
## CountDeclMethodPublic 0.5626 0.6941 0.81 0.4177
## CountLineComment 1.3411 1.0419 1.29 0.1980
## MaxEssential 0.2553 0.7863 0.32 0.7455
## MaxNesting 0.6304 1.1964 0.53 0.5983
## RatioCommentToCode -7.1742 4.6800 -1.53 0.1253
##
## [1] "Project: umbraco-7.6"
## Logistic Regression Model
##
## lrm(formula = as.formula(form_bin), data = temp_data_log, x = T,
## y = T)
##
## Model Likelihood Discrimination Rank Discrim.
## Ratio Test Indexes Indexes
## Obs 230 LR chi2 19.27 R2 0.172 C 0.770
## FALSE 208 d.f. 12 g 1.105 Dxy 0.539
## TRUE 22 Pr(> chi2) 0.0822 gr 3.020 gamma 0.544
## max |deriv| 1e-09 gp 0.090 tau-a 0.094
## Brier 0.078
##
## Coef S.E. Wald Z Pr(>|Z|)
## Intercept -4.3057 1.8728 -2.30 0.0215
## Distinct.count.of.Author.Email.PRE 0.9873 1.4593 0.68 0.4987
## Distinct.count.of.Issue.Key.PRE 2.4547 2.3846 1.03 0.3033
## AvgCyclomaticStrict -0.7456 2.5588 -0.29 0.7708
## AvgEssential -0.5002 3.9258 -0.13 0.8986
## AvgLineBlank 0.8868 1.6796 0.53 0.5975
## AvgLineComment -0.6507 1.6507 -0.39 0.6934
## CountDeclClass -0.4990 1.8050 -0.28 0.7822
## CountLineBlank 1.5155 0.9517 1.59 0.1113
## MaxCyclomaticModified -2.5628 2.0197 -1.27 0.2045
## MaxEssential 2.1070 1.4524 1.45 0.1469
## MaxNesting 1.8571 2.0379 0.91 0.3622
## RatioCommentToCode -4.3581 4.7294 -0.92 0.3568
##
In this section, we present the selected statistics for our analysis. As explained in our approach, they are the steps: MC7, MA1, MA2, MA3 and MA4.
Here we extract the selected statistics and we add the data (columns) to an object that will be exported to CSV in the section Output.
model_things_2_BASE = vector("list", 0)
model_things_2_BASE = modelStats(models_2_BASE)
model_things_2_BASE = modelValidate(models_2_BASE, model_things_2_BASE)
model_things_2_BASE = modelSignificance(models_2_BASE, model_things_2_BASE)
## [1] "project: hadoop-2.6 model: CAT.BASE"
## Wald Statistics Response: Distinct.count.of.Issue.Key.POST > 0
##
## Factor Chi-Square d.f. P
## Distinct.count.of.Commit.Hash.PRE 88.14 1 <.0001
## AvgCyclomaticStrict 10.74 1 0.0010
## AvgEssential 6.96 1 0.0083
## AvgLineBlank 2.24 1 0.1341
## AvgLineComment 0.15 1 0.6986
## CountDeclMethodDefault 1.01 1 0.3157
## CountDeclMethodProtected 0.00 1 0.9724
## MaxEssential 2.11 1 0.1460
## MaxNesting 0.01 1 0.9173
## RatioCommentToCode 1.68 1 0.1945
## TOTAL 146.76 10 <.0001
## [1] "project: hibernate-5.0 model: CAT.BASE"
## Wald Statistics Response: Distinct.count.of.Issue.Key.POST > 0
##
## Factor Chi-Square d.f. P
## Distinct.count.of.Author.Email.PRE 3.74 1 0.0532
## Distinct.count.of.Issue.Key.PRE 2.84 1 0.0917
## AvgCyclomaticModified 3.47 1 0.0626
## AvgEssential 0.35 1 0.5552
## AvgLineBlank 0.86 1 0.3539
## AvgLineComment 0.68 1 0.4089
## CountDeclClassMethod 1.39 1 0.2389
## CountDeclClassVariable 0.29 1 0.5890
## CountDeclClass 0.02 1 0.8876
## CountDeclInstanceVariable 0.97 1 0.3246
## CountDeclMethodDefault 0.00 1 0.9816
## CountDeclMethodPrivate 0.63 1 0.4262
## CountDeclMethodProtected 0.79 1 0.3750
## CountDeclMethodPublic 0.66 1 0.4177
## CountLineComment 1.66 1 0.1980
## MaxEssential 0.11 1 0.7455
## MaxNesting 0.28 1 0.5983
## RatioCommentToCode 2.35 1 0.1253
## TOTAL 63.90 18 <.0001
## [1] "project: umbraco-7.6 model: CAT.BASE"
## Wald Statistics Response: Distinct.count.of.Issue.Key.POST > 0
##
## Factor Chi-Square d.f. P
## Distinct.count.of.Author.Email.PRE 0.46 1 0.4987
## Distinct.count.of.Issue.Key.PRE 1.06 1 0.3033
## AvgCyclomaticStrict 0.08 1 0.7708
## AvgEssential 0.02 1 0.8986
## AvgLineBlank 0.28 1 0.5975
## AvgLineComment 0.16 1 0.6934
## CountDeclClass 0.08 1 0.7822
## CountLineBlank 2.54 1 0.1113
## MaxCyclomaticModified 1.61 1 0.2045
## MaxEssential 2.10 1 0.1469
## MaxNesting 0.83 1 0.3622
## RatioCommentToCode 0.85 1 0.3568
## TOTAL 17.11 12 0.1455
model_things_2_BASE = modelSimplification(models_2_BASE, model_things_2_BASE)
## [1] "project: hadoop-2.6 model: CAT.BASE Refit"
## Logistic Regression Model
##
## lrm(formula = as.formula(final_form), data = temp_data, x = T,
## y = T)
##
## Model Likelihood Discrimination Rank Discrim.
## Ratio Test Indexes Indexes
## Obs 890 LR chi2 196.89 R2 0.339 C 0.834
## FALSE 747 d.f. 4 g 1.412 Dxy 0.669
## TRUE 143 Pr(> chi2) <0.0001 gr 4.105 gamma 0.676
## max |deriv| 6e-10 gp 0.176 tau-a 0.181
## Brier 0.101
##
## Coef S.E. Wald Z Pr(>|Z|)
## Intercept -3.8552 0.5049 -7.64 <0.0001
## Distinct.count.of.Commit.Hash.PRE 3.1135 0.3097 10.05 <0.0001
## AvgCyclomaticStrict 3.2955 0.9393 3.51 0.0005
## AvgEssential -4.5507 1.5179 -3.00 0.0027
## MaxEssential 1.2191 0.4702 2.59 0.0095
##
## [1] "project: hadoop-2.6 model: CAT.BASE Refit - summary"
## [1] "project: hadoop-2.6 model: CAT.BASE Refit - validate"
## [1] "project: hadoop-2.6 model: CAT.BASE Refit - anova"
## Wald Statistics Response: Distinct.count.of.Issue.Key.POST > 0
##
## Factor Chi-Square d.f. P
## Distinct.count.of.Commit.Hash.PRE 101.09 1 <.0001
## AvgCyclomaticStrict 12.31 1 0.0005
## AvgEssential 8.99 1 0.0027
## MaxEssential 6.72 1 0.0095
## TOTAL 145.32 4 <.0001
## [1] "Distinct.count.of.Commit.Hash.PRE"
## [1] "AvgCyclomaticStrict"
## [1] "AvgEssential"
## [1] "MaxEssential"
## Distinct.count.of.Commit.Hash.PRE AvgCyclomaticStrict AvgEssential
## 1 1.695506 2.660674 1.296629
## MaxEssential
## 1 3.969663
## [1] "Fixed at Mean: 0.189754400454327"
## [1] "Distinct.count.of.Commit.Hash.PRE Coef at Mean + 10%: 0.202761812337136"
## [1] "AvgCyclomaticStrict Coef at Mean + 10%: 0.205675857475548"
## [1] "AvgEssential Coef at Mean + 10%: 0.173624906372303"
## [1] "MaxEssential Coef at Mean + 10%: 0.196088895383609"
## [1] "project: hibernate-5.0 model: CAT.BASE Refit"
## Logistic Regression Model
##
## lrm(formula = as.formula(final_form), data = temp_data, x = T,
## y = T)
##
## Model Likelihood Discrimination Rank Discrim.
## Ratio Test Indexes Indexes
## Obs 440 LR chi2 69.04 R2 0.226 C 0.763
## FALSE 347 d.f. 3 g 1.138 Dxy 0.526
## TRUE 93 Pr(> chi2) <0.0001 gr 3.122 gamma 0.528
## max |deriv| 3e-13 gp 0.172 tau-a 0.176
## Brier 0.137
##
## Coef S.E. Wald Z Pr(>|Z|)
## Intercept -4.2571 0.5728 -7.43 <0.0001
## Distinct.count.of.Author.Email.PRE 2.1756 0.8454 2.57 0.0101
## CountLineComment 1.8173 0.4474 4.06 <0.0001
## RatioCommentToCode -9.7887 2.5889 -3.78 0.0002
##
## [1] "project: hibernate-5.0 model: CAT.BASE Refit - summary"
## [1] "project: hibernate-5.0 model: CAT.BASE Refit - validate"
## [1] "project: hibernate-5.0 model: CAT.BASE Refit - anova"
## Wald Statistics Response: Distinct.count.of.Issue.Key.POST > 0
##
## Factor Chi-Square d.f. P
## Distinct.count.of.Author.Email.PRE 6.62 1 0.0101
## CountLineComment 16.50 1 <.0001
## RatioCommentToCode 14.30 1 0.0002
## TOTAL 54.39 3 <.0001
## [1] "Distinct.count.of.Author.Email.PRE"
## [1] "CountLineComment"
## [1] "RatioCommentToCode"
## Distinct.count.of.Author.Email.PRE CountLineComment RatioCommentToCode
## 1 2.129545 46.95455 0.27575
## [1] "Fixed at Mean: 0.238682329326376"
## [1] "Distinct.count.of.Author.Email.PRE Coef at Mean + 10%: 0.250167924092426"
## [1] "CountLineComment Coef at Mean + 10%: 0.252335980398188"
## [1] "RatioCommentToCode Coef at Mean + 10%: 0.222557134553471"
## [1] "project: umbraco-7.6 model: CAT.BASE Refit"
## Logistic Regression Model
##
## lrm(formula = as.formula(final_form), data = temp_data, x = T,
## y = T)
##
## Model Likelihood Discrimination Rank Discrim.
## Ratio Test Indexes Indexes
## Obs 230 LR chi2 11.11 R2 0.101 C 0.696
## FALSE 208 d.f. 1 g 0.895 Dxy 0.391
## TRUE 22 Pr(> chi2) 0.0009 gr 2.448 gamma 0.396
## max |deriv| 5e-07 gp 0.072 tau-a 0.068
## Brier 0.080
##
## Coef S.E. Wald Z Pr(>|Z|)
## Intercept -5.2746 1.0511 -5.02 <0.0001
## CountLineBlank 1.8580 0.5873 3.16 0.0016
##
## [1] "project: umbraco-7.6 model: CAT.BASE Refit - summary"
## [1] "project: umbraco-7.6 model: CAT.BASE Refit - validate"
## [1] "project: umbraco-7.6 model: CAT.BASE Refit - anova"
## Wald Statistics Response: Distinct.count.of.Issue.Key.POST > 0
##
## Factor Chi-Square d.f. P
## CountLineBlank 10.01 1 0.0016
## TOTAL 10.01 1 0.0016
## [1] "CountLineBlank"
## CountLineBlank
## 1 49.35652
## [1] "Fixed at Mean: 0.107922559017229"
## [1] "CountLineBlank Coef at Mean + 10%: 0.115404374892222"
all_list_omitted_m2 = all_list_omitted
To be able to build new models that are an extension of the base model we removed all insignificant metrics according to the related base model construction. This process makes sense since one can adjust metrics based on expertise. In this case, we learned in the previous step what are the significant metrics for base only, therefore we can remove the other base metrics. In this process we need to loop through each model previously built. Each model can have a different set of metrics and, therefore, the construction of their extensions have to be separately done.
all_list_omitted_m2 = vector("list", 0)
for (i in 1:length(models_2_BASE)) {
name = models_2_BASE[[i]]$name
project = models_2_BASE[[i]]$project
fit = models_2_BASE[[i]]$fit
class = as.character(class(fit)[1])
print(paste("Project:", project, "Name:", name))
if(class != "try-error") {
temp_data_index = findProjectData(all_list_omitted,project)
temp_data = as.data.frame(all_list_omitted[[temp_data_index]]$data)
# Keep catch metrics, remove try and throws, adjust base.
keepForID = c("Project", "File.Path")
keepForBase = c("Distinct.count.of.Issue.Key.POST",keepForID)
keepForTry = c(keepForID)
keepForThrows = c(keepForID)
base_names_drop = base_names[!(base_names %in% keepForBase)]
try_names_drop = try_names[!(try_names %in% keepForTry)]
throws_names_drop = throws_names[!(throws_names %in% keepForThrows)]
temp_data = temp_data[,!(names(temp_data) %in% try_names_drop)]
temp_data = temp_data[,!(names(temp_data) %in% throws_names_drop)]
# Remove insignificant metrics according to related base model.
temp_sig_index = findModel(model_things_2_BASE,name,project)
# Only move forward with the models that are under budget.
if (!model_things_2_BASE[[temp_sig_index]][["over_budget"]]){
temp_significant = model_things_2_BASE[[temp_sig_index]][["signifcant_r"]]
if (!is.na(temp_significant)) {
# The significant metrics from the base model as a vector of char.
temp_significant_list = unlist(strsplit(temp_significant, ", "))
} else
temp_significant_list = unlist(strsplit("", ", "))
# The insignificant metrics: all from the base model minus the significant ones.
base_names_insignificant = base_names_drop[!(base_names_drop %in% temp_significant_list)]
# The clean list of metrics for modeling: all metrics minus the base insignificant ones.
temp_data = temp_data[,!(names(temp_data) %in% base_names_insignificant)]
print(names(temp_data))
all_list_omitted_m2 <- c(all_list_omitted_m2, list(list(name=name, project=project, data=temp_data, sig=temp_significant_list)))
} else
print(paste("!-ERROR-! - model over budget."))
} else
print(paste("!-ERROR-! - model construction had issues."))
}
## [1] "Project: hadoop-2.6 Name: CAT.BASE"
## [1] "Distinct.count.of.Commit.Hash.PRE"
## [2] "Distinct.count.of.Issue.Key.POST"
## [3] "AvgCyclomaticStrict"
## [4] "AvgEssential"
## [5] "MaxEssential"
## [6] "X..Catch"
## [7] "Avg.Catch.LOC"
## [8] "Avg.Catch.SLOC"
## [9] "Avg..AP.Catch.and.do.nothing"
## [10] "Avg..AP.Catch.and.Return.null"
## [11] "Avg..AP.Destructive.Wrapping"
## [12] "Avg..AP.Dummy.Handler"
## [13] "Avg..AP.Generic.Catch"
## [14] "Avg..AP.Ignoring.Interrupted.Exception"
## [15] "Avg..AP.Incomplete.implementation"
## [16] "Avg..AP.Log.and.Return.null"
## [17] "Avg..AP.Log.and.Throw"
## [18] "Avg..AP.Multi.line.log.messages"
## [19] "Avg..AP.Nested.try.block"
## [20] "Avg..AP.Overcatch.and.Abort"
## [21] "Avg..AP.Overcatch"
## [22] "Avg..AP.Relying.on.getCause.."
## [23] "Avg..AP.Throw.within.finally"
## [24] "Avg..AP.Unhandled.exceptions"
## [25] "Avg..AP.Unreachable.Catch.Handler"
## [26] "Avg..Potentially.Recoverable"
## [27] "Avg..Potentially.UnRecoverable"
## [28] "Avg..Recoverability.Not.Relevant"
## [29] "AP.Catch.and.do.nothing"
## [30] "AP.Catch.and.Return.null"
## [31] "Sum.of.AP.Destructive.Wrapping"
## [32] "Sum.of.AP.Dummy.Handler"
## [33] "Sum.of.AP.Generic.Catch"
## [34] "AP.Ignoring.Interrupted.Exception"
## [35] "AP.Incomplete.implementation"
## [36] "Sum.of.AP.Log.and.Return.null"
## [37] "AP.Log.and.Throw"
## [38] "AP.Multi.line.log.messages"
## [39] "AP.Nested.try.block"
## [40] "Sum.of.AP.Overcatch.and.Abort"
## [41] "Sum.of.AP.Overcatch"
## [42] "AP.Relying.on.getCause.."
## [43] "AP.Throw.within.finally"
## [44] "Sum.of.AP.Unhandled.exceptions"
## [45] "Sum.of.AP.Unreachable.Catch.Handler"
## [46] "Catch.LOC"
## [47] "Catch.SLOC"
## [48] "Potentially.Recoverable"
## [49] "Potentially.UnRecoverable"
## [50] "Recoverability.Not.Relevant"
## [1] "Project: hibernate-5.0 Name: CAT.BASE"
## [1] "Distinct.count.of.Author.Email.PRE"
## [2] "Distinct.count.of.Issue.Key.POST"
## [3] "CountLineComment"
## [4] "RatioCommentToCode"
## [5] "X..Catch"
## [6] "Avg.Catch.LOC"
## [7] "Avg.Catch.SLOC"
## [8] "Avg..AP.Catch.and.do.nothing"
## [9] "Avg..AP.Catch.and.Return.null"
## [10] "Avg..AP.Destructive.Wrapping"
## [11] "Avg..AP.Dummy.Handler"
## [12] "Avg..AP.Generic.Catch"
## [13] "Avg..AP.Ignoring.Interrupted.Exception"
## [14] "Avg..AP.Incomplete.implementation"
## [15] "Avg..AP.Log.and.Return.null"
## [16] "Avg..AP.Log.and.Throw"
## [17] "Avg..AP.Multi.line.log.messages"
## [18] "Avg..AP.Nested.try.block"
## [19] "Avg..AP.Overcatch.and.Abort"
## [20] "Avg..AP.Overcatch"
## [21] "Avg..AP.Relying.on.getCause.."
## [22] "Avg..AP.Throw.within.finally"
## [23] "Avg..AP.Unhandled.exceptions"
## [24] "Avg..AP.Unreachable.Catch.Handler"
## [25] "Avg..Potentially.Recoverable"
## [26] "Avg..Potentially.UnRecoverable"
## [27] "Avg..Recoverability.Not.Relevant"
## [28] "AP.Catch.and.do.nothing"
## [29] "AP.Catch.and.Return.null"
## [30] "Sum.of.AP.Destructive.Wrapping"
## [31] "Sum.of.AP.Dummy.Handler"
## [32] "Sum.of.AP.Generic.Catch"
## [33] "AP.Ignoring.Interrupted.Exception"
## [34] "AP.Incomplete.implementation"
## [35] "Sum.of.AP.Log.and.Return.null"
## [36] "AP.Log.and.Throw"
## [37] "AP.Multi.line.log.messages"
## [38] "AP.Nested.try.block"
## [39] "Sum.of.AP.Overcatch.and.Abort"
## [40] "Sum.of.AP.Overcatch"
## [41] "AP.Relying.on.getCause.."
## [42] "AP.Throw.within.finally"
## [43] "Sum.of.AP.Unhandled.exceptions"
## [44] "Sum.of.AP.Unreachable.Catch.Handler"
## [45] "Catch.LOC"
## [46] "Catch.SLOC"
## [47] "Potentially.Recoverable"
## [48] "Potentially.UnRecoverable"
## [49] "Recoverability.Not.Relevant"
## [1] "Project: umbraco-7.6 Name: CAT.BASE"
## [1] "Distinct.count.of.Issue.Key.POST"
## [2] "CountLineBlank"
## [3] "X..Catch"
## [4] "Avg.Catch.LOC"
## [5] "Avg.Catch.SLOC"
## [6] "Avg..AP.Catch.and.do.nothing"
## [7] "Avg..AP.Catch.and.Return.null"
## [8] "Avg..AP.Destructive.Wrapping"
## [9] "Avg..AP.Dummy.Handler"
## [10] "Avg..AP.Generic.Catch"
## [11] "Avg..AP.Ignoring.Interrupted.Exception"
## [12] "Avg..AP.Incomplete.implementation"
## [13] "Avg..AP.Log.and.Return.null"
## [14] "Avg..AP.Log.and.Throw"
## [15] "Avg..AP.Multi.line.log.messages"
## [16] "Avg..AP.Nested.try.block"
## [17] "Avg..AP.Overcatch.and.Abort"
## [18] "Avg..AP.Overcatch"
## [19] "Avg..AP.Relying.on.getCause.."
## [20] "Avg..AP.Throw.within.finally"
## [21] "Avg..AP.Unhandled.exceptions"
## [22] "Avg..AP.Unreachable.Catch.Handler"
## [23] "Avg..Potentially.Recoverable"
## [24] "Avg..Potentially.UnRecoverable"
## [25] "Avg..Recoverability.Not.Relevant"
## [26] "AP.Catch.and.do.nothing"
## [27] "AP.Catch.and.Return.null"
## [28] "Sum.of.AP.Destructive.Wrapping"
## [29] "Sum.of.AP.Dummy.Handler"
## [30] "Sum.of.AP.Generic.Catch"
## [31] "AP.Ignoring.Interrupted.Exception"
## [32] "AP.Incomplete.implementation"
## [33] "Sum.of.AP.Log.and.Return.null"
## [34] "AP.Log.and.Throw"
## [35] "AP.Multi.line.log.messages"
## [36] "AP.Nested.try.block"
## [37] "Sum.of.AP.Overcatch.and.Abort"
## [38] "Sum.of.AP.Overcatch"
## [39] "AP.Relying.on.getCause.."
## [40] "AP.Throw.within.finally"
## [41] "Sum.of.AP.Unhandled.exceptions"
## [42] "Sum.of.AP.Unreachable.Catch.Handler"
## [43] "Catch.LOC"
## [44] "Catch.SLOC"
## [45] "Potentially.Recoverable"
## [46] "Potentially.UnRecoverable"
## [47] "Recoverability.Not.Relevant"
dropToPredict = c("File.Path", "Project", "Language", "Table.Name","Name","Kind", "X..Bugs.Post", "File", "Distinct.count.of.Issue.Key.POST")
all_list_model_m2 = dataApplyReductionByModel(all_list_omitted_m2, "BSAP")
## [1] "Project: hadoop-2.6 Name: CAT.BASE"
## [1] "NumberOfMetricsInitial: 49"
## [1] "NumberOfMetricsKept: 23"
## [1] "Distinct.count.of.Commit.Hash.PRE + AvgCyclomaticStrict + AvgEssential + MaxEssential + Avg.Catch.LOC + Avg..AP.Catch.and.do.nothing + Avg..AP.Catch.and.Return.null + Avg..AP.Destructive.Wrapping + Avg..AP.Dummy.Handler + Avg..AP.Generic.Catch + Avg..AP.Ignoring.Interrupted.Exception + Avg..AP.Multi.line.log.messages + Avg..AP.Nested.try.block + Avg..AP.Throw.within.finally + Avg..AP.Unhandled.exceptions + Avg..AP.Unreachable.Catch.Handler + Avg..Potentially.UnRecoverable + Avg..Recoverability.Not.Relevant + AP.Incomplete.implementation + AP.Log.and.Throw + Sum.of.AP.Overcatch.and.Abort + AP.Relying.on.getCause.. + Potentially.Recoverable"
## [1] "Distinct.count.of.Commit.Hash.PRE"
## [2] "Distinct.count.of.Issue.Key.POST"
## [3] "AvgCyclomaticStrict"
## [4] "AvgEssential"
## [5] "MaxEssential"
## [6] "Avg.Catch.LOC"
## [7] "Avg..AP.Catch.and.do.nothing"
## [8] "Avg..AP.Catch.and.Return.null"
## [9] "Avg..AP.Destructive.Wrapping"
## [10] "Avg..AP.Dummy.Handler"
## [11] "Avg..AP.Generic.Catch"
## [12] "Avg..AP.Ignoring.Interrupted.Exception"
## [13] "Avg..AP.Multi.line.log.messages"
## [14] "Avg..AP.Nested.try.block"
## [15] "Avg..AP.Throw.within.finally"
## [16] "Avg..AP.Unhandled.exceptions"
## [17] "Avg..AP.Unreachable.Catch.Handler"
## [18] "Avg..Potentially.UnRecoverable"
## [19] "Avg..Recoverability.Not.Relevant"
## [20] "AP.Incomplete.implementation"
## [21] "AP.Log.and.Throw"
## [22] "Sum.of.AP.Overcatch.and.Abort"
## [23] "AP.Relying.on.getCause.."
## [24] "Potentially.Recoverable"
## [1] "Project: hibernate-5.0 Name: CAT.BASE"
## [1] "NumberOfMetricsInitial: 48"
## [1] "NumberOfMetricsKept: 19"
## [1] "Distinct.count.of.Author.Email.PRE + CountLineComment + RatioCommentToCode + Avg.Catch.LOC + Avg.Catch.SLOC + Avg..AP.Catch.and.do.nothing + Avg..AP.Catch.and.Return.null + Avg..AP.Generic.Catch + Avg..AP.Nested.try.block + Avg..AP.Throw.within.finally + Avg..AP.Unhandled.exceptions + Avg..AP.Unreachable.Catch.Handler + Avg..Potentially.Recoverable + Avg..Recoverability.Not.Relevant + Sum.of.AP.Destructive.Wrapping + Sum.of.AP.Dummy.Handler + Sum.of.AP.Log.and.Return.null + Catch.SLOC + Potentially.Recoverable"
## [1] "Distinct.count.of.Author.Email.PRE"
## [2] "Distinct.count.of.Issue.Key.POST"
## [3] "CountLineComment"
## [4] "RatioCommentToCode"
## [5] "Avg.Catch.LOC"
## [6] "Avg.Catch.SLOC"
## [7] "Avg..AP.Catch.and.do.nothing"
## [8] "Avg..AP.Catch.and.Return.null"
## [9] "Avg..AP.Generic.Catch"
## [10] "Avg..AP.Nested.try.block"
## [11] "Avg..AP.Throw.within.finally"
## [12] "Avg..AP.Unhandled.exceptions"
## [13] "Avg..AP.Unreachable.Catch.Handler"
## [14] "Avg..Potentially.Recoverable"
## [15] "Avg..Recoverability.Not.Relevant"
## [16] "Sum.of.AP.Destructive.Wrapping"
## [17] "Sum.of.AP.Dummy.Handler"
## [18] "Sum.of.AP.Log.and.Return.null"
## [19] "Catch.SLOC"
## [20] "Potentially.Recoverable"
## [1] "Project: umbraco-7.6 Name: CAT.BASE"
## [1] "NumberOfMetricsInitial: 46"
## [1] "NumberOfMetricsKept: 16"
## [1] "CountLineBlank + Avg.Catch.LOC + Avg..AP.Catch.and.Return.null + Avg..AP.Destructive.Wrapping + Avg..AP.Dummy.Handler + Avg..AP.Log.and.Return.null + Avg..AP.Overcatch + Avg..AP.Unhandled.exceptions + Avg..AP.Unreachable.Catch.Handler + Avg..Potentially.UnRecoverable + AP.Catch.and.do.nothing + Sum.of.AP.Generic.Catch + AP.Multi.line.log.messages + AP.Nested.try.block + Sum.of.AP.Overcatch + AP.Relying.on.getCause.."
## [1] "Distinct.count.of.Issue.Key.POST"
## [2] "CountLineBlank"
## [3] "Avg.Catch.LOC"
## [4] "Avg..AP.Catch.and.Return.null"
## [5] "Avg..AP.Destructive.Wrapping"
## [6] "Avg..AP.Dummy.Handler"
## [7] "Avg..AP.Log.and.Return.null"
## [8] "Avg..AP.Overcatch"
## [9] "Avg..AP.Unhandled.exceptions"
## [10] "Avg..AP.Unreachable.Catch.Handler"
## [11] "Avg..Potentially.UnRecoverable"
## [12] "AP.Catch.and.do.nothing"
## [13] "Sum.of.AP.Generic.Catch"
## [14] "AP.Multi.line.log.messages"
## [15] "AP.Nested.try.block"
## [16] "Sum.of.AP.Overcatch"
## [17] "AP.Relying.on.getCause.."
## [1] "Project: hadoop-2.6 Name: CAT.BASE"
## [1] "Redudant variables: "
## [1] "Final variables: Distinct.count.of.Commit.Hash.PRE + Distinct.count.of.Issue.Key.POST + AvgCyclomaticStrict + AvgEssential + MaxEssential + Avg.Catch.LOC + Avg..AP.Catch.and.do.nothing + Avg..AP.Catch.and.Return.null + Avg..AP.Destructive.Wrapping + Avg..AP.Dummy.Handler + Avg..AP.Generic.Catch + Avg..AP.Ignoring.Interrupted.Exception + Avg..AP.Multi.line.log.messages + Avg..AP.Nested.try.block + Avg..AP.Throw.within.finally + Avg..AP.Unhandled.exceptions + Avg..AP.Unreachable.Catch.Handler + Avg..Potentially.UnRecoverable + Avg..Recoverability.Not.Relevant + AP.Incomplete.implementation + AP.Log.and.Throw + Sum.of.AP.Overcatch.and.Abort + AP.Relying.on.getCause.. + Potentially.Recoverable"
## [1] "NumberOfMetricsInitial: 24"
## [1] "NumberOfMetricsKept: 24"
## [1] "Project: hibernate-5.0 Name: CAT.BASE"
## [1] "Redudant variables: "
## [1] "Final variables: Distinct.count.of.Author.Email.PRE + Distinct.count.of.Issue.Key.POST + CountLineComment + RatioCommentToCode + Avg.Catch.LOC + Avg.Catch.SLOC + Avg..AP.Catch.and.do.nothing + Avg..AP.Catch.and.Return.null + Avg..AP.Generic.Catch + Avg..AP.Nested.try.block + Avg..AP.Throw.within.finally + Avg..AP.Unhandled.exceptions + Avg..AP.Unreachable.Catch.Handler + Avg..Potentially.Recoverable + Avg..Recoverability.Not.Relevant + Sum.of.AP.Destructive.Wrapping + Sum.of.AP.Dummy.Handler + Sum.of.AP.Log.and.Return.null + Catch.SLOC + Potentially.Recoverable"
## [1] "NumberOfMetricsInitial: 20"
## [1] "NumberOfMetricsKept: 20"
## [1] "Project: umbraco-7.6 Name: CAT.BASE"
## [1] "Redudant variables: "
## [1] "Final variables: Distinct.count.of.Issue.Key.POST + CountLineBlank + Avg.Catch.LOC + Avg..AP.Catch.and.Return.null + Avg..AP.Destructive.Wrapping + Avg..AP.Dummy.Handler + Avg..AP.Log.and.Return.null + Avg..AP.Overcatch + Avg..AP.Unhandled.exceptions + Avg..AP.Unreachable.Catch.Handler + Avg..Potentially.UnRecoverable + AP.Catch.and.do.nothing + Sum.of.AP.Generic.Catch + AP.Multi.line.log.messages + AP.Nested.try.block + Sum.of.AP.Overcatch + AP.Relying.on.getCause.."
## [1] "NumberOfMetricsInitial: 17"
## [1] "NumberOfMetricsKept: 17"
## [1] "------------------------------------Project: hadoop-2.6 Name: CAT.BASE"
## [1] "NumberOfMetricsInitial: 23 Budget: 59 Over Budget: FALSE NumberOfMetricsKept: 23 CorrelationCutoff: 0.7"
## [1] "NumberOfMetricsInitial: 23 Budget: 59 Over Budget: FALSE NumberOfMetricsKept: 23 CorrelationCutoff: 0.7"
## [1] "------------------------------------Project: hibernate-5.0 Name: CAT.BASE"
## [1] "NumberOfMetricsInitial: 19 Budget: 29 Over Budget: FALSE NumberOfMetricsKept: 19 CorrelationCutoff: 0.7"
## [1] "NumberOfMetricsInitial: 19 Budget: 29 Over Budget: FALSE NumberOfMetricsKept: 19 CorrelationCutoff: 0.7"
## [1] "------------------------------------Project: umbraco-7.6 Name: CAT.BASE"
## [1] "CountLineBlank + Avg.Catch.LOC + Avg..AP.Catch.and.Return.null + Avg..AP.Destructive.Wrapping + Avg..AP.Dummy.Handler + Avg..AP.Log.and.Return.null + Avg..AP.Overcatch + Avg..AP.Unhandled.exceptions + Avg..AP.Unreachable.Catch.Handler + Avg..Potentially.UnRecoverable + AP.Catch.and.do.nothing + Sum.of.AP.Generic.Catch + AP.Multi.line.log.messages + AP.Nested.try.block + AP.Relying.on.getCause.."
## [1] "NumberOfMetricsInitial: 16 Budget: 15 Over Budget: TRUE NumberOfMetricsKept: 15 CorrelationCutoff: 0.671"
## [1] "NumberOfMetricsInitial: 16 Budget: 15 Over Budget: TRUE NumberOfMetricsKept: 15 CorrelationCutoff: 0.671"
form_list_bin_m2 = dataSetupFormulasBinaryByModel(all_list_model_m2)
## [1] "Project: hadoop-2.6 Name: CAT.BASE"
## [1] "Distinct.count.of.Commit.Hash.PRE + AvgCyclomaticStrict + AvgEssential + MaxEssential + Avg.Catch.LOC + Avg..AP.Catch.and.do.nothing + Avg..AP.Catch.and.Return.null + Avg..AP.Destructive.Wrapping + Avg..AP.Dummy.Handler + Avg..AP.Generic.Catch + Avg..AP.Ignoring.Interrupted.Exception + Avg..AP.Multi.line.log.messages + Avg..AP.Nested.try.block + Avg..AP.Throw.within.finally + Avg..AP.Unhandled.exceptions + Avg..AP.Unreachable.Catch.Handler + Avg..Potentially.UnRecoverable + Avg..Recoverability.Not.Relevant + AP.Incomplete.implementation + AP.Log.and.Throw + Sum.of.AP.Overcatch.and.Abort + AP.Relying.on.getCause.. + Potentially.Recoverable"
## [1] "Project: hibernate-5.0 Name: CAT.BASE"
## [1] "Distinct.count.of.Author.Email.PRE + CountLineComment + RatioCommentToCode + Avg.Catch.LOC + Avg.Catch.SLOC + Avg..AP.Catch.and.do.nothing + Avg..AP.Catch.and.Return.null + Avg..AP.Generic.Catch + Avg..AP.Nested.try.block + Avg..AP.Throw.within.finally + Avg..AP.Unhandled.exceptions + Avg..AP.Unreachable.Catch.Handler + Avg..Potentially.Recoverable + Avg..Recoverability.Not.Relevant + Sum.of.AP.Destructive.Wrapping + Sum.of.AP.Dummy.Handler + Sum.of.AP.Log.and.Return.null + Catch.SLOC + Potentially.Recoverable"
## [1] "Project: umbraco-7.6 Name: CAT.BASE"
## [1] "CountLineBlank + Avg.Catch.LOC + Avg..AP.Catch.and.Return.null + Avg..AP.Destructive.Wrapping + Avg..AP.Dummy.Handler + Avg..AP.Log.and.Return.null + Avg..AP.Overcatch + Avg..AP.Unhandled.exceptions + Avg..AP.Unreachable.Catch.Handler + Avg..Potentially.UnRecoverable + AP.Catch.and.do.nothing + Sum.of.AP.Generic.Catch + AP.Multi.line.log.messages + AP.Nested.try.block + AP.Relying.on.getCause.."
models_2_BSAP = modelFitLogisticByModel(all_list_model_m2,form_list_bin_m2,"CAT.BSAP")
## [1] "Project: hadoop-2.6 Name: CAT.BASE"
## Logistic Regression Model
##
## lrm(formula = as.formula(form_bin), data = temp_data_log, x = T,
## y = T)
##
## Model Likelihood Discrimination Rank Discrim.
## Ratio Test Indexes Indexes
## Obs 890 LR chi2 233.46 R2 0.394 C 0.862
## FALSE 747 d.f. 23 g 1.730 Dxy 0.724
## TRUE 143 Pr(> chi2) <0.0001 gr 5.640 gamma 0.726
## max |deriv| 2e-06 gp 0.191 tau-a 0.196
## Brier 0.095
##
## Coef S.E. Wald Z Pr(>|Z|)
## Intercept -4.8011 1.0842 -4.43 <0.0001
## Distinct.count.of.Commit.Hash.PRE 2.9187 0.3478 8.39 <0.0001
## AvgCyclomaticStrict 2.9866 1.0155 2.94 0.0033
## AvgEssential -3.9923 1.6472 -2.42 0.0154
## MaxEssential 0.6824 0.5346 1.28 0.2018
## Avg.Catch.LOC 1.3236 1.1710 1.13 0.2584
## Avg..AP.Catch.and.do.nothing 0.5908 2.2376 0.26 0.7917
## Avg..AP.Catch.and.Return.null -2.4554 2.1356 -1.15 0.2502
## Avg..AP.Destructive.Wrapping -0.9398 1.1800 -0.80 0.4258
## Avg..AP.Dummy.Handler 0.9190 1.4537 0.63 0.5273
## Avg..AP.Generic.Catch -1.9456 1.4520 -1.34 0.1803
## Avg..AP.Ignoring.Interrupted.Exception 5.1785 1.5936 3.25 0.0012
## Avg..AP.Multi.line.log.messages 0.8636 3.0204 0.29 0.7749
## Avg..AP.Nested.try.block 2.0894 2.3797 0.88 0.3799
## Avg..AP.Throw.within.finally -3.5422 2.3058 -1.54 0.1245
## Avg..AP.Unhandled.exceptions -1.1283 1.1293 -1.00 0.3177
## Avg..AP.Unreachable.Catch.Handler 0.2953 1.9069 0.15 0.8769
## Avg..Potentially.UnRecoverable 0.5821 1.8214 0.32 0.7493
## Avg..Recoverability.Not.Relevant -4.4751 3.2719 -1.37 0.1714
## AP.Incomplete.implementation 2.6999 4.1987 0.64 0.5202
## AP.Log.and.Throw 4.1366 2.0714 2.00 0.0458
## Sum.of.AP.Overcatch.and.Abort 0.1437 2.1522 0.07 0.9468
## AP.Relying.on.getCause.. -0.6957 1.3245 -0.53 0.5994
## Potentially.Recoverable 0.8492 0.3661 2.32 0.0204
##
## [1] "Project: hibernate-5.0 Name: CAT.BASE"
## Logistic Regression Model
##
## lrm(formula = as.formula(form_bin), data = temp_data_log, x = T,
## y = T)
##
## Model Likelihood Discrimination Rank Discrim.
## Ratio Test Indexes Indexes
## Obs 440 LR chi2 91.81 R2 0.293 C 0.798
## FALSE 347 d.f. 19 g 1.414 Dxy 0.596
## TRUE 93 Pr(> chi2) <0.0001 gr 4.113 gamma 0.598
## max |deriv| 8e-10 gp 0.199 tau-a 0.199
## Brier 0.128
##
## Coef S.E. Wald Z Pr(>|Z|)
## Intercept -4.8631 1.2596 -3.86 0.0001
## Distinct.count.of.Author.Email.PRE 2.8461 0.9505 2.99 0.0028
## CountLineComment 1.7835 0.5056 3.53 0.0004
## RatioCommentToCode -9.6397 2.9651 -3.25 0.0011
## Avg.Catch.LOC -0.9464 1.4915 -0.63 0.5257
## Avg.Catch.SLOC 3.9438 1.9831 1.99 0.0467
## Avg..AP.Catch.and.do.nothing 0.8616 2.5484 0.34 0.7353
## Avg..AP.Catch.and.Return.null 4.5145 2.6002 1.74 0.0825
## Avg..AP.Generic.Catch -0.6409 1.4690 -0.44 0.6626
## Avg..AP.Nested.try.block -0.2740 2.9307 -0.09 0.9255
## Avg..AP.Throw.within.finally 4.0430 2.6656 1.52 0.1293
## Avg..AP.Unhandled.exceptions 1.5102 1.2390 1.22 0.2229
## Avg..AP.Unreachable.Catch.Handler 0.5841 1.9556 0.30 0.7652
## Avg..Potentially.Recoverable -0.6864 2.6134 -0.26 0.7928
## Avg..Recoverability.Not.Relevant -0.9000 2.7225 -0.33 0.7410
## Sum.of.AP.Destructive.Wrapping 0.8648 0.6441 1.34 0.1793
## Sum.of.AP.Dummy.Handler 3.0046 1.0077 2.98 0.0029
## Sum.of.AP.Log.and.Return.null -0.0299 2.4376 -0.01 0.9902
## Catch.SLOC -1.0628 0.9990 -1.06 0.2874
## Potentially.Recoverable -0.5009 1.2268 -0.41 0.6831
##
## [1] "Project: umbraco-7.6 Name: CAT.BASE"
## Logistic Regression Model
##
## lrm(formula = as.formula(form_bin), data = temp_data_log, x = T,
## y = T)
##
## Model Likelihood Discrimination Rank Discrim.
## Ratio Test Indexes Indexes
## Obs 230 LR chi2 43.16 R2 0.366 C 0.872
## FALSE 208 d.f. 15 g 5.042 Dxy 0.744
## TRUE 22 Pr(> chi2) 0.0001 gr 154.833 gamma 0.747
## max |deriv| 0.003 gp 0.129 tau-a 0.129
## Brier 0.066
##
## Coef S.E. Wald Z Pr(>|Z|)
## Intercept -5.4210 1.7433 -3.11 0.0019
## CountLineBlank 1.3541 0.7092 1.91 0.0562
## Avg.Catch.LOC -2.0201 2.4529 -0.82 0.4102
## Avg..AP.Catch.and.Return.null 1.4990 3.9958 0.38 0.7076
## Avg..AP.Destructive.Wrapping -89.7540 349.8585 -0.26 0.7975
## Avg..AP.Dummy.Handler 6.3450 2.5707 2.47 0.0136
## Avg..AP.Log.and.Return.null -16.9573 19.2147 -0.88 0.3775
## Avg..AP.Overcatch -1.0034 2.9495 -0.34 0.7337
## Avg..AP.Unhandled.exceptions -11.7078 6.3336 -1.85 0.0645
## Avg..AP.Unreachable.Catch.Handler -7.8174 4.3969 -1.78 0.0754
## Avg..Potentially.UnRecoverable 10.1290 6.9465 1.46 0.1448
## AP.Catch.and.do.nothing -0.4214 1.9185 -0.22 0.8261
## Sum.of.AP.Generic.Catch 5.7515 1.8439 3.12 0.0018
## AP.Multi.line.log.messages -30.0528 283.2997 -0.11 0.9155
## AP.Nested.try.block 0.1366 3.6782 0.04 0.9704
## AP.Relying.on.getCause.. -18.4489 234.3618 -0.08 0.9373
##
In this section, we present the selected statistics for our analysis. As explained in our approach, they are the steps: MC7, MA1, MA2, MA3 and MA4.
Here we extract the selected statistics and we add the data (columns) to an object that will be exported to CSV in the section Output.
model_things_2_BSAP = vector("list", 0)
model_things_2_BSAP = modelStats(models_2_BSAP)
model_things_2_BSAP = modelValidate(models_2_BSAP, model_things_2_BSAP)
## singular information matrix in lrm.fit (rank= 23 ). Offending variable(s):
## AP.Incomplete.implementation
## singular information matrix in lrm.fit (rank= 23 ). Offending variable(s):
## AP.Incomplete.implementation
## singular information matrix in lrm.fit (rank= 23 ). Offending variable(s):
## AP.Incomplete.implementation
## singular information matrix in lrm.fit (rank= 23 ). Offending variable(s):
## AP.Incomplete.implementation
## singular information matrix in lrm.fit (rank= 23 ). Offending variable(s):
## AP.Incomplete.implementation
## singular information matrix in lrm.fit (rank= 23 ). Offending variable(s):
## AP.Incomplete.implementation
## singular information matrix in lrm.fit (rank= 23 ). Offending variable(s):
## AP.Incomplete.implementation
## singular information matrix in lrm.fit (rank= 23 ). Offending variable(s):
## AP.Incomplete.implementation
## singular information matrix in lrm.fit (rank= 23 ). Offending variable(s):
## AP.Incomplete.implementation
## singular information matrix in lrm.fit (rank= 23 ). Offending variable(s):
## AP.Incomplete.implementation
## singular information matrix in lrm.fit (rank= 23 ). Offending variable(s):
## AP.Incomplete.implementation
## singular information matrix in lrm.fit (rank= 23 ). Offending variable(s):
## AP.Incomplete.implementation
## singular information matrix in lrm.fit (rank= 23 ). Offending variable(s):
## AP.Incomplete.implementation
## singular information matrix in lrm.fit (rank= 23 ). Offending variable(s):
## AP.Incomplete.implementation
## singular information matrix in lrm.fit (rank= 23 ). Offending variable(s):
## AP.Incomplete.implementation
## singular information matrix in lrm.fit (rank= 23 ). Offending variable(s):
## AP.Incomplete.implementation
## singular information matrix in lrm.fit (rank= 23 ). Offending variable(s):
## AP.Incomplete.implementation
## singular information matrix in lrm.fit (rank= 23 ). Offending variable(s):
## AP.Incomplete.implementation
## singular information matrix in lrm.fit (rank= 23 ). Offending variable(s):
## AP.Incomplete.implementation
## singular information matrix in lrm.fit (rank= 23 ). Offending variable(s):
## AP.Incomplete.implementation
## singular information matrix in lrm.fit (rank= 23 ). Offending variable(s):
## AP.Incomplete.implementation
## singular information matrix in lrm.fit (rank= 23 ). Offending variable(s):
## AP.Incomplete.implementation
## singular information matrix in lrm.fit (rank= 23 ). Offending variable(s):
## AP.Incomplete.implementation
## singular information matrix in lrm.fit (rank= 23 ). Offending variable(s):
## AP.Incomplete.implementation
## singular information matrix in lrm.fit (rank= 23 ). Offending variable(s):
## AP.Incomplete.implementation
## singular information matrix in lrm.fit (rank= 23 ). Offending variable(s):
## AP.Incomplete.implementation
## singular information matrix in lrm.fit (rank= 23 ). Offending variable(s):
## AP.Incomplete.implementation
## singular information matrix in lrm.fit (rank= 23 ). Offending variable(s):
## AP.Incomplete.implementation
## singular information matrix in lrm.fit (rank= 23 ). Offending variable(s):
## AP.Incomplete.implementation
## singular information matrix in lrm.fit (rank= 23 ). Offending variable(s):
## AP.Incomplete.implementation
## singular information matrix in lrm.fit (rank= 23 ). Offending variable(s):
## AP.Incomplete.implementation
## singular information matrix in lrm.fit (rank= 23 ). Offending variable(s):
## AP.Incomplete.implementation
## singular information matrix in lrm.fit (rank= 23 ). Offending variable(s):
## AP.Incomplete.implementation
## singular information matrix in lrm.fit (rank= 23 ). Offending variable(s):
## AP.Incomplete.implementation
## singular information matrix in lrm.fit (rank= 23 ). Offending variable(s):
## AP.Incomplete.implementation
## singular information matrix in lrm.fit (rank= 23 ). Offending variable(s):
## AP.Incomplete.implementation
## singular information matrix in lrm.fit (rank= 23 ). Offending variable(s):
## AP.Incomplete.implementation
## singular information matrix in lrm.fit (rank= 23 ). Offending variable(s):
## AP.Incomplete.implementation
## singular information matrix in lrm.fit (rank= 23 ). Offending variable(s):
## AP.Incomplete.implementation
## singular information matrix in lrm.fit (rank= 23 ). Offending variable(s):
## AP.Incomplete.implementation
## singular information matrix in lrm.fit (rank= 23 ). Offending variable(s):
## AP.Incomplete.implementation
## singular information matrix in lrm.fit (rank= 23 ). Offending variable(s):
## AP.Incomplete.implementation
## singular information matrix in lrm.fit (rank= 23 ). Offending variable(s):
## AP.Incomplete.implementation
## singular information matrix in lrm.fit (rank= 23 ). Offending variable(s):
## AP.Incomplete.implementation
## singular information matrix in lrm.fit (rank= 23 ). Offending variable(s):
## AP.Incomplete.implementation
## singular information matrix in lrm.fit (rank= 23 ). Offending variable(s):
## AP.Incomplete.implementation
## singular information matrix in lrm.fit (rank= 23 ). Offending variable(s):
## AP.Incomplete.implementation
## singular information matrix in lrm.fit (rank= 23 ). Offending variable(s):
## AP.Incomplete.implementation
## singular information matrix in lrm.fit (rank= 23 ). Offending variable(s):
## AP.Incomplete.implementation
## singular information matrix in lrm.fit (rank= 23 ). Offending variable(s):
## AP.Incomplete.implementation
## singular information matrix in lrm.fit (rank= 23 ). Offending variable(s):
## AP.Incomplete.implementation
## singular information matrix in lrm.fit (rank= 23 ). Offending variable(s):
## AP.Incomplete.implementation
## singular information matrix in lrm.fit (rank= 23 ). Offending variable(s):
## AP.Incomplete.implementation
## singular information matrix in lrm.fit (rank= 23 ). Offending variable(s):
## AP.Incomplete.implementation
## singular information matrix in lrm.fit (rank= 23 ). Offending variable(s):
## AP.Incomplete.implementation
## singular information matrix in lrm.fit (rank= 23 ). Offending variable(s):
## AP.Incomplete.implementation
## singular information matrix in lrm.fit (rank= 23 ). Offending variable(s):
## AP.Incomplete.implementation
##
## Divergence or singularity in 57 samples
## singular information matrix in lrm.fit (rank= 15 ). Offending variable(s):
## AP.Relying.on.getCause..
## singular information matrix in lrm.fit (rank= 15 ). Offending variable(s):
## AP.Nested.try.block
## singular information matrix in lrm.fit (rank= 15 ). Offending variable(s):
## AP.Multi.line.log.messages
## singular information matrix in lrm.fit (rank= 15 ). Offending variable(s):
## AP.Multi.line.log.messages
## singular information matrix in lrm.fit (rank= 15 ). Offending variable(s):
## AP.Multi.line.log.messages
## singular information matrix in lrm.fit (rank= 15 ). Offending variable(s):
## AP.Multi.line.log.messages
## singular information matrix in lrm.fit (rank= 15 ). Offending variable(s):
## AP.Multi.line.log.messages
##
## Divergence or singularity in 19 samples
model_things_2_BSAP = modelSignificance(models_2_BSAP, model_things_2_BSAP)
## [1] "project: hadoop-2.6 model: CAT.BSAP"
## Wald Statistics Response: Distinct.count.of.Issue.Key.POST > 0
##
## Factor Chi-Square d.f. P
## Distinct.count.of.Commit.Hash.PRE 70.44 1 <.0001
## AvgCyclomaticStrict 8.65 1 0.0033
## AvgEssential 5.87 1 0.0154
## MaxEssential 1.63 1 0.2018
## Avg.Catch.LOC 1.28 1 0.2584
## Avg..AP.Catch.and.do.nothing 0.07 1 0.7917
## Avg..AP.Catch.and.Return.null 1.32 1 0.2502
## Avg..AP.Destructive.Wrapping 0.63 1 0.4258
## Avg..AP.Dummy.Handler 0.40 1 0.5273
## Avg..AP.Generic.Catch 1.80 1 0.1803
## Avg..AP.Ignoring.Interrupted.Exception 10.56 1 0.0012
## Avg..AP.Multi.line.log.messages 0.08 1 0.7749
## Avg..AP.Nested.try.block 0.77 1 0.3799
## Avg..AP.Throw.within.finally 2.36 1 0.1245
## Avg..AP.Unhandled.exceptions 1.00 1 0.3177
## Avg..AP.Unreachable.Catch.Handler 0.02 1 0.8769
## Avg..Potentially.UnRecoverable 0.10 1 0.7493
## Avg..Recoverability.Not.Relevant 1.87 1 0.1714
## AP.Incomplete.implementation 0.41 1 0.5202
## AP.Log.and.Throw 3.99 1 0.0458
## Sum.of.AP.Overcatch.and.Abort 0.00 1 0.9468
## AP.Relying.on.getCause.. 0.28 1 0.5994
## Potentially.Recoverable 5.38 1 0.0204
## TOTAL 149.23 23 <.0001
## [1] "project: hibernate-5.0 model: CAT.BSAP"
## Wald Statistics Response: Distinct.count.of.Issue.Key.POST > 0
##
## Factor Chi-Square d.f. P
## Distinct.count.of.Author.Email.PRE 8.97 1 0.0028
## CountLineComment 12.44 1 0.0004
## RatioCommentToCode 10.57 1 0.0011
## Avg.Catch.LOC 0.40 1 0.5257
## Avg.Catch.SLOC 3.95 1 0.0467
## Avg..AP.Catch.and.do.nothing 0.11 1 0.7353
## Avg..AP.Catch.and.Return.null 3.01 1 0.0825
## Avg..AP.Generic.Catch 0.19 1 0.6626
## Avg..AP.Nested.try.block 0.01 1 0.9255
## Avg..AP.Throw.within.finally 2.30 1 0.1293
## Avg..AP.Unhandled.exceptions 1.49 1 0.2229
## Avg..AP.Unreachable.Catch.Handler 0.09 1 0.7652
## Avg..Potentially.Recoverable 0.07 1 0.7928
## Avg..Recoverability.Not.Relevant 0.11 1 0.7410
## Sum.of.AP.Destructive.Wrapping 1.80 1 0.1793
## Sum.of.AP.Dummy.Handler 8.89 1 0.0029
## Sum.of.AP.Log.and.Return.null 0.00 1 0.9902
## Catch.SLOC 1.13 1 0.2874
## Potentially.Recoverable 0.17 1 0.6831
## TOTAL 66.79 19 <.0001
## [1] "project: umbraco-7.6 model: CAT.BSAP"
## Wald Statistics Response: Distinct.count.of.Issue.Key.POST > 0
##
## Factor Chi-Square d.f. P
## CountLineBlank 3.65 1 0.0562
## Avg.Catch.LOC 0.68 1 0.4102
## Avg..AP.Catch.and.Return.null 0.14 1 0.7076
## Avg..AP.Destructive.Wrapping 0.07 1 0.7975
## Avg..AP.Dummy.Handler 6.09 1 0.0136
## Avg..AP.Log.and.Return.null 0.78 1 0.3775
## Avg..AP.Overcatch 0.12 1 0.7337
## Avg..AP.Unhandled.exceptions 3.42 1 0.0645
## Avg..AP.Unreachable.Catch.Handler 3.16 1 0.0754
## Avg..Potentially.UnRecoverable 2.13 1 0.1448
## AP.Catch.and.do.nothing 0.05 1 0.8261
## Sum.of.AP.Generic.Catch 9.73 1 0.0018
## AP.Multi.line.log.messages 0.01 1 0.9155
## AP.Nested.try.block 0.00 1 0.9704
## AP.Relying.on.getCause.. 0.01 1 0.9373
## TOTAL 21.98 15 0.1082
model_things_2_BSAP = modelSimplification(models_2_BSAP, model_things_2_BSAP)
## [1] "project: hadoop-2.6 model: CAT.BSAP Refit"
## Logistic Regression Model
##
## lrm(formula = as.formula(final_form), data = temp_data, x = T,
## y = T)
##
## Model Likelihood Discrimination Rank Discrim.
## Ratio Test Indexes Indexes
## Obs 890 LR chi2 215.21 R2 0.367 C 0.848
## FALSE 747 d.f. 6 g 1.530 Dxy 0.696
## TRUE 143 Pr(> chi2) <0.0001 gr 4.616 gamma 0.700
## max |deriv| 3e-09 gp 0.184 tau-a 0.188
## Brier 0.098
##
## Coef S.E. Wald Z Pr(>|Z|)
## Intercept -4.2845 0.5392 -7.95 <0.0001
## Distinct.count.of.Commit.Hash.PRE 3.2525 0.3124 10.41 <0.0001
## AvgCyclomaticStrict 3.1582 0.9624 3.28 0.0010
## AvgEssential -2.9884 1.3791 -2.17 0.0302
## Avg..AP.Ignoring.Interrupted.Exception 5.1063 1.4278 3.58 0.0003
## AP.Log.and.Throw 4.1034 1.9466 2.11 0.0350
## Potentially.Recoverable 0.7444 0.2817 2.64 0.0082
##
## [1] "project: hadoop-2.6 model: CAT.BSAP Refit - summary"
## [1] "project: hadoop-2.6 model: CAT.BSAP Refit - validate"
## [1] "project: hadoop-2.6 model: CAT.BSAP Refit - anova"
## Wald Statistics Response: Distinct.count.of.Issue.Key.POST > 0
##
## Factor Chi-Square d.f. P
## Distinct.count.of.Commit.Hash.PRE 108.40 1 <.0001
## AvgCyclomaticStrict 10.77 1 0.0010
## AvgEssential 4.70 1 0.0302
## Avg..AP.Ignoring.Interrupted.Exception 12.79 1 0.0003
## AP.Log.and.Throw 4.44 1 0.0350
## Potentially.Recoverable 6.98 1 0.0082
## TOTAL 147.46 6 <.0001
## [1] "Distinct.count.of.Commit.Hash.PRE"
## [1] "AvgCyclomaticStrict"
## [1] "AvgEssential"
## [1] "Avg..AP.Ignoring.Interrupted.Exception"
## [1] "AP.Log.and.Throw"
## [1] "Potentially.Recoverable"
## Distinct.count.of.Commit.Hash.PRE AvgCyclomaticStrict AvgEssential
## 1 1.695506 2.660674 1.296629
## Avg..AP.Ignoring.Interrupted.Exception AP.Log.and.Throw
## 1 0.08380526 0.02022472
## Potentially.Recoverable
## 1 6.004494
## [1] "Fixed at Mean: 0.207569898618515"
## [1] "Distinct.count.of.Commit.Hash.PRE Coef at Mean + 10%: 0.222100399673733"
## [1] "AvgCyclomaticStrict Coef at Mean + 10%: 0.223844488271616"
## [1] "AvgEssential Coef at Mean + 10%: 0.196089668996072"
## [1] "Avg..AP.Ignoring.Interrupted.Exception Coef at Mean + 10%: 0.210393662256916"
## [1] "AP.Log.and.Throw Coef at Mean + 10%: 0.208151005992509"
## [1] "Potentially.Recoverable Coef at Mean + 10%: 0.211977383118221"
## [1] "project: hibernate-5.0 model: CAT.BSAP Refit"
## Logistic Regression Model
##
## lrm(formula = as.formula(final_form), data = temp_data, x = T,
## y = T)
##
## Model Likelihood Discrimination Rank Discrim.
## Ratio Test Indexes Indexes
## Obs 440 LR chi2 79.74 R2 0.258 C 0.786
## FALSE 347 d.f. 5 g 1.272 Dxy 0.571
## TRUE 93 Pr(> chi2) <0.0001 gr 3.569 gamma 0.573
## max |deriv| 8e-12 gp 0.185 tau-a 0.191
## Brier 0.134
##
## Coef S.E. Wald Z Pr(>|Z|)
## Intercept -5.0342 0.6565 -7.67 <0.0001
## Distinct.count.of.Author.Email.PRE 2.4206 0.8657 2.80 0.0052
## CountLineComment 1.6859 0.4563 3.69 0.0002
## RatioCommentToCode -9.3723 2.6388 -3.55 0.0004
## Avg.Catch.SLOC 2.1113 0.8268 2.55 0.0107
## Sum.of.AP.Dummy.Handler 1.9714 0.9105 2.17 0.0304
##
## [1] "project: hibernate-5.0 model: CAT.BSAP Refit - summary"
## [1] "project: hibernate-5.0 model: CAT.BSAP Refit - validate"
## [1] "project: hibernate-5.0 model: CAT.BSAP Refit - anova"
## Wald Statistics Response: Distinct.count.of.Issue.Key.POST > 0
##
## Factor Chi-Square d.f. P
## Distinct.count.of.Author.Email.PRE 7.82 1 0.0052
## CountLineComment 13.65 1 0.0002
## RatioCommentToCode 12.61 1 0.0004
## Avg.Catch.SLOC 6.52 1 0.0107
## Sum.of.AP.Dummy.Handler 4.69 1 0.0304
## TOTAL 60.95 5 <.0001
## [1] "Distinct.count.of.Author.Email.PRE"
## [1] "CountLineComment"
## [1] "RatioCommentToCode"
## [1] "Avg.Catch.SLOC"
## [1] "Sum.of.AP.Dummy.Handler"
## Distinct.count.of.Author.Email.PRE CountLineComment RatioCommentToCode
## 1 2.129545 46.95455 0.27575
## Avg.Catch.SLOC Sum.of.AP.Dummy.Handler
## 1 1.286742 0.1409091
## [1] "Fixed at Mean: 0.24582152754868"
## [1] "Distinct.count.of.Author.Email.PRE Coef at Mean + 10%: 0.258875914390769"
## [1] "CountLineComment Coef at Mean + 10%: 0.25872098632262"
## [1] "RatioCommentToCode Coef at Mean + 10%: 0.230043577539418"
## [1] "Avg.Catch.SLOC Coef at Mean + 10%: 0.255245915466901"
## [1] "Sum.of.AP.Dummy.Handler Coef at Mean + 10%: 0.24777507698998"
## [1] "project: umbraco-7.6 model: CAT.BSAP Refit"
## Logistic Regression Model
##
## lrm(formula = as.formula(final_form), data = temp_data, x = T,
## y = T)
##
## Model Likelihood Discrimination Rank Discrim.
## Ratio Test Indexes Indexes
## Obs 230 LR chi2 22.86 R2 0.202 C 0.794
## FALSE 208 d.f. 2 g 1.228 Dxy 0.589
## TRUE 22 Pr(> chi2) <0.0001 gr 3.413 gamma 0.645
## max |deriv| 8e-11 gp 0.097 tau-a 0.102
## Brier 0.078
##
## Coef S.E. Wald Z Pr(>|Z|)
## Intercept -4.5430 0.6758 -6.72 <0.0001
## Avg..AP.Dummy.Handler 5.3086 1.9281 2.75 0.0059
## Sum.of.AP.Generic.Catch 4.0066 1.0406 3.85 0.0001
##
## [1] "project: umbraco-7.6 model: CAT.BSAP Refit - summary"
## [1] "project: umbraco-7.6 model: CAT.BSAP Refit - validate"
## [1] "project: umbraco-7.6 model: CAT.BSAP Refit - anova"
## Wald Statistics Response: Distinct.count.of.Issue.Key.POST > 0
##
## Factor Chi-Square d.f. P
## Avg..AP.Dummy.Handler 7.58 1 0.0059
## Sum.of.AP.Generic.Catch 14.82 1 0.0001
## TOTAL 18.68 2 0.0001
## [1] "Avg..AP.Dummy.Handler"
## [1] "Sum.of.AP.Generic.Catch"
## Avg..AP.Dummy.Handler Sum.of.AP.Generic.Catch
## 1 0.160471 1.852174
## [1] "Fixed at Mean: 0.0850053202474649"
## [1] "Avg..AP.Dummy.Handler Coef at Mean + 10%: 0.0875005781572317"
## [1] "Sum.of.AP.Generic.Catch Coef at Mean + 10%: 0.0939165463703944"
all_list_omitted_m3 = all_list_omitted
To be able to build new models that are an extension of the base model we removed all insignificant metrics according to the related base model construction. This process makes sense since one can adjust metrics based on expertise. In this case, we learned in the previous step what are the significant metrics for base only, therefore we can remove the other base metrics. In this process we need to loop through each model previously built. Each model can have a different set of metrics and, therefore, the construction of their extensions have to be separately done.
all_list_omitted_m3 = vector("list", 0)
for (i in 1:length(models_2_BASE)) {
name = models_2_BASE[[i]]$name
project = models_2_BASE[[i]]$project
fit = models_2_BASE[[i]]$fit
class = as.character(class(fit)[1])
print(paste("Project:", project, "Name:", name))
if(class != "try-error") {
temp_data_index = findProjectData(all_list_omitted,project)
temp_data = as.data.frame(all_list_omitted[[temp_data_index]]$data)
# Keep catch metrics, remove try and throws, adjust base.
keepForID = c("Project", "File.Path")
keepForBase = c("Distinct.count.of.Issue.Key.POST",keepForID)
keepForCatch = c(keepForID)
keepForThrows = c(keepForID)
base_names_drop = base_names[!(base_names %in% keepForBase)]
catch_names_drop = catch_names[!(catch_names %in% keepForCatch)]
throws_names_drop = throws_names[!(throws_names %in% keepForThrows)]
temp_data = temp_data[,!(names(temp_data) %in% catch_names_drop)]
temp_data = temp_data[,!(names(temp_data) %in% throws_names_drop)]
# Remove insignificant metrics according to related base model.
temp_sig_index = findModel(model_things_2_BASE,name,project)
# Only move forward with the models that are under budget.
if (!model_things_2_BASE[[temp_sig_index]][["over_budget"]]){
temp_significant = model_things_2_BASE[[temp_sig_index]][["signifcant_r"]]
if (!is.na(temp_significant)) {
# The significant metrics from the base model as a vector of char.
temp_significant_list = unlist(strsplit(temp_significant, ", "))
} else
temp_significant_list = unlist(strsplit("", ", "))
# The insignificant metrics: all from the base model minus the significant ones.
base_names_insignificant = base_names_drop[!(base_names_drop %in% temp_significant_list)]
# The clean list of metrics for modeling: all metrics minus the base insignificant ones.
temp_data = temp_data[,!(names(temp_data) %in% base_names_insignificant)]
print(names(temp_data))
all_list_omitted_m3 <- c(all_list_omitted_m3, list(list(name=name, project=project, data=temp_data, sig=temp_significant_list)))
} else
print(paste("!-ERROR-! - model over budget."))
} else
print(paste("!-ERROR-! - model construction had issues."))
}
## [1] "Project: hadoop-2.6 Name: CAT.BASE"
## [1] "Distinct.count.of.Commit.Hash.PRE"
## [2] "Distinct.count.of.Issue.Key.POST"
## [3] "AvgCyclomaticStrict"
## [4] "AvgEssential"
## [5] "MaxEssential"
## [6] "X..Try.in.Conditional"
## [7] "X..Try.in.Declaration"
## [8] "X..Try.in.EH.Feature"
## [9] "X..Try.in.Loop"
## [10] "X..Try.in.Other"
## [11] "X..Try"
## [12] "X..Try.in.Conditional.1"
## [13] "X..Try.in.Declaration.1"
## [14] "X..Try.in.EH.Feature.1"
## [15] "X..Try.in.Loop.1"
## [16] "X..Try.in.Other.1"
## [17] "Avg.Max.Depth"
## [18] "Avg.Try.LOC"
## [19] "Avg.Try.SLOC"
## [20] "X..Handled.with.Abort"
## [21] "X..Handled.with.Continue"
## [22] "X..Handled.with.Default"
## [23] "X..Handled.with.Empty"
## [24] "X..Handled.with.Log"
## [25] "X..Handled.with.Method"
## [26] "X..Handled.with.Nested.Try"
## [27] "X..Handled.with.Return"
## [28] "X..Handled.with.Throw.with.New"
## [29] "X..Handled.with.Throw.without.New"
## [30] "X..Handled.with.Throw.Wrap"
## [31] "X..Handled.with.ToDo"
## [32] "X..Possible.Exceptions"
## [33] "X..Propagated.and.Potentially.Recoverable"
## [34] "X..Propagated"
## [35] "X..Doc.in.Comment"
## [36] "X..External.Doc"
## [37] "X..Handled.with.Abort.1"
## [38] "X..Handled.with.Continue.1"
## [39] "X..Handled.with.Default.1"
## [40] "X..Handled.with.Empty.1"
## [41] "X..Handled.with.Log.1"
## [42] "X..Handled.with.Method.1"
## [43] "X..Handled.with.Nested.Try.1"
## [44] "X..Handled.with.Return.1"
## [45] "X..Handled.with.Throw.with.New.1"
## [46] "X..Handled.with.Throw.without.New.1"
## [47] "X..Handled.with.Throw.Wrap.1"
## [48] "X..Handled.with.ToDo.1"
## [49] "X..Method.Declaration"
## [50] "X..Propagated.and.Potentially.Recoverable.1"
## [51] "X..Propagated.1"
## [52] "X..Specific"
## [53] "X..Subsumption"
## [54] "X..Throw.Statement"
## [55] "Avg...Declaring.Methods"
## [56] "Avg...Invoked.Method"
## [57] "Avg.Depth"
## [58] "Avg.Exc.Type.Prevalence"
## [59] "Num.Distinct.Methods"
## [60] "Sum.of.Num.Distinct.Methods"
## [61] "Try.LOC"
## [62] "Try.SLOC"
## [1] "Project: hibernate-5.0 Name: CAT.BASE"
## [1] "Distinct.count.of.Author.Email.PRE"
## [2] "Distinct.count.of.Issue.Key.POST"
## [3] "CountLineComment"
## [4] "RatioCommentToCode"
## [5] "X..Try.in.Conditional"
## [6] "X..Try.in.Declaration"
## [7] "X..Try.in.EH.Feature"
## [8] "X..Try.in.Loop"
## [9] "X..Try.in.Other"
## [10] "X..Try"
## [11] "X..Try.in.Conditional.1"
## [12] "X..Try.in.Declaration.1"
## [13] "X..Try.in.EH.Feature.1"
## [14] "X..Try.in.Loop.1"
## [15] "X..Try.in.Other.1"
## [16] "Avg.Max.Depth"
## [17] "Avg.Try.LOC"
## [18] "Avg.Try.SLOC"
## [19] "X..Handled.with.Abort"
## [20] "X..Handled.with.Continue"
## [21] "X..Handled.with.Default"
## [22] "X..Handled.with.Empty"
## [23] "X..Handled.with.Log"
## [24] "X..Handled.with.Method"
## [25] "X..Handled.with.Nested.Try"
## [26] "X..Handled.with.Return"
## [27] "X..Handled.with.Throw.with.New"
## [28] "X..Handled.with.Throw.without.New"
## [29] "X..Handled.with.Throw.Wrap"
## [30] "X..Handled.with.ToDo"
## [31] "X..Possible.Exceptions"
## [32] "X..Propagated.and.Potentially.Recoverable"
## [33] "X..Propagated"
## [34] "X..Doc.in.Comment"
## [35] "X..External.Doc"
## [36] "X..Handled.with.Abort.1"
## [37] "X..Handled.with.Continue.1"
## [38] "X..Handled.with.Default.1"
## [39] "X..Handled.with.Empty.1"
## [40] "X..Handled.with.Log.1"
## [41] "X..Handled.with.Method.1"
## [42] "X..Handled.with.Nested.Try.1"
## [43] "X..Handled.with.Return.1"
## [44] "X..Handled.with.Throw.with.New.1"
## [45] "X..Handled.with.Throw.without.New.1"
## [46] "X..Handled.with.Throw.Wrap.1"
## [47] "X..Handled.with.ToDo.1"
## [48] "X..Method.Declaration"
## [49] "X..Propagated.and.Potentially.Recoverable.1"
## [50] "X..Propagated.1"
## [51] "X..Specific"
## [52] "X..Subsumption"
## [53] "X..Throw.Statement"
## [54] "Avg...Declaring.Methods"
## [55] "Avg...Invoked.Method"
## [56] "Avg.Depth"
## [57] "Avg.Exc.Type.Prevalence"
## [58] "Num.Distinct.Methods"
## [59] "Sum.of.Num.Distinct.Methods"
## [60] "Try.LOC"
## [61] "Try.SLOC"
## [1] "Project: umbraco-7.6 Name: CAT.BASE"
## [1] "Distinct.count.of.Issue.Key.POST"
## [2] "CountLineBlank"
## [3] "X..Try.in.Conditional"
## [4] "X..Try.in.Declaration"
## [5] "X..Try.in.EH.Feature"
## [6] "X..Try.in.Loop"
## [7] "X..Try.in.Other"
## [8] "X..Try"
## [9] "X..Try.in.Conditional.1"
## [10] "X..Try.in.Declaration.1"
## [11] "X..Try.in.EH.Feature.1"
## [12] "X..Try.in.Loop.1"
## [13] "X..Try.in.Other.1"
## [14] "Avg.Max.Depth"
## [15] "Avg.Try.LOC"
## [16] "Avg.Try.SLOC"
## [17] "X..Handled.with.Abort"
## [18] "X..Handled.with.Continue"
## [19] "X..Handled.with.Default"
## [20] "X..Handled.with.Empty"
## [21] "X..Handled.with.Log"
## [22] "X..Handled.with.Method"
## [23] "X..Handled.with.Nested.Try"
## [24] "X..Handled.with.Return"
## [25] "X..Handled.with.Throw.with.New"
## [26] "X..Handled.with.Throw.without.New"
## [27] "X..Handled.with.Throw.Wrap"
## [28] "X..Handled.with.ToDo"
## [29] "X..Possible.Exceptions"
## [30] "X..Propagated.and.Potentially.Recoverable"
## [31] "X..Propagated"
## [32] "X..Doc.in.Comment"
## [33] "X..External.Doc"
## [34] "X..Handled.with.Abort.1"
## [35] "X..Handled.with.Continue.1"
## [36] "X..Handled.with.Default.1"
## [37] "X..Handled.with.Empty.1"
## [38] "X..Handled.with.Log.1"
## [39] "X..Handled.with.Method.1"
## [40] "X..Handled.with.Nested.Try.1"
## [41] "X..Handled.with.Return.1"
## [42] "X..Handled.with.Throw.with.New.1"
## [43] "X..Handled.with.Throw.without.New.1"
## [44] "X..Handled.with.Throw.Wrap.1"
## [45] "X..Handled.with.ToDo.1"
## [46] "X..Method.Declaration"
## [47] "X..Propagated.and.Potentially.Recoverable.1"
## [48] "X..Propagated.1"
## [49] "X..Specific"
## [50] "X..Subsumption"
## [51] "X..Throw.Statement"
## [52] "Avg...Declaring.Methods"
## [53] "Avg...Invoked.Method"
## [54] "Avg.Depth"
## [55] "Avg.Exc.Type.Prevalence"
## [56] "Num.Distinct.Methods"
## [57] "Sum.of.Num.Distinct.Methods"
## [58] "Try.LOC"
## [59] "Try.SLOC"
dropToPredict = c("File.Path", "Project", "Language", "Table.Name","Name","Kind", "X..Bugs.Post", "File", "Distinct.count.of.Issue.Key.POST")
all_list_model_m3 = dataApplyReductionByModel(all_list_omitted_m3, "BSFC")
## [1] "Project: hadoop-2.6 Name: CAT.BASE"
## [1] "NumberOfMetricsInitial: 49"
## [1] "NumberOfMetricsKept: 23"
## [1] "Distinct.count.of.Commit.Hash.PRE + AvgCyclomaticStrict + AvgEssential + MaxEssential + Avg.Catch.LOC + Avg..AP.Catch.and.do.nothing + Avg..AP.Catch.and.Return.null + Avg..AP.Destructive.Wrapping + Avg..AP.Dummy.Handler + Avg..AP.Generic.Catch + Avg..AP.Ignoring.Interrupted.Exception + Avg..AP.Multi.line.log.messages + Avg..AP.Nested.try.block + Avg..AP.Throw.within.finally + Avg..AP.Unhandled.exceptions + Avg..AP.Unreachable.Catch.Handler + Avg..Potentially.UnRecoverable + Avg..Recoverability.Not.Relevant + AP.Incomplete.implementation + AP.Log.and.Throw + Sum.of.AP.Overcatch.and.Abort + AP.Relying.on.getCause.. + Potentially.Recoverable"
## [1] "Distinct.count.of.Commit.Hash.PRE"
## [2] "Distinct.count.of.Issue.Key.POST"
## [3] "AvgCyclomaticStrict"
## [4] "AvgEssential"
## [5] "MaxEssential"
## [6] "Avg.Catch.LOC"
## [7] "Avg..AP.Catch.and.do.nothing"
## [8] "Avg..AP.Catch.and.Return.null"
## [9] "Avg..AP.Destructive.Wrapping"
## [10] "Avg..AP.Dummy.Handler"
## [11] "Avg..AP.Generic.Catch"
## [12] "Avg..AP.Ignoring.Interrupted.Exception"
## [13] "Avg..AP.Multi.line.log.messages"
## [14] "Avg..AP.Nested.try.block"
## [15] "Avg..AP.Throw.within.finally"
## [16] "Avg..AP.Unhandled.exceptions"
## [17] "Avg..AP.Unreachable.Catch.Handler"
## [18] "Avg..Potentially.UnRecoverable"
## [19] "Avg..Recoverability.Not.Relevant"
## [20] "AP.Incomplete.implementation"
## [21] "AP.Log.and.Throw"
## [22] "Sum.of.AP.Overcatch.and.Abort"
## [23] "AP.Relying.on.getCause.."
## [24] "Potentially.Recoverable"
## [1] "Project: hibernate-5.0 Name: CAT.BASE"
## [1] "NumberOfMetricsInitial: 48"
## [1] "NumberOfMetricsKept: 19"
## [1] "Distinct.count.of.Author.Email.PRE + CountLineComment + RatioCommentToCode + Avg.Catch.LOC + Avg.Catch.SLOC + Avg..AP.Catch.and.do.nothing + Avg..AP.Catch.and.Return.null + Avg..AP.Generic.Catch + Avg..AP.Nested.try.block + Avg..AP.Throw.within.finally + Avg..AP.Unhandled.exceptions + Avg..AP.Unreachable.Catch.Handler + Avg..Potentially.Recoverable + Avg..Recoverability.Not.Relevant + Sum.of.AP.Destructive.Wrapping + Sum.of.AP.Dummy.Handler + Sum.of.AP.Log.and.Return.null + Catch.SLOC + Potentially.Recoverable"
## [1] "Distinct.count.of.Author.Email.PRE"
## [2] "Distinct.count.of.Issue.Key.POST"
## [3] "CountLineComment"
## [4] "RatioCommentToCode"
## [5] "Avg.Catch.LOC"
## [6] "Avg.Catch.SLOC"
## [7] "Avg..AP.Catch.and.do.nothing"
## [8] "Avg..AP.Catch.and.Return.null"
## [9] "Avg..AP.Generic.Catch"
## [10] "Avg..AP.Nested.try.block"
## [11] "Avg..AP.Throw.within.finally"
## [12] "Avg..AP.Unhandled.exceptions"
## [13] "Avg..AP.Unreachable.Catch.Handler"
## [14] "Avg..Potentially.Recoverable"
## [15] "Avg..Recoverability.Not.Relevant"
## [16] "Sum.of.AP.Destructive.Wrapping"
## [17] "Sum.of.AP.Dummy.Handler"
## [18] "Sum.of.AP.Log.and.Return.null"
## [19] "Catch.SLOC"
## [20] "Potentially.Recoverable"
## [1] "Project: umbraco-7.6 Name: CAT.BASE"
## [1] "NumberOfMetricsInitial: 46"
## [1] "NumberOfMetricsKept: 16"
## [1] "CountLineBlank + Avg.Catch.LOC + Avg..AP.Catch.and.Return.null + Avg..AP.Destructive.Wrapping + Avg..AP.Dummy.Handler + Avg..AP.Log.and.Return.null + Avg..AP.Overcatch + Avg..AP.Unhandled.exceptions + Avg..AP.Unreachable.Catch.Handler + Avg..Potentially.UnRecoverable + AP.Catch.and.do.nothing + Sum.of.AP.Generic.Catch + AP.Multi.line.log.messages + AP.Nested.try.block + Sum.of.AP.Overcatch + AP.Relying.on.getCause.."
## [1] "Distinct.count.of.Issue.Key.POST"
## [2] "CountLineBlank"
## [3] "Avg.Catch.LOC"
## [4] "Avg..AP.Catch.and.Return.null"
## [5] "Avg..AP.Destructive.Wrapping"
## [6] "Avg..AP.Dummy.Handler"
## [7] "Avg..AP.Log.and.Return.null"
## [8] "Avg..AP.Overcatch"
## [9] "Avg..AP.Unhandled.exceptions"
## [10] "Avg..AP.Unreachable.Catch.Handler"
## [11] "Avg..Potentially.UnRecoverable"
## [12] "AP.Catch.and.do.nothing"
## [13] "Sum.of.AP.Generic.Catch"
## [14] "AP.Multi.line.log.messages"
## [15] "AP.Nested.try.block"
## [16] "Sum.of.AP.Overcatch"
## [17] "AP.Relying.on.getCause.."
## [1] "Project: hadoop-2.6 Name: CAT.BASE"
## [1] "Redudant variables: "
## [1] "Final variables: Distinct.count.of.Commit.Hash.PRE + Distinct.count.of.Issue.Key.POST + AvgCyclomaticStrict + AvgEssential + MaxEssential + Avg.Catch.LOC + Avg..AP.Catch.and.do.nothing + Avg..AP.Catch.and.Return.null + Avg..AP.Destructive.Wrapping + Avg..AP.Dummy.Handler + Avg..AP.Generic.Catch + Avg..AP.Ignoring.Interrupted.Exception + Avg..AP.Multi.line.log.messages + Avg..AP.Nested.try.block + Avg..AP.Throw.within.finally + Avg..AP.Unhandled.exceptions + Avg..AP.Unreachable.Catch.Handler + Avg..Potentially.UnRecoverable + Avg..Recoverability.Not.Relevant + AP.Incomplete.implementation + AP.Log.and.Throw + Sum.of.AP.Overcatch.and.Abort + AP.Relying.on.getCause.. + Potentially.Recoverable"
## [1] "NumberOfMetricsInitial: 24"
## [1] "NumberOfMetricsKept: 24"
## [1] "Project: hibernate-5.0 Name: CAT.BASE"
## [1] "Redudant variables: "
## [1] "Final variables: Distinct.count.of.Author.Email.PRE + Distinct.count.of.Issue.Key.POST + CountLineComment + RatioCommentToCode + Avg.Catch.LOC + Avg.Catch.SLOC + Avg..AP.Catch.and.do.nothing + Avg..AP.Catch.and.Return.null + Avg..AP.Generic.Catch + Avg..AP.Nested.try.block + Avg..AP.Throw.within.finally + Avg..AP.Unhandled.exceptions + Avg..AP.Unreachable.Catch.Handler + Avg..Potentially.Recoverable + Avg..Recoverability.Not.Relevant + Sum.of.AP.Destructive.Wrapping + Sum.of.AP.Dummy.Handler + Sum.of.AP.Log.and.Return.null + Catch.SLOC + Potentially.Recoverable"
## [1] "NumberOfMetricsInitial: 20"
## [1] "NumberOfMetricsKept: 20"
## [1] "Project: umbraco-7.6 Name: CAT.BASE"
## [1] "Redudant variables: "
## [1] "Final variables: Distinct.count.of.Issue.Key.POST + CountLineBlank + Avg.Catch.LOC + Avg..AP.Catch.and.Return.null + Avg..AP.Destructive.Wrapping + Avg..AP.Dummy.Handler + Avg..AP.Log.and.Return.null + Avg..AP.Overcatch + Avg..AP.Unhandled.exceptions + Avg..AP.Unreachable.Catch.Handler + Avg..Potentially.UnRecoverable + AP.Catch.and.do.nothing + Sum.of.AP.Generic.Catch + AP.Multi.line.log.messages + AP.Nested.try.block + Sum.of.AP.Overcatch + AP.Relying.on.getCause.."
## [1] "NumberOfMetricsInitial: 17"
## [1] "NumberOfMetricsKept: 17"
## [1] "------------------------------------Project: hadoop-2.6 Name: CAT.BASE"
## [1] "NumberOfMetricsInitial: 23 Budget: 59 Over Budget: FALSE NumberOfMetricsKept: 23 CorrelationCutoff: 0.7"
## [1] "NumberOfMetricsInitial: 23 Budget: 59 Over Budget: FALSE NumberOfMetricsKept: 23 CorrelationCutoff: 0.7"
## [1] "------------------------------------Project: hibernate-5.0 Name: CAT.BASE"
## [1] "NumberOfMetricsInitial: 19 Budget: 29 Over Budget: FALSE NumberOfMetricsKept: 19 CorrelationCutoff: 0.7"
## [1] "NumberOfMetricsInitial: 19 Budget: 29 Over Budget: FALSE NumberOfMetricsKept: 19 CorrelationCutoff: 0.7"
## [1] "------------------------------------Project: umbraco-7.6 Name: CAT.BASE"
## [1] "CountLineBlank + Avg.Catch.LOC + Avg..AP.Catch.and.Return.null + Avg..AP.Destructive.Wrapping + Avg..AP.Dummy.Handler + Avg..AP.Log.and.Return.null + Avg..AP.Overcatch + Avg..AP.Unhandled.exceptions + Avg..AP.Unreachable.Catch.Handler + Avg..Potentially.UnRecoverable + AP.Catch.and.do.nothing + Sum.of.AP.Generic.Catch + AP.Multi.line.log.messages + AP.Nested.try.block + AP.Relying.on.getCause.."
## [1] "NumberOfMetricsInitial: 16 Budget: 15 Over Budget: TRUE NumberOfMetricsKept: 15 CorrelationCutoff: 0.671"
## [1] "NumberOfMetricsInitial: 16 Budget: 15 Over Budget: TRUE NumberOfMetricsKept: 15 CorrelationCutoff: 0.671"
form_list_bin_m3 = dataSetupFormulasBinaryByModel(all_list_model_m3)
## [1] "Project: hadoop-2.6 Name: CAT.BASE"
## [1] "Distinct.count.of.Commit.Hash.PRE + AvgCyclomaticStrict + AvgEssential + MaxEssential + Avg.Catch.LOC + Avg..AP.Catch.and.do.nothing + Avg..AP.Catch.and.Return.null + Avg..AP.Destructive.Wrapping + Avg..AP.Dummy.Handler + Avg..AP.Generic.Catch + Avg..AP.Ignoring.Interrupted.Exception + Avg..AP.Multi.line.log.messages + Avg..AP.Nested.try.block + Avg..AP.Throw.within.finally + Avg..AP.Unhandled.exceptions + Avg..AP.Unreachable.Catch.Handler + Avg..Potentially.UnRecoverable + Avg..Recoverability.Not.Relevant + AP.Incomplete.implementation + AP.Log.and.Throw + Sum.of.AP.Overcatch.and.Abort + AP.Relying.on.getCause.. + Potentially.Recoverable"
## [1] "Project: hibernate-5.0 Name: CAT.BASE"
## [1] "Distinct.count.of.Author.Email.PRE + CountLineComment + RatioCommentToCode + Avg.Catch.LOC + Avg.Catch.SLOC + Avg..AP.Catch.and.do.nothing + Avg..AP.Catch.and.Return.null + Avg..AP.Generic.Catch + Avg..AP.Nested.try.block + Avg..AP.Throw.within.finally + Avg..AP.Unhandled.exceptions + Avg..AP.Unreachable.Catch.Handler + Avg..Potentially.Recoverable + Avg..Recoverability.Not.Relevant + Sum.of.AP.Destructive.Wrapping + Sum.of.AP.Dummy.Handler + Sum.of.AP.Log.and.Return.null + Catch.SLOC + Potentially.Recoverable"
## [1] "Project: umbraco-7.6 Name: CAT.BASE"
## [1] "CountLineBlank + Avg.Catch.LOC + Avg..AP.Catch.and.Return.null + Avg..AP.Destructive.Wrapping + Avg..AP.Dummy.Handler + Avg..AP.Log.and.Return.null + Avg..AP.Overcatch + Avg..AP.Unhandled.exceptions + Avg..AP.Unreachable.Catch.Handler + Avg..Potentially.UnRecoverable + AP.Catch.and.do.nothing + Sum.of.AP.Generic.Catch + AP.Multi.line.log.messages + AP.Nested.try.block + AP.Relying.on.getCause.."
models_2_BSFC = modelFitLogisticByModel(all_list_model_m3,form_list_bin_m3,"CAT.BSFC")
## [1] "Project: hadoop-2.6 Name: CAT.BASE"
## Logistic Regression Model
##
## lrm(formula = as.formula(form_bin), data = temp_data_log, x = T,
## y = T)
##
## Model Likelihood Discrimination Rank Discrim.
## Ratio Test Indexes Indexes
## Obs 890 LR chi2 233.46 R2 0.394 C 0.862
## FALSE 747 d.f. 23 g 1.730 Dxy 0.724
## TRUE 143 Pr(> chi2) <0.0001 gr 5.640 gamma 0.726
## max |deriv| 2e-06 gp 0.191 tau-a 0.196
## Brier 0.095
##
## Coef S.E. Wald Z Pr(>|Z|)
## Intercept -4.8011 1.0842 -4.43 <0.0001
## Distinct.count.of.Commit.Hash.PRE 2.9187 0.3478 8.39 <0.0001
## AvgCyclomaticStrict 2.9866 1.0155 2.94 0.0033
## AvgEssential -3.9923 1.6472 -2.42 0.0154
## MaxEssential 0.6824 0.5346 1.28 0.2018
## Avg.Catch.LOC 1.3236 1.1710 1.13 0.2584
## Avg..AP.Catch.and.do.nothing 0.5908 2.2376 0.26 0.7917
## Avg..AP.Catch.and.Return.null -2.4554 2.1356 -1.15 0.2502
## Avg..AP.Destructive.Wrapping -0.9398 1.1800 -0.80 0.4258
## Avg..AP.Dummy.Handler 0.9190 1.4537 0.63 0.5273
## Avg..AP.Generic.Catch -1.9456 1.4520 -1.34 0.1803
## Avg..AP.Ignoring.Interrupted.Exception 5.1785 1.5936 3.25 0.0012
## Avg..AP.Multi.line.log.messages 0.8636 3.0204 0.29 0.7749
## Avg..AP.Nested.try.block 2.0894 2.3797 0.88 0.3799
## Avg..AP.Throw.within.finally -3.5422 2.3058 -1.54 0.1245
## Avg..AP.Unhandled.exceptions -1.1283 1.1293 -1.00 0.3177
## Avg..AP.Unreachable.Catch.Handler 0.2953 1.9069 0.15 0.8769
## Avg..Potentially.UnRecoverable 0.5821 1.8214 0.32 0.7493
## Avg..Recoverability.Not.Relevant -4.4751 3.2719 -1.37 0.1714
## AP.Incomplete.implementation 2.6999 4.1987 0.64 0.5202
## AP.Log.and.Throw 4.1366 2.0714 2.00 0.0458
## Sum.of.AP.Overcatch.and.Abort 0.1437 2.1522 0.07 0.9468
## AP.Relying.on.getCause.. -0.6957 1.3245 -0.53 0.5994
## Potentially.Recoverable 0.8492 0.3661 2.32 0.0204
##
## [1] "Project: hibernate-5.0 Name: CAT.BASE"
## Logistic Regression Model
##
## lrm(formula = as.formula(form_bin), data = temp_data_log, x = T,
## y = T)
##
## Model Likelihood Discrimination Rank Discrim.
## Ratio Test Indexes Indexes
## Obs 440 LR chi2 91.81 R2 0.293 C 0.798
## FALSE 347 d.f. 19 g 1.414 Dxy 0.596
## TRUE 93 Pr(> chi2) <0.0001 gr 4.113 gamma 0.598
## max |deriv| 8e-10 gp 0.199 tau-a 0.199
## Brier 0.128
##
## Coef S.E. Wald Z Pr(>|Z|)
## Intercept -4.8631 1.2596 -3.86 0.0001
## Distinct.count.of.Author.Email.PRE 2.8461 0.9505 2.99 0.0028
## CountLineComment 1.7835 0.5056 3.53 0.0004
## RatioCommentToCode -9.6397 2.9651 -3.25 0.0011
## Avg.Catch.LOC -0.9464 1.4915 -0.63 0.5257
## Avg.Catch.SLOC 3.9438 1.9831 1.99 0.0467
## Avg..AP.Catch.and.do.nothing 0.8616 2.5484 0.34 0.7353
## Avg..AP.Catch.and.Return.null 4.5145 2.6002 1.74 0.0825
## Avg..AP.Generic.Catch -0.6409 1.4690 -0.44 0.6626
## Avg..AP.Nested.try.block -0.2740 2.9307 -0.09 0.9255
## Avg..AP.Throw.within.finally 4.0430 2.6656 1.52 0.1293
## Avg..AP.Unhandled.exceptions 1.5102 1.2390 1.22 0.2229
## Avg..AP.Unreachable.Catch.Handler 0.5841 1.9556 0.30 0.7652
## Avg..Potentially.Recoverable -0.6864 2.6134 -0.26 0.7928
## Avg..Recoverability.Not.Relevant -0.9000 2.7225 -0.33 0.7410
## Sum.of.AP.Destructive.Wrapping 0.8648 0.6441 1.34 0.1793
## Sum.of.AP.Dummy.Handler 3.0046 1.0077 2.98 0.0029
## Sum.of.AP.Log.and.Return.null -0.0299 2.4376 -0.01 0.9902
## Catch.SLOC -1.0628 0.9990 -1.06 0.2874
## Potentially.Recoverable -0.5009 1.2268 -0.41 0.6831
##
## [1] "Project: umbraco-7.6 Name: CAT.BASE"
## Logistic Regression Model
##
## lrm(formula = as.formula(form_bin), data = temp_data_log, x = T,
## y = T)
##
## Model Likelihood Discrimination Rank Discrim.
## Ratio Test Indexes Indexes
## Obs 230 LR chi2 43.16 R2 0.366 C 0.872
## FALSE 208 d.f. 15 g 5.042 Dxy 0.744
## TRUE 22 Pr(> chi2) 0.0001 gr 154.833 gamma 0.747
## max |deriv| 0.003 gp 0.129 tau-a 0.129
## Brier 0.066
##
## Coef S.E. Wald Z Pr(>|Z|)
## Intercept -5.4210 1.7433 -3.11 0.0019
## CountLineBlank 1.3541 0.7092 1.91 0.0562
## Avg.Catch.LOC -2.0201 2.4529 -0.82 0.4102
## Avg..AP.Catch.and.Return.null 1.4990 3.9958 0.38 0.7076
## Avg..AP.Destructive.Wrapping -89.7540 349.8585 -0.26 0.7975
## Avg..AP.Dummy.Handler 6.3450 2.5707 2.47 0.0136
## Avg..AP.Log.and.Return.null -16.9573 19.2147 -0.88 0.3775
## Avg..AP.Overcatch -1.0034 2.9495 -0.34 0.7337
## Avg..AP.Unhandled.exceptions -11.7078 6.3336 -1.85 0.0645
## Avg..AP.Unreachable.Catch.Handler -7.8174 4.3969 -1.78 0.0754
## Avg..Potentially.UnRecoverable 10.1290 6.9465 1.46 0.1448
## AP.Catch.and.do.nothing -0.4214 1.9185 -0.22 0.8261
## Sum.of.AP.Generic.Catch 5.7515 1.8439 3.12 0.0018
## AP.Multi.line.log.messages -30.0528 283.2997 -0.11 0.9155
## AP.Nested.try.block 0.1366 3.6782 0.04 0.9704
## AP.Relying.on.getCause.. -18.4489 234.3618 -0.08 0.9373
##
In this section, we present the selected statistics for our analysis. As explained in our approach, they are the steps: MC7, MA1, MA2, MA3 and MA4.
Here we extract the selected statistics and we add the data (columns) to an object that will be exported to CSV in the section Output.
model_things_2_BSFC = vector("list", 0)
model_things_2_BSFC = modelStats(models_2_BSFC)
model_things_2_BSFC = modelValidate(models_2_BSFC, model_things_2_BSFC)
## singular information matrix in lrm.fit (rank= 23 ). Offending variable(s):
## AP.Incomplete.implementation
## singular information matrix in lrm.fit (rank= 23 ). Offending variable(s):
## AP.Incomplete.implementation
## singular information matrix in lrm.fit (rank= 23 ). Offending variable(s):
## AP.Incomplete.implementation
## singular information matrix in lrm.fit (rank= 23 ). Offending variable(s):
## AP.Incomplete.implementation
## singular information matrix in lrm.fit (rank= 23 ). Offending variable(s):
## AP.Incomplete.implementation
## singular information matrix in lrm.fit (rank= 23 ). Offending variable(s):
## AP.Incomplete.implementation
## singular information matrix in lrm.fit (rank= 23 ). Offending variable(s):
## AP.Incomplete.implementation
## singular information matrix in lrm.fit (rank= 23 ). Offending variable(s):
## AP.Incomplete.implementation
## singular information matrix in lrm.fit (rank= 23 ). Offending variable(s):
## AP.Incomplete.implementation
## singular information matrix in lrm.fit (rank= 23 ). Offending variable(s):
## AP.Incomplete.implementation
## singular information matrix in lrm.fit (rank= 23 ). Offending variable(s):
## AP.Incomplete.implementation
## singular information matrix in lrm.fit (rank= 23 ). Offending variable(s):
## AP.Incomplete.implementation
## singular information matrix in lrm.fit (rank= 23 ). Offending variable(s):
## AP.Incomplete.implementation
## singular information matrix in lrm.fit (rank= 23 ). Offending variable(s):
## AP.Incomplete.implementation
## singular information matrix in lrm.fit (rank= 23 ). Offending variable(s):
## AP.Incomplete.implementation
## singular information matrix in lrm.fit (rank= 23 ). Offending variable(s):
## AP.Incomplete.implementation
## singular information matrix in lrm.fit (rank= 23 ). Offending variable(s):
## AP.Incomplete.implementation
## singular information matrix in lrm.fit (rank= 23 ). Offending variable(s):
## AP.Incomplete.implementation
## singular information matrix in lrm.fit (rank= 23 ). Offending variable(s):
## AP.Incomplete.implementation
## singular information matrix in lrm.fit (rank= 23 ). Offending variable(s):
## AP.Incomplete.implementation
## singular information matrix in lrm.fit (rank= 23 ). Offending variable(s):
## AP.Incomplete.implementation
## singular information matrix in lrm.fit (rank= 23 ). Offending variable(s):
## AP.Incomplete.implementation
## singular information matrix in lrm.fit (rank= 23 ). Offending variable(s):
## AP.Incomplete.implementation
## singular information matrix in lrm.fit (rank= 23 ). Offending variable(s):
## AP.Incomplete.implementation
## singular information matrix in lrm.fit (rank= 23 ). Offending variable(s):
## AP.Incomplete.implementation
## singular information matrix in lrm.fit (rank= 23 ). Offending variable(s):
## AP.Incomplete.implementation
## singular information matrix in lrm.fit (rank= 23 ). Offending variable(s):
## AP.Incomplete.implementation
## singular information matrix in lrm.fit (rank= 23 ). Offending variable(s):
## AP.Incomplete.implementation
## singular information matrix in lrm.fit (rank= 23 ). Offending variable(s):
## AP.Incomplete.implementation
## singular information matrix in lrm.fit (rank= 23 ). Offending variable(s):
## AP.Incomplete.implementation
## singular information matrix in lrm.fit (rank= 23 ). Offending variable(s):
## AP.Incomplete.implementation
## singular information matrix in lrm.fit (rank= 23 ). Offending variable(s):
## AP.Incomplete.implementation
## singular information matrix in lrm.fit (rank= 23 ). Offending variable(s):
## AP.Incomplete.implementation
## singular information matrix in lrm.fit (rank= 23 ). Offending variable(s):
## AP.Incomplete.implementation
## singular information matrix in lrm.fit (rank= 23 ). Offending variable(s):
## AP.Incomplete.implementation
## singular information matrix in lrm.fit (rank= 23 ). Offending variable(s):
## AP.Incomplete.implementation
## singular information matrix in lrm.fit (rank= 23 ). Offending variable(s):
## AP.Incomplete.implementation
## singular information matrix in lrm.fit (rank= 23 ). Offending variable(s):
## AP.Incomplete.implementation
## singular information matrix in lrm.fit (rank= 23 ). Offending variable(s):
## AP.Incomplete.implementation
## singular information matrix in lrm.fit (rank= 23 ). Offending variable(s):
## AP.Incomplete.implementation
##
## Divergence or singularity in 40 samples
## singular information matrix in lrm.fit (rank= 15 ). Offending variable(s):
## AP.Multi.line.log.messages
## singular information matrix in lrm.fit (rank= 15 ). Offending variable(s):
## AP.Nested.try.block
## singular information matrix in lrm.fit (rank= 15 ). Offending variable(s):
## AP.Relying.on.getCause..
## singular information matrix in lrm.fit (rank= 15 ). Offending variable(s):
## AP.Multi.line.log.messages
## singular information matrix in lrm.fit (rank= 15 ). Offending variable(s):
## AP.Multi.line.log.messages
##
## Divergence or singularity in 19 samples
model_things_2_BSFC = modelSignificance(models_2_BSFC, model_things_2_BSFC)
## [1] "project: hadoop-2.6 model: CAT.BSFC"
## Wald Statistics Response: Distinct.count.of.Issue.Key.POST > 0
##
## Factor Chi-Square d.f. P
## Distinct.count.of.Commit.Hash.PRE 70.44 1 <.0001
## AvgCyclomaticStrict 8.65 1 0.0033
## AvgEssential 5.87 1 0.0154
## MaxEssential 1.63 1 0.2018
## Avg.Catch.LOC 1.28 1 0.2584
## Avg..AP.Catch.and.do.nothing 0.07 1 0.7917
## Avg..AP.Catch.and.Return.null 1.32 1 0.2502
## Avg..AP.Destructive.Wrapping 0.63 1 0.4258
## Avg..AP.Dummy.Handler 0.40 1 0.5273
## Avg..AP.Generic.Catch 1.80 1 0.1803
## Avg..AP.Ignoring.Interrupted.Exception 10.56 1 0.0012
## Avg..AP.Multi.line.log.messages 0.08 1 0.7749
## Avg..AP.Nested.try.block 0.77 1 0.3799
## Avg..AP.Throw.within.finally 2.36 1 0.1245
## Avg..AP.Unhandled.exceptions 1.00 1 0.3177
## Avg..AP.Unreachable.Catch.Handler 0.02 1 0.8769
## Avg..Potentially.UnRecoverable 0.10 1 0.7493
## Avg..Recoverability.Not.Relevant 1.87 1 0.1714
## AP.Incomplete.implementation 0.41 1 0.5202
## AP.Log.and.Throw 3.99 1 0.0458
## Sum.of.AP.Overcatch.and.Abort 0.00 1 0.9468
## AP.Relying.on.getCause.. 0.28 1 0.5994
## Potentially.Recoverable 5.38 1 0.0204
## TOTAL 149.23 23 <.0001
## [1] "project: hibernate-5.0 model: CAT.BSFC"
## Wald Statistics Response: Distinct.count.of.Issue.Key.POST > 0
##
## Factor Chi-Square d.f. P
## Distinct.count.of.Author.Email.PRE 8.97 1 0.0028
## CountLineComment 12.44 1 0.0004
## RatioCommentToCode 10.57 1 0.0011
## Avg.Catch.LOC 0.40 1 0.5257
## Avg.Catch.SLOC 3.95 1 0.0467
## Avg..AP.Catch.and.do.nothing 0.11 1 0.7353
## Avg..AP.Catch.and.Return.null 3.01 1 0.0825
## Avg..AP.Generic.Catch 0.19 1 0.6626
## Avg..AP.Nested.try.block 0.01 1 0.9255
## Avg..AP.Throw.within.finally 2.30 1 0.1293
## Avg..AP.Unhandled.exceptions 1.49 1 0.2229
## Avg..AP.Unreachable.Catch.Handler 0.09 1 0.7652
## Avg..Potentially.Recoverable 0.07 1 0.7928
## Avg..Recoverability.Not.Relevant 0.11 1 0.7410
## Sum.of.AP.Destructive.Wrapping 1.80 1 0.1793
## Sum.of.AP.Dummy.Handler 8.89 1 0.0029
## Sum.of.AP.Log.and.Return.null 0.00 1 0.9902
## Catch.SLOC 1.13 1 0.2874
## Potentially.Recoverable 0.17 1 0.6831
## TOTAL 66.79 19 <.0001
## [1] "project: umbraco-7.6 model: CAT.BSFC"
## Wald Statistics Response: Distinct.count.of.Issue.Key.POST > 0
##
## Factor Chi-Square d.f. P
## CountLineBlank 3.65 1 0.0562
## Avg.Catch.LOC 0.68 1 0.4102
## Avg..AP.Catch.and.Return.null 0.14 1 0.7076
## Avg..AP.Destructive.Wrapping 0.07 1 0.7975
## Avg..AP.Dummy.Handler 6.09 1 0.0136
## Avg..AP.Log.and.Return.null 0.78 1 0.3775
## Avg..AP.Overcatch 0.12 1 0.7337
## Avg..AP.Unhandled.exceptions 3.42 1 0.0645
## Avg..AP.Unreachable.Catch.Handler 3.16 1 0.0754
## Avg..Potentially.UnRecoverable 2.13 1 0.1448
## AP.Catch.and.do.nothing 0.05 1 0.8261
## Sum.of.AP.Generic.Catch 9.73 1 0.0018
## AP.Multi.line.log.messages 0.01 1 0.9155
## AP.Nested.try.block 0.00 1 0.9704
## AP.Relying.on.getCause.. 0.01 1 0.9373
## TOTAL 21.98 15 0.1082
model_things_2_BSFC = modelSimplification(models_2_BSFC, model_things_2_BSFC)
## [1] "project: hadoop-2.6 model: CAT.BSFC Refit"
## Logistic Regression Model
##
## lrm(formula = as.formula(final_form), data = temp_data, x = T,
## y = T)
##
## Model Likelihood Discrimination Rank Discrim.
## Ratio Test Indexes Indexes
## Obs 890 LR chi2 215.21 R2 0.367 C 0.848
## FALSE 747 d.f. 6 g 1.530 Dxy 0.696
## TRUE 143 Pr(> chi2) <0.0001 gr 4.616 gamma 0.700
## max |deriv| 3e-09 gp 0.184 tau-a 0.188
## Brier 0.098
##
## Coef S.E. Wald Z Pr(>|Z|)
## Intercept -4.2845 0.5392 -7.95 <0.0001
## Distinct.count.of.Commit.Hash.PRE 3.2525 0.3124 10.41 <0.0001
## AvgCyclomaticStrict 3.1582 0.9624 3.28 0.0010
## AvgEssential -2.9884 1.3791 -2.17 0.0302
## Avg..AP.Ignoring.Interrupted.Exception 5.1063 1.4278 3.58 0.0003
## AP.Log.and.Throw 4.1034 1.9466 2.11 0.0350
## Potentially.Recoverable 0.7444 0.2817 2.64 0.0082
##
## [1] "project: hadoop-2.6 model: CAT.BSFC Refit - summary"
## [1] "project: hadoop-2.6 model: CAT.BSFC Refit - validate"
## [1] "project: hadoop-2.6 model: CAT.BSFC Refit - anova"
## Wald Statistics Response: Distinct.count.of.Issue.Key.POST > 0
##
## Factor Chi-Square d.f. P
## Distinct.count.of.Commit.Hash.PRE 108.40 1 <.0001
## AvgCyclomaticStrict 10.77 1 0.0010
## AvgEssential 4.70 1 0.0302
## Avg..AP.Ignoring.Interrupted.Exception 12.79 1 0.0003
## AP.Log.and.Throw 4.44 1 0.0350
## Potentially.Recoverable 6.98 1 0.0082
## TOTAL 147.46 6 <.0001
## [1] "Distinct.count.of.Commit.Hash.PRE"
## [1] "AvgCyclomaticStrict"
## [1] "AvgEssential"
## [1] "Avg..AP.Ignoring.Interrupted.Exception"
## [1] "AP.Log.and.Throw"
## [1] "Potentially.Recoverable"
## Distinct.count.of.Commit.Hash.PRE AvgCyclomaticStrict AvgEssential
## 1 1.695506 2.660674 1.296629
## Avg..AP.Ignoring.Interrupted.Exception AP.Log.and.Throw
## 1 0.08380526 0.02022472
## Potentially.Recoverable
## 1 6.004494
## [1] "Fixed at Mean: 0.207569898618515"
## [1] "Distinct.count.of.Commit.Hash.PRE Coef at Mean + 10%: 0.222100399673733"
## [1] "AvgCyclomaticStrict Coef at Mean + 10%: 0.223844488271616"
## [1] "AvgEssential Coef at Mean + 10%: 0.196089668996072"
## [1] "Avg..AP.Ignoring.Interrupted.Exception Coef at Mean + 10%: 0.210393662256916"
## [1] "AP.Log.and.Throw Coef at Mean + 10%: 0.208151005992509"
## [1] "Potentially.Recoverable Coef at Mean + 10%: 0.211977383118221"
## [1] "project: hibernate-5.0 model: CAT.BSFC Refit"
## Logistic Regression Model
##
## lrm(formula = as.formula(final_form), data = temp_data, x = T,
## y = T)
##
## Model Likelihood Discrimination Rank Discrim.
## Ratio Test Indexes Indexes
## Obs 440 LR chi2 79.74 R2 0.258 C 0.786
## FALSE 347 d.f. 5 g 1.272 Dxy 0.571
## TRUE 93 Pr(> chi2) <0.0001 gr 3.569 gamma 0.573
## max |deriv| 8e-12 gp 0.185 tau-a 0.191
## Brier 0.134
##
## Coef S.E. Wald Z Pr(>|Z|)
## Intercept -5.0342 0.6565 -7.67 <0.0001
## Distinct.count.of.Author.Email.PRE 2.4206 0.8657 2.80 0.0052
## CountLineComment 1.6859 0.4563 3.69 0.0002
## RatioCommentToCode -9.3723 2.6388 -3.55 0.0004
## Avg.Catch.SLOC 2.1113 0.8268 2.55 0.0107
## Sum.of.AP.Dummy.Handler 1.9714 0.9105 2.17 0.0304
##
## [1] "project: hibernate-5.0 model: CAT.BSFC Refit - summary"
## [1] "project: hibernate-5.0 model: CAT.BSFC Refit - validate"
## [1] "project: hibernate-5.0 model: CAT.BSFC Refit - anova"
## Wald Statistics Response: Distinct.count.of.Issue.Key.POST > 0
##
## Factor Chi-Square d.f. P
## Distinct.count.of.Author.Email.PRE 7.82 1 0.0052
## CountLineComment 13.65 1 0.0002
## RatioCommentToCode 12.61 1 0.0004
## Avg.Catch.SLOC 6.52 1 0.0107
## Sum.of.AP.Dummy.Handler 4.69 1 0.0304
## TOTAL 60.95 5 <.0001
## [1] "Distinct.count.of.Author.Email.PRE"
## [1] "CountLineComment"
## [1] "RatioCommentToCode"
## [1] "Avg.Catch.SLOC"
## [1] "Sum.of.AP.Dummy.Handler"
## Distinct.count.of.Author.Email.PRE CountLineComment RatioCommentToCode
## 1 2.129545 46.95455 0.27575
## Avg.Catch.SLOC Sum.of.AP.Dummy.Handler
## 1 1.286742 0.1409091
## [1] "Fixed at Mean: 0.24582152754868"
## [1] "Distinct.count.of.Author.Email.PRE Coef at Mean + 10%: 0.258875914390769"
## [1] "CountLineComment Coef at Mean + 10%: 0.25872098632262"
## [1] "RatioCommentToCode Coef at Mean + 10%: 0.230043577539418"
## [1] "Avg.Catch.SLOC Coef at Mean + 10%: 0.255245915466901"
## [1] "Sum.of.AP.Dummy.Handler Coef at Mean + 10%: 0.24777507698998"
## [1] "project: umbraco-7.6 model: CAT.BSFC Refit"
## Logistic Regression Model
##
## lrm(formula = as.formula(final_form), data = temp_data, x = T,
## y = T)
##
## Model Likelihood Discrimination Rank Discrim.
## Ratio Test Indexes Indexes
## Obs 230 LR chi2 22.86 R2 0.202 C 0.794
## FALSE 208 d.f. 2 g 1.228 Dxy 0.589
## TRUE 22 Pr(> chi2) <0.0001 gr 3.413 gamma 0.645
## max |deriv| 8e-11 gp 0.097 tau-a 0.102
## Brier 0.078
##
## Coef S.E. Wald Z Pr(>|Z|)
## Intercept -4.5430 0.6758 -6.72 <0.0001
## Avg..AP.Dummy.Handler 5.3086 1.9281 2.75 0.0059
## Sum.of.AP.Generic.Catch 4.0066 1.0406 3.85 0.0001
##
## [1] "project: umbraco-7.6 model: CAT.BSFC Refit - summary"
## [1] "project: umbraco-7.6 model: CAT.BSFC Refit - validate"
## [1] "project: umbraco-7.6 model: CAT.BSFC Refit - anova"
## Wald Statistics Response: Distinct.count.of.Issue.Key.POST > 0
##
## Factor Chi-Square d.f. P
## Avg..AP.Dummy.Handler 7.58 1 0.0059
## Sum.of.AP.Generic.Catch 14.82 1 0.0001
## TOTAL 18.68 2 0.0001
## [1] "Avg..AP.Dummy.Handler"
## [1] "Sum.of.AP.Generic.Catch"
## Avg..AP.Dummy.Handler Sum.of.AP.Generic.Catch
## 1 0.160471 1.852174
## [1] "Fixed at Mean: 0.0850053202474649"
## [1] "Avg..AP.Dummy.Handler Coef at Mean + 10%: 0.0875005781572317"
## [1] "Sum.of.AP.Generic.Catch Coef at Mean + 10%: 0.0939165463703944"
Here we output the selected statistics from the R functions results and we output in the CSV files in the folder “output”.
write.table(data.frame(model_things_2_BASE[[1]])[0,], 'output/base_test_2.csv' , append= F, sep=',', row.names = F, col.names = T )
lapply(model_things_2_BASE, function(x) write.table( data.frame(x), 'output/base_test_2.csv' , append= T, sep=',', row.names = F, col.names = F ))
## [[1]]
## NULL
##
## [[2]]
## NULL
##
## [[3]]
## NULL
lapply(model_things_2_BSAP, function(x) write.table( data.frame(x), 'output/base_test_2.csv' , append= T, sep=',', row.names = F, col.names = F ))
## [[1]]
## NULL
##
## [[2]]
## NULL
##
## [[3]]
## NULL
lapply(model_things_2_BSFC, function(x) write.table( data.frame(x), 'output/base_test_2.csv' , append= T, sep=',', row.names = F, col.names = F ))
## [[1]]
## NULL
##
## [[2]]
## NULL
##
## [[3]]
## NULL