final version of benchmark design

slds-lmu · Apr 6, 2019 · af9a3fc · af9a3fc
1 parent 9b137a0
commit af9a3fc
Show file tree

Hide file tree

Showing 3 changed files with 69 additions and 51 deletions.
diff --git a/benchmark/helpers.R b/benchmark/helpers.R
@@ -206,9 +206,9 @@ collectBenchmarkResults = function(path, experiments, tab) {
     toreduce = ijoin(tab, experiments[[experiment]], by = names(experiments[[experiment]]))
     toreduce = ijoin(toreduce, findDone(), by = "job.id")
 
-    dir = as.numeric(sapply(list.files("registry/results/"), function(x) strsplit(x, ".rds")[[1]][1]))
-    dir = data.frame(job.id = dir)
-    toreduce = ijoin(toreduce, dir)
+    # dir = as.numeric(sapply(list.files("registry/results/"), function(x) strsplit(x, ".rds")[[1]][1]))
+    # dir = data.frame(job.id = dir)
+    # toreduce = ijoin(toreduce, dir)
 
     res = reduceResultsDataTable(toreduce, function(x) collectResult(x$result))
     res = ijoin(tab, res, by = "job.id")
@@ -453,15 +453,15 @@ plotPerformanceHout = function(res, plotspath) {
 }
 
 
-plotRanks = function(res, plotspath, logscale = FALSE, metric = "naive.hout.domHV", limits = c(0.37, 1)) {
+plotRanks = function(res, plotspath, logscale = FALSE, metric = "naive.hout.domHV", limits = c(0.37, 1), height = 10, width = 7) {
 
     # --- naive.hout.domHV
     df = extractFromSummary(res, c("evals", metric))
     df = df[evals < 4000, ]
     df$gen = (df$evals - 80) / 15
     df = df[, replication := 1:length(job.id), by = c("learner", "variant", "problem", "gen")]
     df = renameAndRevalue(df)
-    names(df)[20] = "metric"
+    names(df)[17] = "metric"
 
     # --- calculate ranks within learner, problem and replication ---
     dfr = df[, rank_variant := rank(- metric), by = c("learner", "problem", "evals", "replication")]
@@ -490,7 +490,7 @@ plotRanks = function(res, plotspath, logscale = FALSE, metric = "naive.hout.domH
     p = p + guides(lty = guide_legend(order = 1), colour = guide_legend(order = 2))
     p = p + xlab("Evaluations")
 
-    ggsave(file.path(plotspath, paste(metric, "ranks.pdf", sep = "_")), p, width = 9, height = 6, device = "pdf")
+    ggsave(file.path(plotspath, paste(gsub("\\.", "", metric), "ranks.pdf", sep = "_")), p, width = 9, height = 6, device = "pdf")
 
     p = ggplot()
     p = p + geom_line(data = res_ovr_pl, aes(x = evals, y = value, lty = algorithm, colour = variant), size = 0.6)
@@ -502,7 +502,7 @@ plotRanks = function(res, plotspath, logscale = FALSE, metric = "naive.hout.domH
     p = p + guides(lty = guide_legend(order = 1), colour = guide_legend(order = 2))
     p = p + xlab("Evaluations")
 
-    ggsave(file.path(plotspath, paste(metric, "ranks_perLearner.pdf", sep = "_")), p, width = 7, height = 10)
+    ggsave(file.path(plotspath, paste(gsub("\\.", "", metric), "ranks_perLearner.pdf", sep = "_")), p, width = 7, height = 10)
 }
 
 # --- this is not a general function
@@ -577,7 +577,7 @@ plotHeatmap = function(populations, plotspath) {
 
 
 
-calculateSummaryOfMethods = function(res, maxevals = 4000L) {
+calculateSummaryOfMethods = function(path, res, maxevals = 4000L) {
 
     # structure of the table
     # problem | RS (double budget) | RSI (double budget) | RSIF (double budget) | NSGA-II | MOSMAFS
@@ -609,11 +609,11 @@ calculateSummaryOfMethods = function(res, maxevals = 4000L) {
         dfc = dfc[order(dfc$p), ]
         dfc$dummycol = NA
         dfcc = dfc[, c("problem", "NSGA-II", "+UI", "+UI+FI", "+UI+FI+FM", "+UI+FI+FM (s.a.)", "+UI+HP", "+UI+FI+HP+FM (s.a.)", "dummycol", "RS", "RSI", "RSIF")]
-        print(xtable::xtable(dfcc, type = "latex", include.rownames=FALSE), file = paste("latex_temp/houtdomHV", "_singlebudget/", lrn, "complete", "_", maxevals, ".tex", sep = ""))
+        print(xtable::xtable(dfcc, type = "latex", include.rownames=FALSE), file = paste("latex_temp/houtdomHV/", lrn, "complete", "_", maxevals, ".tex", sep = ""))
 
         dfcc = dfc[, c("problem", "n", "p", "NSGA-II", "+UI+FI+HP+FM (s.a.)", "RS", "RSI", "RSIF")]
 
-        print(xtable::xtable(dfcc, type = "latex", include.rownames=FALSE), file = paste("latex_temp/houtdomHV", "_singlebudget/", lrn, "_", maxevals, ".tex", sep = ""))
+        print(xtable::xtable(dfcc, type = "latex", include.rownames=FALSE), file = paste(path, "/houtdomHV/", lrn, "_", maxevals, ".tex", sep = ""))
 
 
 
@@ -626,8 +626,6 @@ calculateSummaryOfMethods = function(res, maxevals = 4000L) {
 
         # print(xtable(dfc, type = "latex", include.rownames=FALSE), file = paste("latex_temp/houtdomHV", "_", method, "/", lrn, ".tex", sep = ""))
     }
-
-
 } 
 
 g_legend<-function(a.gplot){
@@ -636,7 +634,7 @@ g_legend<-function(a.gplot){
   legend <- tmp$grobs[[leg]]
   return(legend)}
 
-calculateEvalsToRandomsearch = function(res) {
+calculateEvalsToRandomsearch = function(res, path) {
 
     # naive.hout.domHV
     df = extractFromSummary(res, c("evals", "naive.hout.domHV"))
@@ -651,6 +649,10 @@ calculateEvalsToRandomsearch = function(res) {
     res2 = res2[algorithm == "mosmafs", ]
     res2 = res2[, replication := 1:length(job.id), by = c("learner", "variant", "problem")]
 
+    res2$RS.beat = 0
+    res2$RSI.beat = 0
+    res2$RSIF.beat = 0
+
     for (repl in 1:10) {
       for (prob in unique(dfm$problem)) {
         for (lrn in unique(dfm$learner)) {
@@ -668,23 +670,10 @@ calculateEvalsToRandomsearch = function(res) {
         }
       }      
     }
-    path = "latex_temp"
+
     saveRDS(res2, file.path(path, "beat_randomsearch_complete.rds"))
 
     # --- imputation
-    res3 = res2
-    res3[is.na(res3$RS.beat), ]$RS.beat = 8000L
-    res3[is.na(res3$RSI.beat), ]$RSI.beat = 8000L
-    res3[is.na(res3$RSIF.beat), ]$RSIF.beat = 8000L
-
-    res3 = res3[, .(RS.beat = mean(RS.beat, na.rm = TRUE),
-                    RS.sd = sd(RS.beat, na.rm = TRUE) / sqrt(360), 
-                    RSI.beat = mean(RSI.beat, na.rm = TRUE), 
-                    RSI.sd = sd(RSI.beat, na.rm = TRUE) / sqrt(360),                     
-                    RSIF.beat = mean(RSIF.beat, na.rm = TRUE),
-                    RSIF.sd = sd(RSIF.beat, na.rm = TRUE) / sqrt(360),
-                    test = length(RS.beat)), by = c("variant")]
-
     res3 = res2[, .(RS.beat = mean(RS.beat, na.rm = TRUE),
                     RS.sd = sd(RS.beat, na.rm = TRUE) / sqrt(360), 
                     RS.nas = mean(is.na(RS.beat)) * 100,
@@ -707,7 +696,7 @@ calculateEvalsToRandomsearch = function(res) {
       c("O" = "NSGA-II", "OI" = "+UI", "OIFi" = "+UI+FI", "OIFiFm" = "+UI+FI+FM", 
         "OIFiFmS" = "+UI+FI+FM (s.a.)", "OIH" = "+UI+HP", "OIHFiFmS" = "+UI+FI+HP+FM (s.a.)"))
 
-    names(res3) = c(" ", "RS", "RS.sd", "NAs.1", "RS+UI", "RSI.sd", "NAs.2", "RS+UI+IF", "RSUIIF.sd", "NAs.3", "test")
+    names(res3) = c(" ", "RS", "RS.sd", "NC.1", "RS+UI", "RSI.sd", "NC.2", "RS+UI+IF", "RSUIIF.sd", "NC.3", "test")
 
-    print(xtable::xtable(res3[, c(" ", "RS", "NAs.1", "RS+UI", "NAs.2", "RS+UI+IF", "NAs.3")], type = "latex", include.rownames=FALSE), file = paste("latex_temp/beatRS_with_nas_average_after.tex", sep = ""))
+    print(xtable::xtable(res3[, c(" ", "RS", "NC.1", "RS+UI", "NC.2", "RS+UI+IF", "NC.3")], type = "latex", include.rownames=FALSE), file = paste("latex_temp/beatRS_with_nas_average_after.tex", sep = ""))
 }
diff --git a/benchmark/reduce.R b/benchmark/reduce.R
@@ -1,19 +1,24 @@
 library(batchtools)
 library(dplyr)
+library(mlr)
+library(mlrCPO)
 
 source("helpers.R")
 source("probdesign.R")
 
 # load registry
-reg = loadRegistry("registry")
+reg = loadRegistry("registry22", writeable = FALSE)
 tab = summarizeExperiments(by = c("job.id", "algorithm", 
 	"problem", "learner", "maxeval", "filter", "initialization", 
 	"lambda", "mu", "parent.sel", "chw.bitflip", "adaptive.filter.weights",
-	"filter.during.run", "surrogate", "MBMOmethod", "propose.points"))
-tab = tab[maxeval %in% c(4000), ]
-tab = rbind(tab[lambda != 4L, ], tab[is.na(lambda), ])
+	"filter.during.run"))
+tab = tab[learner == "xgboost", ]
+# tab = tab[maxeval %in% c(4000), ]
+# tab = rbind(tab[lambda != 4L, ], tab[is.na(lambda), ])
+done = ijoin(tab, findDone())
 
-path = "results_raw"
+path = "results_raw_xgboost"
+dir.create(path)
 
 problems = c("wdbc", "ionosphere", "sonar", "hill-valley", "clean1", 
   "tecator", "USPS", "madeline", "lsvt", "madelon", "isolet", "cnae-9")
@@ -32,7 +37,7 @@ experiments = list(
 	)
 
 collectBenchmarkResults(path, experiments, tab)
-collectParetofront(path, experiments = experiments[c("O", "OIHFiFmS", "RS", "RSI", "RSIF")], tab, problems, learners = c("SVM", "kknn"))
+collectParetofront(path, experiments = experiments[c("O", "OIHFiFmS", "RS", "RSI", "RSIF")], tab, problems, learners = c("xgboost"))
 
 
 

diff --git a/benchmark/viz.R b/benchmark/viz.R
@@ -22,48 +22,72 @@ experiments = list(
 	RSIF = data.table(algorithm = "randomsearch", initialization = "unif", filter = "custom")
 	)
 
+# no madelon 
+# no madeline 
 problems = data.table(problem = c(
 	"wdbc", "ionosphere", "sonar", "hill-valley", "clean1", 
 	"tecator", "USPS", "madeline", "lsvt", "madelon", "isolet", "cnae-9"),
 	n = c(569, 351, 208, 1212, 476, 240, 1424, 3140, 126, 2600, 600, 240),
 	p = c(30, 34, 60, 100, 168, 124, 256, 259, 310, 500, 617, 856))
 
+problems = problems[- which(problem %in% c("madeline", "madelon"))]
+
+# savepath
+plotspath = "results_plots"
+latex_path = "latex_temp"
+
 # --- read the data ---
 datapath = "results_raw"
-plotspath = "results_plots/plots_final"
+learners = c("SVM", "kknn")
 reslist = lapply(names(experiments), function(x) readRDS(file.path(datapath, x, "result.rds")))
 res = do.call("rbind", reslist)
-res = res[problem %in% problems$problem, ]
+res = res[problem %in% problems$problem & learner %in% learners, ]
+res$learner = factor(res$learner, levels = learners)
+res$surrogate = NULL
+res$MBMOmethod = NULL
+res$propose.points = NULL
+
+datapath = "results_raw_xgboost"
+learners = c("xgboost")
+reslist = lapply(names(experiments), function(x) readRDS(file.path(datapath, x, "result.rds")))
+res_xgb = do.call("rbind", reslist)
+res_xgb = res_xgb[problem %in% problems$problem & learner %in% learners, ]
+res_xgb$learner = factor(res_xgb$learner, levels = learners)
+res_xgb$job.id = res_xgb$job.id + 100000L
 
-poplist = lapply(names(experiments), function(x) readRDS(file.path(datapath, x, "population.rds")))
-pops = do.call("rbind", poplist)
-pops = pops[problem %in% problems$problem, ]
+res = rbind(res, res_xgb)
 
 # --- opt.paths per task per learner --- 
-plotRanks(res, plotspath, metric = "eval.domHV", limits = c(0.2, 1))
-plotRanks(res, plotspath, metric = "naive.hout.domHV", limits = c(0.2, 1))
-calculateEvalsToRandomsearch(res)
-calculateSummaryOfMethods(res, maxevals = 4000L) {
-calculateSummaryOfMethods(res, maxevals = 2000L) {
+plotRanks(res, plotspath, metric = "eval.domHV", limits = c(0.2, 1))#, height = 8, width = 7)
+plotRanks(res, plotspath, metric = "naive.hout.domHV", limits = c(0.2, 1))#, height = 8, width = 7)
+calculateEvalsToRandomsearch(res, path = latex_path)
+calculateSummaryOfMethods(res, maxevals = 4000L) 
+calculateSummaryOfMethods(res, maxevals = 2000L) 
 
 
-for (prob in problems$problem) {
+parfrnt = readRDS(paste("results_raw/pareto_examples/paretofront.rds", sep = ""))
+parfrnt$surrogate = NULL
+parfrnt$MBMOmethod = NULL
+parfrnt$propose.points = NULL
+parfrnt_xgb = readRDS(paste("results_raw_xgboost/pareto_examples/paretofront.rds", sep = ""))
+parfrnt = rbind(parfrnt, parfrnt_xgb)
 
-	parfrnt = readRDS(paste("results_raw/pareto_examples/paretofront.rds", sep = ""))
+for (prob in problems$problem) {
 
 	allparetos = parfrnt[problem == prob, ]
+	allparetos = allparetos[expname %in% c("O", "OIHFiFmS", "RS", "RSI", "RSIF"), ]
 
 	allparetos$expname = revalue(allparetos$expname, 
 	    c("O" = "NSGAII", "OI" = "NSGAII+UI", "OIFi" = "NSGAII+UI+FI", "OIFiFm" = "NSGAII+UI+FI+FM", 
-	        "OIFiFmS" = "NSGAII+UI+FI+FM (s.a.)", "OIH" = "NSGAII+UI+HP", "OIHFiFmS" = "NSGAII+UI+FI+HP+FM(s.a.)", 
+	        "OIFiFmS" = "NSGAII+UI+FI+FM(s.a.)", "OIH" = "NSGAII+UI+HP", "OIHFiFmS" = "NSGAII+UI+FI+HP+FM(s.a.)", 
 	        "RS" = "RS", "RSI" = "RS+UI", "RSIF" = "RS+UI+IF"))
 	allparetos$expname = factor(allparetos$expname, levels = c("RS", "RS+UI", "RS+UI+IF", "NSGAII", "NSGAII+UI+FI+HP+FM(s.a.)"))
-	allparetos$learner = factor(allparetos$learner, levels = c("SVM", "kknn"))
+	allparetos$learner = factor(allparetos$learner, levels = c("SVM", "kknn", "xgboost"))
 
 	p = ggplot(allparetos, aes(x = mmce, y = featfrac, group = instance)) 
 	p = p + geom_polygon(data = allparetos, fill = "grey", alpha = 0.05) 
 	p = p + geom_line(data = allparetos, colour = "grey", alpha = 0.6) 
-	p = p + geom_point(data = allparetos[point == TRUE], aes(color = expname), alpha = 0.4)
+	p = p + geom_point(data = allparetos[point == TRUE], color = "#386cb0", alpha = 0.4)
 	p = p + scale_colour_Publication() + theme_Publication() + scale_fill_Publication()
 	# p = p + ylab("Value") + labs(colour = "Variant", lty = "Algorithm") 
 	p = p + labs(colour = "", fill = "")
@@ -78,5 +102,5 @@ for (prob in problems$problem) {
 	#   legend.position = "right", legend.direction = "vertical", legend.box = "vertical")
 	# p = p + guides(colour = guide_legend(override.aes = list(size = 4, alpha = 0.6)))
 
-	ggsave(file.path(plotspath, "plots_final", "front", paste("all_variants", "_", prob, ".pdf", sep = "")), p, width = 9, height = 12, device = "pdf")
+	ggsave(file.path(plotspath, "front", paste("all_variants", "_", prob, ".pdf", sep = "")), p, width = 9, height = 12, device = "pdf")
 }