Skip to content

Commit

Permalink
final version of benchmark design
Browse files Browse the repository at this point in the history
  • Loading branch information
juliambr committed Apr 6, 2019
1 parent 9b137a0 commit af9a3fc
Show file tree
Hide file tree
Showing 3 changed files with 69 additions and 51 deletions.
47 changes: 18 additions & 29 deletions benchmark/helpers.R
Original file line number Diff line number Diff line change
Expand Up @@ -206,9 +206,9 @@ collectBenchmarkResults = function(path, experiments, tab) {
toreduce = ijoin(tab, experiments[[experiment]], by = names(experiments[[experiment]]))
toreduce = ijoin(toreduce, findDone(), by = "job.id")

dir = as.numeric(sapply(list.files("registry/results/"), function(x) strsplit(x, ".rds")[[1]][1]))
dir = data.frame(job.id = dir)
toreduce = ijoin(toreduce, dir)
# dir = as.numeric(sapply(list.files("registry/results/"), function(x) strsplit(x, ".rds")[[1]][1]))
# dir = data.frame(job.id = dir)
# toreduce = ijoin(toreduce, dir)

res = reduceResultsDataTable(toreduce, function(x) collectResult(x$result))
res = ijoin(tab, res, by = "job.id")
Expand Down Expand Up @@ -453,15 +453,15 @@ plotPerformanceHout = function(res, plotspath) {
}


plotRanks = function(res, plotspath, logscale = FALSE, metric = "naive.hout.domHV", limits = c(0.37, 1)) {
plotRanks = function(res, plotspath, logscale = FALSE, metric = "naive.hout.domHV", limits = c(0.37, 1), height = 10, width = 7) {

# --- naive.hout.domHV
df = extractFromSummary(res, c("evals", metric))
df = df[evals < 4000, ]
df$gen = (df$evals - 80) / 15
df = df[, replication := 1:length(job.id), by = c("learner", "variant", "problem", "gen")]
df = renameAndRevalue(df)
names(df)[20] = "metric"
names(df)[17] = "metric"

# --- calculate ranks within learner, problem and replication ---
dfr = df[, rank_variant := rank(- metric), by = c("learner", "problem", "evals", "replication")]
Expand Down Expand Up @@ -490,7 +490,7 @@ plotRanks = function(res, plotspath, logscale = FALSE, metric = "naive.hout.domH
p = p + guides(lty = guide_legend(order = 1), colour = guide_legend(order = 2))
p = p + xlab("Evaluations")

ggsave(file.path(plotspath, paste(metric, "ranks.pdf", sep = "_")), p, width = 9, height = 6, device = "pdf")
ggsave(file.path(plotspath, paste(gsub("\\.", "", metric), "ranks.pdf", sep = "_")), p, width = 9, height = 6, device = "pdf")

p = ggplot()
p = p + geom_line(data = res_ovr_pl, aes(x = evals, y = value, lty = algorithm, colour = variant), size = 0.6)
Expand All @@ -502,7 +502,7 @@ plotRanks = function(res, plotspath, logscale = FALSE, metric = "naive.hout.domH
p = p + guides(lty = guide_legend(order = 1), colour = guide_legend(order = 2))
p = p + xlab("Evaluations")

ggsave(file.path(plotspath, paste(metric, "ranks_perLearner.pdf", sep = "_")), p, width = 7, height = 10)
ggsave(file.path(plotspath, paste(gsub("\\.", "", metric), "ranks_perLearner.pdf", sep = "_")), p, width = 7, height = 10)
}

# --- this is not a general function
Expand Down Expand Up @@ -577,7 +577,7 @@ plotHeatmap = function(populations, plotspath) {



calculateSummaryOfMethods = function(res, maxevals = 4000L) {
calculateSummaryOfMethods = function(path, res, maxevals = 4000L) {

# structure of the table
# problem | RS (double budget) | RSI (double budget) | RSIF (double budget) | NSGA-II | MOSMAFS
Expand Down Expand Up @@ -609,11 +609,11 @@ calculateSummaryOfMethods = function(res, maxevals = 4000L) {
dfc = dfc[order(dfc$p), ]
dfc$dummycol = NA
dfcc = dfc[, c("problem", "NSGA-II", "+UI", "+UI+FI", "+UI+FI+FM", "+UI+FI+FM (s.a.)", "+UI+HP", "+UI+FI+HP+FM (s.a.)", "dummycol", "RS", "RSI", "RSIF")]
print(xtable::xtable(dfcc, type = "latex", include.rownames=FALSE), file = paste("latex_temp/houtdomHV", "_singlebudget/", lrn, "complete", "_", maxevals, ".tex", sep = ""))
print(xtable::xtable(dfcc, type = "latex", include.rownames=FALSE), file = paste("latex_temp/houtdomHV/", lrn, "complete", "_", maxevals, ".tex", sep = ""))

dfcc = dfc[, c("problem", "n", "p", "NSGA-II", "+UI+FI+HP+FM (s.a.)", "RS", "RSI", "RSIF")]

print(xtable::xtable(dfcc, type = "latex", include.rownames=FALSE), file = paste("latex_temp/houtdomHV", "_singlebudget/", lrn, "_", maxevals, ".tex", sep = ""))
print(xtable::xtable(dfcc, type = "latex", include.rownames=FALSE), file = paste(path, "/houtdomHV/", lrn, "_", maxevals, ".tex", sep = ""))



Expand All @@ -626,8 +626,6 @@ calculateSummaryOfMethods = function(res, maxevals = 4000L) {

# print(xtable(dfc, type = "latex", include.rownames=FALSE), file = paste("latex_temp/houtdomHV", "_", method, "/", lrn, ".tex", sep = ""))
}


}

g_legend<-function(a.gplot){
Expand All @@ -636,7 +634,7 @@ g_legend<-function(a.gplot){
legend <- tmp$grobs[[leg]]
return(legend)}

calculateEvalsToRandomsearch = function(res) {
calculateEvalsToRandomsearch = function(res, path) {

# naive.hout.domHV
df = extractFromSummary(res, c("evals", "naive.hout.domHV"))
Expand All @@ -651,6 +649,10 @@ calculateEvalsToRandomsearch = function(res) {
res2 = res2[algorithm == "mosmafs", ]
res2 = res2[, replication := 1:length(job.id), by = c("learner", "variant", "problem")]

res2$RS.beat = 0
res2$RSI.beat = 0
res2$RSIF.beat = 0

for (repl in 1:10) {
for (prob in unique(dfm$problem)) {
for (lrn in unique(dfm$learner)) {
Expand All @@ -668,23 +670,10 @@ calculateEvalsToRandomsearch = function(res) {
}
}
}
path = "latex_temp"

saveRDS(res2, file.path(path, "beat_randomsearch_complete.rds"))

# --- imputation
res3 = res2
res3[is.na(res3$RS.beat), ]$RS.beat = 8000L
res3[is.na(res3$RSI.beat), ]$RSI.beat = 8000L
res3[is.na(res3$RSIF.beat), ]$RSIF.beat = 8000L

res3 = res3[, .(RS.beat = mean(RS.beat, na.rm = TRUE),
RS.sd = sd(RS.beat, na.rm = TRUE) / sqrt(360),
RSI.beat = mean(RSI.beat, na.rm = TRUE),
RSI.sd = sd(RSI.beat, na.rm = TRUE) / sqrt(360),
RSIF.beat = mean(RSIF.beat, na.rm = TRUE),
RSIF.sd = sd(RSIF.beat, na.rm = TRUE) / sqrt(360),
test = length(RS.beat)), by = c("variant")]

res3 = res2[, .(RS.beat = mean(RS.beat, na.rm = TRUE),
RS.sd = sd(RS.beat, na.rm = TRUE) / sqrt(360),
RS.nas = mean(is.na(RS.beat)) * 100,
Expand All @@ -707,7 +696,7 @@ calculateEvalsToRandomsearch = function(res) {
c("O" = "NSGA-II", "OI" = "+UI", "OIFi" = "+UI+FI", "OIFiFm" = "+UI+FI+FM",
"OIFiFmS" = "+UI+FI+FM (s.a.)", "OIH" = "+UI+HP", "OIHFiFmS" = "+UI+FI+HP+FM (s.a.)"))

names(res3) = c(" ", "RS", "RS.sd", "NAs.1", "RS+UI", "RSI.sd", "NAs.2", "RS+UI+IF", "RSUIIF.sd", "NAs.3", "test")
names(res3) = c(" ", "RS", "RS.sd", "NC.1", "RS+UI", "RSI.sd", "NC.2", "RS+UI+IF", "RSUIIF.sd", "NC.3", "test")

print(xtable::xtable(res3[, c(" ", "RS", "NAs.1", "RS+UI", "NAs.2", "RS+UI+IF", "NAs.3")], type = "latex", include.rownames=FALSE), file = paste("latex_temp/beatRS_with_nas_average_after.tex", sep = ""))
print(xtable::xtable(res3[, c(" ", "RS", "NC.1", "RS+UI", "NC.2", "RS+UI+IF", "NC.3")], type = "latex", include.rownames=FALSE), file = paste("latex_temp/beatRS_with_nas_average_after.tex", sep = ""))
}
17 changes: 11 additions & 6 deletions benchmark/reduce.R
Original file line number Diff line number Diff line change
@@ -1,19 +1,24 @@
library(batchtools)
library(dplyr)
library(mlr)
library(mlrCPO)

source("helpers.R")
source("probdesign.R")

# load registry
reg = loadRegistry("registry")
reg = loadRegistry("registry22", writeable = FALSE)
tab = summarizeExperiments(by = c("job.id", "algorithm",
"problem", "learner", "maxeval", "filter", "initialization",
"lambda", "mu", "parent.sel", "chw.bitflip", "adaptive.filter.weights",
"filter.during.run", "surrogate", "MBMOmethod", "propose.points"))
tab = tab[maxeval %in% c(4000), ]
tab = rbind(tab[lambda != 4L, ], tab[is.na(lambda), ])
"filter.during.run"))
tab = tab[learner == "xgboost", ]
# tab = tab[maxeval %in% c(4000), ]
# tab = rbind(tab[lambda != 4L, ], tab[is.na(lambda), ])
done = ijoin(tab, findDone())

path = "results_raw"
path = "results_raw_xgboost"
dir.create(path)

problems = c("wdbc", "ionosphere", "sonar", "hill-valley", "clean1",
"tecator", "USPS", "madeline", "lsvt", "madelon", "isolet", "cnae-9")
Expand All @@ -32,7 +37,7 @@ experiments = list(
)

collectBenchmarkResults(path, experiments, tab)
collectParetofront(path, experiments = experiments[c("O", "OIHFiFmS", "RS", "RSI", "RSIF")], tab, problems, learners = c("SVM", "kknn"))
collectParetofront(path, experiments = experiments[c("O", "OIHFiFmS", "RS", "RSI", "RSIF")], tab, problems, learners = c("xgboost"))



Expand Down
56 changes: 40 additions & 16 deletions benchmark/viz.R
Original file line number Diff line number Diff line change
Expand Up @@ -22,48 +22,72 @@ experiments = list(
RSIF = data.table(algorithm = "randomsearch", initialization = "unif", filter = "custom")
)

# no madelon
# no madeline
problems = data.table(problem = c(
"wdbc", "ionosphere", "sonar", "hill-valley", "clean1",
"tecator", "USPS", "madeline", "lsvt", "madelon", "isolet", "cnae-9"),
n = c(569, 351, 208, 1212, 476, 240, 1424, 3140, 126, 2600, 600, 240),
p = c(30, 34, 60, 100, 168, 124, 256, 259, 310, 500, 617, 856))

problems = problems[- which(problem %in% c("madeline", "madelon"))]

# savepath
plotspath = "results_plots"
latex_path = "latex_temp"

# --- read the data ---
datapath = "results_raw"
plotspath = "results_plots/plots_final"
learners = c("SVM", "kknn")
reslist = lapply(names(experiments), function(x) readRDS(file.path(datapath, x, "result.rds")))
res = do.call("rbind", reslist)
res = res[problem %in% problems$problem, ]
res = res[problem %in% problems$problem & learner %in% learners, ]
res$learner = factor(res$learner, levels = learners)
res$surrogate = NULL
res$MBMOmethod = NULL
res$propose.points = NULL

datapath = "results_raw_xgboost"
learners = c("xgboost")
reslist = lapply(names(experiments), function(x) readRDS(file.path(datapath, x, "result.rds")))
res_xgb = do.call("rbind", reslist)
res_xgb = res_xgb[problem %in% problems$problem & learner %in% learners, ]
res_xgb$learner = factor(res_xgb$learner, levels = learners)
res_xgb$job.id = res_xgb$job.id + 100000L

poplist = lapply(names(experiments), function(x) readRDS(file.path(datapath, x, "population.rds")))
pops = do.call("rbind", poplist)
pops = pops[problem %in% problems$problem, ]
res = rbind(res, res_xgb)

# --- opt.paths per task per learner ---
plotRanks(res, plotspath, metric = "eval.domHV", limits = c(0.2, 1))
plotRanks(res, plotspath, metric = "naive.hout.domHV", limits = c(0.2, 1))
calculateEvalsToRandomsearch(res)
calculateSummaryOfMethods(res, maxevals = 4000L) {
calculateSummaryOfMethods(res, maxevals = 2000L) {
plotRanks(res, plotspath, metric = "eval.domHV", limits = c(0.2, 1))#, height = 8, width = 7)
plotRanks(res, plotspath, metric = "naive.hout.domHV", limits = c(0.2, 1))#, height = 8, width = 7)
calculateEvalsToRandomsearch(res, path = latex_path)
calculateSummaryOfMethods(res, maxevals = 4000L)
calculateSummaryOfMethods(res, maxevals = 2000L)


for (prob in problems$problem) {
parfrnt = readRDS(paste("results_raw/pareto_examples/paretofront.rds", sep = ""))
parfrnt$surrogate = NULL
parfrnt$MBMOmethod = NULL
parfrnt$propose.points = NULL
parfrnt_xgb = readRDS(paste("results_raw_xgboost/pareto_examples/paretofront.rds", sep = ""))
parfrnt = rbind(parfrnt, parfrnt_xgb)

parfrnt = readRDS(paste("results_raw/pareto_examples/paretofront.rds", sep = ""))
for (prob in problems$problem) {

allparetos = parfrnt[problem == prob, ]
allparetos = allparetos[expname %in% c("O", "OIHFiFmS", "RS", "RSI", "RSIF"), ]

allparetos$expname = revalue(allparetos$expname,
c("O" = "NSGAII", "OI" = "NSGAII+UI", "OIFi" = "NSGAII+UI+FI", "OIFiFm" = "NSGAII+UI+FI+FM",
"OIFiFmS" = "NSGAII+UI+FI+FM (s.a.)", "OIH" = "NSGAII+UI+HP", "OIHFiFmS" = "NSGAII+UI+FI+HP+FM(s.a.)",
"OIFiFmS" = "NSGAII+UI+FI+FM(s.a.)", "OIH" = "NSGAII+UI+HP", "OIHFiFmS" = "NSGAII+UI+FI+HP+FM(s.a.)",
"RS" = "RS", "RSI" = "RS+UI", "RSIF" = "RS+UI+IF"))
allparetos$expname = factor(allparetos$expname, levels = c("RS", "RS+UI", "RS+UI+IF", "NSGAII", "NSGAII+UI+FI+HP+FM(s.a.)"))
allparetos$learner = factor(allparetos$learner, levels = c("SVM", "kknn"))
allparetos$learner = factor(allparetos$learner, levels = c("SVM", "kknn", "xgboost"))

p = ggplot(allparetos, aes(x = mmce, y = featfrac, group = instance))
p = p + geom_polygon(data = allparetos, fill = "grey", alpha = 0.05)
p = p + geom_line(data = allparetos, colour = "grey", alpha = 0.6)
p = p + geom_point(data = allparetos[point == TRUE], aes(color = expname), alpha = 0.4)
p = p + geom_point(data = allparetos[point == TRUE], color = "#386cb0", alpha = 0.4)
p = p + scale_colour_Publication() + theme_Publication() + scale_fill_Publication()
# p = p + ylab("Value") + labs(colour = "Variant", lty = "Algorithm")
p = p + labs(colour = "", fill = "")
Expand All @@ -78,5 +102,5 @@ for (prob in problems$problem) {
# legend.position = "right", legend.direction = "vertical", legend.box = "vertical")
# p = p + guides(colour = guide_legend(override.aes = list(size = 4, alpha = 0.6)))

ggsave(file.path(plotspath, "plots_final", "front", paste("all_variants", "_", prob, ".pdf", sep = "")), p, width = 9, height = 12, device = "pdf")
ggsave(file.path(plotspath, "front", paste("all_variants", "_", prob, ".pdf", sep = "")), p, width = 9, height = 12, device = "pdf")
}

0 comments on commit af9a3fc

Please sign in to comment.