From 18520690d6b763853ef1a5481a46a17b755a4ed4 Mon Sep 17 00:00:00 2001 From: luav Date: Mon, 18 Dec 2017 15:39:46 +0100 Subject: [PATCH] Combined function changed to geometric mean as the more indicative one --- README.md | 8 ++++---- args.ggo | 2 +- autogen/cmdline.c | 4 ++-- autogen/cmdline.h | 6 +++--- include/interface.h | 9 ++++++++- include/interface.hpp | 4 +++- 6 files changed, 21 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index 8b383aa..6681b97 100644 --- a/README.md +++ b/README.md @@ -38,9 +38,9 @@ Execution Options: $ ../xmeasures -h xmeasures 3.1 -Extrinsic measures evaluation: F1 (prob, harm and score) for overlapping -multi-resolution clusterings with possible unequal node base and standard NMI -for non-overlapping clustering on a single resolution. +Extrinsic measures evaluation: F1 (probabilistic, harmonic and standard score) +for overlapping multi-resolution clusterings with possible unequal node base and +standard NMI for non-overlapping clustering on a single resolution. Usage: xmeasures [OPTIONS] clustering1 clustering2 @@ -113,7 +113,7 @@ F1 Options: -k, --kind[=ENUM] kind of the matching policy: - w - weighted (default) - u - unweighed - - c - combined: F1(w, u) + - c - combined(w, u) using geometric mean (possible values="weighted", "unweighed", "combined" default=`weighted') diff --git a/args.ggo b/args.ggo index ccbf1b9..55586b3 100644 --- a/args.ggo +++ b/args.ggo @@ -70,7 +70,7 @@ NOTE: F1p <= F1h <= F1s, where: option "kind" k "kind of the matching policy: - w - weighted (default) - u - unweighed - - c - combined: F1(w, u) + - c - combined(w, u) using geometric mean " values ="weighted","unweighed","combined" enum default="weighted" argoptional dependon="f1" diff --git a/autogen/cmdline.c b/autogen/cmdline.c index 693cc23..bec25d8 100644 --- a/autogen/cmdline.c +++ b/autogen/cmdline.c @@ -42,7 +42,7 @@ const char *gengetopt_args_info_help[] = { " -d, --detailed detailed (verbose) results output (default=off)", "\nF1 Options:", " -f, --f1[=ENUM] evaluate F1 of the [weighted] average of the greatest\n (maximal) match by F1 or partial probability.\n NOTE: F1p <= F1h <= F1s, where:\n - p (F1p) - Harmonic mean of the [weighted]\n average of Partial Probabilities, the most\n discriminative and satisfies the largest number of\n the Formal Constraints (homogeneity, completeness,\n rag bag, size/quantity, balance);\n - h (F1h) - Harmonic mean of the [weighted]\n average of F1s;\n - s (F1s) - Arithmetic mean (average) of the\n [weighted] average of F1s, Standard F1-Score, the\n least discriminative and satisfies the lowest\n number of the Formal Constraints.\n (possible values=\"partprob\", \"harmonic\",\n \"standard\" default=`partprob')", - " -k, --kind[=ENUM] kind of the matching policy:\n - w - weighted (default)\n - u - unweighed\n - c - combined: F1(w, u)\n (possible values=\"weighted\", \"unweighed\",\n \"combined\" default=`weighted')", + " -k, --kind[=ENUM] kind of the matching policy:\n - w - weighted (default)\n - u - unweighed\n - c - combined(w, u) using geometric mean\n (possible values=\"weighted\", \"unweighed\",\n \"combined\" default=`weighted')", "\nNMI Options:", " -n, --nmi evaluate NMI (Normalized Mutual Information)\n (default=off)", " -a, --all evaluate all NMIs using sqrt, avg and min\n denominators besides the max one (default=off)", @@ -716,7 +716,7 @@ cmdline_parser_internal ( case 'k': /* kind of the matching policy: - w - weighted (default) - u - unweighed - - c - combined: F1(w, u) + - c - combined(w, u) using geometric mean . */ diff --git a/autogen/cmdline.h b/autogen/cmdline.h index 938a401..aa057d4 100644 --- a/autogen/cmdline.h +++ b/autogen/cmdline.h @@ -76,17 +76,17 @@ struct gengetopt_args_info enum enum_kind kind_arg; /**< @brief kind of the matching policy: - w - weighted (default) - u - unweighed - - c - combined: F1(w, u) + - c - combined(w, u) using geometric mean (default='weighted'). */ char * kind_orig; /**< @brief kind of the matching policy: - w - weighted (default) - u - unweighed - - c - combined: F1(w, u) + - c - combined(w, u) using geometric mean original value given at command line. */ const char *kind_help; /**< @brief kind of the matching policy: - w - weighted (default) - u - unweighed - - c - combined: F1(w, u) + - c - combined(w, u) using geometric mean help description. */ int nmi_flag; /**< @brief evaluate NMI (Normalized Mutual Information) (default=off). */ const char *nmi_help; /**< @brief evaluate NMI (Normalized Mutual Information) help description. */ diff --git a/include/interface.h b/include/interface.h index 04fcb45..83540fd 100644 --- a/include/interface.h +++ b/include/interface.h @@ -479,7 +479,7 @@ enum struct Match: MatchBase { NONE = 0, //!< Note initialized WEIGHTED, //!< Weighted matching by the number of members in each cluster (macro weighting) UNWEIGHTED, //!< Unweighted matching of each cluster (micro weighting) - COMBINED //!< Combined weighting of macro and micro: F1(macro_weighting, micro_weighting) + COMBINED //!< Combined of macro and micro weightings using geometric mean }; //! \brief String representation of the Match @@ -653,6 +653,13 @@ class Collection: public NodeBaseI { //! \return AccProb - resulting mean AccProb hmean(AccProb a, AccProb b) noexcept; +//! \brief Geometric mean +//! +//! \param a AccProb - first item +//! \param b AccProb - second item +//! \return AccProb - resulting mean +AccProb gmean(AccProb a, AccProb b) noexcept; + //! \brief Arithmetic mean (average) //! //! \param a AccProb - first item diff --git a/include/interface.hpp b/include/interface.hpp index 60a5d2b..9d479de 100644 --- a/include/interface.hpp +++ b/include/interface.hpp @@ -339,7 +339,9 @@ Prob Collection::f1(const CollectionT& cn1, const CollectionT& cn2, F1 ki const AccProb resw = kind != F1::STANDARD ? hmean(f1ga1w, f1ga2w) : (f1ga1w + f1ga2w) / 2; - return hmean(res, resw); + // Note: geometric mean >= harmonic mean for [0, 1] and yields more indicative values, + // dropping the value not so much when (usually) the weighted match is larger + return gmean(res, resw); } return res; }