diff --git a/gcomm/src/conf.cpp b/gcomm/src/conf.cpp index 37b313da1..dbad82a26 100644 --- a/gcomm/src/conf.cpp +++ b/gcomm/src/conf.cpp @@ -109,6 +109,8 @@ std::string const gcomm::Conf::PcWaitPrimTimeout = PcPrefix + "wait_prim_timeout"; std::string const gcomm::Conf::PcWeight = PcPrefix + "weight"; std::string const gcomm::Conf::PcRecovery = PcPrefix + "recovery"; +std::string const gcomm::Conf::PcRecoverMinimumWeight = + PcPrefix + "recover_minimum_weight"; void gcomm::Conf::register_params(gu::Config& cnf) @@ -172,6 +174,7 @@ gcomm::Conf::register_params(gu::Config& cnf) GCOMM_CONF_ADD_DEFAULT(PcWaitPrimTimeout); GCOMM_CONF_ADD_DEFAULT(PcWeight); GCOMM_CONF_ADD_DEFAULT(PcRecovery); + GCOMM_CONF_ADD_DEFAULT(PcRecoverMinimumWeight); #undef GCOMM_CONF_ADD #undef GCOMM_CONF_ADD_DEFAULT diff --git a/gcomm/src/defaults.cpp b/gcomm/src/defaults.cpp index e2da55183..a892e9bc3 100644 --- a/gcomm/src/defaults.cpp +++ b/gcomm/src/defaults.cpp @@ -51,4 +51,5 @@ namespace gcomm std::string const Defaults::PcWaitPrimTimeout = "P30S"; std::string const Defaults::PcWeight = "1"; std::string const Defaults::PcRecovery = "1"; + std::string const Defaults::PcRecoverMinimumWeight = "0"; } diff --git a/gcomm/src/defaults.hpp b/gcomm/src/defaults.hpp index ef5c8ef38..f7ecce3df 100644 --- a/gcomm/src/defaults.hpp +++ b/gcomm/src/defaults.hpp @@ -49,6 +49,7 @@ namespace gcomm static std::string const PcWaitPrimTimeout ; static std::string const PcWeight ; static std::string const PcRecovery ; + static std::string const PcRecoverMinimumWeight ; }; } diff --git a/gcomm/src/gcomm/conf.hpp b/gcomm/src/gcomm/conf.hpp index 98cebdfc7..dae48ad96 100644 --- a/gcomm/src/gcomm/conf.hpp +++ b/gcomm/src/gcomm/conf.hpp @@ -410,6 +410,8 @@ namespace gcomm */ static std::string const PcRecovery; + static std::string const PcRecoverMinimumWeight; + static void register_params(gu::Config&); }; diff --git a/gcomm/src/gcomm/view.hpp b/gcomm/src/gcomm/view.hpp index d9e074702..da53c860f 100644 --- a/gcomm/src/gcomm/view.hpp +++ b/gcomm/src/gcomm/view.hpp @@ -236,6 +236,7 @@ namespace gcomm std::istream& read_stream(std::istream& is); void write_file() const; bool read_file(); + static bool file_exists(const char* fname = NULL); static void remove_file(gu::Config& conf); bool operator== (const ViewState& vst) const { diff --git a/gcomm/src/pc.cpp b/gcomm/src/pc.cpp index d75b6cbc9..d26a48627 100644 --- a/gcomm/src/pc.cpp +++ b/gcomm/src/pc.cpp @@ -24,12 +24,33 @@ void gcomm::PC::handle_up(const void* cid, const Datagram& rb, um.has_view() && um.view().id().type() == V_PRIM) { - ViewState vst(const_cast(uuid()), - const_cast(um.view()), - conf_); - log_info << "save pc into disk"; - vst.write_file(); + size_t total_weight_new= weighted_sum(um.view().members(), + pc_->instances()); + /* + When pc.recover_minimum_weight is set to non-zero, save the PC + state on disk iff the new total weight is greater than or equal + to the specified value. + */ + if (pc_recover_minimum_weight_ == 0 || /* backward compatibility */ + total_weight_new >= pc_recover_minimum_weight_) + { + ViewState vst(const_cast(uuid()), + const_cast(um.view()), + conf_); + log_info << "Saving PC state on disk."; + vst.write_file(); + } + else + { + log_info << "New PC weight is less than the configured " + "pc.recover_minimum_weight, state file not " + "being saved."; + } + + // Update total_weight + total_weight_= total_weight_new; } + send_up(rb, um); } @@ -74,6 +95,11 @@ std::string gcomm::PC::listen_addr() const void gcomm::PC::connect(bool start_prim) { + if (start_prim && pc_recovery_ && ViewState::file_exists()) + { + gu_throw_error(EPROTO) << "PC state file exists, aborting bootstrap."; + } + try { // for backward compatibility with old approach: gcomm://0.0.0.0 @@ -217,7 +243,18 @@ void gcomm::PC::close(bool force) pstack_.pop_proto(pc_); pstack_.pop_proto(evs_); pstack_.pop_proto(gmcast_); - ViewState::remove_file(conf_); + + if (pc_recover_minimum_weight_ == 0 || /* backward compatibility */ + total_weight_ > pc_recover_minimum_weight_) + { + ViewState::remove_file(conf_); + } + else if (ViewState::file_exists()) + { + log_info << "New PC weight has fallen below the configured " + "pc.recover_minimum_weight; state file not being " + "deleted."; + } closed_ = true; } @@ -240,6 +277,10 @@ gcomm::PC::PC(Protonet& net, const gu::URI& uri) : Defaults::PcAnnounceTimeout)), pc_recovery_ (param(conf_, uri, Conf::PcRecovery, Defaults::PcRecovery)), + pc_recover_minimum_weight_ (param(conf_, uri, + Conf::PcRecoverMinimumWeight, + Defaults::PcRecoverMinimumWeight)), + total_weight_(0), rst_uuid_(), rst_view_() diff --git a/gcomm/src/pc.hpp b/gcomm/src/pc.hpp index e8c837810..545393c59 100644 --- a/gcomm/src/pc.hpp +++ b/gcomm/src/pc.hpp @@ -47,11 +47,13 @@ namespace gcomm evs::Proto* evs_; // EVS protocol layer pc::Proto* pc_; // PC protocol layer bool closed_; // flag for destructor - // Period to wait graceful leave - gu::datetime::Period linger_; + gu::datetime::Period linger_; // Period to wait graceful leave gu::datetime::Period announce_timeout_; - bool pc_recovery_; + bool pc_recovery_; // Automatic PC recovery + // Minimum PC weight for recovery to kick-in + size_t pc_recover_minimum_weight_; + size_t total_weight_; // Current total PC weight UUID rst_uuid_; View rst_view_; diff --git a/gcomm/src/pc_proto.cpp b/gcomm/src/pc_proto.cpp index 14c6a9245..2eb144e14 100644 --- a/gcomm/src/pc_proto.cpp +++ b/gcomm/src/pc_proto.cpp @@ -423,8 +423,8 @@ void gcomm::pc::Proto::handle_first_trans(const View& view) // Compute weighted sum of members in node list. If member cannot be found // from node_map its weight is assumed to be zero. -static size_t weighted_sum(const gcomm::NodeList& node_list, - const gcomm::pc::NodeMap& node_map) +size_t weighted_sum(const gcomm::NodeList& node_list, + const gcomm::pc::NodeMap& node_map) { size_t sum(0); for (gcomm::NodeList::const_iterator i(node_list.begin()); diff --git a/gcomm/src/pc_proto.hpp b/gcomm/src/pc_proto.hpp index 70d39e810..5d1022b93 100644 --- a/gcomm/src/pc_proto.hpp +++ b/gcomm/src/pc_proto.hpp @@ -190,6 +190,11 @@ class gcomm::pc::Proto : public Protolay rst_view -> id().seq())); } const View* restored_view() const { return rst_view_; } + const NodeMap& instances() const + { + return instances_; + } + private: friend std::ostream& operator<<(std::ostream& os, const Proto& p); Proto (const Proto&); @@ -229,5 +234,7 @@ class gcomm::pc::Proto : public Protolay View* rst_view_; // restored PC view }; +size_t weighted_sum(const gcomm::NodeList& node_list, + const gcomm::pc::NodeMap& node_map); #endif // PC_PROTO_HPP diff --git a/gcomm/src/view.cpp b/gcomm/src/view.cpp index 4ab203720..235ebc649 100644 --- a/gcomm/src/view.cpp +++ b/gcomm/src/view.cpp @@ -322,6 +322,15 @@ bool gcomm::ViewState::read_file() } } +bool gcomm::ViewState::file_exists(const char* fname) +{ + if (fname == NULL) fname = COMMON_VIEW_STAT_FILE; + if (access(fname, F_OK) == 0) { + return true; + } + return false; +} + // remove_file is static function, it should remove the view // state file even if there is no ViewState object around. // View state file name is derived in the same way as for