From 4049ac6afe5c29e0214a291cea171027927d8ba0 Mon Sep 17 00:00:00 2001 From: Oleg Barenboim Date: Thu, 13 Aug 2020 10:57:29 -0400 Subject: [PATCH] Merge pull request #20420 from agrare/use_systemd_services_for_current_workers_list Cleanup failed systemd services (cherry picked from commit 8338fdcb8429ce60f00b74a684bbdc781fa1a3c9) --- .../miq_server/worker_management/monitor.rb | 24 ++++- .../worker_management/monitor/quiesce.rb | 2 - .../worker_management/monitor/systemd.rb | 73 +++++++++++++++ .../worker_management/monitor/systemd_spec.rb | 92 +++++++++++++++++++ .../worker_management/monitor_spec.rb | 7 ++ 5 files changed, 193 insertions(+), 5 deletions(-) create mode 100644 app/models/miq_server/worker_management/monitor/systemd.rb create mode 100644 spec/models/miq_server/worker_management/monitor/systemd_spec.rb diff --git a/app/models/miq_server/worker_management/monitor.rb b/app/models/miq_server/worker_management/monitor.rb index e8ec7879ae1..aac11482229 100644 --- a/app/models/miq_server/worker_management/monitor.rb +++ b/app/models/miq_server/worker_management/monitor.rb @@ -8,6 +8,7 @@ module MiqServer::WorkerManagement::Monitor include_concern 'Start' include_concern 'Status' include_concern 'Stop' + include_concern 'Systemd' include_concern 'SystemLimits' include_concern 'Validation' @@ -22,9 +23,7 @@ def monitor_workers MiqWorker.status_update_all - check_not_responding - check_pending_stop - clean_worker_records + cleanup_failed_workers # Monitor all remaining current worker records miq_workers.where(:status => MiqWorker::STATUSES_CURRENT_OR_STARTING).each do |worker| @@ -63,6 +62,25 @@ def sync_workers result end + def cleanup_failed_workers + check_not_responding + check_pending_stop + clean_worker_records + + if podified? + elsif systemd? + cleanup_failed_systemd_services + end + end + + def podified? + MiqEnvironment::Command.is_podified? + end + + def systemd? + MiqEnvironment::Command.supports_systemd? + end + def clean_worker_records worker_deleted = false miq_workers.each do |w| diff --git a/app/models/miq_server/worker_management/monitor/quiesce.rb b/app/models/miq_server/worker_management/monitor/quiesce.rb index a5b201a2dcf..46434d66247 100644 --- a/app/models/miq_server/worker_management/monitor/quiesce.rb +++ b/app/models/miq_server/worker_management/monitor/quiesce.rb @@ -36,8 +36,6 @@ def quiesce_workers_loop miq_workers.each do |w| if w.containerized_worker? w.delete_container_objects - elsif w.systemd_worker? - w.stop_systemd_worker else stop_worker(w) end diff --git a/app/models/miq_server/worker_management/monitor/systemd.rb b/app/models/miq_server/worker_management/monitor/systemd.rb new file mode 100644 index 00000000000..02db4e39e9e --- /dev/null +++ b/app/models/miq_server/worker_management/monitor/systemd.rb @@ -0,0 +1,73 @@ +module MiqServer::WorkerManagement::Monitor::Systemd + extend ActiveSupport::Concern + + def cleanup_failed_systemd_services + failed_service_names = systemd_failed_miq_services.map { |service| service[:name] } + return if failed_service_names.empty? + + _log.info("Disabling failed unit files: [#{failed_service_names.join(", ")}]") + systemd_stop_services(failed_service_names) + end + + def systemd_failed_miq_services + miq_services(systemd_failed_services) + end + + def systemd_all_miq_services + miq_services(systemd_services) + end + + private + + def systemd_manager + @systemd_manager ||= begin + require "dbus/systemd" + DBus::Systemd::Manager.new + end + end + + def systemd_stop_services(service_names) + service_names.each do |service_name| + systemd_manager.StopUnit(service_name, "replace") + + service_settings_dir = systemd_unit_dir.join("#{service_name}.d") + FileUtils.rm_r(service_settings_dir) if service_settings_dir.exist? + end + + systemd_manager.DisableUnitFiles(service_names, false) + end + + def systemd_unit_dir + Pathname.new("/etc/systemd/system") + end + + def miq_services(services) + services.select { |unit| systemd_miq_service_base_names.include?(systemd_service_base_name(unit)) } + end + + def systemd_miq_service_base_names + @systemd_miq_service_base_names ||= begin + MiqWorkerType.worker_class_names.map(&:constantize).map(&:service_base_name) + end + end + + def systemd_service_name(unit) + File.basename(unit[:name], ".*") + end + + def systemd_service_base_name(unit) + systemd_service_name(unit).split("@").first + end + + def systemd_failed_services + systemd_services.select { |service| service[:active_state] == "failed" } + end + + def systemd_services + systemd_units.select { |unit| File.extname(unit[:name]) == ".service" } + end + + def systemd_units + systemd_manager.units + end +end diff --git a/spec/models/miq_server/worker_management/monitor/systemd_spec.rb b/spec/models/miq_server/worker_management/monitor/systemd_spec.rb new file mode 100644 index 00000000000..557f77433f5 --- /dev/null +++ b/spec/models/miq_server/worker_management/monitor/systemd_spec.rb @@ -0,0 +1,92 @@ +RSpec.describe MiqServer::WorkerManagement::Monitor::Systemd do + let(:units) { [] } + let(:server) { EvmSpecHelper.create_guid_miq_server_zone.second } + let(:systemd_manager) { double("DBus::Systemd::Manager") } + + before do + MiqWorkerType.seed + allow(server).to receive(:systemd_manager).and_return(systemd_manager) + allow(systemd_manager).to receive(:units).and_return(units) + end + + context "#cleanup_failed_systemd_services" do + context "with no failed services" do + let(:units) { [{:name => "generic@68400a7e-1747-4f10-be2a-d0fc91b705ca.service", :description => "ManageIQ Generic Worker", :load_state => "loaded", :active_state => "active", :sub_state => "plugged", :job_id => 0, :job_type => "", :job_object_path => "/"}] } + + it "doesn't call DisableUnitFiles" do + expect(systemd_manager).not_to receive(:DisableUnitFiles) + server.cleanup_failed_systemd_services + end + end + + context "with failed services" do + let(:units) { [{:name => "generic@68400a7e-1747-4f10-be2a-d0fc91b705ca.service", :description => "ManageIQ Generic Worker", :load_state => "loaded", :active_state => "failed", :sub_state => "plugged", :job_id => 0, :job_type => "", :job_object_path => "/"}] } + + it "calls DisableUnitFiles with the service name" do + expect(systemd_manager).to receive(:StopUnit).with("generic@68400a7e-1747-4f10-be2a-d0fc91b705ca.service", "replace") + expect(systemd_manager).to receive(:DisableUnitFiles).with(["generic@68400a7e-1747-4f10-be2a-d0fc91b705ca.service"], false) + server.cleanup_failed_systemd_services + end + end + end + + context "#systemd_all_miq_services" do + let(:units) do + [ + {:name => "generic@68400a7e-1747-4f10-be2a-d0fc91b705ca.service", :active_state => "failed"}, + {:name => "ui@cfe2c489-5c93-4b77-8620-cf6b1d3ec595.service", :active_state => "active"}, + {:name => "ssh.service", :active_state => "active"} + ] + end + + it "filters out non-miq services" do + expect(server.systemd_all_miq_services.count).to eq(2) + end + end + + context "#systemd_failed_miq_services" do + let(:units) do + [ + {:name => "generic@68400a7e-1747-4f10-be2a-d0fc91b705ca.service", :active_state => "failed"}, + {:name => "ui@cfe2c489-5c93-4b77-8620-cf6b1d3ec595.service", :active_state => "active"} + ] + end + + it "filters out only failed services" do + expect(server.systemd_failed_miq_services.count).to eq(1) + end + end + + context "#systemd_miq_service_base_names (private)" do + it "returns the minimal_class_name" do + expect(server.send(:systemd_miq_service_base_names)).to include("generic", "ui") + end + end + + context "#systemd_services (private)" do + let(:units) do + [ + {:name => "generic@68400a7e-1747-4f10-be2a-d0fc91b705ca.service"}, + {:name => "miq.slice"} + ] + end + + it "filters out non-service files" do + expect(server.send(:systemd_services).count).to eq(1) + end + end + + context "#systemd_service_base_name (private)" do + it "with a non-templated service" do + expect(server.send(:systemd_service_base_name, :name => "miq.slice")).to eq("miq") + end + + it "with a template service" do + expect(server.send(:systemd_service_base_name, :name => "generic@.service")).to eq("generic") + end + + it "with a templated service instance" do + expect(server.send(:systemd_service_base_name, :name => "generic@68400a7e-1747-4f10-be2a-d0fc91b705ca.service")).to eq("generic") + end + end +end diff --git a/spec/models/miq_server/worker_management/monitor_spec.rb b/spec/models/miq_server/worker_management/monitor_spec.rb index b5541456c6d..daf564665cd 100644 --- a/spec/models/miq_server/worker_management/monitor_spec.rb +++ b/spec/models/miq_server/worker_management/monitor_spec.rb @@ -56,6 +56,13 @@ expect(MiqPriorityWorker).to receive(:sync_workers).and_return(:adds => [123]) expect(server.sync_workers).to eq("MiqPriorityWorker"=>{:adds=>[123]}) end + + it "calls cleanup_failed_services" do + allow(MiqWorkerType).to receive(:worker_class_names).and_return([]) + allow(MiqEnvironment::Command).to receive(:supports_systemd?).and_return(true) + expect(server).to receive(:cleanup_failed_systemd_services) + server.cleanup_failed_workers + end end end end