Tomáš Pecka | 6a2334b | 2022-07-12 13:57:54 +0200 | [diff] [blame] | 1 | /* |
| 2 | * Copyright (C) 2020 - 2022 CESNET, https://photonics.cesnet.cz/ |
| 3 | * |
| 4 | * Written by Tomáš Pecka <tomas.pecka@fit.cvut.cz> |
| 5 | * |
| 6 | */ |
| 7 | #include "SystemdUnits.h" |
Tomáš Pecka | ad3247f | 2023-05-11 15:56:26 +0200 | [diff] [blame] | 8 | #include "utils/alarms.h" |
Tomáš Pecka | 6a2334b | 2022-07-12 13:57:54 +0200 | [diff] [blame] | 9 | #include "utils/log.h" |
| 10 | #include "utils/sysrepo.h" |
| 11 | |
| 12 | namespace { |
| 13 | const auto ALARM_ID = "velia-alarms:systemd-unit-failure"; |
| 14 | const auto ALARM_SEVERITY = "critical"; |
Tomáš Pecka | 8e5a2d3 | 2022-08-22 17:48:58 +0200 | [diff] [blame] | 15 | const auto ALARM_INVENTORY_DESCRIPTION = "The systemd service is considered in failed state."; |
Tomáš Pecka | 6a2334b | 2022-07-12 13:57:54 +0200 | [diff] [blame] | 16 | } |
| 17 | |
| 18 | namespace velia::health { |
| 19 | |
| 20 | /** @brief Construct the systemd unit watcher for arbitrary dbus object. Mainly for tests. */ |
| 21 | SystemdUnits::SystemdUnits(sysrepo::Session session, sdbus::IConnection& connection, const std::string& busname, const std::string& managerObjectPath, const std::string& managerIface, const std::string& unitIface) |
| 22 | : m_log(spdlog::get("health")) |
| 23 | , m_srSession(std::move(session)) |
| 24 | , m_busName(busname) |
| 25 | , m_unitIface(unitIface) |
| 26 | , m_proxyManager(sdbus::createProxy(connection, m_busName, managerObjectPath)) |
| 27 | { |
| 28 | utils::ensureModuleImplemented(m_srSession, "sysrepo-ietf-alarms", "2022-02-17"); |
| 29 | utils::ensureModuleImplemented(m_srSession, "velia-alarms", "2022-07-12"); |
| 30 | |
Tomáš Pecka | 6a2334b | 2022-07-12 13:57:54 +0200 | [diff] [blame] | 31 | /* Track all current units. Method ListUnits() -> a(ssssssouso) returns a DBus struct type with information |
| 32 | * about the unit (see https://www.freedesktop.org/wiki/Software/systemd/dbus/#Manager-ListUnits). |
Tomáš Pecka | 94ebc36 | 2024-01-24 16:46:39 +0100 | [diff] [blame] | 33 | * In our code we need the fields: |
| 34 | * - 0: the unit name |
| 35 | * - 6: unit object path |
| 36 | * - 3: unit activeState |
| 37 | * - 4: unit subState |
Tomáš Pecka | 6a2334b | 2022-07-12 13:57:54 +0200 | [diff] [blame] | 38 | */ |
| 39 | std::vector<sdbus::Struct<std::string, std::string, std::string, std::string, std::string, std::string, sdbus::ObjectPath, uint32_t, std::string, sdbus::ObjectPath>> units; |
Tomáš Pecka | ca85a6d | 2024-01-24 16:55:44 +0100 | [diff] [blame] | 40 | std::vector<std::string> unitNames; |
| 41 | |
| 42 | // First, fetch all currently loaded units, register to their PropertiesChanged signal and create the alarm-inventory entries in a *single* edit |
| 43 | m_proxyManager->callMethod("ListUnits").onInterface(managerIface).storeResultsTo(units); |
| 44 | std::transform(units.begin(), units.end(), std::back_inserter(unitNames), [](const auto& unit) { return unit.template get<0>(); }); |
Tomáš Pecka | 14f912b | 2024-01-30 09:39:20 +0100 | [diff] [blame^] | 45 | alarms::pushInventory(m_srSession, ALARM_ID, std::nullopt, {ALARM_SEVERITY}, ALARM_INVENTORY_DESCRIPTION, unitNames); |
Tomáš Pecka | ca85a6d | 2024-01-24 16:55:44 +0100 | [diff] [blame] | 46 | |
| 47 | for (const auto& unit : units) { |
| 48 | registerSystemdUnit(connection, unit.get<0>(), unit.get<6>(), UnitState{unit.get<3>(), unit.get<4>()}, RegisterAlarmInventory::No); |
| 49 | } |
| 50 | |
| 51 | // Subscribe to systemd events. Systemd may not generate signals unless explicitly called |
| 52 | m_proxyManager->callMethod("Subscribe").onInterface(managerIface).withArguments().dontExpectReply(); |
| 53 | |
| 54 | // Register to a signal introducing new unit. Newly loaded units into systemd can now start coming. The corresponding alarm MUST be registered because it was not yet. |
| 55 | m_proxyManager->uponSignal("UnitNew").onInterface(managerIface).call([&](const std::string& unitName, const sdbus::ObjectPath& unitObjectPath) { |
| 56 | registerSystemdUnit(connection, unitName, unitObjectPath, std::nullopt, RegisterAlarmInventory::Yes); |
| 57 | }); |
| 58 | m_proxyManager->finishRegistration(); |
| 59 | |
| 60 | // Ask for all the units once again. There could have been some that were created between the first ListUnits call and the UnitNew subscription |
| 61 | units.clear(); |
Tomáš Pecka | 6a2334b | 2022-07-12 13:57:54 +0200 | [diff] [blame] | 62 | m_proxyManager->callMethod("ListUnits").onInterface(managerIface).storeResultsTo(units); |
| 63 | for (const auto& unit : units) { |
Tomáš Pecka | ca85a6d | 2024-01-24 16:55:44 +0100 | [diff] [blame] | 64 | registerSystemdUnit(connection, unit.get<0>(), unit.get<6>(), UnitState{unit.get<3>(), unit.get<4>()}, RegisterAlarmInventory::Yes); |
Tomáš Pecka | 6a2334b | 2022-07-12 13:57:54 +0200 | [diff] [blame] | 65 | } |
| 66 | } |
| 67 | |
| 68 | /** @brief Construct the systemd watcher for well-known systemd paths. */ |
| 69 | SystemdUnits::SystemdUnits(sysrepo::Session session, sdbus::IConnection& connection) |
| 70 | : SystemdUnits(session, connection, "org.freedesktop.systemd1", "/org/freedesktop/systemd1", "org.freedesktop.systemd1.Manager", "org.freedesktop.systemd1.Unit") |
| 71 | { |
| 72 | } |
| 73 | |
| 74 | /** @brief Registers a systemd unit by its unit name and unit dbus objectpath. */ |
Tomáš Pecka | ca85a6d | 2024-01-24 16:55:44 +0100 | [diff] [blame] | 75 | void SystemdUnits::registerSystemdUnit(sdbus::IConnection& connection, const std::string& unitName, const sdbus::ObjectPath& unitObjectPath, const std::optional<UnitState>& unitState, const RegisterAlarmInventory registerAlarmInventory) |
Tomáš Pecka | 6a2334b | 2022-07-12 13:57:54 +0200 | [diff] [blame] | 76 | { |
Tomáš Pecka | 0f8f15b | 2023-05-17 20:00:48 +0200 | [diff] [blame] | 77 | sdbus::IProxy* proxyUnit; |
Tomáš Pecka | 8e5a2d3 | 2022-08-22 17:48:58 +0200 | [diff] [blame] | 78 | |
Tomáš Pecka | 0f8f15b | 2023-05-17 20:00:48 +0200 | [diff] [blame] | 79 | { |
| 80 | std::lock_guard lck(m_mtx); |
| 81 | if (m_proxyUnits.contains(unitObjectPath)) { |
| 82 | return; |
| 83 | } |
| 84 | |
Tomáš Pecka | ca85a6d | 2024-01-24 16:55:44 +0100 | [diff] [blame] | 85 | if (registerAlarmInventory == RegisterAlarmInventory::Yes) { |
Tomáš Pecka | c1aee5c | 2024-01-30 09:31:37 +0100 | [diff] [blame] | 86 | alarms::addResourceToInventory(m_srSession, ALARM_ID, std::nullopt, unitName); |
Tomáš Pecka | ca85a6d | 2024-01-24 16:55:44 +0100 | [diff] [blame] | 87 | } |
| 88 | |
Tomáš Pecka | 0f8f15b | 2023-05-17 20:00:48 +0200 | [diff] [blame] | 89 | proxyUnit = m_proxyUnits.emplace(unitObjectPath, sdbus::createProxy(connection, m_busName, unitObjectPath)).first->second.get(); |
Tomáš Pecka | 0f8f15b | 2023-05-17 20:00:48 +0200 | [diff] [blame] | 90 | } |
| 91 | |
Tomáš Pecka | 6a2334b | 2022-07-12 13:57:54 +0200 | [diff] [blame] | 92 | proxyUnit->uponSignal("PropertiesChanged").onInterface("org.freedesktop.DBus.Properties").call([&, unitName](const std::string& iface, const std::map<std::string, sdbus::Variant>& changed, [[maybe_unused]] const std::vector<std::string>& invalidated) { |
| 93 | if (iface != m_unitIface) { |
| 94 | return; |
| 95 | } |
| 96 | |
| 97 | std::string newActiveState, newSubState; |
| 98 | if (auto it = changed.find("ActiveState"); it != changed.end()) { |
| 99 | newActiveState = it->second.get<std::string>(); |
| 100 | } |
| 101 | if (auto it = changed.find("SubState"); it != changed.end()) { |
| 102 | newSubState = it->second.get<std::string>(); |
| 103 | } |
| 104 | |
Tomáš Pecka | 26ac0fa | 2024-01-24 16:34:16 +0100 | [diff] [blame] | 105 | onUnitStateChange(unitName, UnitState{std::move(newActiveState), std::move(newSubState)}); |
Tomáš Pecka | 6a2334b | 2022-07-12 13:57:54 +0200 | [diff] [blame] | 106 | }); |
| 107 | proxyUnit->finishRegistration(); |
| 108 | m_log->trace("Registered systemd unit watcher for '{}'", unitName); |
| 109 | |
Tomáš Pecka | 94ebc36 | 2024-01-24 16:46:39 +0100 | [diff] [blame] | 110 | // Query the current state of this unit if not provided |
| 111 | if (!unitState) { |
| 112 | std::string newActiveState = proxyUnit->getProperty("ActiveState").onInterface(m_unitIface); |
| 113 | std::string newSubState = proxyUnit->getProperty("SubState").onInterface(m_unitIface); |
| 114 | onUnitStateChange(unitName, UnitState{std::move(newActiveState), std::move(newSubState)}); |
| 115 | } else { |
| 116 | onUnitStateChange(unitName, *unitState); |
| 117 | } |
| 118 | |
Tomáš Pecka | 6a2334b | 2022-07-12 13:57:54 +0200 | [diff] [blame] | 119 | } |
| 120 | |
| 121 | /** @brief Callback for unit state change */ |
Tomáš Pecka | 26ac0fa | 2024-01-24 16:34:16 +0100 | [diff] [blame] | 122 | void SystemdUnits::onUnitStateChange(const std::string& name, const UnitState& state) |
Tomáš Pecka | 6a2334b | 2022-07-12 13:57:54 +0200 | [diff] [blame] | 123 | { |
Tomáš Pecka | 0f8f15b | 2023-05-17 20:00:48 +0200 | [diff] [blame] | 124 | std::lock_guard lck(m_mtx); |
Tomáš Pecka | 26ac0fa | 2024-01-24 16:34:16 +0100 | [diff] [blame] | 125 | const auto& [activeState, subState] = state; |
Tomáš Pecka | 6a2334b | 2022-07-12 13:57:54 +0200 | [diff] [blame] | 126 | |
| 127 | auto lastState = m_unitState.find(name); |
| 128 | if (lastState == m_unitState.end()) { |
Tomáš Pecka | 26ac0fa | 2024-01-24 16:34:16 +0100 | [diff] [blame] | 129 | lastState = m_unitState.insert(std::make_pair(name, state)).first; |
| 130 | } else if (lastState->second == state) { |
Tomáš Pecka | 6a2334b | 2022-07-12 13:57:54 +0200 | [diff] [blame] | 131 | // We were notified about a state change into the same state. No need to fire any events, everything is still the same. |
Tomáš Pecka | 26ac0fa | 2024-01-24 16:34:16 +0100 | [diff] [blame] | 132 | m_log->trace("Systemd unit '{}' changed state but it is the same state as before ({}, {})", name, activeState, subState); |
Tomáš Pecka | 6a2334b | 2022-07-12 13:57:54 +0200 | [diff] [blame] | 133 | return; |
| 134 | } |
| 135 | |
| 136 | std::string alarmSeverity; |
| 137 | if (activeState == "failed" || (activeState == "activating" && subState == "auto-restart")) { |
| 138 | alarmSeverity = ALARM_SEVERITY; |
| 139 | } else { |
| 140 | alarmSeverity = "cleared"; |
| 141 | } |
| 142 | |
| 143 | m_log->debug("Systemd unit '{}' changed state ({} {})", name, activeState, subState); |
Tomáš Pecka | 26ac0fa | 2024-01-24 16:34:16 +0100 | [diff] [blame] | 144 | lastState->second = state; |
Tomáš Pecka | 6a2334b | 2022-07-12 13:57:54 +0200 | [diff] [blame] | 145 | |
Tomáš Pecka | c1aee5c | 2024-01-30 09:31:37 +0100 | [diff] [blame] | 146 | alarms::push(m_srSession, ALARM_ID, std::nullopt, name, alarmSeverity, "systemd unit state: (" + activeState + ", " + subState + ")"); |
Tomáš Pecka | 6a2334b | 2022-07-12 13:57:54 +0200 | [diff] [blame] | 147 | } |
| 148 | |
| 149 | SystemdUnits::~SystemdUnits() = default; |
| 150 | |
| 151 | } |