blob: bb0243d2fcaa80fda83fbef360601b8cc74ed052 [file] [log] [blame]
Tomáš Pecka6a2334b2022-07-12 13:57:54 +02001/*
2 * Copyright (C) 2020 - 2022 CESNET, https://photonics.cesnet.cz/
3 *
4 * Written by Tomáš Pecka <tomas.pecka@fit.cvut.cz>
5 *
6 */
7#include "SystemdUnits.h"
Tomáš Peckaad3247f2023-05-11 15:56:26 +02008#include "utils/alarms.h"
Tomáš Pecka6a2334b2022-07-12 13:57:54 +02009#include "utils/log.h"
10#include "utils/sysrepo.h"
11
12namespace {
13const auto ALARM_ID = "velia-alarms:systemd-unit-failure";
14const auto ALARM_SEVERITY = "critical";
Tomáš Pecka8e5a2d32022-08-22 17:48:58 +020015const auto ALARM_INVENTORY_DESCRIPTION = "The systemd service is considered in failed state.";
Tomáš Pecka6a2334b2022-07-12 13:57:54 +020016}
17
18namespace velia::health {
19
20/** @brief Construct the systemd unit watcher for arbitrary dbus object. Mainly for tests. */
21SystemdUnits::SystemdUnits(sysrepo::Session session, sdbus::IConnection& connection, const std::string& busname, const std::string& managerObjectPath, const std::string& managerIface, const std::string& unitIface)
22 : m_log(spdlog::get("health"))
23 , m_srSession(std::move(session))
24 , m_busName(busname)
25 , m_unitIface(unitIface)
26 , m_proxyManager(sdbus::createProxy(connection, m_busName, managerObjectPath))
27{
28 utils::ensureModuleImplemented(m_srSession, "sysrepo-ietf-alarms", "2022-02-17");
29 utils::ensureModuleImplemented(m_srSession, "velia-alarms", "2022-07-12");
30
Tomáš Pecka6a2334b2022-07-12 13:57:54 +020031 /* Track all current units. Method ListUnits() -> a(ssssssouso) returns a DBus struct type with information
32 * about the unit (see https://www.freedesktop.org/wiki/Software/systemd/dbus/#Manager-ListUnits).
Tomáš Pecka94ebc362024-01-24 16:46:39 +010033 * In our code we need the fields:
34 * - 0: the unit name
35 * - 6: unit object path
36 * - 3: unit activeState
37 * - 4: unit subState
Tomáš Pecka6a2334b2022-07-12 13:57:54 +020038 */
39 std::vector<sdbus::Struct<std::string, std::string, std::string, std::string, std::string, std::string, sdbus::ObjectPath, uint32_t, std::string, sdbus::ObjectPath>> units;
Tomáš Peckaca85a6d2024-01-24 16:55:44 +010040 std::vector<std::string> unitNames;
41
42 // First, fetch all currently loaded units, register to their PropertiesChanged signal and create the alarm-inventory entries in a *single* edit
43 m_proxyManager->callMethod("ListUnits").onInterface(managerIface).storeResultsTo(units);
44 std::transform(units.begin(), units.end(), std::back_inserter(unitNames), [](const auto& unit) { return unit.template get<0>(); });
Tomáš Pecka14f912b2024-01-30 09:39:20 +010045 alarms::pushInventory(m_srSession, ALARM_ID, std::nullopt, {ALARM_SEVERITY}, ALARM_INVENTORY_DESCRIPTION, unitNames);
Tomáš Peckaca85a6d2024-01-24 16:55:44 +010046
47 for (const auto& unit : units) {
48 registerSystemdUnit(connection, unit.get<0>(), unit.get<6>(), UnitState{unit.get<3>(), unit.get<4>()}, RegisterAlarmInventory::No);
49 }
50
51 // Subscribe to systemd events. Systemd may not generate signals unless explicitly called
52 m_proxyManager->callMethod("Subscribe").onInterface(managerIface).withArguments().dontExpectReply();
53
54 // Register to a signal introducing new unit. Newly loaded units into systemd can now start coming. The corresponding alarm MUST be registered because it was not yet.
55 m_proxyManager->uponSignal("UnitNew").onInterface(managerIface).call([&](const std::string& unitName, const sdbus::ObjectPath& unitObjectPath) {
56 registerSystemdUnit(connection, unitName, unitObjectPath, std::nullopt, RegisterAlarmInventory::Yes);
57 });
58 m_proxyManager->finishRegistration();
59
60 // Ask for all the units once again. There could have been some that were created between the first ListUnits call and the UnitNew subscription
61 units.clear();
Tomáš Pecka6a2334b2022-07-12 13:57:54 +020062 m_proxyManager->callMethod("ListUnits").onInterface(managerIface).storeResultsTo(units);
63 for (const auto& unit : units) {
Tomáš Peckaca85a6d2024-01-24 16:55:44 +010064 registerSystemdUnit(connection, unit.get<0>(), unit.get<6>(), UnitState{unit.get<3>(), unit.get<4>()}, RegisterAlarmInventory::Yes);
Tomáš Pecka6a2334b2022-07-12 13:57:54 +020065 }
66}
67
68/** @brief Construct the systemd watcher for well-known systemd paths. */
69SystemdUnits::SystemdUnits(sysrepo::Session session, sdbus::IConnection& connection)
70 : SystemdUnits(session, connection, "org.freedesktop.systemd1", "/org/freedesktop/systemd1", "org.freedesktop.systemd1.Manager", "org.freedesktop.systemd1.Unit")
71{
72}
73
74/** @brief Registers a systemd unit by its unit name and unit dbus objectpath. */
Tomáš Peckaca85a6d2024-01-24 16:55:44 +010075void SystemdUnits::registerSystemdUnit(sdbus::IConnection& connection, const std::string& unitName, const sdbus::ObjectPath& unitObjectPath, const std::optional<UnitState>& unitState, const RegisterAlarmInventory registerAlarmInventory)
Tomáš Pecka6a2334b2022-07-12 13:57:54 +020076{
Tomáš Pecka0f8f15b2023-05-17 20:00:48 +020077 sdbus::IProxy* proxyUnit;
Tomáš Pecka8e5a2d32022-08-22 17:48:58 +020078
Tomáš Pecka0f8f15b2023-05-17 20:00:48 +020079 {
80 std::lock_guard lck(m_mtx);
81 if (m_proxyUnits.contains(unitObjectPath)) {
82 return;
83 }
84
Tomáš Peckaca85a6d2024-01-24 16:55:44 +010085 if (registerAlarmInventory == RegisterAlarmInventory::Yes) {
Tomáš Peckac1aee5c2024-01-30 09:31:37 +010086 alarms::addResourceToInventory(m_srSession, ALARM_ID, std::nullopt, unitName);
Tomáš Peckaca85a6d2024-01-24 16:55:44 +010087 }
88
Tomáš Pecka0f8f15b2023-05-17 20:00:48 +020089 proxyUnit = m_proxyUnits.emplace(unitObjectPath, sdbus::createProxy(connection, m_busName, unitObjectPath)).first->second.get();
Tomáš Pecka0f8f15b2023-05-17 20:00:48 +020090 }
91
Tomáš Pecka6a2334b2022-07-12 13:57:54 +020092 proxyUnit->uponSignal("PropertiesChanged").onInterface("org.freedesktop.DBus.Properties").call([&, unitName](const std::string& iface, const std::map<std::string, sdbus::Variant>& changed, [[maybe_unused]] const std::vector<std::string>& invalidated) {
93 if (iface != m_unitIface) {
94 return;
95 }
96
97 std::string newActiveState, newSubState;
98 if (auto it = changed.find("ActiveState"); it != changed.end()) {
99 newActiveState = it->second.get<std::string>();
100 }
101 if (auto it = changed.find("SubState"); it != changed.end()) {
102 newSubState = it->second.get<std::string>();
103 }
104
Tomáš Pecka26ac0fa2024-01-24 16:34:16 +0100105 onUnitStateChange(unitName, UnitState{std::move(newActiveState), std::move(newSubState)});
Tomáš Pecka6a2334b2022-07-12 13:57:54 +0200106 });
107 proxyUnit->finishRegistration();
108 m_log->trace("Registered systemd unit watcher for '{}'", unitName);
109
Tomáš Pecka94ebc362024-01-24 16:46:39 +0100110 // Query the current state of this unit if not provided
111 if (!unitState) {
112 std::string newActiveState = proxyUnit->getProperty("ActiveState").onInterface(m_unitIface);
113 std::string newSubState = proxyUnit->getProperty("SubState").onInterface(m_unitIface);
114 onUnitStateChange(unitName, UnitState{std::move(newActiveState), std::move(newSubState)});
115 } else {
116 onUnitStateChange(unitName, *unitState);
117 }
118
Tomáš Pecka6a2334b2022-07-12 13:57:54 +0200119}
120
121/** @brief Callback for unit state change */
Tomáš Pecka26ac0fa2024-01-24 16:34:16 +0100122void SystemdUnits::onUnitStateChange(const std::string& name, const UnitState& state)
Tomáš Pecka6a2334b2022-07-12 13:57:54 +0200123{
Tomáš Pecka0f8f15b2023-05-17 20:00:48 +0200124 std::lock_guard lck(m_mtx);
Tomáš Pecka26ac0fa2024-01-24 16:34:16 +0100125 const auto& [activeState, subState] = state;
Tomáš Pecka6a2334b2022-07-12 13:57:54 +0200126
127 auto lastState = m_unitState.find(name);
128 if (lastState == m_unitState.end()) {
Tomáš Pecka26ac0fa2024-01-24 16:34:16 +0100129 lastState = m_unitState.insert(std::make_pair(name, state)).first;
130 } else if (lastState->second == state) {
Tomáš Pecka6a2334b2022-07-12 13:57:54 +0200131 // We were notified about a state change into the same state. No need to fire any events, everything is still the same.
Tomáš Pecka26ac0fa2024-01-24 16:34:16 +0100132 m_log->trace("Systemd unit '{}' changed state but it is the same state as before ({}, {})", name, activeState, subState);
Tomáš Pecka6a2334b2022-07-12 13:57:54 +0200133 return;
134 }
135
136 std::string alarmSeverity;
137 if (activeState == "failed" || (activeState == "activating" && subState == "auto-restart")) {
138 alarmSeverity = ALARM_SEVERITY;
139 } else {
140 alarmSeverity = "cleared";
141 }
142
143 m_log->debug("Systemd unit '{}' changed state ({} {})", name, activeState, subState);
Tomáš Pecka26ac0fa2024-01-24 16:34:16 +0100144 lastState->second = state;
Tomáš Pecka6a2334b2022-07-12 13:57:54 +0200145
Tomáš Peckac1aee5c2024-01-30 09:31:37 +0100146 alarms::push(m_srSession, ALARM_ID, std::nullopt, name, alarmSeverity, "systemd unit state: (" + activeState + ", " + subState + ")");
Tomáš Pecka6a2334b2022-07-12 13:57:54 +0200147}
148
149SystemdUnits::~SystemdUnits() = default;
150
151}