blob: 7381b7075f8dc715114e550403d7c8aefd627760 [file] [log] [blame]
Tomáš Pecka98ad18d2020-11-13 15:39:55 +01001/*
Tomáš Pecka43ef7ba2023-04-13 15:56:48 +02002 * Copyright (C) 2020-2023 CESNET, https://photonics.cesnet.cz/
Tomáš Pecka98ad18d2020-11-13 15:39:55 +01003 *
4 * Written by Tomáš Pecka <tomas.pecka@fit.cvut.cz>
5 *
6 */
7
Tomáš Pecka5a4c0352023-12-12 12:29:28 +01008#include <boost/algorithm/string.hpp>
Tomáš Pecka43ef7ba2023-04-13 15:56:48 +02009#include <regex>
10#include <sysrepo-cpp/Connection.hpp>
Tomáš Pecka98ad18d2020-11-13 15:39:55 +010011#include "Sysrepo.h"
Tomáš Pecka2117ce52023-05-12 11:28:34 +020012#include "utils/alarms.h"
Tomáš Pecka98ad18d2020-11-13 15:39:55 +010013#include "utils/log.h"
Tomáš Peckaba2dc312021-01-23 22:29:11 +010014#include "utils/sysrepo.h"
Tomáš Pecka98ad18d2020-11-13 15:39:55 +010015
Tomáš Pecka98ad18d2020-11-13 15:39:55 +010016namespace {
17
Tomáš Pecka1b3c1732023-05-12 11:45:01 +020018const auto ALARM_CLEARED = "cleared";
Tomáš Pecka2117ce52023-05-12 11:28:34 +020019const auto ALARM_SENSOR_MISSING = "velia-alarms:sensor-missing-alarm";
Tomáš Pecka1b3c1732023-05-12 11:45:01 +020020const auto ALARM_MISSING_SEVERITY = "warning";
21const auto ALARM_MISSING_DESCRIPTION = "Sensor value not reported. Maybe the sensor was unplugged?";
Tomáš Pecka2117ce52023-05-12 11:28:34 +020022const auto ALARM_THRESHOLD_CROSSING_LOW = "velia-alarms:sensor-low-value-alarm";
Tomáš Pecka9af47392023-05-23 14:56:48 +020023const auto ALARM_THRESHOLD_CROSSING_LOW_DESCRIPTION = "Sensor value crossed low threshold.";
Tomáš Pecka2117ce52023-05-12 11:28:34 +020024const auto ALARM_THRESHOLD_CROSSING_HIGH = "velia-alarms:sensor-high-value-alarm";
Tomáš Pecka9af47392023-05-23 14:56:48 +020025const auto ALARM_THRESHOLD_CROSSING_HIGH_DESCRIPTION = "Sensor value crossed high threshold.";
Tomáš Pecka5a4c0352023-12-12 12:29:28 +010026const auto ALARM_SENSOR_NONOPERATIONAL = "velia-alarms:sensor-nonoperational";
27const auto ALARM_SENSOR_NONOPERATIONAL_SEVERITY = "warning";
28const auto ALARM_SENSOR_NONOPERATIONAL_DESCRIPTION = "Sensor is nonoperational. The values it reports may not be relevant.";
Tomáš Pecka2117ce52023-05-12 11:28:34 +020029
Tomáš Pecka43ef7ba2023-04-13 15:56:48 +020030/** @brief Extracts component path prefix from an XPath under /ietf-hardware/component node
31 *
32 * Example input: /ietf-hardware:hardware/component[name='ne:psu:child']/oper-state/disabled
33 * Example output: /ietf-hardware:hardware/component[name='ne:psu:child']
34 */
35std::string extractComponentPrefix(const std::string& componentXPath)
36{
37 static const std::regex regex(R"((/ietf-hardware:hardware/component\[name=('|").*?(\2)\]).*)");
38 std::smatch match;
Tomáš Pecka98ad18d2020-11-13 15:39:55 +010039
Tomáš Pecka43ef7ba2023-04-13 15:56:48 +020040 if (std::regex_match(componentXPath, match, regex)) {
41 return match.str(1);
42 }
43
44 throw std::logic_error("Invalid xPath provided ('" + componentXPath + "')");
45}
Tomáš Pecka1b3c1732023-05-12 11:45:01 +020046
47void logAlarm(velia::Log logger, const std::string_view sensor, const std::string_view alarm, const std::string_view severity)
48{
49 logger->info("Alarm {}: {} for {}", alarm, severity, sensor);
50}
Tomáš Pecka9af47392023-05-23 14:56:48 +020051
52bool isThresholdCrossingLow(velia::ietf_hardware::State state)
53{
54 return state == velia::ietf_hardware::State::WarningLow || state == velia::ietf_hardware::State::CriticalLow;
55}
56
57bool isThresholdCrossingHigh(velia::ietf_hardware::State state)
58{
59 return state == velia::ietf_hardware::State::WarningHigh || state == velia::ietf_hardware::State::CriticalHigh;
60}
61
62std::string toYangAlarmSeverity(velia::ietf_hardware::State state)
63{
64 switch (state) {
65 case velia::ietf_hardware::State::WarningLow:
66 case velia::ietf_hardware::State::WarningHigh:
67 return "warning";
68 case velia::ietf_hardware::State::CriticalLow:
69 case velia::ietf_hardware::State::CriticalHigh:
70 return "critical";
71 default: {
72 std::ostringstream oss;
73 oss << "No severity associated with sensor threshold State " << state;
74 throw std::logic_error(oss.str());
75 }
76 }
77}
Tomáš Pecka98ad18d2020-11-13 15:39:55 +010078}
79
Tomáš Pecka43ef7ba2023-04-13 15:56:48 +020080namespace velia::ietf_hardware::sysrepo {
81
82/** @brief The constructor expects the HardwareState instance which will provide the actual hardware state data and the poll interval */
83Sysrepo::Sysrepo(::sysrepo::Session session, std::shared_ptr<IETFHardware> hwState, std::chrono::microseconds pollInterval)
84 : m_log(spdlog::get("hardware"))
85 , m_pollInterval(std::move(pollInterval))
86 , m_session(std::move(session))
87 , m_hwState(std::move(hwState))
88 , m_quit(false)
Tomáš Pecka2117ce52023-05-12 11:28:34 +020089{
Tomáš Pecka2117ce52023-05-12 11:28:34 +020090 m_pollThread = std::thread([&]() {
Tomáš Pecka43ef7ba2023-04-13 15:56:48 +020091 auto conn = m_session.getConnection();
92
93 DataTree prevValues;
Tomáš Peckac0991ce2023-12-20 15:46:03 +010094 std::set<std::string> seenSensors;
Tomáš Pecka1b3c1732023-05-12 11:45:01 +020095 std::map<std::string, State> thresholdsStates;
Tomáš Pecka26b38212024-01-16 17:23:31 +010096 std::set<std::pair<std::string, std::string>> activeSideLoadedAlarms;
Tomáš Pecka43ef7ba2023-04-13 15:56:48 +020097
Tomáš Pecka9bd61272024-01-31 15:08:13 +010098 alarms::pushInventory(
99 m_session,
100 {
101 {ALARM_THRESHOLD_CROSSING_LOW, "Sensor value is below the low threshold."},
102 {ALARM_THRESHOLD_CROSSING_HIGH, "Sensor value is above the high threshold."},
103 {ALARM_SENSOR_MISSING, "Sensor is missing."},
104 {ALARM_SENSOR_NONOPERATIONAL, "Sensor is flagged as nonoperational."},
105 });
Tomáš Pecka2848fd02024-01-30 12:05:59 +0100106
Tomáš Pecka43ef7ba2023-04-13 15:56:48 +0200107 while (!m_quit) {
108 m_log->trace("IetfHardware poll");
109
Tomáš Pecka26b38212024-01-16 17:23:31 +0100110 auto [hwStateValues, thresholds, activeSensors, sideLoadedAlarms] = m_hwState->process();
Tomáš Pecka43ef7ba2023-04-13 15:56:48 +0200111 std::set<std::string> deletedComponents;
Tomáš Pecka87844292024-01-30 15:20:21 +0100112 std::vector<std::string> newSensors;
Tomáš Pecka43ef7ba2023-04-13 15:56:48 +0200113
Tomáš Peckac0991ce2023-12-20 15:46:03 +0100114 for (const auto& sensorXPath : activeSensors) {
115 if (!seenSensors.contains(sensorXPath)) {
Tomáš Pecka87844292024-01-30 15:20:21 +0100116 newSensors.emplace_back(extractComponentPrefix(sensorXPath));
Tomáš Peckac0991ce2023-12-20 15:46:03 +0100117 }
118 }
119 seenSensors.merge(activeSensors);
120
Tomáš Pecka87844292024-01-30 15:20:21 +0100121 if (!newSensors.empty()) {
Tomáš Peckabbfc1c32024-01-31 13:58:11 +0100122 alarms::addResourcesToInventory(m_session, {
123 {ALARM_THRESHOLD_CROSSING_LOW, newSensors},
124 {ALARM_THRESHOLD_CROSSING_HIGH, newSensors},
125 {ALARM_SENSOR_MISSING, newSensors},
126 {ALARM_SENSOR_NONOPERATIONAL, newSensors},
127 });
Tomáš Pecka87844292024-01-30 15:20:21 +0100128 }
129
Tomáš Pecka43ef7ba2023-04-13 15:56:48 +0200130 /* Some data readers can stop returning data in some cases (e.g. ejected PSU).
131 * Prune tree components that were removed before updating to avoid having not current data from previous invocations.
132 */
133 for (const auto& [k, v] : prevValues) {
134 if (!hwStateValues.contains(k)) {
135 deletedComponents.emplace(extractComponentPrefix(k));
136 }
137 }
138
Jan Kundrát498c3f82023-05-24 19:25:48 +0200139 std::vector<std::string> discards;
140 discards.reserve(deletedComponents.size());
141 std::copy(deletedComponents.begin(), deletedComponents.end(), std::back_inserter(discards));
Tomáš Pecka43ef7ba2023-04-13 15:56:48 +0200142
Jan Kundrát498c3f82023-05-24 19:25:48 +0200143 utils::valuesPush(hwStateValues, {}, discards, m_session, ::sysrepo::Datastore::Operational);
Tomáš Pecka43ef7ba2023-04-13 15:56:48 +0200144
Tomáš Pecka26b38212024-01-16 17:23:31 +0100145 /* Publish sideloaded alarms */
146 for (const auto& [alarm, resource, severity, text] : sideLoadedAlarms) {
147 // Sideloaded alarms are not registered using the code above, let's register those too
Tomáš Peckabbfc1c32024-01-31 13:58:11 +0100148 alarms::addResourcesToInventory(m_session, {{ALARM_SENSOR_MISSING, {resource}}});
Tomáš Pecka26b38212024-01-16 17:23:31 +0100149
150 bool isActive = activeSideLoadedAlarms.contains({alarm, resource});
151 if (isActive && severity == "cleared") {
Tomáš Peckad694bc52024-01-30 09:53:06 +0100152 alarms::push(m_session, alarm, resource, "cleared", text);
Tomáš Pecka26b38212024-01-16 17:23:31 +0100153 activeSideLoadedAlarms.erase({alarm, resource});
154 } else if (!isActive && severity != "cleared") {
Tomáš Peckad694bc52024-01-30 09:53:06 +0100155 alarms::push(m_session, alarm, resource, severity, text);
Tomáš Pecka26b38212024-01-16 17:23:31 +0100156 activeSideLoadedAlarms.insert({alarm, resource});
157 }
158 }
159
Tomáš Pecka5a4c0352023-12-12 12:29:28 +0100160 /* Look for nonoperational sensors to set alarms */
161 for (const auto& [leaf, value] : hwStateValues) {
162 if (boost::ends_with(leaf, "/sensor-data/oper-status")) {
163 std::optional<std::string> oldValue;
164
165 if (auto it = prevValues.find(leaf); it != prevValues.end()) {
166 oldValue = it->second;
167 }
168
169 if (value == "nonoperational" && oldValue != "nonoperational") {
Tomáš Peckad694bc52024-01-30 09:53:06 +0100170 alarms::push(m_session, ALARM_SENSOR_NONOPERATIONAL, extractComponentPrefix(leaf), ALARM_SENSOR_NONOPERATIONAL_SEVERITY, ALARM_SENSOR_NONOPERATIONAL_DESCRIPTION);
Tomáš Pecka5a4c0352023-12-12 12:29:28 +0100171 } else if (value == "ok" && oldValue && oldValue != "ok" /* don't call clear-alarm if we see this node for the first time, i.e., oldvalue is nullopt */) {
Tomáš Peckad694bc52024-01-30 09:53:06 +0100172 alarms::push(m_session, ALARM_SENSOR_NONOPERATIONAL, extractComponentPrefix(leaf), ALARM_CLEARED, ALARM_SENSOR_NONOPERATIONAL_DESCRIPTION);
Tomáš Pecka5a4c0352023-12-12 12:29:28 +0100173 }
174 }
175 }
176
Tomáš Pecka1b3c1732023-05-12 11:45:01 +0200177 for (const auto& [sensorXPath, state] : thresholds) {
178 // missing prevState can be considered as Normal
179 const State prevState = [&, sensorXPath = sensorXPath] {
180 if (auto it = thresholdsStates.find(sensorXPath); it != thresholdsStates.end()) {
181 return it->second;
182 }
183 return State::Normal;
184 }();
185 const auto componentXPath = extractComponentPrefix(sensorXPath);
186
187 if (state == State::NoValue) {
Jan Kundrát2ec37482024-01-11 21:38:20 +0100188 logAlarm(m_log, componentXPath, ALARM_SENSOR_MISSING, ALARM_MISSING_SEVERITY);
Tomáš Peckad694bc52024-01-30 09:53:06 +0100189 alarms::push(m_session, ALARM_SENSOR_MISSING, componentXPath, ALARM_MISSING_SEVERITY, ALARM_MISSING_DESCRIPTION);
Tomáš Pecka1b3c1732023-05-12 11:45:01 +0200190 } else if (prevState == State::NoValue) {
Jan Kundrát2ec37482024-01-11 21:38:20 +0100191 logAlarm(m_log, componentXPath, ALARM_SENSOR_MISSING, ALARM_CLEARED);
Tomáš Pecka1b3c1732023-05-12 11:45:01 +0200192 /* The alarm message is same for both setting and clearing the alarm. RFC8632 says that it is
193 * "The string used to inform operators about the alarm. This MUST contain enough information for an operator to be able to understand the problem and how to resolve it.",
194 * i.e., from my POV it does not make sense to say something like "cleared" when clearing the alarm as this would not be beneficial for the operator to understand what happened.
195 */
Tomáš Peckad694bc52024-01-30 09:53:06 +0100196 alarms::push(m_session, ALARM_SENSOR_MISSING, componentXPath, ALARM_CLEARED, ALARM_MISSING_DESCRIPTION);
Tomáš Pecka1b3c1732023-05-12 11:45:01 +0200197 }
198
Tomáš Pecka9af47392023-05-23 14:56:48 +0200199 /*
200 * We set new threshold alarms first. In case the sensor value transitions from high to low (or low to high) we don't want to lose any active alarm on the resource.
201 *
202 * In case new state corresponds to threshold crossing (wither lower bound or upper bound) we set the alarm.
203 * Since we receive only changes to states it should be sufficient to just check if the new state crossed the threshold.
204 * We shouldn't receive any "no-op" state change (e.g. warning low to warning low) and even if we did receive such change, we would only set the same alarm again.
205 * We can however receive a change from critical threshold to warning threshold (or warning to critical) but that is no problem.
206 * We only need to set the same alarm again with the new severity.
207 */
208 if (isThresholdCrossingLow(state)) {
209 logAlarm(m_log, componentXPath, ALARM_THRESHOLD_CROSSING_LOW, toYangAlarmSeverity(state));
Tomáš Peckad694bc52024-01-30 09:53:06 +0100210 alarms::push(m_session, ALARM_THRESHOLD_CROSSING_LOW, componentXPath, toYangAlarmSeverity(state), ALARM_THRESHOLD_CROSSING_LOW_DESCRIPTION);
Tomáš Pecka9af47392023-05-23 14:56:48 +0200211 } else if (isThresholdCrossingHigh(state)) {
212 logAlarm(m_log, componentXPath, ALARM_THRESHOLD_CROSSING_HIGH, toYangAlarmSeverity(state));
Tomáš Peckad694bc52024-01-30 09:53:06 +0100213 alarms::push(m_session, ALARM_THRESHOLD_CROSSING_HIGH, componentXPath, toYangAlarmSeverity(state), ALARM_THRESHOLD_CROSSING_HIGH_DESCRIPTION);
Tomáš Pecka9af47392023-05-23 14:56:48 +0200214 }
215
216 /* Now we can clear the old threshold alarms that are no longer active, i.e., we transition away from the CriticalLow/WarningLow or CriticalHigh/WarningHigh. */
217 if (!isThresholdCrossingLow(state) && isThresholdCrossingLow(prevState)) {
218 logAlarm(m_log, componentXPath, ALARM_THRESHOLD_CROSSING_LOW, ALARM_CLEARED);
Tomáš Peckad694bc52024-01-30 09:53:06 +0100219 alarms::push(m_session, ALARM_THRESHOLD_CROSSING_LOW, componentXPath, ALARM_CLEARED, ALARM_THRESHOLD_CROSSING_LOW_DESCRIPTION);
Tomáš Pecka9af47392023-05-23 14:56:48 +0200220 } else if (!isThresholdCrossingHigh(state) && isThresholdCrossingHigh(prevState)) {
221 logAlarm(m_log, componentXPath, ALARM_THRESHOLD_CROSSING_HIGH, ALARM_CLEARED);
Tomáš Peckad694bc52024-01-30 09:53:06 +0100222 alarms::push(m_session, ALARM_THRESHOLD_CROSSING_HIGH, componentXPath, ALARM_CLEARED, ALARM_THRESHOLD_CROSSING_HIGH_DESCRIPTION);
Tomáš Pecka9af47392023-05-23 14:56:48 +0200223 }
224
Tomáš Pecka1b3c1732023-05-12 11:45:01 +0200225 thresholdsStates[sensorXPath] = state;
226 }
227
Tomáš Pecka43ef7ba2023-04-13 15:56:48 +0200228 prevValues = std::move(hwStateValues);
229 std::this_thread::sleep_for(m_pollInterval);
Tomáš Peckabbfc1c32024-01-31 13:58:11 +0100230 }
Tomáš Pecka2117ce52023-05-12 11:28:34 +0200231 });
Tomáš Pecka43ef7ba2023-04-13 15:56:48 +0200232}
Václav Kubernát7efd6d52021-11-09 01:31:11 +0100233
Tomáš Pecka43ef7ba2023-04-13 15:56:48 +0200234Sysrepo::~Sysrepo()
235{
236 m_log->trace("Requesting poll thread stop");
237 m_quit = true;
238 m_pollThread.join();
Tomáš Pecka98ad18d2020-11-13 15:39:55 +0100239}
240}