Fix call home client thread cleanup
This commit by @thelsper fixes a race condition in call home client creation
and deletion. If a call home client is deleted and then immediately re-created
with the same client name, the old client thread will not notice that the
old client was destroyed, and will attempt to service the new client,
while the newly created client thread for the new client also attempts to
service the new client! This results in a proliferation of threads and can also
result in deadlock.
To fix this, we added a unique client ID to the client information. When
a client thread retrieves its client by name, it will now also check the client
ID against the cached value of the client ID for the client it was previously
servicing. If the client ID changes, this means that the client was destroyed,
and the thread will clean itself up.
While we were here making these changes we also changed the session ID to be
a true atomic, and removed the spin lock that was protecting it. (The atomic
operations library stdatomic.h was added in C11, which was why we also
appended `-std=gnu11` to the `CFLAGS`. Both sysrepo and netopeer2 are already
using C11.)
diff --git a/src/session_server.c b/src/session_server.c
index 885ef3a..75b9859 100644
--- a/src/session_server.c
+++ b/src/session_server.c
@@ -515,7 +515,7 @@
server_opts.ctx = ctx;
server_opts.new_session_id = 1;
- pthread_spin_init(&server_opts.sid_lock, PTHREAD_PROCESS_PRIVATE);
+ server_opts.new_client_id = 1;
errno=0;
@@ -549,8 +549,6 @@
server_opts.capabilities = NULL;
server_opts.capabilities_count = 0;
- pthread_spin_destroy(&server_opts.sid_lock);
-
#if defined(NC_ENABLED_SSH) || defined(NC_ENABLED_TLS)
nc_server_del_endpt(NULL, 0);
#endif
@@ -703,9 +701,7 @@
(*session)->ctx = server_opts.ctx;
/* assign new SID atomically */
- pthread_spin_lock(&server_opts.sid_lock);
- (*session)->id = server_opts.new_session_id++;
- pthread_spin_unlock(&server_opts.sid_lock);
+ (*session)->id = atomic_fetch_add(&server_opts.new_session_id, 1);
/* NETCONF handshake */
msgtype = nc_handshake_io(*session);
@@ -2015,11 +2011,7 @@
pthread_rwlock_unlock(&server_opts.endpt_lock);
/* assign new SID atomically */
- /* LOCK */
- pthread_spin_lock(&server_opts.sid_lock);
- (*session)->id = server_opts.new_session_id++;
- /* UNLOCK */
- pthread_spin_unlock(&server_opts.sid_lock);
+ (*session)->id = atomic_fetch_add(&server_opts.new_session_id, 1);
/* NETCONF handshake */
msgtype = nc_handshake_io(*session);
@@ -2081,6 +2073,7 @@
return -1;
}
server_opts.ch_clients[server_opts.ch_client_count - 1].name = lydict_insert(server_opts.ctx, name, 0);
+ server_opts.ch_clients[server_opts.ch_client_count - 1].id = atomic_fetch_add(&server_opts.new_client_id, 1);
server_opts.ch_clients[server_opts.ch_client_count - 1].ti = ti;
server_opts.ch_clients[server_opts.ch_client_count - 1].ch_endpts = NULL;
server_opts.ch_clients[server_opts.ch_client_count - 1].ch_endpt_count = 0;
@@ -2758,11 +2751,7 @@
}
/* assign new SID atomically */
- /* LOCK */
- pthread_spin_lock(&server_opts.sid_lock);
- (*session)->id = server_opts.new_session_id++;
- /* UNLOCK */
- pthread_spin_unlock(&server_opts.sid_lock);
+ (*session)->id = atomic_fetch_add(&server_opts.new_session_id, 1);
/* NETCONF handshake */
msgtype = nc_handshake_io(*session);
@@ -2909,12 +2898,14 @@
struct nc_ch_endpt *cur_endpt;
struct nc_session *session;
struct nc_ch_client *client;
+ uint32_t client_id;
/* LOCK */
client = nc_server_ch_client_with_endpt_lock(data->client_name);
if (!client) {
goto cleanup;
}
+ client_id = client->id;
cur_endpt = &client->ch_endpts[0];
cur_endpt_name = strdup(cur_endpt->name);
@@ -2938,6 +2929,10 @@
if (!client) {
goto cleanup;
}
+ if (client->id != client_id) {
+ nc_server_ch_client_unlock(client);
+ goto cleanup;
+ }
/* session changed status -> it was disconnected for whatever reason,
* persistent connection immediately tries to reconnect, periodic waits some first */
@@ -2953,6 +2948,10 @@
if (!client) {
goto cleanup;
}
+ if (client->id != client_id) {
+ nc_server_ch_client_unlock(client);
+ goto cleanup;
+ }
}
/* set next endpoint to try */
@@ -2983,6 +2982,10 @@
if (!client) {
goto cleanup;
}
+ if (client->id != client_id) {
+ nc_server_ch_client_unlock(client);
+ goto cleanup;
+ }
++cur_attempts;