From 450920bffd32369b7852805223b66c5d127f35a2 Mon Sep 17 00:00:00 2001 From: fivef Date: Thu, 14 Dec 2023 13:23:40 +0100 Subject: [PATCH] Fix DLT-Multinode: Gateway does not recognize reset of passive node #551 Enable TCP keepalive to detect broken connections due to network disconnects. Without this in cases where no TCP FIN or RST package is received from the server, the dlt_client will never notice that it was disconnected from the server. Thus it will not reconnect when the server is available again. Also improve logging output to see which connection fails. --- src/daemon/dlt_daemon_event_handler.c | 24 +++++++++---- src/gateway/dlt_gateway.c | 5 +-- src/lib/dlt_client.c | 52 ++++++++++++++++++++++++++- 3 files changed, 71 insertions(+), 10 deletions(-) diff --git a/src/daemon/dlt_daemon_event_handler.c b/src/daemon/dlt_daemon_event_handler.c index 74961a61a..8d9bad7f3 100644 --- a/src/daemon/dlt_daemon_event_handler.c +++ b/src/daemon/dlt_daemon_event_handler.c @@ -228,18 +228,28 @@ int dlt_daemon_handle_event(DltEventHandler *pEvent, if (pEvent->pfd[i].revents & DLT_EV_MASK_REJECTED) { /* An error occurred, we need to clean-up the concerned event */ - if (type == DLT_CONNECTION_CLIENT_MSG_TCP) + if (type == DLT_CONNECTION_CLIENT_MSG_TCP) { /* To transition to BUFFER state if this is final TCP client connection, * call dedicated function. this function also calls * dlt_event_handler_unregister_connection() inside the function. */ dlt_daemon_close_socket(fd, daemon, daemon_local, 0); - else - dlt_event_handler_unregister_connection(pEvent, - daemon_local, - fd); - - continue; + continue; + } + else if (type == DLT_CONNECTION_GATEWAY) { + /* Let the callback function + * dlt_gateway_process_passive_node_messages handle the + * disconnect which was triggered by TCP keepalive after a + * network disconnect. If we directly called + * dlt_event_handler_unregister_connection() here the dlt_gateway would + * not notice that the connection was closed. + */ + dlt_vlog(LOG_DEBUG, "Connection to dlt gateway broken.\n"); + } + else { + dlt_event_handler_unregister_connection(pEvent, daemon_local, fd); + continue; + } } /* Get the function to be used to handle the event */ diff --git a/src/gateway/dlt_gateway.c b/src/gateway/dlt_gateway.c index c73332229..95888bb06 100644 --- a/src/gateway/dlt_gateway.c +++ b/src/gateway/dlt_gateway.c @@ -985,8 +985,9 @@ int dlt_gateway_establish_connections(DltGateway *gateway, } } else { - dlt_log(LOG_DEBUG, - "Passive Node is not up. Connection failed.\n"); + dlt_vlog(LOG_WARNING, + "Passive Node %s is not up. Connection failed.\n", + con->ecuid); con->timeout_cnt++; diff --git a/src/lib/dlt_client.c b/src/lib/dlt_client.c index dd9443b1e..2b47dd4b8 100644 --- a/src/lib/dlt_client.c +++ b/src/lib/dlt_client.c @@ -73,6 +73,7 @@ #else # include /* for socket(), connect(), send(), and recv() */ # include /* for sockaddr_in and inet_addr() */ +# include /* for TCP keepalive: TCP_KEEPIDLE, TCP_KEEPCNT, TCP_KEEPINTVL */ # include # include # include @@ -172,6 +173,14 @@ DltReturnValue dlt_client_init(DltClient *client, int verbose) DltReturnValue dlt_client_connect(DltClient *client, int verbose) { const int yes = 1; + // Keepalive: Opt flag to enable TCP keepalive + const int enablekeepalive = 1; + // Keepalive: Seconds of idle before sending keepalive probes + const int keepidle = 10; + // Keepalive: Number of probes to send before considering the connection dead + const int keepcount = 2; + // Keepalive: Interval between the probes + const int keepintvl = 3; char portnumbuffer[33] = {0}; struct addrinfo hints, *servinfo, *p; struct sockaddr_un addr; @@ -212,6 +221,45 @@ DltReturnValue dlt_client_connect(DltClient *client, int verbose) continue; } + /* Enable TCP keepalive to detect broken connections due to network disconnects. + * Without this in cases where no TCP FIN or RST package is received from the server, + * the dlt_client will never notice that it was disconnected from the server and so + * it will not reconnect when the server is available again. + */ + if (setsockopt(client->sock, SOL_SOCKET, SO_KEEPALIVE, + &enablekeepalive, sizeof(enablekeepalive)) < 0) { + dlt_vlog(LOG_WARNING, + "%s: Failed to set SO_KEEPALIVE on socket: %s\n", + __func__, strerror(errno)); + close(client->sock); + continue; + } + + if (setsockopt(client->sock, IPPROTO_TCP, TCP_KEEPIDLE, + &keepidle, sizeof(keepidle)) < 0) { + dlt_vlog(LOG_WARNING, + "%s: Failed to set TCP_KEEPIDLE on socket: %s\n", + __func__, strerror(errno)); + close(client->sock); + continue; + } + if (setsockopt(client->sock, IPPROTO_TCP, TCP_KEEPCNT, + &keepcount, sizeof(keepcount)) < 0) { + dlt_vlog(LOG_WARNING, + "%s: Failed to set TCP_KEEPCNT on socket: %s\n", + __func__, strerror(errno)); + close(client->sock); + continue; + } + if (setsockopt(client->sock, IPPROTO_TCP, TCP_KEEPINTVL, + &keepintvl, sizeof(keepintvl)) < 0) { + dlt_vlog(LOG_WARNING, + "%s: Failed to set TCP_KEEPINTVL on socket: %s\n", + __func__, strerror(errno)); + close(client->sock); + continue; + } + /* Set socket to Non-blocking mode */ if(fcntl(client->sock, F_SETFL, fcntl(client->sock,F_GETFL,0) | O_NONBLOCK) < 0) { @@ -273,8 +321,10 @@ DltReturnValue dlt_client_connect(DltClient *client, int verbose) if (p == NULL) { dlt_vlog(LOG_ERR, - "%s: ERROR: failed to connect! %s\n", + "%s: ERROR: failed to connect to %s:%s! %s\n", __func__, + client->servIP, + portnumbuffer, strerror(connect_errno)); return DLT_RETURN_ERROR; }