Skip to content

Commit

Permalink
Fix DLT-Multinode: Gateway does not recognize reset of passive node #551
Browse files Browse the repository at this point in the history


Enable TCP keepalive to detect broken connections due to network disconnects.
Without this in cases where no TCP FIN or RST package is received from the server, the dlt_client will never notice that it was disconnected from the server. Thus it will not reconnect when the server is available again.
Also improve logging output to see which connection fails.
  • Loading branch information
fivef committed Dec 14, 2023
1 parent 2118762 commit 450920b
Show file tree
Hide file tree
Showing 3 changed files with 71 additions and 10 deletions.
24 changes: 17 additions & 7 deletions src/daemon/dlt_daemon_event_handler.c
Original file line number Diff line number Diff line change
Expand Up @@ -228,18 +228,28 @@ int dlt_daemon_handle_event(DltEventHandler *pEvent,
if (pEvent->pfd[i].revents & DLT_EV_MASK_REJECTED) {
/* An error occurred, we need to clean-up the concerned event
*/
if (type == DLT_CONNECTION_CLIENT_MSG_TCP)
if (type == DLT_CONNECTION_CLIENT_MSG_TCP) {
/* To transition to BUFFER state if this is final TCP client connection,
* call dedicated function. this function also calls
* dlt_event_handler_unregister_connection() inside the function.
*/
dlt_daemon_close_socket(fd, daemon, daemon_local, 0);
else
dlt_event_handler_unregister_connection(pEvent,
daemon_local,
fd);

continue;
continue;
}
else if (type == DLT_CONNECTION_GATEWAY) {
/* Let the callback function
* dlt_gateway_process_passive_node_messages handle the
* disconnect which was triggered by TCP keepalive after a
* network disconnect. If we directly called
* dlt_event_handler_unregister_connection() here the dlt_gateway would
* not notice that the connection was closed.
*/
dlt_vlog(LOG_DEBUG, "Connection to dlt gateway broken.\n");
}
else {
dlt_event_handler_unregister_connection(pEvent, daemon_local, fd);
continue;
}
}

/* Get the function to be used to handle the event */
Expand Down
5 changes: 3 additions & 2 deletions src/gateway/dlt_gateway.c
Original file line number Diff line number Diff line change
Expand Up @@ -985,8 +985,9 @@ int dlt_gateway_establish_connections(DltGateway *gateway,
}
}
else {
dlt_log(LOG_DEBUG,
"Passive Node is not up. Connection failed.\n");
dlt_vlog(LOG_WARNING,
"Passive Node %s is not up. Connection failed.\n",
con->ecuid);

con->timeout_cnt++;

Expand Down
52 changes: 51 additions & 1 deletion src/lib/dlt_client.c
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@
#else
# include <sys/socket.h> /* for socket(), connect(), send(), and recv() */
# include <arpa/inet.h> /* for sockaddr_in and inet_addr() */
# include <netinet/tcp.h> /* for TCP keepalive: TCP_KEEPIDLE, TCP_KEEPCNT, TCP_KEEPINTVL */
# include <netdb.h>
# include <sys/stat.h>
# include <sys/un.h>
Expand Down Expand Up @@ -172,6 +173,14 @@ DltReturnValue dlt_client_init(DltClient *client, int verbose)
DltReturnValue dlt_client_connect(DltClient *client, int verbose)
{
const int yes = 1;
// Keepalive: Opt flag to enable TCP keepalive
const int enablekeepalive = 1;
// Keepalive: Seconds of idle before sending keepalive probes
const int keepidle = 10;
// Keepalive: Number of probes to send before considering the connection dead
const int keepcount = 2;
// Keepalive: Interval between the probes
const int keepintvl = 3;
char portnumbuffer[33] = {0};
struct addrinfo hints, *servinfo, *p;
struct sockaddr_un addr;
Expand Down Expand Up @@ -212,6 +221,45 @@ DltReturnValue dlt_client_connect(DltClient *client, int verbose)
continue;
}

/* Enable TCP keepalive to detect broken connections due to network disconnects.
* Without this in cases where no TCP FIN or RST package is received from the server,
* the dlt_client will never notice that it was disconnected from the server and so
* it will not reconnect when the server is available again.
*/
if (setsockopt(client->sock, SOL_SOCKET, SO_KEEPALIVE,
&enablekeepalive, sizeof(enablekeepalive)) < 0) {
dlt_vlog(LOG_WARNING,
"%s: Failed to set SO_KEEPALIVE on socket: %s\n",
__func__, strerror(errno));
close(client->sock);
continue;
}

if (setsockopt(client->sock, IPPROTO_TCP, TCP_KEEPIDLE,
&keepidle, sizeof(keepidle)) < 0) {
dlt_vlog(LOG_WARNING,
"%s: Failed to set TCP_KEEPIDLE on socket: %s\n",
__func__, strerror(errno));
close(client->sock);
continue;
}
if (setsockopt(client->sock, IPPROTO_TCP, TCP_KEEPCNT,
&keepcount, sizeof(keepcount)) < 0) {
dlt_vlog(LOG_WARNING,
"%s: Failed to set TCP_KEEPCNT on socket: %s\n",
__func__, strerror(errno));
close(client->sock);
continue;
}
if (setsockopt(client->sock, IPPROTO_TCP, TCP_KEEPINTVL,
&keepintvl, sizeof(keepintvl)) < 0) {
dlt_vlog(LOG_WARNING,
"%s: Failed to set TCP_KEEPINTVL on socket: %s\n",
__func__, strerror(errno));
close(client->sock);
continue;
}

/* Set socket to Non-blocking mode */
if(fcntl(client->sock, F_SETFL, fcntl(client->sock,F_GETFL,0) | O_NONBLOCK) < 0)
{
Expand Down Expand Up @@ -273,8 +321,10 @@ DltReturnValue dlt_client_connect(DltClient *client, int verbose)

if (p == NULL) {
dlt_vlog(LOG_ERR,
"%s: ERROR: failed to connect! %s\n",
"%s: ERROR: failed to connect to %s:%s! %s\n",
__func__,
client->servIP,
portnumbuffer,
strerror(connect_errno));
return DLT_RETURN_ERROR;
}
Expand Down

0 comments on commit 450920b

Please sign in to comment.