Skip to content

Commit

Permalink
Merge branch 'tim/transport-fixxxx' into 'master'
Browse files Browse the repository at this point in the history
fix(transport): Don't hold peer state lock across TLS handshake

In the worst case we hold the write lock on the peer for 30s (TLS timeout). It is possible that p2p has not received the peer down event yet (because we spawn the connect task before sending the peer down event) and it might try to send sth to the reconnecting peer (doing a blocking_read) which can block the p2p thread. 

See merge request dfinity-lab/public/ic!15496
  • Loading branch information
tthebst committed Oct 25, 2023
2 parents 156f472 + d2fbd51 commit d71613c
Showing 1 changed file with 15 additions and 11 deletions.
26 changes: 15 additions & 11 deletions rs/transport/src/control_plane.rs
Original file line number Diff line number Diff line change
Expand Up @@ -271,29 +271,33 @@ impl TransportImpl {
.tcp_connects
.with_label_values(&[STATUS_SUCCESS])
.inc();

let peer_map = arc_self.peer_map.read().await;
let peer_state_mu = match peer_map.get(&peer_id) {
Some(peer_state) => peer_state,
None => continue,
};
let mut peer_state = peer_state_mu.write().await;
if peer_state.get_connected().is_some() {
// TODO: P2P-516
continue;
}
match arc_self.tls_client_handshake(peer_id, stream).await {
Ok(tls_stream) => {
arc_self.control_plane_metrics
.tls_handshakes
.with_label_values(&[ConnectionRole::Client.as_ref(), STATUS_SUCCESS])
.inc();
// It is important to not hold these lock across the tls handshake since in the worst case
// this can take 10+s. By doing the peer map operation after the handshake it is ensured
// that the looks are held as short as possible.
let peer_map = arc_self.peer_map.read().await;
let peer_state_mu = match peer_map.get(&peer_id) {
Some(peer_state) => peer_state,
None => continue,
};

let mut peer_state = peer_state_mu.write().await;
if peer_state.get_connected().is_some() {
continue;
}

let mut event_handler = match arc_self.event_handler.lock().await.as_ref() {
Some(event_handler) => event_handler.clone(),
None => continue,
};
let weak_self_clone = arc_self.weak_self.read().unwrap().clone();
let event_handler_clone = event_handler.clone();

if let Ok(connected_state) = create_connected_state(
peer_id,
channel_id,
Expand Down

0 comments on commit d71613c

Please sign in to comment.