From aa1986f05960ccd7260f262185b0353811105b3c Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Mon, 5 Jun 2023 10:19:28 -0700 Subject: [PATCH] add call to p2p overlap (#6779) (#6786) * add call to p2p overlap * update Jenkins for test --------- Signed-off-by: Abhinav Khattar Signed-off-by: Eric Harper Co-authored-by: Abhinav Khattar Co-authored-by: Eric Harper --- Jenkinsfile | 2 +- .../nlp/models/language_modeling/megatron_gpt_model.py | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/Jenkinsfile b/Jenkinsfile index 83223d5c8669..fdd311ba4a59 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -61,7 +61,7 @@ pipeline { steps { sh 'git clone https://github.com/NVIDIA/Megatron-LM.git && \ cd Megatron-LM && \ - git checkout cd2537d444792b487b1ab5a6fa685e09c9957409 && \ + git checkout e6d7e09845590d0a36bc7f29eb28db974fb8da4e && \ pip install -e .' } } diff --git a/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py b/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py index fd1382e668cf..96f40b99bdd0 100644 --- a/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py +++ b/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py @@ -468,6 +468,8 @@ def fwd_bwd_step(self, dataloader_iter, batch_idx, forward_only): no_sync_func=no_sync_func, grad_sync_func=grad_sync_func, param_sync_func=param_sync_func, + overlap_p2p_comm=self.cfg.get('overlap_p2p_comm', False), + batch_p2p_comm=self.cfg.get('batch_p2p_comm', True), ) # only the last stages of the pipeline return losses