NVIDIA · janekl · Jan 10, 2025 · Dec 5, 2024 · Dec 23, 2024 · Jan 10, 2025
diff --git a/nemo/collections/llm/recipes/run/executor.py b/nemo/collections/llm/recipes/run/executor.py
@@ -11,16 +11,31 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+from typing import Optional
+
 import nemo_run as run
+import torch
 
 
 @run.cli.factory
-def torchrun(devices: int = 8) -> run.Config[run.LocalExecutor]:
-    """Local executor using torchrun."""
+def torchrun(devices: Optional[int] = None) -> run.Config[run.LocalExecutor]:
+    """
+    Local executor using torchrun.
+
+    Args:
+        devices (Optional[int]): Number of devices to use. If None, it will use all available CUDA devices.
+
+    Returns:
+        run.Config[run.LocalExecutor]: Configuration for the local executor using torchrun.
+    """
     env_vars = {
         "TORCH_NCCL_AVOID_RECORD_STREAMS": "1",
     }
 
+    if devices is None:
+        assert torch.cuda.is_available()
+        devices = torch.cuda.device_count()
+
     executor = run.Config(
         run.LocalExecutor,
         ntasks_per_node=devices,