Merge pull request karpathy#357 from rosslwheeler/cudnn_makefile_changes

cuDNN makefile changes
ahrefs · May 7, 2024 · 0141408 · 0141408
2 parents ce72242 + 134f4c7
commit 0141408
Showing 1 changed file with 44 additions and 22 deletions.
diff --git a/Makefile b/Makefile
@@ -86,27 +86,49 @@ else
 endif
 
 # Check and include cudnn if available
-# Currently hard-coding a bunch of stuff here for Linux, todo make this better/nicer
+# You can override the path to cudnn frontend by setting CUDNN_FRONTEND_PATH=your_path on the make command line
 # You need cuDNN from: https://developer.nvidia.com/cudnn
-# Follow the apt-get instructions
+# Follow the apt-get instructions or Windows instructions to install the cuDNN library
 # And the cuDNN front-end from: https://github.com/NVIDIA/cudnn-frontend/tree/main
-# For this there is no installation, just download the repo to your home directory
+# For this there is no installation, just download the repo to your home directory or directory of your choice
 # and then we include it below (see currently hard-coded path assumed in home directory)
 ifeq ($(USE_CUDNN), 1)
   ifeq ($(SHELL_UNAME), Linux)
-    # hard-coded path for now
-    CUDNN_FRONTEND_PATH := $(HOME)/cudnn-frontend/include
-    ifeq ($(shell [ -d $(CUDNN_FRONTEND_PATH) ] && echo "exists"), exists)
+    # hard-coded path for now in either . or ($HOME) directory 
+    # this can be overridden by setting CUDNN_FRONTEND_PATH on the command line
+    ifeq ($(shell [ -d $(HOME)/cudnn-frontend/include ] && echo "exists"), exists)
       $(info ✓ cuDNN found, will run with flash-attention)
-      NVCC_INCLUDES += -I$(CUDNN_FRONTEND_PATH)
-      NVCC_LDFLAGS += -lcudnn
-      NVCC_FLAGS += -DENABLE_CUDNN
-      NVCC_CUDNN = cudnn_att.o
+      CUDNN_FRONTEND_PATH ?= $(HOME)/cudnn-frontend/include
+    else ifeq ($(shell [ -d cudnn-frontend/include ] && echo "exists"),)
+      $(info ✓ cuDNN found, will run with flash-attention)
+      CUDNN_FRONTEND_PATH ?= cudnn-frontend/include
     else
       $(error ✗ cuDNN not found. See the Makefile for our currently hard-coded paths / install instructions)
     endif
-  else
-    $(info → cuDNN is not supported right now outside of Linux)
+    NVCC_INCLUDES += -I$(CUDNN_FRONTEND_PATH)
+    NVCC_LDFLAGS += -lcudnn
+    NVCC_FLAGS += -DENABLE_CUDNN
+    NVCC_CUDNN = cudnn_att.o
+  else 
+    ifneq ($(OS), Windows_NT)
+      $(info → cuDNN is not supported on MAC OS right now)
+    else
+      $(info ✓ Windows cuDNN found, will run with flash-attention)
+      ifeq ($(shell if exist "$(HOMEDRIVE)$(HOMEPATH)\cudnn-frontend\include" (echo exists)),exists)
+        CUDNN_FRONTEND_PATH ?= $(HOMEDRIVE)$(HOMEPATH)\cudnn-frontend\include #override on command line if different location
+      else ifeq ($(shell if exist "cudnn-frontend\include" (echo exists)),exists)
+        CUDNN_FRONTEND_PATH ?= cudnn-frontend\include #override on command line if different location
+      else
+        $(error ✗ cuDNN not found. See the Makefile for our currently hard-coded paths / install instructions) 
+      endif
+      CUDNN_INCLUDE_PATH ?= -I"C:\Program Files\NVIDIA\CUDNN\v9.1\include\12.4"
+      CUDNN_FRONTEND_PATH += $(CUDNN_INCLUDE_PATH)
+      NVCC_FLAGS += --std c++20 -Xcompiler "/std:c++20" -Xcompiler "/EHsc /W0 /nologo /Ox /FS" -maxrregcount=0 --machine 64
+      NVCC_CUDNN = cudnn_att.obj
+      NVCC_INCLUDES += -I$(CUDNN_FRONTEND_PATH)
+      NVCC_LDFLAGS += -L"C:\Program Files\NVIDIA\CUDNN\v9.1\lib\12.4\x64" -lcudnn 
+      NVCC_FLAGS += -DENABLE_CUDNN
+    endif
   endif
 else
   $(info → cuDNN is manually disabled by default, run make with `USE_CUDNN=1` to try to enable)
@@ -202,36 +224,36 @@ ifeq ($(NVCC),)
     $(info ✗ nvcc not found, skipping GPU/CUDA builds)
 else
     $(info ✓ nvcc found, including GPU/CUDA support)
-    TARGETS += train_gpt2cu test_gpt2cu train_gpt2fp32cu test_gpt2fp32cu
+    TARGETS += train_gpt2cu test_gpt2cu train_gpt2fp32cu test_gpt2fp32cu $(NVCC_CUDNN)
 endif
 
 $(info ---------------------------------------------)
 
 all: $(TARGETS)
 
 train_gpt2: train_gpt2.c
-	$(CC) $(CFLAGS) $(INCLUDES) $(LDFLAGS) $< $(LDLIBS) $(OUTPUT_FILE)
+	$(CC) $(CFLAGS) $(INCLUDES) $(LDFLAGS) $^ $(LDLIBS) $(OUTPUT_FILE)
 
 test_gpt2: test_gpt2.c
-	$(CC) $(CFLAGS) $(INCLUDES) $(LDFLAGS) $< $(LDLIBS) $(OUTPUT_FILE)
+	$(CC) $(CFLAGS) $(INCLUDES) $(LDFLAGS) $^ $(LDLIBS) $(OUTPUT_FILE)
 
-cudnn_att.o: cudnn_att.cu
-	$(NVCC) -c $(NVCC_FLAGS) $(PFLAGS) $< $(NVCC_LDFLAGS) $(NVCC_INCLUDES) $(NVCC_LDLIBS)
+$(NVCC_CUDNN): cudnn_att.cu
+	$(NVCC) -c $(NVCC_FLAGS) $(PFLAGS) $^ $(NVCC_INCLUDES) 
 
 train_gpt2cu: train_gpt2.cu $(NVCC_CUDNN)
-	$(NVCC) $(NVCC_FLAGS) $(PFLAGS) $< $(NVCC_LDFLAGS) $(NVCC_INCLUDES) $(NVCC_LDLIBS) $(CUDA_OUTPUT_FILE) $(NVCC_CUDNN)
+	$(NVCC) $(NVCC_FLAGS) $(PFLAGS) $^ $(NVCC_LDFLAGS) $(NVCC_INCLUDES) $(NVCC_LDLIBS) $(CUDA_OUTPUT_FILE) 
 
 train_gpt2fp32cu: train_gpt2_fp32.cu
-	$(NVCC) $(NVCC_FLAGS) $< $(NVCC_LDFLAGS) $(NVCC_INCLUDES) $(NVCC_LDLIBS) $(CUDA_OUTPUT_FILE)
+	$(NVCC) $(NVCC_FLAGS) $^ $(NVCC_LDFLAGS) $(NVCC_INCLUDES) $(NVCC_LDLIBS) $(CUDA_OUTPUT_FILE)
 
 test_gpt2cu: test_gpt2.cu $(NVCC_CUDNN)
-	$(NVCC) $(NVCC_FLAGS) $(PFLAGS) $< $(NVCC_LDFLAGS) $(NVCC_INCLUDES) $(NVCC_LDLIBS) $(CUDA_OUTPUT_FILE) $(NVCC_CUDNN)
+	$(NVCC) $(NVCC_FLAGS) $(PFLAGS) $^ $(NVCC_LDFLAGS) $(NVCC_INCLUDES) $(NVCC_LDLIBS) $(CUDA_OUTPUT_FILE) 
 
 test_gpt2fp32cu: test_gpt2_fp32.cu
-	$(NVCC) $(NVCC_FLAGS) $< $(NVCC_LDFLAGS) $(NVCC_INCLUDES) $(NVCC_LDLIBS) $(CUDA_OUTPUT_FILE)
+	$(NVCC) $(NVCC_FLAGS) $^ $(NVCC_LDFLAGS) $(NVCC_INCLUDES) $(NVCC_LDLIBS) $(CUDA_OUTPUT_FILE)
 
 profile_gpt2cu: profile_gpt2.cu $(NVCC_CUDNN)
-	$(NVCC) $(NVCC_FLAGS) $(PFLAGS) -lineinfo $< $(NVCC_LDFLAGS) $(NVCC_INCLUDES) $(NVCC_LDLIBS)  $(CUDA_OUTPUT_FILE) $(NVCC_CUDNN)
+	$(NVCC) $(NVCC_FLAGS) $(PFLAGS) -lineinfo $^ $(NVCC_LDFLAGS) $(NVCC_INCLUDES) $(NVCC_LDLIBS)  $(CUDA_OUTPUT_FILE) 
 
 clean:
 	$(REMOVE_FILES) $(TARGETS)