Skip to content

Commit

Permalink
Merge pull request karpathy#357 from rosslwheeler/cudnn_makefile_changes
Browse files Browse the repository at this point in the history
cuDNN makefile changes
  • Loading branch information
karpathy authored May 7, 2024
2 parents ce72242 + 134f4c7 commit 0141408
Showing 1 changed file with 44 additions and 22 deletions.
66 changes: 44 additions & 22 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -86,27 +86,49 @@ else
endif

# Check and include cudnn if available
# Currently hard-coding a bunch of stuff here for Linux, todo make this better/nicer
# You can override the path to cudnn frontend by setting CUDNN_FRONTEND_PATH=your_path on the make command line
# You need cuDNN from: https://developer.nvidia.com/cudnn
# Follow the apt-get instructions
# Follow the apt-get instructions or Windows instructions to install the cuDNN library
# And the cuDNN front-end from: https://github.com/NVIDIA/cudnn-frontend/tree/main
# For this there is no installation, just download the repo to your home directory
# For this there is no installation, just download the repo to your home directory or directory of your choice
# and then we include it below (see currently hard-coded path assumed in home directory)
ifeq ($(USE_CUDNN), 1)
ifeq ($(SHELL_UNAME), Linux)
# hard-coded path for now
CUDNN_FRONTEND_PATH := $(HOME)/cudnn-frontend/include
ifeq ($(shell [ -d $(CUDNN_FRONTEND_PATH) ] && echo "exists"), exists)
# hard-coded path for now in either . or ($HOME) directory
# this can be overridden by setting CUDNN_FRONTEND_PATH on the command line
ifeq ($(shell [ -d $(HOME)/cudnn-frontend/include ] && echo "exists"), exists)
$(info ✓ cuDNN found, will run with flash-attention)
NVCC_INCLUDES += -I$(CUDNN_FRONTEND_PATH)
NVCC_LDFLAGS += -lcudnn
NVCC_FLAGS += -DENABLE_CUDNN
NVCC_CUDNN = cudnn_att.o
CUDNN_FRONTEND_PATH ?= $(HOME)/cudnn-frontend/include
else ifeq ($(shell [ -d cudnn-frontend/include ] && echo "exists"),)
$(info ✓ cuDNN found, will run with flash-attention)
CUDNN_FRONTEND_PATH ?= cudnn-frontend/include
else
$(error ✗ cuDNN not found. See the Makefile for our currently hard-coded paths / install instructions)
endif
else
$(info → cuDNN is not supported right now outside of Linux)
NVCC_INCLUDES += -I$(CUDNN_FRONTEND_PATH)
NVCC_LDFLAGS += -lcudnn
NVCC_FLAGS += -DENABLE_CUDNN
NVCC_CUDNN = cudnn_att.o
else
ifneq ($(OS), Windows_NT)
$(info → cuDNN is not supported on MAC OS right now)
else
$(info ✓ Windows cuDNN found, will run with flash-attention)
ifeq ($(shell if exist "$(HOMEDRIVE)$(HOMEPATH)\cudnn-frontend\include" (echo exists)),exists)
CUDNN_FRONTEND_PATH ?= $(HOMEDRIVE)$(HOMEPATH)\cudnn-frontend\include #override on command line if different location
else ifeq ($(shell if exist "cudnn-frontend\include" (echo exists)),exists)
CUDNN_FRONTEND_PATH ?= cudnn-frontend\include #override on command line if different location
else
$(error ✗ cuDNN not found. See the Makefile for our currently hard-coded paths / install instructions)
endif
CUDNN_INCLUDE_PATH ?= -I"C:\Program Files\NVIDIA\CUDNN\v9.1\include\12.4"
CUDNN_FRONTEND_PATH += $(CUDNN_INCLUDE_PATH)
NVCC_FLAGS += --std c++20 -Xcompiler "/std:c++20" -Xcompiler "/EHsc /W0 /nologo /Ox /FS" -maxrregcount=0 --machine 64
NVCC_CUDNN = cudnn_att.obj
NVCC_INCLUDES += -I$(CUDNN_FRONTEND_PATH)
NVCC_LDFLAGS += -L"C:\Program Files\NVIDIA\CUDNN\v9.1\lib\12.4\x64" -lcudnn
NVCC_FLAGS += -DENABLE_CUDNN
endif
endif
else
$(info → cuDNN is manually disabled by default, run make with `USE_CUDNN=1` to try to enable)
Expand Down Expand Up @@ -202,36 +224,36 @@ ifeq ($(NVCC),)
$(info ✗ nvcc not found, skipping GPU/CUDA builds)
else
$(info ✓ nvcc found, including GPU/CUDA support)
TARGETS += train_gpt2cu test_gpt2cu train_gpt2fp32cu test_gpt2fp32cu
TARGETS += train_gpt2cu test_gpt2cu train_gpt2fp32cu test_gpt2fp32cu $(NVCC_CUDNN)
endif

$(info ---------------------------------------------)

all: $(TARGETS)

train_gpt2: train_gpt2.c
$(CC) $(CFLAGS) $(INCLUDES) $(LDFLAGS) $< $(LDLIBS) $(OUTPUT_FILE)
$(CC) $(CFLAGS) $(INCLUDES) $(LDFLAGS) $^ $(LDLIBS) $(OUTPUT_FILE)

test_gpt2: test_gpt2.c
$(CC) $(CFLAGS) $(INCLUDES) $(LDFLAGS) $< $(LDLIBS) $(OUTPUT_FILE)
$(CC) $(CFLAGS) $(INCLUDES) $(LDFLAGS) $^ $(LDLIBS) $(OUTPUT_FILE)

cudnn_att.o: cudnn_att.cu
$(NVCC) -c $(NVCC_FLAGS) $(PFLAGS) $< $(NVCC_LDFLAGS) $(NVCC_INCLUDES) $(NVCC_LDLIBS)
$(NVCC_CUDNN): cudnn_att.cu
$(NVCC) -c $(NVCC_FLAGS) $(PFLAGS) $^ $(NVCC_INCLUDES)

train_gpt2cu: train_gpt2.cu $(NVCC_CUDNN)
$(NVCC) $(NVCC_FLAGS) $(PFLAGS) $< $(NVCC_LDFLAGS) $(NVCC_INCLUDES) $(NVCC_LDLIBS) $(CUDA_OUTPUT_FILE) $(NVCC_CUDNN)
$(NVCC) $(NVCC_FLAGS) $(PFLAGS) $^ $(NVCC_LDFLAGS) $(NVCC_INCLUDES) $(NVCC_LDLIBS) $(CUDA_OUTPUT_FILE)

train_gpt2fp32cu: train_gpt2_fp32.cu
$(NVCC) $(NVCC_FLAGS) $< $(NVCC_LDFLAGS) $(NVCC_INCLUDES) $(NVCC_LDLIBS) $(CUDA_OUTPUT_FILE)
$(NVCC) $(NVCC_FLAGS) $^ $(NVCC_LDFLAGS) $(NVCC_INCLUDES) $(NVCC_LDLIBS) $(CUDA_OUTPUT_FILE)

test_gpt2cu: test_gpt2.cu $(NVCC_CUDNN)
$(NVCC) $(NVCC_FLAGS) $(PFLAGS) $< $(NVCC_LDFLAGS) $(NVCC_INCLUDES) $(NVCC_LDLIBS) $(CUDA_OUTPUT_FILE) $(NVCC_CUDNN)
$(NVCC) $(NVCC_FLAGS) $(PFLAGS) $^ $(NVCC_LDFLAGS) $(NVCC_INCLUDES) $(NVCC_LDLIBS) $(CUDA_OUTPUT_FILE)

test_gpt2fp32cu: test_gpt2_fp32.cu
$(NVCC) $(NVCC_FLAGS) $< $(NVCC_LDFLAGS) $(NVCC_INCLUDES) $(NVCC_LDLIBS) $(CUDA_OUTPUT_FILE)
$(NVCC) $(NVCC_FLAGS) $^ $(NVCC_LDFLAGS) $(NVCC_INCLUDES) $(NVCC_LDLIBS) $(CUDA_OUTPUT_FILE)

profile_gpt2cu: profile_gpt2.cu $(NVCC_CUDNN)
$(NVCC) $(NVCC_FLAGS) $(PFLAGS) -lineinfo $< $(NVCC_LDFLAGS) $(NVCC_INCLUDES) $(NVCC_LDLIBS) $(CUDA_OUTPUT_FILE) $(NVCC_CUDNN)
$(NVCC) $(NVCC_FLAGS) $(PFLAGS) -lineinfo $^ $(NVCC_LDFLAGS) $(NVCC_INCLUDES) $(NVCC_LDLIBS) $(CUDA_OUTPUT_FILE)

clean:
$(REMOVE_FILES) $(TARGETS)

0 comments on commit 0141408

Please sign in to comment.