forked from karpathy/llm.c
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request karpathy#266 from AnswerDotAI/master
Add a local convenience Makefile for dev/cuda/
- Loading branch information
Showing
5 changed files
with
72 additions
and
4 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,63 @@ | ||
# Makefile for building dev/cuda kernels | ||
|
||
# Find nvcc | ||
NVCC := $(shell which nvcc 2>/dev/null) | ||
ifeq ($(NVCC),) | ||
$(error nvcc not found.) | ||
endif | ||
|
||
# Compiler flags | ||
CFLAGS = -O3 --use_fast_math | ||
MPI_PATHS = -I/usr/lib/x86_64-linux-gnu/openmpi/include -L/usr/lib/x86_64-linux-gnu/openmpi/lib/ | ||
|
||
|
||
%: %.cu | ||
$(NVCC) $(CFLAGS) $< -o $@ -lcublas | ||
|
||
TARGETS = adamw attention_backward attention_forward classifier_fused crossentropy_forward crossentropy_softmax_backward encoder_backward encoder_forward gelu_forward layernorm_backward layernorm_forward matmul_backward matmul_backward_bias matmul_forward nccl_all_reduce residual_forward softmax_forward trimat_forward | ||
|
||
all: $(TARGETS) | ||
|
||
# Forward kernels | ||
|
||
attention_forward: attention_forward.cu | ||
classifier_fused: classifier_fused.cu | ||
crossentropy_forward: crossentropy_forward.cu | ||
encoder_forward: encoder_forward.cu | ||
gelu_forward: gelu_forward.cu | ||
layernorm_forward: layernorm_forward.cu | ||
matmul_forward: matmul_forward.cu | ||
$(NVCC) $(CFLAGS) -Xcompiler -fopenmp matmul_forward.cu -o matmul_forward -lcublas -lcublasLt | ||
residual_forward: residual_forward.cu | ||
softmax_forward: softmax_forward.cu | ||
trimat_forward: trimat_forward.cu | ||
|
||
# Backward kernels | ||
|
||
attention_backward: attention_backward.cu | ||
crossentropy_softmax_backward: crossentropy_softmax_backward.cu | ||
encoder_backward: encoder_backward.cu | ||
layernorm_backward: layernorm_backward.cu | ||
matmul_backward_bias: matmul_backward_bias.cu | ||
matmul_backward: matmul_backward.cu | ||
$(NVCC) $(CFLAGS) -Xcompiler -fopenmp matmul_backward.cu -o matmul_backward -lcublas | ||
|
||
# Update kernels | ||
|
||
adamw: adamw.cu | ||
|
||
# NCCL | ||
|
||
nccl_all_reduce: nccl_all_reduce.cu | ||
$(NVCC) -lmpi -lnccl $(MPI_PATHS) nccl_all_reduce.cu -o nccl_all_reduce | ||
|
||
run_all: all | ||
@for target in $(TARGETS); do \ | ||
echo "\n========================================"; \ | ||
echo "Running $$target ..."; \ | ||
echo "========================================\n"; \ | ||
./$$target; \ | ||
done | ||
|
||
clean: | ||
rm -f $(TARGETS) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,7 @@ | ||
# dev/cuda | ||
|
||
This directory is scratch space for developing various versions of the needed CUDA kernels. Each file develops a kernel, see the top of each file for instructions on how to compile and run each one. | ||
This directory is scratch space for developing various versions of the needed CUDA kernels. Each file develops a kernel, see the top of each file for instructions on how to compile and run each one using the `nvcc` compiler. | ||
|
||
An alternative to invoking `nvcc` manually is to use `make` with the accompanying `Makefile` in this directory. Each kernel has its own `make` build target, invoking `make` for the target builds the associated binary. | ||
|
||
For example, `make gelu_forward` builds the forward GELU kernel, creating a binary that can be executed by running `./gelu_forward`. `make` or `make all` builds all the kernels in this directory. To delete all binary build targets, run `make clean`. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters