-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathMakefile
161 lines (135 loc) · 5.21 KB
/
Makefile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
tmp_file_prefix=tmp
SRC=halo.cu
CUDA_PATH=/usr/local/cuda/
.PHONY: clean all
all: a.out
clean:
rm ${tmp_file_prefix}* a.out
${tmp_file_prefix}-6_halo.cpp4.ii: ${SRC}
gcc \
-D__CUDA_ARCH_LIST__=860 \
-D__CUDACC__ \
-D__NVCC__ \
"-I${CUDA_PATH}/bin/../targets/x86_64-linux/include" \
-D__CUDACC_VER_MAJOR__=12 \
-D__CUDACC_VER_MINOR__=1 \
-D__CUDACC_VER_BUILD__=66 \
-D__CUDA_API_VER_MAJOR__=12 \
-D__CUDA_API_VER_MINOR__=1 \
-D__NVCC_DIAG_PRAGMA_SUPPORT__=1 \
-include "cuda_runtime.h" \
-m64 \
-E \
-x c++ \
"./halo.cu" \
-o "./${tmp_file_prefix}-6_halo.cpp4.ii"
${tmp_file_prefix}-10_halo.cpp1.ii: ${SRC}
gcc \
-D__CUDA_ARCH__=860 \
-D__CUDA_ARCH_LIST__=860 \
-DCUDA_DOUBLE_MATH_FUNCTIONS \
-D__CUDACC__ \
-D__NVCC__ \
-I${CUDA_PATH}/bin/../targets/x86_64-linux/include \
-D__CUDACC_VER_MAJOR__=12 \
-D__CUDACC_VER_MINOR__=1 \
-D__CUDACC_VER_BUILD__=66 \
-D__CUDA_API_VER_MAJOR__=12 \
-D__CUDA_API_VER_MINOR__=1 \
-D__NVCC_DIAG_PRAGMA_SUPPORT__=1 \
-include cuda_runtime.h \
-m64 \
-E \
-x c++ \
${SRC} \
-o ${tmp_file_prefix}-10_halo.cpp1.ii
${tmp_file_prefix}-5_halo.module_id ${tmp_file_prefix}-7_halo.cudafe1.cpp: ${tmp_file_prefix}-6_halo.cpp4.ii
${CUDA_PATH}/bin/cudafe++ \
--c++17 \
--gnu_version=110300 \
--display_error_number \
--allow_managed \
--m64 \
--parse_templates \
--gen_c_file_name "./${tmp_file_prefix}-7_halo.cudafe1.cpp" \
--stub_file_name "${tmp_file_prefix}-7_halo.cudafe1.stub.c" \
--gen_module_id_file \
--module_id_file_name "./${tmp_file_prefix}-5_halo.module_id" \
"./${tmp_file_prefix}-6_halo.cpp4.ii"
# --orig_src_file_name "./halo.cu" \
# --orig_src_path_name "/home/liuweinan/20230303_TVM/cuda_demo/halo.cu" \
${tmp_file_prefix}-7_halo.ptx ${tmp_file_prefix}-7_halo.cudafe1.stub.c: ${tmp_file_prefix}-10_halo.cpp1.ii ${tmp_file_prefix}-5_halo.module_id
${CUDA_PATH}/nvvm/bin/cicc \
--c++17 \
--gnu_version=110300 \
--display_error_number \
--allow_managed \
-arch compute_86 \
-m64 \
--no-version-ident \
-ftz=0 -prec_div=1 -prec_sqrt=1 -fmad=1 \
--include_file_name ${tmp_file_prefix}-4_halo.fatbin.c \
-tused \
--module_id_file_name ${tmp_file_prefix}-5_halo.module_id \
--gen_device_file_name ${tmp_file_prefix}-7_halo.cudafe1.gpu \
--gen_c_file_name ${tmp_file_prefix}-7_halo.cudafe1.c \
--stub_file_name ${tmp_file_prefix}-7_halo.cudafe1.stub.c \
${tmp_file_prefix}-10_halo.cpp1.ii \
-o ${tmp_file_prefix}-7_halo.ptx
# --orig_src_file_name "./halo.cu" \
# --orig_src_path_name "/home/liuweinan/20230303_TVM/cuda_demo/halo.cu" \
${tmp_file_prefix}-11_halo.cubin: ${tmp_file_prefix}-7_halo.ptx
${CUDA_PATH}/bin/ptxas -arch=sm_86 -m64 "./${tmp_file_prefix}-7_halo.ptx" -o "./${tmp_file_prefix}-11_halo.cubin"
${tmp_file_prefix}-4_halo.fatbin.c: ${tmp_file_prefix}-11_halo.cubin
${CUDA_PATH}/bin/fatbinary -64 --cicc-cmdline="-ftz=0 -prec_div=1 -prec_sqrt=1 -fmad=1 " "--image3=kind=elf,sm=86,file=./${tmp_file_prefix}-11_halo.cubin" --embedded-fatbin="./${tmp_file_prefix}-4_halo.fatbin.c"
${tmp_file_prefix}-12_halo.o: ${tmp_file_prefix}-7_halo.cudafe1.stub.c ${tmp_file_prefix}-7_halo.cudafe1.cpp ${tmp_file_prefix}-4_halo.fatbin.c
gcc \
-D__CUDA_ARCH__=860 \
-D__CUDA_ARCH_LIST__=860 \
-DCUDA_DOUBLE_MATH_FUNCTIONS \
"-I${CUDA_PATH}/bin/../targets/x86_64-linux/include" \
-m64 \
-x c++ \
-c \
"./${tmp_file_prefix}-7_halo.cudafe1.cpp" \
-o "./${tmp_file_prefix}-12_halo.o"
${tmp_file_prefix}-13_a_dlink.cubin ${tmp_file_prefix}-8_a_dlink.reg.c: ${tmp_file_prefix}-12_halo.o
${CUDA_PATH}/bin/nvlink -m64 --arch=sm_86 --register-link-binaries="./${tmp_file_prefix}-8_a_dlink.reg.c" "-L${CUDA_PATH}/bin/../targets/x86_64-linux/lib/stubs" "-L${CUDA_PATH}/bin/../targets/x86_64-linux/lib" -cpu-arch=X86_64 "./${tmp_file_prefix}-12_halo.o" -lcudadevrt -o "./${tmp_file_prefix}-13_a_dlink.cubin" --host-ccbin "gcc"
${tmp_file_prefix}-9_a_dlink.fatbin.c: ${tmp_file_prefix}-13_a_dlink.cubin
${CUDA_PATH}/bin/fatbinary -64 --cicc-cmdline="-ftz=0 -prec_div=1 -prec_sqrt=1 -fmad=1 " -link "--image3=kind=elf,sm=86,file=./${tmp_file_prefix}-13_a_dlink.cubin" --embedded-fatbin="./${tmp_file_prefix}-9_a_dlink.fatbin.c"
${tmp_file_prefix}-14_a_dlink.o: ${tmp_file_prefix}-8_a_dlink.reg.c ${tmp_file_prefix}-9_a_dlink.fatbin.c
gcc \
-D__CUDA_ARCH_LIST__=860 \
-DFATBINFILE="\"./${tmp_file_prefix}-9_a_dlink.fatbin.c\"" \
-DREGISTERLINKBINARYFILE="\"./${tmp_file_prefix}-8_a_dlink.reg.c\"" \
-I. \
-D__NV_EXTRA_INITIALIZATION= \
-D__NV_EXTRA_FINALIZATION= \
-D__CUDA_INCLUDE_COMPILER_INTERNAL_HEADERS__ \
"-I${CUDA_PATH}/bin/../targets/x86_64-linux/include" \
-D__CUDACC_VER_MAJOR__=12 \
-D__CUDACC_VER_MINOR__=1 \
-D__CUDACC_VER_BUILD__=66 \
-D__CUDA_API_VER_MAJOR__=12 \
-D__CUDA_API_VER_MINOR__=1 \
-D__NVCC_DIAG_PRAGMA_SUPPORT__=1 \
-m64 \
-x c++ \
-c \
"${CUDA_PATH}/bin/crt/link.stub" \
-o "./${tmp_file_prefix}-14_a_dlink.o"
a.out: ${tmp_file_prefix}-14_a_dlink.o ${tmp_file_prefix}-12_halo.o
g++ \
-D__CUDA_ARCH_LIST__=860 \
-m64 \
-Wl,--start-group "./${tmp_file_prefix}-14_a_dlink.o" \
"./${tmp_file_prefix}-12_halo.o" \
"-L${CUDA_PATH}/bin/../targets/x86_64-linux/lib/stubs" \
"-L${CUDA_PATH}/bin/../targets/x86_64-linux/lib" \
-lcudadevrt \
-lcudart_static \
-lrt \
-lpthread \
-ldl \
-Wl,--end-group \
-o "a.out"