diff --git a/.clang-format b/.clang-format new file mode 100644 index 00000000..b5550c82 --- /dev/null +++ b/.clang-format @@ -0,0 +1,97 @@ +--- +Language: Cpp +#BasedOnStyle: Chromium +AccessModifierOffset: -1 +AlignAfterOpenBracket: Align +AlignConsecutiveAssignments: false +AlignConsecutiveDeclarations: false +AlignEscapedNewlinesLeft: true +AlignOperands: true +AlignTrailingComments: true +AllowAllParametersOfDeclarationOnNextLine: false +AllowShortBlocksOnASingleLine: false +AllowShortCaseLabelsOnASingleLine: false +AllowShortFunctionsOnASingleLine: false +AllowShortIfStatementsOnASingleLine: false +AllowShortLoopsOnASingleLine: false +AlwaysBreakAfterDefinitionReturnType: None +AlwaysBreakAfterReturnType: None +AlwaysBreakBeforeMultilineStrings: true +AlwaysBreakTemplateDeclarations: true +BinPackArguments: true +BinPackParameters: false +BraceWrapping: + AfterClass: false + AfterControlStatement: false + AfterEnum: false + AfterFunction: false + AfterNamespace: false + AfterObjCDeclaration: false + AfterStruct: false + AfterUnion: false + BeforeCatch: false + BeforeElse: false + IndentBraces: false +BreakBeforeBinaryOperators: None +BreakBeforeBraces: Attach +BreakBeforeTernaryOperators: true +BreakConstructorInitializersBeforeComma: false +BreakAfterJavaFieldAnnotations: false +BreakStringLiterals: true +ColumnLimit: 80 +CommentPragmas: '^ IWYU pragma:' +ConstructorInitializerAllOnOneLineOrOnePerLine: true +ConstructorInitializerIndentWidth: 4 +ContinuationIndentWidth: 4 +Cpp11BracedListStyle: true +DerivePointerAlignment: false +DisableFormat: false +ExperimentalAutoDetectBinPacking: false +ForEachMacros: [ foreach, Q_FOREACH, BOOST_FOREACH ] +IncludeCategories: + - Regex: '^<.*\.h>' + Priority: 1 + - Regex: '^<.*' + Priority: 2 + - Regex: '.*' + Priority: 3 +IncludeIsMainRegex: '([-_](test|unittest))?$' +IndentCaseLabels: true +IndentWidth: 2 +IndentWrappedFunctionNames: false +JavaScriptQuotes: Leave +JavaScriptWrapImports: true +KeepEmptyLinesAtTheStartOfBlocks: false +MacroBlockBegin: '' +MacroBlockEnd: '' +MaxEmptyLinesToKeep: 1 +NamespaceIndentation: None +ObjCBlockIndentWidth: 2 +ObjCSpaceAfterProperty: false +ObjCSpaceBeforeProtocolList: false +PenaltyBreakBeforeFirstCallParameter: 1 +PenaltyBreakComment: 300 +PenaltyBreakFirstLessLess: 120 +PenaltyBreakString: 1000 +PenaltyExcessCharacter: 1000000 +PenaltyReturnTypeOnItsOwnLine: 200 +PointerAlignment: Right +ReflowComments: true +SortIncludes: false +SpaceAfterCStyleCast: false +SpaceAfterTemplateKeyword: true +SpaceBeforeAssignmentOperators: true +SpaceBeforeParens: ControlStatements +SpaceInEmptyParentheses: false +SpacesBeforeTrailingComments: 2 +SpacesInAngles: false +SpacesInContainerLiterals: true +SpacesInCStyleCastParentheses: false +SpacesInParentheses: false +SpacesInSquareBrackets: false +Standard: Auto +TabWidth: 8 +UseTab: Never +--- +Language: ObjC +... diff --git a/1_hello_world/app.cpp b/1_hello_world/app.cpp index 20505355..b76a5b28 100644 --- a/1_hello_world/app.cpp +++ b/1_hello_world/app.cpp @@ -3,6 +3,7 @@ #include #include "glm/glm.hpp" #include "taichi/aot_demo/framework.hpp" +#include "taichi/aot_demo/shadow_buffer.hpp" using namespace ti::aot_demo; @@ -15,21 +16,19 @@ struct App1_hello_world : public App { virtual AppConfig cfg() const override final { AppConfig out {}; out.app_name = "1_hello_world"; + out.supported_archs = { + TI_ARCH_VULKAN, + }; return out; } - virtual void initialize(TiArch arch) override final{ - - if(arch != TI_ARCH_VULKAN) { - std::cout << "1_hello_world only supports vulkan backend" << std::endl; - exit(0); - } - - GraphicsRuntime& runtime = F_->runtime(); + virtual void initialize() override final{ + Renderer& renderer = F_->renderer(); + ti::Runtime &runtime = F_->runtime(); - points = runtime.allocate_vertex_buffer(3, 2, true); + points = runtime.allocate_ndarray({3}, {2}, true); colors = runtime.allocate_ndarray({3}, {4}, true); - draw_points = runtime.draw_points(points) + draw_points = renderer.draw_points(points) .point_size(10.0f) .color(colors) .build(); diff --git a/1_hello_world_with_interop/app.cpp b/1_hello_world_with_interop/app.cpp index 97abde81..1aad189a 100644 --- a/1_hello_world_with_interop/app.cpp +++ b/1_hello_world_with_interop/app.cpp @@ -3,13 +3,11 @@ #include #include "glm/glm.hpp" #include "taichi/aot_demo/framework.hpp" -#include "taichi/aot_demo/interop/cross_device_copy.hpp" using namespace ti::aot_demo; struct App1_hello_world_with_interop : public App { // Runtime/Ndarray to perform computations - TiArch arch_; ti::Runtime runtime; ti::NdArray points; @@ -22,31 +20,22 @@ struct App1_hello_world_with_interop : public App { virtual AppConfig cfg() const override final { AppConfig out {}; out.app_name = "1_hello_world_with_interop"; + out.supported_archs = { + TI_ARCH_VULKAN, + TI_ARCH_CUDA, + TI_ARCH_X64, + TI_ARCH_OPENGL, + }; return out; } - virtual void initialize(TiArch arch) override final{ - if(arch != TI_ARCH_VULKAN && arch != TI_ARCH_X64 && arch != TI_ARCH_CUDA && arch != TI_ARCH_OPENGL) { - std::cout << "1_hello_world_with_interop only supports cuda, x64, vulkan, opengl backends" << std::endl; - exit(0); - } - arch_ = arch; + virtual void initialize() override final{ + Renderer &renderer = F_->renderer(); // Prepare Ndarray to store computation results - if(arch_ == TI_ARCH_VULKAN) { - // Reuse the vulkan runtime from renderer framework - runtime = ti::Runtime(arch_, F_->runtime(), false);; - } else { - runtime = ti::Runtime(arch_); - } points = runtime.allocate_ndarray({3}, {2}, true); - // Prepare vertex buffers for the renderer - GraphicsRuntime& g_runtime = F_->runtime(); - render_points = g_runtime.allocate_vertex_buffer(3, 2, true); - colors = g_runtime.allocate_ndarray({3}, {4}, true); - // Renderer renders with data from "render_points" in each frame - draw_points = g_runtime.draw_points(render_points) + draw_points = renderer.draw_points(points) .point_size(10.0f) .color(colors) .build(); @@ -69,17 +58,6 @@ struct App1_hello_world_with_interop : public App { }; colors.write(colors_data); - // Copy data from "points" to "render_points" - if(arch_ == TI_ARCH_X64) { - InteropHelper::copy_from_cpu(F_->runtime(), render_points, runtime, points); - } else if(arch_ == TI_ARCH_CUDA) { - InteropHelper::copy_from_cuda(F_->runtime(), render_points, runtime, points); - } else if(arch_ == TI_ARCH_VULKAN) { - InteropHelper::copy_from_vulkan(F_->runtime(), render_points, runtime, points); - } else if(arch_ == TI_ARCH_OPENGL) { - InteropHelper::copy_from_opengl(F_->runtime(), render_points, runtime, points); - } - std::cout << "stepped! (fps=" << F_->fps() << ")" << std::endl; return true; } diff --git a/2_mpm88/app.cpp b/2_mpm88/app.cpp index c503a9a0..9f9fe309 100644 --- a/2_mpm88/app.cpp +++ b/2_mpm88/app.cpp @@ -1,9 +1,11 @@ +#include #include #include #include #include "glm/glm.hpp" +#include "taichi/aot_demo/draws/draw_points.hpp" #include "taichi/aot_demo/framework.hpp" -#include "taichi/aot_demo/interop/cross_device_copy.hpp" +#include "taichi/aot_demo/shadow_buffer.hpp" using namespace ti::aot_demo; @@ -27,42 +29,11 @@ static std::string get_aot_file_dir(TiArch arch) { } } -template -static void copy_to_vulkan_ndarray(ti::NdArray& dst, - GraphicsRuntime& dst_runtime, - ti::NdArray& src, - ti::Runtime& src_runtime, TiArch src_arch) { - - switch(src_arch) { - case TI_ARCH_VULKAN: { - InteropHelper::copy_from_vulkan(dst_runtime, dst, src_runtime, src); - break; - } - case TI_ARCH_X64: { - InteropHelper::copy_from_cpu(dst_runtime, dst, src_runtime, src); - break; - } - case TI_ARCH_CUDA: { - InteropHelper::copy_from_cuda(dst_runtime, dst, src_runtime, src); - break; - } - case TI_ARCH_OPENGL: { - InteropHelper::copy_from_opengl(dst_runtime, dst, src_runtime, src); - break; - } - default: { - throw std::runtime_error("Unable to perform NdArray memory copy"); - } - } -} - struct App2_mpm88 : public App { static const uint32_t NPARTICLE = 8192 * 2; static const uint32_t GRID_SIZE = 128; - ti::Runtime runtime_; ti::AotModule module_; - TiArch arch_; ti::ComputeGraph g_init_; ti::ComputeGraph g_update_; @@ -74,8 +45,6 @@ struct App2_mpm88 : public App { ti::NdArray J_; ti::NdArray grid_v_; ti::NdArray grid_m_; - - ti::NdArray render_x_; std::unique_ptr draw_points; @@ -87,46 +56,32 @@ struct App2_mpm88 : public App { return out; } - - virtual void initialize(TiArch arch) override final{ - if(arch != TI_ARCH_VULKAN && arch != TI_ARCH_X64 && arch != TI_ARCH_CUDA && arch != TI_ARCH_OPENGL) { - std::cout << "1_hello_world_with_interop only supports cuda, x64, vulkan, opengl backends" << std::endl; - exit(0); - } - arch_ = arch; - - GraphicsRuntime& g_runtime = F_->runtime(); - if(arch_ == TI_ARCH_VULKAN) { - // Reuse the vulkan runtime from renderer framework - runtime_ = ti::Runtime(arch_, F_->runtime(), false);; - } else { - runtime_ = ti::Runtime(arch_); - } + virtual void initialize() override final{ + Renderer &renderer = F_->renderer(); + ti::Runtime &runtime = F_->runtime(); // 2. Load AOT module #ifdef TI_AOT_DEMO_WITH_ANDROID_APP std::vector tcm; F_->asset_mgr().load_file("E2_mpm88.tcm", tcm); - module_ = runtime_.create_aot_module(tcm); + module_ = runtime.create_aot_module(tcm); #else - auto aot_file_path = get_aot_file_dir(arch_); - module_ = runtime_.load_aot_module(aot_file_path); + auto aot_file_path = get_aot_file_dir(runtime.arch()); + module_ = runtime.load_aot_module(aot_file_path); #endif g_init_ = module_.get_compute_graph("init"); g_update_ = module_.get_compute_graph("update"); - render_x_ = g_runtime.allocate_vertex_buffer(NPARTICLE, 2, false/*host_access*/); + x_ = runtime.allocate_ndarray({NPARTICLE}, {2}, false/*host_access*/); + v_ = runtime.allocate_ndarray({NPARTICLE}, {2}); + pos_ = runtime.allocate_ndarray({NPARTICLE}, {3}); + C_ = runtime.allocate_ndarray({NPARTICLE}, {2, 2}); + J_ = runtime.allocate_ndarray({NPARTICLE}, {}); + grid_v_ = runtime.allocate_ndarray({GRID_SIZE, GRID_SIZE}, {2}); + grid_m_ = runtime.allocate_ndarray({GRID_SIZE, GRID_SIZE}, {}); - x_ = runtime_.allocate_ndarray({NPARTICLE}, {2}, false/*host_access*/); - v_ = runtime_.allocate_ndarray({NPARTICLE}, {2}); - pos_ = runtime_.allocate_ndarray({NPARTICLE}, {3}); - C_ = runtime_.allocate_ndarray({NPARTICLE}, {2, 2}); - J_ = runtime_.allocate_ndarray({NPARTICLE}, {}); - grid_v_ = runtime_.allocate_ndarray({GRID_SIZE, GRID_SIZE}, {2}); - grid_m_ = runtime_.allocate_ndarray({GRID_SIZE, GRID_SIZE}, {}); - - draw_points = g_runtime.draw_points(render_x_) + draw_points = renderer.draw_points(x_) .point_size(3.0f) .color(glm::vec3(0,0,1)) .build(); @@ -144,7 +99,6 @@ struct App2_mpm88 : public App { g_update_["grid_v"] = grid_v_; g_update_["grid_m"] = grid_m_; - Renderer& renderer = F_->renderer(); renderer.set_framebuffer_size(256, 256); std::cout << "initialized!" << std::endl; @@ -152,10 +106,6 @@ struct App2_mpm88 : public App { virtual bool update() override final { g_update_.launch(); - auto& g_runtime = F_->runtime(); - copy_to_vulkan_ndarray(render_x_, g_runtime, x_, runtime_, arch_); - runtime_.wait(); - std::cout << "stepped! (fps=" << F_->fps() << ")" << std::endl; return true; } diff --git a/3_implicit_fem/app.cpp b/3_implicit_fem/app.cpp index b2780afd..6992cb42 100644 --- a/3_implicit_fem/app.cpp +++ b/3_implicit_fem/app.cpp @@ -46,14 +46,9 @@ struct App3_implicit_fem : public App { out.framebuffer_height = 256; return out; } - virtual void initialize(TiArch arch) override final{ - - if(arch != TI_ARCH_VULKAN) { - std::cout << "3_implicit_fem only supports vulkan backend" << std::endl; - exit(0); - } - GraphicsRuntime& runtime = F_->runtime(); - Renderer& renderer = F_->renderer(); + virtual void initialize() override final{ + Renderer &renderer = F_->renderer(); + ti::Runtime& runtime = F_->runtime(); #ifdef TI_AOT_DEMO_WITH_ANDROID_APP std::vector tcm; @@ -84,7 +79,7 @@ struct App3_implicit_fem : public App { hes_edge_ = runtime.allocate_ndarray({nedge}); hes_vert_ = runtime.allocate_ndarray({ncell}); - x_ = runtime.allocate_vertex_buffer(nvert, 3, true); + x_ = runtime.allocate_ndarray({nvert}, {3}, true); v_ = runtime.allocate_ndarray({nvert}, {3}); f_ = runtime.allocate_ndarray({nvert}, {3}); mul_ans_ = runtime.allocate_ndarray({nvert}, {3}); @@ -92,7 +87,7 @@ struct App3_implicit_fem : public App { b_ = runtime.allocate_ndarray({nvert}, {3}); r0_ = runtime.allocate_ndarray({nvert}, {3}); p0_ = runtime.allocate_ndarray({nvert}, {3}); - indices_ = runtime.allocate_index_buffer(nface, 3, true); + indices_ = runtime.allocate_ndarray({nface}, {3}, true); vertices_ = runtime.allocate_ndarray({ncell}, {4}, true); edges_ = runtime.allocate_ndarray({nedge}, {2}, true); ox_ = runtime.allocate_ndarray({nvert}, {3}, true); @@ -116,7 +111,7 @@ struct App3_implicit_fem : public App { glm::mat4 world2camera = glm::lookAt(glm::vec3(0, 0, 10), glm::vec3(0, 0, 0), glm::vec3(0, -1, 0)); glm::mat4 world2view = camera2view * world2camera; - draw_mesh = runtime.draw_mesh(x_, indices_) + draw_mesh = renderer.draw_mesh(x_, indices_) .model2world(model2world) .world2view(world2view) .color(glm::vec3(0,0,1)) diff --git a/4_texture_fractal/app.cpp b/4_texture_fractal/app.cpp index be7eca38..16753a30 100644 --- a/4_texture_fractal/app.cpp +++ b/4_texture_fractal/app.cpp @@ -24,25 +24,20 @@ struct App4_texture_fractal : public App { out.framebuffer_height = 320; return out; } - virtual void initialize(TiArch arch) override final{ - - if(arch != TI_ARCH_VULKAN && arch != TI_ARCH_OPENGL) { - std::cout << "4_texture_fractal only supports vulkan, opengl backend" << std::endl; - exit(0); - } - GraphicsRuntime& runtime = F_->runtime(); + virtual void initialize() override final{ + ti::aot_demo::Renderer& renderer = F_->renderer(); + ti::Runtime& runtime = F_->runtime(); module_ = runtime.load_aot_module("4_texture_fractal/assets/fractal"); graph_ = module_.get_compute_graph("fractal"); canvas_ = runtime.allocate_texture2d(640, 320, TI_FORMAT_R32F, TI_NULL_HANDLE); - draw_points = runtime.draw_texture(canvas_) + draw_points = renderer.draw_texture(canvas_) .build(); graph_["canvas"] = canvas_; - Renderer& renderer = F_->renderer(); renderer.set_framebuffer_size(640, 320); std::cout << "initialized!" << std::endl; diff --git a/5_sph/CMakeLists.txt b/5_sph/CMakeLists.txt index 1e581b53..8cdc6732 100644 --- a/5_sph/CMakeLists.txt +++ b/5_sph/CMakeLists.txt @@ -2,3 +2,4 @@ add_demo(5_sph ${CMAKE_CURRENT_SOURCE_DIR}/app.cpp) generate_aot_files(5_sph "assets/sph.py" "vulkan") generate_aot_files(5_sph "assets/sph.py" "x64") generate_aot_files(5_sph "assets/sph.py" "cuda") +generate_aot_files(5_sph "assets/sph.py" "android-vulkan") diff --git a/5_sph/app.cpp b/5_sph/app.cpp index 9e15c5ce..9bb536de 100644 --- a/5_sph/app.cpp +++ b/5_sph/app.cpp @@ -4,7 +4,6 @@ #include "glm/glm.hpp" #include "glm/ext.hpp" #include "taichi/aot_demo/framework.hpp" -#include "taichi/aot_demo/interop/cross_device_copy.hpp" using namespace ti::aot_demo; @@ -25,38 +24,11 @@ static std::string get_aot_file_dir(TiArch arch) { } } -template -static void copy_to_vulkan_ndarray(ti::NdArray& dst, - GraphicsRuntime& dst_runtime, - ti::NdArray& src, - ti::Runtime& src_runtime, TiArch src_arch) { - - switch(src_arch) { - case TI_ARCH_VULKAN: { - InteropHelper::copy_from_vulkan(dst_runtime, dst, src_runtime, src); - break; - } - case TI_ARCH_X64: { - InteropHelper::copy_from_cpu(dst_runtime, dst, src_runtime, src); - break; - } - case TI_ARCH_CUDA: { - InteropHelper::copy_from_cuda(dst_runtime, dst, src_runtime, src); - break; - } - default: { - throw std::runtime_error("Unable to perform NdArray memory copy"); - } - } -} - struct App5_sph : public App { static const uint32_t NR_PARTICLES = 8000; static const uint32_t SUBSTEPS = 5; - ti::Runtime runtime_; ti::AotModule module_; - TiArch arch_; ti::Kernel k_initialize_; ti::Kernel k_initialize_particle_; @@ -83,30 +55,27 @@ struct App5_sph : public App { out.app_name = "5_sph"; out.framebuffer_width = 512; out.framebuffer_height = 512; + out.supported_archs = { + TI_ARCH_VULKAN, + TI_ARCH_CUDA, + TI_ARCH_X64, + }; return out; } - virtual void initialize(TiArch arch) override final{ + virtual void initialize() override final{ + Renderer &renderer = F_->renderer(); + ti::Runtime &runtime = F_->runtime(); - if(arch != TI_ARCH_VULKAN && arch != TI_ARCH_X64 && arch != TI_ARCH_CUDA) { - std::cout << "5_sph only supports cuda, x64, vulkan backends" << std::endl; - exit(0); - } - arch_ = arch; - - // 1. Create runtime - GraphicsRuntime& g_runtime = F_->runtime(); - - if(arch_ == TI_ARCH_VULKAN) { - // Reuse the vulkan runtime from renderer framework - runtime_ = ti::Runtime(arch_, F_->runtime(), false);; - } else { - runtime_ = ti::Runtime(arch_); - } - // 2. Load AOT module - auto aot_file_path = get_aot_file_dir(arch_); - module_ = runtime_.load_aot_module(aot_file_path); +#ifdef TI_AOT_DEMO_WITH_ANDROID_APP + std::vector tcm; + F_->asset_mgr().load_file("E5_sph.tcm", tcm); + module_ = runtime.create_aot_module(tcm); +#else + auto aot_file_path = get_aot_file_dir(runtime.arch()); + module_ = runtime.load_aot_module(aot_file_path); +#endif // 3. Load kernels k_initialize_ = module_.get_kernel("initialize"); @@ -120,25 +89,22 @@ struct App5_sph : public App { const std::vector shape_1d = {NR_PARTICLES}; const std::vector vec3_shape = {3}; - N_ = runtime_.allocate_ndarray(shape_1d, vec3_shape); - den_ = runtime_.allocate_ndarray(shape_1d, {}); - pre_ = runtime_.allocate_ndarray(shape_1d, {}); - vel_ = runtime_.allocate_ndarray(shape_1d, vec3_shape); - acc_ = runtime_.allocate_ndarray(shape_1d, vec3_shape); - boundary_box_ = runtime_.allocate_ndarray(shape_1d, vec3_shape); - spawn_box_ = runtime_.allocate_ndarray(shape_1d, vec3_shape); - gravity_ = runtime_.allocate_ndarray({}, vec3_shape); - pos_ = runtime_.allocate_ndarray(shape_1d, vec3_shape, false/*host_access*/); - - render_pos_ = g_runtime.allocate_vertex_buffer(shape_1d[0], vec3_shape[0], false/*host_access*/); - + N_ = runtime.allocate_ndarray(shape_1d, vec3_shape); + den_ = runtime.allocate_ndarray(shape_1d, {}); + pre_ = runtime.allocate_ndarray(shape_1d, {}); + vel_ = runtime.allocate_ndarray(shape_1d, vec3_shape); + acc_ = runtime.allocate_ndarray(shape_1d, vec3_shape); + boundary_box_ = runtime.allocate_ndarray(shape_1d, vec3_shape); + spawn_box_ = runtime.allocate_ndarray(shape_1d, vec3_shape); + gravity_ = runtime.allocate_ndarray({}, vec3_shape); + pos_ = runtime.allocate_ndarray(shape_1d, vec3_shape, false/*host_access*/); + // 5. Handle image presentation - Renderer& renderer = F_->renderer(); glm::mat4 model2world = glm::mat4(1.0f); model2world = glm::scale(model2world, glm::vec3(5.0f)); glm::mat4 world2view = glm::lookAt(glm::vec3(10, 10, 10), glm::vec3(0, 0, 0), glm::vec3(0, -1, 0)); glm::mat4 view2clip = glm::perspective(glm::radians(45.0f), renderer.width() / (float)renderer.height(), 0.1f, 1000.0f); - draw_points = g_runtime.draw_particles(render_pos_) + draw_points = renderer.draw_particles(render_pos_) .model2world(model2world) .world2view(world2view) .view2clip(view2clip) @@ -176,27 +142,29 @@ struct App5_sph : public App { k_boundary_handle_[2] = boundary_box_; k_initialize_.launch(); + runtime.wait(); k_initialize_particle_.launch(); - runtime_.wait(); - + runtime.wait(); + // 7. Run initialization kernels renderer.set_framebuffer_size(512, 512); std::cout << "initialized!" << std::endl; } virtual bool update() override final { + ti::Runtime &runtime = F_->runtime(); + // 8. Run compute kernels for(int i = 0; i < SUBSTEPS; i++) { k_update_density_.launch(); + runtime.wait(); k_update_force_.launch(); + runtime.wait(); k_advance_.launch(); + runtime.wait(); k_boundary_handle_.launch(); + runtime.wait(); } - runtime_.wait(); - - // 9. Update vertex buffer - auto& g_runtime = F_->runtime(); - copy_to_vulkan_ndarray(render_pos_, g_runtime, pos_, runtime_, arch_); std::cout << "stepped! (fps=" << F_->fps() << ")" << std::endl; return true; diff --git a/5_sph/assets/sph.py b/5_sph/assets/sph.py index d0c7792e..92c2f494 100644 --- a/5_sph/assets/sph.py +++ b/5_sph/assets/sph.py @@ -12,13 +12,22 @@ def get_save_dir(name, arch): curr_dir = os.path.dirname(os.path.realpath(__file__)) return os.path.join(curr_dir, f"{name}_{arch}") +def get_archive_path(): + curr_dir = os.path.dirname(os.path.realpath(__file__)) + return os.path.join(curr_dir, f"../../framework/android/app/src/main/assets/E5_sph.tcm") if args.arch == "cuda": arch = ti.cuda + platform = None elif args.arch == "x64": arch = ti.x64 + platform = None elif args.arch == "vulkan": arch = ti.vulkan + platform = None +elif args.arch == "android-vulkan": + arch = ti.vulkan + platform = "android" else: assert False @@ -208,6 +217,10 @@ def copy_data_from_ndarray_to_field(src: ti.template(), dst: ti.any_arr()): mod.add_kernel(advance, template_args={'pos':pos, 'vel':vel, 'acc':acc}) mod.add_kernel(boundary_handle, template_args={'pos':pos, 'vel':vel, 'boundary_box':boundary_box}) - save_dir = get_save_dir("sph", args.arch) - os.makedirs(save_dir, exist_ok=True) - mod.save(save_dir) + + if platform == "android": + mod.archive(get_archive_path()) + else: + save_dir = get_save_dir("sph", args.arch) + os.makedirs(save_dir, exist_ok=True) + mod.save(save_dir) diff --git a/6_taichi_sparse/app.cpp b/6_taichi_sparse/app.cpp index 65af38a8..0b1876c6 100644 --- a/6_taichi_sparse/app.cpp +++ b/6_taichi_sparse/app.cpp @@ -4,8 +4,6 @@ #include "glm/glm.hpp" #include "glm/ext.hpp" #include "taichi/aot_demo/framework.hpp" -#include "taichi/aot_demo/interop/texture_utils.hpp" -#include "taichi/aot_demo/interop/cross_device_copy.hpp" using namespace ti::aot_demo; @@ -30,7 +28,6 @@ struct App6_taichi_sparse : public App { static const uint32_t img_w = 680; static const uint32_t img_h = 680; - ti::Runtime runtime_; ti::AotModule module_; TiArch arch_; @@ -43,7 +40,6 @@ struct App6_taichi_sparse : public App { ti::NdArray arr_; float val = 0.0f; - ti::Texture tex_; std::unique_ptr draw_texture; virtual AppConfig cfg() const override final { @@ -51,24 +47,21 @@ struct App6_taichi_sparse : public App { out.app_name = "6_taichi_sparse"; out.framebuffer_width = img_w; out.framebuffer_height = img_h; + out.supported_archs = { + TI_ARCH_X64, + TI_ARCH_CUDA, + }; return out; } - virtual void initialize(TiArch arch) override final{ - - if(arch != TI_ARCH_X64 && arch != TI_ARCH_CUDA) { - std::cout << "6_taichi_sparse only supports cuda, x64 backends" << std::endl; - exit(0); - } - arch_ = arch; - + virtual void initialize() override final{ // 1. Create runtime - GraphicsRuntime& g_runtime = F_->runtime(); - runtime_ = ti::Runtime(arch_); + Renderer &renderer = F_->renderer(); + ti::Runtime& runtime = F_->runtime(); // 2. Load AOT module auto aot_file_path = get_aot_file_dir(arch_); - module_ = runtime_.load_aot_module(aot_file_path); + module_ = runtime.load_aot_module(aot_file_path); // 3. Load kernels k_fill_img_ = module_.get_kernel("fill_img"); @@ -78,11 +71,10 @@ struct App6_taichi_sparse : public App { k_img_to_ndarray_ = module_.get_kernel("img_to_ndarray"); // 4. Create kernel arguments - Ndarrays - arr_ = runtime_.allocate_ndarray({img_w, img_h}, {}, false /*host_access*/); + arr_ = runtime.allocate_ndarray({img_w, img_h}); // 5. Handle image presentation - tex_ = g_runtime.allocate_texture2d(img_w, img_h, TI_FORMAT_R32F, TI_NULL_HANDLE); - draw_texture = g_runtime.draw_texture(tex_).build(); + draw_texture = renderer.draw_texture(arr_).build(); // 6. Setup taichi kernels k_img_to_ndarray_[0] = arr_; @@ -90,16 +82,15 @@ struct App6_taichi_sparse : public App { // 7. Run initialization kernels k_fill_img_.launch(); - runtime_.wait(); + runtime.wait(); - Renderer& renderer = F_->renderer(); renderer.set_framebuffer_size(img_w, img_h); std::cout << "initialized!" << std::endl; } virtual bool update() override final { // 8. Run compute kernels - auto& g_runtime = F_->runtime(); + ti::Runtime& runtime = F_->runtime(); val += 0.05f; k_activate_[0] = val; @@ -108,24 +99,16 @@ struct App6_taichi_sparse : public App { k_paint_.launch(); k_img_to_ndarray_.launch(); - runtime_.wait(); + runtime.wait(); std::cout << "stepped! (fps=" << F_->fps() << ")" << std::endl; return true; } virtual void render() override final { - auto& g_runtime = F_->runtime(); + ti::Runtime& runtime = F_->runtime(); // 9. Update to texture - if(arch_ == TI_ARCH_CUDA) { - TextureHelper::copy_from_cuda_ndarray(g_runtime, tex_, runtime_, arr_); - } else if(arch_ == TI_ARCH_X64) { - TextureHelper::copy_from_cpu_ndarray(g_runtime, tex_, runtime_, arr_); - } else { - throw std::runtime_error("Unrecognized architecture"); - } - g_runtime.wait(); - runtime_.wait(); + runtime.wait(); Renderer& renderer = F_->renderer(); renderer.enqueue_graphics_task(*draw_texture); diff --git a/7_comet/app.cpp b/7_comet/app.cpp index a1c35992..336a9b17 100644 --- a/7_comet/app.cpp +++ b/7_comet/app.cpp @@ -4,8 +4,6 @@ #include "glm/glm.hpp" #include "glm/ext.hpp" #include "taichi/aot_demo/framework.hpp" -#include "taichi/aot_demo/interop/texture_utils.hpp" -#include "taichi/aot_demo/interop/cross_device_copy.hpp" using namespace ti::aot_demo; @@ -30,7 +28,6 @@ struct App7_comet : public App { static const uint32_t img_w = 680; static const uint32_t img_h = 680; - ti::Runtime runtime_; ti::AotModule module_; TiArch arch_; @@ -47,44 +44,40 @@ struct App7_comet : public App { out.app_name = "7_comet"; out.framebuffer_width = img_w; out.framebuffer_height = img_h; + out.supported_archs = { + TI_ARCH_X64, + TI_ARCH_CUDA, + }; return out; } - virtual void initialize(TiArch arch) override final{ + virtual void initialize() override final{ - if(arch != TI_ARCH_X64 && arch != TI_ARCH_CUDA) { - std::cout << "7_comet only supports cuda, x64 backends" << std::endl; - exit(0); - } - arch_ = arch; - // 1. Create runtime - GraphicsRuntime& g_runtime = F_->runtime(); - runtime_ = ti::Runtime(arch_); + Renderer& renderer = F_->renderer(); + ti::Runtime& runtime = F_->runtime(); // 2. Load AOT module auto aot_file_path = get_aot_file_dir(arch_); - module_ = runtime_.load_aot_module(aot_file_path); + module_ = runtime.load_aot_module(aot_file_path); // 3. Load compute graphs g_init_ = module_.get_compute_graph("init"); g_update_ = module_.get_compute_graph("update"); // 4. Create kernel arguments - Ndarrays - arr_ = runtime_.allocate_ndarray({img_w, img_h}, {}, false/*host_access*/); + arr_ = runtime.allocate_ndarray({img_w, img_h}, {}, false/*host_access*/); // 5. Handle image presentation - tex_ = g_runtime.allocate_texture2d(img_w, img_h, TI_FORMAT_R32F, TI_NULL_HANDLE); - draw_texture = g_runtime.draw_texture(tex_).build(); + draw_texture = renderer.draw_texture(arr_).build(); // 6. Setup taichi kernels g_update_["arr"] = arr_; // 7. Run initialization kernels g_init_.launch(); - runtime_.wait(); + runtime.wait(); - Renderer& renderer = F_->renderer(); renderer.set_framebuffer_size(img_w, img_h); std::cout << "initialized!" << std::endl; @@ -92,25 +85,12 @@ struct App7_comet : public App { virtual bool update() override final { // 8. Run compute kernels g_update_.launch(); - runtime_.wait(); + F_->runtime().wait(); std::cout << "stepped! (fps=" << F_->fps() << ")" << std::endl; return true; } virtual void render() override final { - auto& g_runtime = F_->runtime(); - - // 9. Update to texture - if(arch_ == TI_ARCH_CUDA) { - TextureHelper::copy_from_cuda_ndarray(g_runtime, tex_, runtime_, arr_); - } else if(arch_ == TI_ARCH_X64) { - TextureHelper::copy_from_cpu_ndarray(g_runtime, tex_, runtime_, arr_); - } else { - throw std::runtime_error("Unrecognized architecture"); - } - g_runtime.wait(); - runtime_.wait(); - Renderer& renderer = F_->renderer(); renderer.enqueue_graphics_task(*draw_texture); } diff --git a/framework/include/taichi/aot_demo/common.hpp b/framework/include/taichi/aot_demo/common.hpp index 66a021ab..635a951e 100644 --- a/framework/include/taichi/aot_demo/common.hpp +++ b/framework/include/taichi/aot_demo/common.hpp @@ -37,3 +37,16 @@ #if TI_AOT_DEMO_ANDROID_APP #include #endif // TI_AOT_DEMO_ANDROID_APP + +namespace ti { +namespace aot_demo { + +inline void check_taichi_error() { + TiError error = ti_get_last_error(0, nullptr); + if (error < TI_ERROR_SUCCESS) { + throw std::runtime_error("taichi failed"); + } +} + +} // namespace aot_demo +} // namespace ti diff --git a/framework/include/taichi/aot_demo/draws/draw_mesh.hpp b/framework/include/taichi/aot_demo/draws/draw_mesh.hpp index 92fb4d44..5ae731b6 100644 --- a/framework/include/taichi/aot_demo/draws/draw_mesh.hpp +++ b/framework/include/taichi/aot_demo/draws/draw_mesh.hpp @@ -1,5 +1,5 @@ #pragma once -#include "taichi/aot_demo/common.hpp" +#include "taichi/aot_demo/graphics_task.hpp" namespace ti { namespace aot_demo { @@ -7,15 +7,20 @@ namespace aot_demo { class Renderer; class GraphicsTask; -class DrawMeshBuilder { +class DrawMeshBuilder : public GraphicsTaskBuilder { using Self = DrawMeshBuilder; std::shared_ptr renderer_; - TiNdArray positions_ = {}; - TiNdArray indices_ = {}; + uint32_t position_count_; + uint32_t position_component_count_; + std::shared_ptr positions_ = nullptr; + + uint32_t primitive_count_; + uint32_t primitive_vertex_count_; + std::shared_ptr indices_ = nullptr; glm::vec4 color_ = glm::vec4(1.0f, 1.0f, 1.0f, 1.0f); - TiNdArray colors_ = {}; + std::shared_ptr colors_ = {}; glm::mat4 model2world_ = glm::mat4(1.0f); glm::mat4 world2view_ = glm::mat4(1.0f); @@ -25,10 +30,10 @@ class DrawMeshBuilder { const std::shared_ptr& renderer, const ti::NdArray& positions, const ti::NdArray& indices - ) : renderer_(renderer) { + ) : GraphicsTaskBuilder(renderer) { assert(positions.is_valid()); - positions_ = positions; - indices_ = indices; + positions_ = create_shadow_buffer(positions.memory(), ShadowBufferUsage::VertexBuffer); + indices_ = create_shadow_buffer(indices.memory(), ShadowBufferUsage::IndexBuffer); assert(positions_.shape.dim_count == 1); assert(positions_.shape.dims[0] != 0); @@ -59,7 +64,7 @@ class DrawMeshBuilder { } Self& color(const ti::NdArray& colors) { assert(colors.is_valid()); - colors_ = colors; + colors_ = create_shadow_buffer(colors.memory(), ShadowBufferUsage::StorageBuffer); assert(colors_.shape.dim_count == 1); assert(colors_.shape.dims[0] != 0); diff --git a/framework/include/taichi/aot_demo/draws/draw_particles.hpp b/framework/include/taichi/aot_demo/draws/draw_particles.hpp index 333297cf..86dcfe5b 100644 --- a/framework/include/taichi/aot_demo/draws/draw_particles.hpp +++ b/framework/include/taichi/aot_demo/draws/draw_particles.hpp @@ -1,5 +1,5 @@ #pragma once -#include "taichi/aot_demo/common.hpp" +#include "taichi/aot_demo/graphics_task.hpp" namespace ti { namespace aot_demo { @@ -7,15 +7,16 @@ namespace aot_demo { class Renderer; class GraphicsTask; -class DrawParticlesBuilder { +class DrawParticlesBuilder : public GraphicsTaskBuilder { using Self = DrawParticlesBuilder; - std::shared_ptr renderer_; - TiNdArray positions_ = {}; + uint32_t position_count_; + uint32_t position_component_count_; + std::shared_ptr positions_ = nullptr; glm::vec4 color_ = glm::vec4(1.0f, 1.0f, 1.0f, 1.0f); - TiNdArray colors_ = {}; - + std::shared_ptr colors_ = nullptr; + // model matrix glm::mat4 model2world_ = glm::mat4(1.0f); @@ -29,14 +30,18 @@ class DrawParticlesBuilder { DrawParticlesBuilder( const std::shared_ptr& renderer, const ti::NdArray& positions - ) : renderer_(renderer) { + ) : GraphicsTaskBuilder(renderer) { assert(positions.is_valid()); - positions_ = positions; - assert(positions_.shape.dim_count == 1); assert(positions_.shape.dims[0] != 0); assert(positions_.elem_shape.dim_count == 1); - assert(positions_.elem_shape.dims[0] > 0 && positions_.elem_shape.dims[0] <= 4); + assert(positions_.elem_shape.dims[0] > 0 && + positions_.elem_shape.dims[0] <= 4); + + position_component_count_ = positions.elem_shape().dims[0]; + position_count_ = positions.shape().dims[0]; + positions_ = create_shadow_buffer(positions.memory(), + ShadowBufferUsage::VertexBuffer); } Self& model2world(const glm::mat4& model2world) { @@ -62,7 +67,7 @@ class DrawParticlesBuilder { } Self& color(const ti::NdArray& colors) { assert(colors.is_valid()); - colors_ = colors; + colors_ = create_shadow_buffer(colors.memory(), ShadowBufferUsage::StorageBuffer); assert(colors_.shape.dim_count == 1); assert(colors_.shape.dims[0] != 0); diff --git a/framework/include/taichi/aot_demo/draws/draw_points.hpp b/framework/include/taichi/aot_demo/draws/draw_points.hpp index 0b5ae1db..e39c5757 100644 --- a/framework/include/taichi/aot_demo/draws/draw_points.hpp +++ b/framework/include/taichi/aot_demo/draws/draw_points.hpp @@ -1,5 +1,5 @@ #pragma once -#include "taichi/aot_demo/common.hpp" +#include "taichi/aot_demo/graphics_task.hpp" namespace ti { namespace aot_demo { @@ -7,31 +7,37 @@ namespace aot_demo { class Renderer; class GraphicsTask; -class DrawPointsBuilder { +class DrawPointsBuilder : public GraphicsTaskBuilder { using Self = DrawPointsBuilder; - std::shared_ptr renderer_; - TiNdArray positions_ = {}; + uint32_t position_count_ = 0; + uint32_t position_component_count_ = 0; + std::shared_ptr positions_ = {}; glm::vec4 color_ = glm::vec4(1.0f, 1.0f, 1.0f, 1.0f); - TiNdArray colors_ = {}; + std::shared_ptr colors_ = {}; float point_size_ = 1.0f; - TiNdArray point_sizes_ = {}; + std::shared_ptr point_sizes_ = {}; -public: + public: DrawPointsBuilder( const std::shared_ptr& renderer, const ti::NdArray& positions - ) : renderer_(renderer) { + ) : GraphicsTaskBuilder(renderer) { assert(positions.is_valid()); - positions_ = positions; + assert(positions.shape.dim_count == 1); + assert(positions.shape.dims[0] != 0); + assert(positions.elem_shape.dim_count == 1); + assert(positions.elem_shape.dims[0] > 0 && + positions.elem_shape.dims[0] <= 4); - assert(positions_.shape.dim_count == 1); - assert(positions_.shape.dims[0] != 0); - assert(positions_.elem_shape.dim_count == 1); - assert(positions_.elem_shape.dims[0] > 0 && positions_.elem_shape.dims[0] <= 4); + position_count_ = positions.shape().dims[0]; + position_component_count_ = positions.elem_shape().dims[0]; + positions_ = create_shadow_buffer(positions.memory(), + ShadowBufferUsage::VertexBuffer); } + Self& color(const glm::vec3& color) { color_ = glm::vec4(color, 1.0f); @@ -43,12 +49,13 @@ class DrawPointsBuilder { } Self& color(const ti::NdArray& colors) { assert(colors.is_valid()); - colors_ = colors; + assert(colors.shape.dim_count == 1); + assert(colors.shape.dims[0] != 0); + assert(colors.elem_shape.dim_count == 1); + assert(colors.elem_shape.dims[0] == 4); - assert(colors_.shape.dim_count == 1); - assert(colors_.shape.dims[0] != 0); - assert(colors_.elem_shape.dim_count == 1); - assert(colors_.elem_shape.dims[0] == 4); + colors_ = + create_shadow_buffer(colors.memory(), ShadowBufferUsage::StorageBuffer); return *this; } @@ -58,7 +65,8 @@ class DrawPointsBuilder { } Self& point_size(const ti::NdArray& point_sizes) { assert(point_sizes.is_valid()); - point_sizes_ = point_sizes; + point_sizes_ = create_shadow_buffer(point_sizes.memory(), + ShadowBufferUsage::StorageBuffer); assert(colors_.shape.dim_count == 1); assert(colors_.shape.dims[0] != 0); diff --git a/framework/include/taichi/aot_demo/draws/draw_texture.hpp b/framework/include/taichi/aot_demo/draws/draw_texture.hpp index dd220df8..749b2a72 100644 --- a/framework/include/taichi/aot_demo/draws/draw_texture.hpp +++ b/framework/include/taichi/aot_demo/draws/draw_texture.hpp @@ -1,5 +1,5 @@ #pragma once -#include "taichi/aot_demo/common.hpp" +#include "taichi/aot_demo/graphics_task.hpp" namespace ti { namespace aot_demo { @@ -7,22 +7,41 @@ namespace aot_demo { class Renderer; class GraphicsTask; -class DrawTextureBuilder { +class DrawTextureBuilder : GraphicsTaskBuilder { using Self = DrawTextureBuilder; - std::shared_ptr renderer_; - TiTexture texture_ = {}; + uint32_t width_ = 1; + uint32_t height_ = 1; + std::shared_ptr rect_vertices_ = nullptr; + std::shared_ptr texture_ = nullptr; + uint32_t texel_component_count_ = 1; + std::shared_ptr texture_buffer_ = nullptr; public: DrawTextureBuilder( const std::shared_ptr& renderer, const ti::Texture& texture - ) : renderer_(renderer) { + ) : GraphicsTaskBuilder(renderer) { assert(texture.is_valid()); - texture_ = texture; + texture_ = create_shadow_texture(texture.image(), ShadowTextureUsage::SampledImage); + width_ = texture.texture().extent.width; + height_ = texture.texture().extent.height; assert(texture.texture().dimension == TI_IMAGE_DIMENSION_2D); } + DrawTextureBuilder( + const std::shared_ptr& renderer, + const ti::NdArray& texture_buffer + ) : GraphicsTaskBuilder(renderer) { + assert(texture_buffer.is_valid()); + assert(texture_buffer.shape().dim_count == 2); + assert(texture_buffer.elem_shape().dim_count == 1); + + width_ = texture_buffer.shape().dims[0]; + height_ = texture_buffer.shape().dims[1]; + texel_component_count_ = texture_buffer.elem_shape().dims[0]; + texture_buffer_ = create_shadow_buffer(texture_buffer.memory(), ShadowBufferUsage::StorageBuffer); + } std::unique_ptr build(); }; diff --git a/framework/include/taichi/aot_demo/framework.hpp b/framework/include/taichi/aot_demo/framework.hpp index 9c44a586..ca6e4b87 100644 --- a/framework/include/taichi/aot_demo/framework.hpp +++ b/framework/include/taichi/aot_demo/framework.hpp @@ -2,7 +2,6 @@ #include #include #include "taichi/aot_demo/renderer.hpp" -#include "taichi/aot_demo/graphics_runtime.hpp" #include "taichi/aot_demo/asset_manager.hpp" namespace ti { @@ -15,6 +14,7 @@ struct AppConfig { const char* app_name = "taichi"; uint32_t framebuffer_width = 64; uint32_t framebuffer_height = 32; + std::vector supported_archs; }; // What you need to implement: @@ -27,7 +27,7 @@ struct App { } virtual AppConfig cfg() const = 0; - virtual void initialize(TiArch arch) = 0; + virtual void initialize() = 0; virtual bool update() = 0; virtual void render() = 0; }; @@ -37,6 +37,11 @@ extern std::unique_ptr create_app(); // ----------------------------------------------------------------------------- +struct EntryPointConfig { + TiArch client_arch; + bool debug; +}; + // This should be implemented in platform entry points. extern std::unique_ptr create_asset_manager(); @@ -45,7 +50,6 @@ namespace aot_demo { class Framework { std::shared_ptr renderer_; - GraphicsRuntime runtime_; std::unique_ptr asset_mgr_; uint32_t frame_; @@ -55,11 +59,10 @@ class Framework { public: Framework() {} - Framework(const AppConfig& app_cfg, bool debug); + Framework(const AppConfig& app_cfg, const EntryPointConfig& client_arch); Framework(const Framework&) = delete; Framework(Framework&& b) : renderer_(std::move(b.renderer_)), - runtime_(std::move(b.runtime_)), asset_mgr_(std::move(b.asset_mgr_)), frame_(std::exchange(b.frame_, 0)), tic0_(std::move(b.tic0_)), @@ -69,7 +72,6 @@ class Framework { Framework& operator=(Framework&& b) { renderer_ = std::move(b.renderer_); - runtime_ = std::move(b.runtime_); asset_mgr_ = std::move(b.asset_mgr_); frame_ = std::exchange(b.frame_, 0); tic0_ = std::move(b.tic0_); @@ -102,14 +104,12 @@ class Framework { inline AssetManager& asset_mgr() { return *asset_mgr_; } - // You usually need this in `initialize` and `update`. - inline GraphicsRuntime& runtime() { - return runtime_; - } - // You usually need this in `render`. inline Renderer& renderer() { return *renderer_; } + inline ti::Runtime &runtime() { + return renderer_->client_runtime(); + } }; } // namespace renderer diff --git a/framework/include/taichi/aot_demo/graphics_runtime.hpp b/framework/include/taichi/aot_demo/graphics_runtime.hpp deleted file mode 100644 index 18a65e3d..00000000 --- a/framework/include/taichi/aot_demo/graphics_runtime.hpp +++ /dev/null @@ -1,61 +0,0 @@ -#pragma once -#include "draws/draw_points.hpp" -#include "draws/draw_particles.hpp" -#include "draws/draw_mesh.hpp" -#include "draws/draw_texture.hpp" - -namespace ti { -namespace aot_demo { - -class Renderer; - -class GraphicsRuntime : public ti::Runtime { - template - friend class InteropHelper; - - template - friend class TextureHelper; - - std::shared_ptr renderer_; - -public: - GraphicsRuntime() : ti::Runtime() {} - GraphicsRuntime(const std::shared_ptr& renderer); - - ti::NdArray allocate_vertex_buffer( - uint32_t vertex_count, - uint32_t vertex_component_count, - bool host_access = false - ); - ti::NdArray allocate_index_buffer( - uint32_t index_count, - uint32_t index_component_count, - bool host_access = false - ); - - // Add your drawing functions here. - DrawPointsBuilder draw_points( - const ti::NdArray& positions - ) { - return DrawPointsBuilder(renderer_, positions); - } - DrawParticlesBuilder draw_particles( - const ti::NdArray& positions - ) { - return DrawParticlesBuilder(renderer_, positions); - } - DrawMeshBuilder draw_mesh( - const ti::NdArray& positions, - const ti::NdArray& indices - ) { - return DrawMeshBuilder(renderer_, positions, indices); - } - DrawTextureBuilder draw_texture( - const ti::Texture& texture - ) { - return DrawTextureBuilder(renderer_, texture); - } -}; - -} // namespace aot_demo -} // namespace ti diff --git a/framework/include/taichi/aot_demo/graphics_task.hpp b/framework/include/taichi/aot_demo/graphics_task.hpp new file mode 100644 index 00000000..b45b5aea --- /dev/null +++ b/framework/include/taichi/aot_demo/graphics_task.hpp @@ -0,0 +1,97 @@ +#pragma once +#include +#include "taichi/aot_demo/common.hpp" +#include "taichi/aot_demo/shadow_buffer.hpp" +#include "taichi/aot_demo/shadow_texture.hpp" + +namespace ti { +namespace aot_demo { + +class Renderer; + +enum GraphicsTaskResourceType { + L_GRAPHICS_TASK_RESOURCE_TYPE_BUFFER, + L_GRAPHICS_TASK_RESOURCE_TYPE_TEXTURE, +}; + +struct GraphicsTaskResource { + GraphicsTaskResourceType type; + std::shared_ptr shadow_buffer; + std::shared_ptr shadow_texture; + + GraphicsTaskResource(const std::shared_ptr &shadow_buffer) + : type(L_GRAPHICS_TASK_RESOURCE_TYPE_BUFFER), + shadow_buffer(std::move(shadow_buffer)) { + } + GraphicsTaskResource(const std::shared_ptr &shadow_texture) + : type(L_GRAPHICS_TASK_RESOURCE_TYPE_TEXTURE), + shadow_texture(std::move(shadow_texture)) { + } +}; + +enum PrimitiveTopology { + L_PRIMITIVE_TOPOLOGY_POINT, + L_PRIMITIVE_TOPOLOGY_LINE, + L_PRIMITIVE_TOPOLOGY_TRIANGLE, +}; + +struct GraphicsTaskConfig { + std::string vertex_shader_glsl; + std::string fragment_shader_glsl; + void* uniform_buffer_data; + size_t uniform_buffer_size; + std::vector resources; + + std::shared_ptr vertex_buffer; + std::shared_ptr index_buffer; + uint32_t vertex_component_count; + uint32_t vertex_count; + uint32_t index_count; + uint32_t instance_count; + PrimitiveTopology primitive_topology; +}; + +class GraphicsTask { + friend class Renderer; + + GraphicsTaskConfig config_; + + std::shared_ptr renderer_; + VkPipeline pipeline_; + VkPipelineLayout pipeline_layout_; + VkDescriptorSetLayout descriptor_set_layout_; + VkDescriptorPool descriptor_pool_; + VkDescriptorSet descriptor_set_; + VkBuffer uniform_buffer_; + VmaAllocation uniform_buffer_allocation_; + std::vector texture_views_; + +public: + constexpr bool is_valid() const { + return pipeline_ != VK_NULL_HANDLE; + } + void destroy(); + + GraphicsTask( + const std::shared_ptr& renderer, + const GraphicsTaskConfig& config + ); + ~GraphicsTask(); +}; + +class GraphicsTaskBuilder { + protected: + std::shared_ptr renderer_; + + GraphicsTaskBuilder(const std::shared_ptr &renderer) + : renderer_(renderer) { + } + + std::shared_ptr create_shadow_buffer(const ti::Memory &src, + ShadowBufferUsage usage); + std::shared_ptr create_shadow_texture(const ti::Image &src, + ShadowTextureUsage usage); +}; + +} // namespace aot_demo +} // namespace ti diff --git a/framework/include/taichi/aot_demo/interop/cross_device_copy.hpp b/framework/include/taichi/aot_demo/interop/cross_device_copy.hpp deleted file mode 100644 index 869cedb3..00000000 --- a/framework/include/taichi/aot_demo/interop/cross_device_copy.hpp +++ /dev/null @@ -1,34 +0,0 @@ -#pragma once -#include "taichi/aot_demo/common.hpp" - -#include "taichi/aot_demo/graphics_runtime.hpp" - -namespace ti { -namespace aot_demo { - -template -class InteropHelper { -public: - static void copy_from_vulkan(GraphicsRuntime& dst_runtime, - ti::NdArray& dst_vulkan_ndarray, - ti::Runtime& src_vulkan_runtime, - ti::NdArray& src_vulkan_ndarray); - - static void copy_from_cpu(GraphicsRuntime& runtime, - ti::NdArray& vulkan_ndarray, - ti::Runtime& cpu_runtime, - ti::NdArray& cpu_ndarray); - - static void copy_from_cuda(GraphicsRuntime& runtime, - ti::NdArray& vulkan_ndarray, - ti::Runtime& cuda_runtime, - ti::NdArray& cuda_ndarray); - - static void copy_from_opengl(GraphicsRuntime &runtime, - ti::NdArray &vulkan_ndarray, - ti::Runtime &opengl_runtime, - ti::NdArray &opengl_ndarray); -}; - -} -} diff --git a/framework/include/taichi/aot_demo/interop/texture_utils.hpp b/framework/include/taichi/aot_demo/interop/texture_utils.hpp deleted file mode 100644 index 1833a407..00000000 --- a/framework/include/taichi/aot_demo/interop/texture_utils.hpp +++ /dev/null @@ -1,36 +0,0 @@ -#pragma once -#include "taichi/aot_demo/common.hpp" - -#include "taichi/aot_demo/graphics_runtime.hpp" - -namespace ti { -namespace aot_demo { - -template -class TextureHelper { -public: - - static void interchange_vulkan_ndarray_texture(GraphicsRuntime& g_runtime, - ti::Texture& vulkan_texture, - ti::Runtime& vulkan_runtime, - ti::NdArray& vulkan_ndarray, - bool texture_to_ndarray); - - static void copy_from_cpu_ndarray(GraphicsRuntime& g_runtime, - ti::Texture& vulkan_texture, - ti::Runtime& cpu_runtime, - ti::NdArray& cpu_ndarray); - - static void copy_from_cuda_ndarray(GraphicsRuntime& g_runtime, - ti::Texture& vulkan_texture, - ti::Runtime& cuda_runtime, - ti::NdArray& cuda_ndarray); - - static void copy_from_opengl_ndarray(GraphicsRuntime &g_runtime, - ti::Texture &vulkan_texture, - ti::Runtime &opengl_runtime, - ti::NdArray &opengl_texture); -}; - -} -} diff --git a/framework/include/taichi/aot_demo/renderer.hpp b/framework/include/taichi/aot_demo/renderer.hpp index 7e988421..a79f6705 100644 --- a/framework/include/taichi/aot_demo/renderer.hpp +++ b/framework/include/taichi/aot_demo/renderer.hpp @@ -1,13 +1,44 @@ #pragma once #include "taichi/aot_demo/common.hpp" +#include "taichi/aot_demo/draws/draw_points.hpp" +#include "taichi/aot_demo/draws/draw_particles.hpp" +#include "taichi/aot_demo/draws/draw_mesh.hpp" +#include "taichi/aot_demo/draws/draw_texture.hpp" +#include "taichi/aot_demo/shadow_buffer.hpp" +#include "taichi/aot_demo/shadow_texture.hpp" +#include +#include + +#define check_vulkan_result(x) \ + if (x < VK_SUCCESS) { \ + uint32_t x2 = (uint32_t)x; \ + std::printf("File \"%s\", line %d, in %s:\n", __FILE__, __LINE__, __func__); \ + std::printf(" vulkan failed: %d\n", x2); \ + std::fflush(stdout); \ + throw std::runtime_error("vulkan failed"); \ + } namespace ti { namespace aot_demo { +class Renderer; class GraphicsTask; +template +class VertexBuffer; +template +class IndexBuffer; + +struct RendererConfig { + TiArch client_arch; + uint32_t framebuffer_width; + uint32_t framebuffer_height; + bool debug; +}; -class Renderer { +class Renderer : public std::enable_shared_from_this { friend class GraphicsTask; + friend class ShadowBuffer; + friend class ShadowTexture; template friend class InteropHelper; @@ -48,7 +79,11 @@ class Renderer { uint32_t swapchain_image_height_; PFN_vkGetInstanceProcAddr loader_; + // This runtime always has `TI_ARCH_VULKAN`. This is used for internal interop + // only. DO NOT EXPOSE AS PUBLIC. ti::Runtime runtime_; + // This runtime has `RendererConfig::client_arch`. + ti::Runtime client_runtime_; ti::NdArray rect_vertex_buffer_; ti::NdArray rect_texcoord_buffer_; @@ -60,7 +95,9 @@ class Renderer { void set_swapchain(); std::map ti_memory_interops_; - const TiVulkanMemoryInteropInfo& export_ti_memory(TiMemory memory); + std::map ti_image_interops_; + const TiVulkanMemoryInteropInfo& export_ti_memory(const ShadowBuffer &shadow_buffer); + const TiVulkanImageInteropInfo& export_ti_image(const ShadowTexture &shadow_texture); public: constexpr bool is_valid() const { @@ -68,10 +105,35 @@ class Renderer { } void destroy(); - Renderer() {} - Renderer(bool debug, uint32_t width, uint32_t height); + Renderer(const RendererConfig &config); ~Renderer(); + // Add your drawing functions here. + DrawPointsBuilder draw_points( + const ti::NdArray& positions + ) { + return DrawPointsBuilder(shared_from_this(), positions); + } + DrawParticlesBuilder draw_particles( + const ti::NdArray& positions + ) { + return DrawParticlesBuilder(shared_from_this(), positions); + } + DrawMeshBuilder draw_mesh( + const ti::NdArray& positions, + const ti::NdArray& indices + ) { + return DrawMeshBuilder(shared_from_this(), positions, indices); + } + DrawTextureBuilder draw_texture( + const ti::Texture& texture + ) { + return DrawTextureBuilder(shared_from_this(), texture); + } + DrawTextureBuilder draw_texture(const ti::NdArray &texture) { + return DrawTextureBuilder(shared_from_this(), texture); + } + // Before a frame. #if TI_AOT_DEMO_WITH_GLFW void set_surface_window(GLFWwindow* window); @@ -91,7 +153,7 @@ class Renderer { // After a frame. You MUST call one of them between frames for the renderer to // work properly void present_to_surface(); - void present_to_ndarray(ti::NdArray& dst); + ti::NdArray present_to_ndarray(); // After all the works of a frame. DO NOT call this unless you know what you // are doing. @@ -105,14 +167,14 @@ class Renderer { } // The renderer's representation as Taichi objects. - constexpr TiArch arch() const { - return TI_ARCH_VULKAN; - } constexpr PFN_vkGetInstanceProcAddr loader() const { return loader_; } - constexpr TiRuntime runtime() const { - return runtime_; + constexpr ti::Runtime &client_runtime() { + return client_runtime_; + } + constexpr const ti::Runtime &client_runtime() const { + return client_runtime_; } constexpr uint32_t width() const { return width_; @@ -129,70 +191,6 @@ class Renderer { } }; -enum GraphicsTaskResourceType { - L_GRAPHICS_TASK_RESOURCE_TYPE_NDARRAY, - L_GRAPHICS_TASK_RESOURCE_TYPE_TEXTURE, -}; - -struct GraphicsTaskResource { - GraphicsTaskResourceType type; - union { - TiNdArray ndarray; - TiTexture texture; - }; -}; - -enum PrimitiveTopology { - L_PRIMITIVE_TOPOLOGY_POINT, - L_PRIMITIVE_TOPOLOGY_LINE, - L_PRIMITIVE_TOPOLOGY_TRIANGLE, -}; - -struct GraphicsTaskConfig { - std::string vertex_shader_glsl; - std::string fragment_shader_glsl; - void* uniform_buffer_data; - size_t uniform_buffer_size; - std::vector resources; - - TiMemory vertex_buffer; - TiMemory index_buffer; - uint32_t vertex_component_count; - uint32_t vertex_count; - uint32_t index_count; - uint32_t instance_count; - PrimitiveTopology primitive_topology; -}; - -class GraphicsTask { - friend class Renderer; - - GraphicsTaskConfig config_; - - std::shared_ptr renderer_; - VkPipeline pipeline_; - VkPipelineLayout pipeline_layout_; - VkDescriptorSetLayout descriptor_set_layout_; - VkDescriptorPool descriptor_pool_; - VkDescriptorSet descriptor_set_; - VkBuffer uniform_buffer_; - VmaAllocation uniform_buffer_allocation_; - std::vector texture_views_; - -public: - constexpr bool is_valid() const { - return pipeline_ != VK_NULL_HANDLE; - } - void destroy(); - - GraphicsTask() {} - GraphicsTask( - const std::shared_ptr& renderer, - const GraphicsTaskConfig& config - ); - ~GraphicsTask(); -}; - } // namespace aot_demo } // namespace ti diff --git a/framework/include/taichi/aot_demo/shadow_buffer.hpp b/framework/include/taichi/aot_demo/shadow_buffer.hpp new file mode 100644 index 00000000..a1c4e09c --- /dev/null +++ b/framework/include/taichi/aot_demo/shadow_buffer.hpp @@ -0,0 +1,40 @@ +#pragma once +#include "taichi/aot_demo/common.hpp" + +namespace ti { +namespace aot_demo { + +class Renderer; + +enum class ShadowBufferUsage { + VertexBuffer, + IndexBuffer, + StorageBuffer, +}; +class ShadowBuffer { + friend class Renderer; + public: + ShadowBuffer(const std::shared_ptr &renderer, + const ti::Memory &client_memory, + ShadowBufferUsage usage); + virtual ~ShadowBuffer(); + + // Copy data in `client_memory_` to `memory_`. + void update(); + + private: + std::shared_ptr renderer_; + ShadowBufferUsage usage_; + // Owned memory that is accessed by draw shaders. Always `TI_ARCH_VULKAN`. + ti::Memory memory_; + // Reference to data source. Follows `Renderer::client_arch()`. + ti::Memory client_memory_; + + void copy_from_vulkan_(const ti::Memory &src); + void copy_from_cpu_(const ti::Memory &src); + void copy_from_cuda_(const ti::Memory &src); + void copy_from_opengl_(const ti::Memory &src); +}; + +} // namespace aot_demo +} // namespace ti diff --git a/framework/include/taichi/aot_demo/shadow_texture.hpp b/framework/include/taichi/aot_demo/shadow_texture.hpp new file mode 100644 index 00000000..c33a6596 --- /dev/null +++ b/framework/include/taichi/aot_demo/shadow_texture.hpp @@ -0,0 +1,38 @@ +#pragma once +#include "taichi/aot_demo/common.hpp" + +namespace ti { +namespace aot_demo { + +class Renderer; + +enum class ShadowTextureUsage { + SampledImage, +}; +class ShadowTexture { + friend class Renderer; + public: + ShadowTexture(const std::shared_ptr &renderer, + const ti::Image &client_image, + ShadowTextureUsage usage); + virtual ~ShadowTexture(); + + // Copy data in `client_memory_` to `memory_`. + void update(); + + private: + std::shared_ptr renderer_; + ShadowTextureUsage usage_; + // Owned memory that is accessed by draw shaders. Always `TI_ARCH_VULKAN`. + ti::Image image_; + // Reference to data source. Follows `Renderer::client_arch()`. + ti::Image client_image_; + + void copy_from_vulkan_(const ti::Image &src); + void copy_from_cpu_(const ti::Image &src); + void copy_from_cuda_(const ti::Image &src); + void copy_from_opengl_(const ti::Image &src); +}; + +} // namespace aot_demo +} // namespace ti diff --git a/framework/src/taichi/aot_demo/graphics_runtime.cpp b/framework/include/taichi/aot_demo/vulkan/vulkan_common.hpp similarity index 100% rename from framework/src/taichi/aot_demo/graphics_runtime.cpp rename to framework/include/taichi/aot_demo/vulkan/vulkan_common.hpp diff --git a/framework/src/taichi/aot_demo/draws/draw_mesh.cpp b/framework/src/taichi/aot_demo/draws/draw_mesh.cpp index 3232f880..4768edc5 100644 --- a/framework/src/taichi/aot_demo/draws/draw_mesh.cpp +++ b/framework/src/taichi/aot_demo/draws/draw_mesh.cpp @@ -8,10 +8,9 @@ namespace ti { namespace aot_demo { std::unique_ptr DrawMeshBuilder::build() { - uint32_t ncomp = positions_.elem_shape.dims[0]; const char* vertex_declr; const char* vertex_get; - switch (ncomp) { + switch (position_component_count_) { case 1: vertex_declr = "layout(location=0) in float pos;"; vertex_get = "vec4(pos * 2.0 - 1.0, 0.0f, 0.0f, 1.0f)"; @@ -50,7 +49,7 @@ std::unique_ptr DrawMeshBuilder::build() { std::vector rscs; - bool is_color_per_vertex = colors_.memory != TI_NULL_HANDLE; + bool is_color_per_vertex = colors_ != nullptr; std::string color_buffer_declr; const char* color_get; if (is_color_per_vertex) { @@ -64,10 +63,7 @@ std::unique_ptr DrawMeshBuilder::build() { } color_get = "colors[gl_VertexIndex]"; - GraphicsTaskResource rsc {}; - rsc.type = L_GRAPHICS_TASK_RESOURCE_TYPE_NDARRAY; - rsc.ndarray = colors_; - rscs.emplace_back(std::move(rsc)); + rscs.emplace_back(colors_); } else { color_get = "u.color"; } @@ -103,12 +99,12 @@ std::unique_ptr DrawMeshBuilder::build() { config.fragment_shader_glsl = frag; config.uniform_buffer_data = &u; config.uniform_buffer_size = sizeof(u); - config.vertex_buffer = positions_.memory; + config.vertex_buffer = positions_; config.resources = std::move(rscs); - config.vertex_component_count = ncomp; - config.vertex_count = positions_.shape.dims[0]; - config.index_buffer = indices_.memory; - config.index_count = indices_.shape.dims[0] * indices_.elem_shape.dims[0]; + config.vertex_component_count = position_component_count_; + config.vertex_count = position_count_; + config.index_buffer = indices_; + config.index_count = primitive_count_ * primitive_vertex_count_; config.instance_count = 1; config.primitive_topology = L_PRIMITIVE_TOPOLOGY_TRIANGLE; diff --git a/framework/src/taichi/aot_demo/draws/draw_particles.cpp b/framework/src/taichi/aot_demo/draws/draw_particles.cpp index 48d5bb6e..085753cf 100644 --- a/framework/src/taichi/aot_demo/draws/draw_particles.cpp +++ b/framework/src/taichi/aot_demo/draws/draw_particles.cpp @@ -9,10 +9,9 @@ namespace ti { namespace aot_demo { std::unique_ptr DrawParticlesBuilder::build() { - uint32_t ncomp = positions_.elem_shape.dims[0]; const char* vertex_declr; const char* vertex_get; - switch (ncomp) { + switch (position_component_count_) { case 1: vertex_declr = "layout(location=0) in float pos;"; vertex_get = "vec4(pos * 2.0 - 1.0, 0.0f, 0.0f, 1.0f)"; @@ -54,7 +53,7 @@ std::unique_ptr DrawParticlesBuilder::build() { std::vector rscs; - bool is_color_per_vertex = colors_.memory != TI_NULL_HANDLE; + bool is_color_per_vertex = colors_ != nullptr; std::string color_buffer_declr; const char* color_get; if (is_color_per_vertex) { @@ -68,10 +67,7 @@ std::unique_ptr DrawParticlesBuilder::build() { } color_get = "colors[gl_VertexIndex]"; - GraphicsTaskResource rsc {}; - rsc.type = L_GRAPHICS_TASK_RESOURCE_TYPE_NDARRAY; - rsc.ndarray = colors_; - rscs.emplace_back(std::move(rsc)); + rscs.emplace_back(colors_); } else { color_get = "u.color"; } @@ -107,10 +103,10 @@ std::unique_ptr DrawParticlesBuilder::build() { config.fragment_shader_glsl = frag; config.uniform_buffer_data = &u; config.uniform_buffer_size = sizeof(u); - config.vertex_buffer = positions_.memory; + config.vertex_buffer = positions_; config.resources = std::move(rscs); - config.vertex_component_count = ncomp; - config.vertex_count = positions_.shape.dims[0]; + config.vertex_component_count = position_component_count_; + config.vertex_count = position_count_; config.instance_count = 1; config.primitive_topology = L_PRIMITIVE_TOPOLOGY_POINT; diff --git a/framework/src/taichi/aot_demo/draws/draw_points.cpp b/framework/src/taichi/aot_demo/draws/draw_points.cpp index 8b1cf77c..24fd0425 100644 --- a/framework/src/taichi/aot_demo/draws/draw_points.cpp +++ b/framework/src/taichi/aot_demo/draws/draw_points.cpp @@ -1,5 +1,5 @@ #include -#include +#include "taichi/aot_demo/graphics_task.hpp" #include "taichi/aot_demo/renderer.hpp" #include "taichi/aot_demo/draws/draw_points.hpp" @@ -7,10 +7,9 @@ namespace ti { namespace aot_demo { std::unique_ptr DrawPointsBuilder::build() { - uint32_t ncomp = positions_.elem_shape.dims[0]; const char* vertex_declr; const char* vertex_get; - switch (ncomp) { + switch (position_component_count_) { case 1: vertex_declr = "layout(location=0) in float pos;"; vertex_get = "vec4(pos * 2.0 - 1.0, 0.0f, 0.0f, 1.0f)"; @@ -46,7 +45,7 @@ std::unique_ptr DrawPointsBuilder::build() { std::vector rscs; - bool is_color_per_vertex = colors_.memory != TI_NULL_HANDLE; + bool is_color_per_vertex = colors_ != nullptr; std::string color_buffer_declr; const char* color_get; if (is_color_per_vertex) { @@ -60,15 +59,12 @@ std::unique_ptr DrawPointsBuilder::build() { } color_get = "colors[gl_VertexIndex]"; - GraphicsTaskResource rsc {}; - rsc.type = L_GRAPHICS_TASK_RESOURCE_TYPE_NDARRAY; - rsc.ndarray = colors_; - rscs.emplace_back(std::move(rsc)); + rscs.emplace_back(colors_); } else { color_get = "u.color"; } - bool is_point_size_per_vertex = point_sizes_.memory != TI_NULL_HANDLE; + bool is_point_size_per_vertex = point_sizes_ != nullptr; std::string point_size_buffer_declr; const char* point_size_get; if (is_point_size_per_vertex) { @@ -82,10 +78,7 @@ std::unique_ptr DrawPointsBuilder::build() { } point_size_get = "point_sizes[gl_VertexIndex]"; - GraphicsTaskResource rsc {}; - rsc.type = L_GRAPHICS_TASK_RESOURCE_TYPE_NDARRAY; - rsc.ndarray = point_sizes_; - rscs.emplace_back(std::move(rsc)); + rscs.emplace_back(point_sizes_); } else { point_size_get = "u.point_size"; } @@ -122,10 +115,10 @@ std::unique_ptr DrawPointsBuilder::build() { config.fragment_shader_glsl = frag; config.uniform_buffer_data = &u; config.uniform_buffer_size = sizeof(u); - config.vertex_buffer = positions_.memory; + config.vertex_buffer = positions_; config.resources = std::move(rscs); - config.vertex_component_count = ncomp; - config.vertex_count = positions_.shape.dims[0]; + config.vertex_component_count = position_component_count_; + config.vertex_count = position_count_; config.instance_count = 1; config.primitive_topology = L_PRIMITIVE_TOPOLOGY_POINT; diff --git a/framework/src/taichi/aot_demo/draws/draw_texture.cpp b/framework/src/taichi/aot_demo/draws/draw_texture.cpp index 24e2a800..dee11dde 100644 --- a/framework/src/taichi/aot_demo/draws/draw_texture.cpp +++ b/framework/src/taichi/aot_demo/draws/draw_texture.cpp @@ -1,3 +1,4 @@ +#include #include #include #include "taichi/aot_demo/renderer.hpp" @@ -9,25 +10,67 @@ namespace aot_demo { std::unique_ptr DrawTextureBuilder::build() { const char* uniform_buffer_declr = R"( layout(binding=0) uniform Uniform { - float dummy_; + ivec2 width_height; } u; )"; struct UniformBuffer { - float dummy_; + glm::ivec2 width_height; } u; + u.width_height = glm::ivec2(width_, height_); std::vector rscs; { - GraphicsTaskResource rsc {}; - rsc.type = L_GRAPHICS_TASK_RESOURCE_TYPE_NDARRAY; - rsc.ndarray = renderer_->rect_texcoord_buffer(); - rscs.emplace_back(std::move(rsc)); + rect_vertices_ = create_shadow_buffer(renderer_->rect_texcoord_buffer().memory(), + ShadowBufferUsage::VertexBuffer); + rscs.emplace_back(rect_vertices_); } - { - GraphicsTaskResource rsc {}; - rsc.type = L_GRAPHICS_TASK_RESOURCE_TYPE_TEXTURE; - rsc.texture = texture_; - rscs.emplace_back(std::move(rsc)); + + const char* texture_declr; + const char* texture_get; + if (texture_ != nullptr) { + rscs.emplace_back(texture_); + texture_declr = "layout(binding=2) uniform sampler2D maintex;"; + texture_get = ""; + } + if (texture_buffer_ != nullptr) { + const char* texture_declr; + switch (texel_component_count_) { + case 1: + texture_declr = R"( + layout(binding=2, std430) readonly buffer MainTex { + float texels[]; + }; + )"; + texture_get = "vec4(texels[(v_uv.y * u.width + v_uv.x)], 0.0, 0.0, 1.0)"; + break; + case 2: + texture_declr = R"( + layout(binding=2, std430) readonly buffer MainTex { + vec2 texels[]; + }; + )"; + texture_get = "vec4(texels[(v_uv.y * u.width + v_uv.x)], 0.0, 1.0)"; + break; + case 3: + texture_declr = R"( + layout(binding=2, std430) readonly buffer MainTex { + float texels[]; + }; + )"; + texture_get = "vec4(texels[(v_uv.y * u.width + v_uv.x) * 3], texels[(v_uv.y * u.width + v_uv.x) * 3 + 1], texels[(v_uv.y * u.width + v_uv.x) * 3 + 2], 1.0)"; + break; + case 4: + texture_declr = R"( + layout(binding=2, std430) readonly buffer MainTex { + vec4 texels[]; + }; + )"; + texture_get = "texels[(v_uv.y * u.width + v_uv.x)]"; + break; + default: + throw std::logic_error("vertex position can only `float`, `vec2`, `vec3` or `vec4`"); + } + rscs.emplace_back(texture_buffer_); } std::string vert; @@ -49,23 +92,28 @@ std::unique_ptr DrawTextureBuilder::build() { )"; vert = ss.str(); } - const char* frag = R"( - #version 460 - layout(location=0) in vec2 v_uv; - layout(location=0) out vec4 color; - - layout(binding=2) uniform sampler2D maintex; - void main() { - color = texture(maintex, v_uv); - } - )"; + std::string frag; + { + std::stringstream ss; + ss << R"( + #version 460 + layout(location=0) in vec2 v_uv; + layout(location=0) out vec4 color; + )" << uniform_buffer_declr + << "\n" + << texture_declr << R"( + void main() { + color = )" << texture_get << R"( + } + )"; + } GraphicsTaskConfig config {}; config.vertex_shader_glsl = vert; config.fragment_shader_glsl = frag; config.uniform_buffer_data = &u; config.uniform_buffer_size = sizeof(u); - config.vertex_buffer = renderer_->rect_vertex_buffer().memory(); + config.vertex_buffer = rect_vertices_; config.resources = std::move(rscs); config.vertex_component_count = 2; config.vertex_count = 6; diff --git a/framework/src/taichi/aot_demo/entry_points/android.cpp b/framework/src/taichi/aot_demo/entry_points/android.cpp index b31a06a7..7fed8f85 100644 --- a/framework/src/taichi/aot_demo/entry_points/android.cpp +++ b/framework/src/taichi/aot_demo/entry_points/android.cpp @@ -5,6 +5,7 @@ #include "gft/util.hpp" #include #include +#include #define VK_USE_PLATFORM_ANDROID_KHR 1 #include @@ -71,12 +72,16 @@ static void on_app_cmd_callback(struct android_app* state, int32_t cmd) { std::unique_ptr app2 = create_app(); const AppConfig app_cfg = app2->cfg(); - auto F = std::make_shared(app_cfg, false); + EntryPointConfig entry_point_cfg{}; + entry_point_cfg.debug = false; + entry_point_cfg.client_arch = TI_ARCH_VULKAN; + + auto F = std::make_shared(app_cfg, entry_point_cfg); app2->set_framework(F); ti::aot_demo::Renderer& renderer = F->renderer(); - app2->initialize(TI_ARCH_VULKAN); + app2->initialize(); renderer.set_surface_window(state->window); diff --git a/framework/src/taichi/aot_demo/entry_points/glfw.cpp b/framework/src/taichi/aot_demo/entry_points/glfw.cpp index 8e664931..e5516aa9 100644 --- a/framework/src/taichi/aot_demo/entry_points/glfw.cpp +++ b/framework/src/taichi/aot_demo/entry_points/glfw.cpp @@ -95,13 +95,16 @@ int main(int argc, const char** argv) { initialize(app_cfg.app_name, argc, argv); - auto F = std::make_shared(app_cfg, CFG.debug); + EntryPointConfig entry_point_cfg{}; + entry_point_cfg.client_arch = CFG.arch; + entry_point_cfg.debug = CFG.debug; + + auto F = std::make_shared(app_cfg, entry_point_cfg); app->set_framework(F); - ti::aot_demo::GraphicsRuntime& runtime = F->runtime(); ti::aot_demo::Renderer& renderer = F->renderer(); - app->initialize(CFG.arch); + app->initialize(); GLFWwindow* glfw_window = create_glfw_window(app_cfg, renderer); renderer.set_surface_window(glfw_window); diff --git a/framework/src/taichi/aot_demo/entry_points/headless.cpp b/framework/src/taichi/aot_demo/entry_points/headless.cpp index 836e96ba..2d43953c 100644 --- a/framework/src/taichi/aot_demo/entry_points/headless.cpp +++ b/framework/src/taichi/aot_demo/entry_points/headless.cpp @@ -5,6 +5,7 @@ #include "gft/util.hpp" #include +#include struct Config { std::string output_path = ""; @@ -75,18 +76,19 @@ int main(int argc, const char** argv) { initialize(app_cfg.app_name, argc, argv); - auto F = std::make_shared(app_cfg, CFG.debug); + EntryPointConfig entry_point_cfg{}; + entry_point_cfg.client_arch = CFG.arch; + entry_point_cfg.debug = CFG.debug; + + auto F = std::make_shared(app_cfg, entry_point_cfg); app->set_framework(F); - ti::aot_demo::GraphicsRuntime& runtime = F->runtime(); ti::aot_demo::Renderer& renderer = F->renderer(); - app->initialize(CFG.arch); + app->initialize(); uint32_t width = renderer.width(); uint32_t height = renderer.height(); - ti::NdArray framebuffer = - runtime.allocate_ndarray({width, height}, {4}, true); for (uint32_t i = 0; i < CFG.frame_count; ++i) { if (!app->update()) { @@ -98,7 +100,7 @@ int main(int argc, const char** argv) { app->render(); renderer.end_render(); - renderer.present_to_ndarray(framebuffer); + ti::NdArray framebuffer = renderer.present_to_ndarray(); F->next_frame(); save_framebuffer_to_bmp(framebuffer, i); diff --git a/framework/src/taichi/aot_demo/framework.cpp b/framework/src/taichi/aot_demo/framework.cpp index 3aafd319..764626d2 100644 --- a/framework/src/taichi/aot_demo/framework.cpp +++ b/framework/src/taichi/aot_demo/framework.cpp @@ -1,15 +1,19 @@ #include #include "taichi/aot_demo/framework.hpp" +#include "taichi/aot_demo/renderer.hpp" namespace ti { namespace aot_demo { -Framework::Framework(const AppConfig& app_cfg, bool debug) { - renderer_ = std::make_unique( - debug, - app_cfg.framebuffer_width, - app_cfg.framebuffer_height); - runtime_ = GraphicsRuntime(renderer_); +Framework::Framework(const AppConfig &app_cfg, const EntryPointConfig& entry_point_cfg) { + + RendererConfig config{}; + config.debug = entry_point_cfg.debug; + config.framebuffer_width = app_cfg.framebuffer_width; + config.framebuffer_height = app_cfg.framebuffer_height; + config.client_arch = entry_point_cfg.client_arch; + + renderer_ = std::make_shared(config); asset_mgr_ = create_asset_manager(); frame_ = 0; @@ -21,63 +25,11 @@ Framework::Framework(const AppConfig& app_cfg, bool debug) { Framework::~Framework() { if (renderer_ != nullptr) { asset_mgr_.reset(); - runtime_.destroy(); renderer_.reset(); std::cout << "framework finalized" << std::endl; } } -GraphicsRuntime::GraphicsRuntime(const std::shared_ptr& renderer) : - ti::Runtime(TI_ARCH_VULKAN, renderer->runtime(), false), renderer_(renderer) {} - -ti::NdArray GraphicsRuntime::allocate_vertex_buffer( - uint32_t vertex_count, - uint32_t vertex_component_count, - bool host_access -) { - TiMemoryAllocateInfo mai {}; - mai.size = vertex_component_count * vertex_count * sizeof(float); - mai.host_read = host_access; - mai.host_write = host_access; - mai.usage = TI_MEMORY_USAGE_STORAGE_BIT | TI_MEMORY_USAGE_VERTEX_BIT; -#ifndef ANDROID - mai.export_sharing = TI_TRUE; -#endif // ANDROID - ti::Memory memory = allocate_memory(mai); - - TiNdArray ndarray {}; - ndarray.memory = memory; - ndarray.elem_type = TI_DATA_TYPE_F32; - ndarray.shape.dim_count = 1; - ndarray.shape.dims[0] = vertex_count; - ndarray.elem_shape.dim_count = 1; - ndarray.elem_shape.dims[0] = vertex_component_count; - return ti::NdArray(std::move(memory), ndarray); -} -ti::NdArray GraphicsRuntime::allocate_index_buffer( - uint32_t index_count, - uint32_t index_component_count, - bool host_access -) { - TiMemoryAllocateInfo mai {}; - mai.size = index_count * index_component_count * sizeof(uint32_t); - mai.host_write = host_access; - mai.usage = TI_MEMORY_USAGE_STORAGE_BIT | TI_MEMORY_USAGE_INDEX_BIT; -#ifndef ANDROID - mai.export_sharing = TI_TRUE; -#endif // ANDROID - ti::Memory memory = allocate_memory(mai); - - TiNdArray ndarray {}; - ndarray.memory = memory; - ndarray.elem_type = TI_DATA_TYPE_U32; - ndarray.shape.dim_count = 1; - ndarray.shape.dims[0] = index_count; - ndarray.elem_shape.dim_count = 1; - ndarray.elem_shape.dims[0] = index_component_count; - return ti::NdArray(std::move(memory), ndarray); -} - } // namespace aot_demo } // namespace ti diff --git a/framework/src/taichi/aot_demo/interop/common_utils.cpp b/framework/src/taichi/aot_demo/interop/common_utils.cpp deleted file mode 100644 index f470266c..00000000 --- a/framework/src/taichi/aot_demo/interop/common_utils.cpp +++ /dev/null @@ -1,169 +0,0 @@ -#include "taichi/aot_demo/interop/common_utils.hpp" -#include - -namespace ti { -namespace aot_demo { - -void copyBuffer(VkDevice& device, - VkCommandPool& command_pool, - VkQueue& graphics_queue, - VkBuffer& src_buffer, - VkBuffer& dst_buffer, - VkDeviceSize size) { - - VkCommandBufferAllocateInfo alloc_info{}; - alloc_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO; - alloc_info.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY; - alloc_info.commandPool = command_pool; - alloc_info.commandBufferCount = 1; - - VkCommandBuffer command_buffer; - vkAllocateCommandBuffers(device, &alloc_info, &command_buffer); - - VkCommandBufferBeginInfo begin_info{}; - begin_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; - begin_info.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT; - - vkBeginCommandBuffer(command_buffer, &begin_info); - - VkBufferCopy copy_region{}; - copy_region.size = size; - vkCmdCopyBuffer(command_buffer, src_buffer, dst_buffer, 1, ©_region); - - vkEndCommandBuffer(command_buffer); - - VkSubmitInfo submit_info{}; - submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; - submit_info.commandBufferCount = 1; - submit_info.pCommandBuffers = &command_buffer; - - vkQueueSubmit(graphics_queue, 1, &submit_info, VK_NULL_HANDLE); - vkQueueWaitIdle(graphics_queue); - - vkFreeCommandBuffers(device, command_pool, 1, &command_buffer); -} - -uint32_t findMemoryType(VkPhysicalDevice& physical_device, uint32_t type_filter, VkMemoryPropertyFlags properties) { - VkPhysicalDeviceMemoryProperties mem_properties; - vkGetPhysicalDeviceMemoryProperties(physical_device, &mem_properties); - - for (uint32_t i = 0; i < mem_properties.memoryTypeCount; i++) { - if ((type_filter & (1 << i)) && (mem_properties.memoryTypes[i].propertyFlags & properties) == properties) { - return i; - } - } - - throw std::runtime_error("failed to find suitable memory type!"); -} - -void createBuffer(VkDevice& device, - VkPhysicalDevice& physical_device, - VkDeviceSize size, - VkBufferUsageFlags usage, - VkMemoryPropertyFlags properties, - VkBuffer& buffer, - VkDeviceMemory& buffer_memory) { - VkBufferCreateInfo buffer_info{}; - buffer_info.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; - buffer_info.size = size; - buffer_info.usage = usage; - buffer_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE; - - if (vkCreateBuffer(device, &buffer_info, nullptr, &buffer) != VK_SUCCESS) { - throw std::runtime_error("failed to create buffer!"); - } - - VkMemoryRequirements mem_requirements; - vkGetBufferMemoryRequirements(device, buffer, &mem_requirements); - - VkMemoryAllocateInfo alloc_info{}; - alloc_info.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; - alloc_info.allocationSize = mem_requirements.size; - alloc_info.memoryTypeIndex = findMemoryType(physical_device, mem_requirements.memoryTypeBits, properties); - - if (vkAllocateMemory(device, &alloc_info, nullptr, &buffer_memory) != VK_SUCCESS) { - throw std::runtime_error("failed to allocate buffer memory!"); - } - - vkBindBufferMemory(device, buffer, buffer_memory, 0); -} - -void copyImage2Buffer(VkDevice& device, - VkCommandPool& command_pool, - VkQueue& graphics_queue, - VkImage& src_image, - VkBuffer& dst_buffer, - VkImageLayout& layout, - uint32_t width, - uint32_t height, - uint32_t channel, - bool image_to_buffer) { - - VkCommandBufferAllocateInfo alloc_info{}; - alloc_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO; - alloc_info.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY; - alloc_info.commandPool = command_pool; - alloc_info.commandBufferCount = 1; - - VkCommandBuffer command_buffer; - vkAllocateCommandBuffers(device, &alloc_info, &command_buffer); - - VkCommandBufferBeginInfo begin_info{}; - begin_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; - begin_info.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT; - - vkBeginCommandBuffer(command_buffer, &begin_info); - - // Program command buffers - VkBufferImageCopy region{}; - region.bufferOffset = 0; - region.bufferRowLength = 0; - region.bufferImageHeight = 0; - - region.imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; - region.imageSubresource.mipLevel = 0; - region.imageSubresource.baseArrayLayer = 0; - region.imageSubresource.layerCount = 1; - - region.imageOffset = {0, 0, 0}; - region.imageExtent = { - width, - height, - channel - }; - - if(image_to_buffer) { - vkCmdCopyImageToBuffer( - command_buffer, - src_image, /*VkImage*/ - layout, /*VkImageLayout*/ - dst_buffer, - 1, - ®ion - ); - } else { - vkCmdCopyBufferToImage( - command_buffer, - dst_buffer, - src_image, /*VkImage*/ - layout, /*VkImageLayout*/ - 1, - ®ion - ); - } - - vkEndCommandBuffer(command_buffer); - - VkSubmitInfo submit_info{}; - submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; - submit_info.commandBufferCount = 1; - submit_info.pCommandBuffers = &command_buffer; - - vkQueueSubmit(graphics_queue, 1, &submit_info, VK_NULL_HANDLE); - vkQueueWaitIdle(graphics_queue); - - vkFreeCommandBuffers(device, command_pool, 1, &command_buffer); -} - -} -} diff --git a/framework/src/taichi/aot_demo/interop/texture_utils.cpp b/framework/src/taichi/aot_demo/interop/texture_utils.cpp deleted file mode 100644 index da08c3a3..00000000 --- a/framework/src/taichi/aot_demo/interop/texture_utils.cpp +++ /dev/null @@ -1,134 +0,0 @@ -#include - -#include "taichi/aot_demo/interop/texture_utils.hpp" -#include "taichi/aot_demo/interop/common_utils.hpp" -#include "taichi/aot_demo/interop/cross_device_copy.hpp" -#include "taichi/aot_demo/renderer.hpp" - -namespace ti { -namespace aot_demo { - -template -ti::NdArray clone_ndarray(ti::Runtime& runtime, - const ti::NdArray& ndarray) { - std::vector shape; - for(uint32_t i = 0; i < ndarray.shape().dim_count; i++) { - shape.push_back(ndarray.shape().dims[i]); - } - - std::vector element_shape; - for(uint32_t i = 0; i < ndarray.elem_shape().dim_count; i++) { - element_shape.push_back(ndarray.elem_shape().dims[i]); - } - - ti::NdArray target_ndarray = runtime.allocate_ndarray(shape, element_shape, true /*host_access*/); - - return target_ndarray; -} - - -template -void TextureHelper::interchange_vulkan_ndarray_texture(GraphicsRuntime& g_runtime, - ti::Texture& vulkan_texture, - ti::Runtime& vulkan_runtime, - ti::NdArray& vulkan_ndarray, - bool texture_to_ndarray) { - // Get Vulkan Ndarray Interop Info - TiVulkanMemoryInteropInfo vulkan_interop_info; - ti_export_vulkan_memory(vulkan_runtime.runtime(), - vulkan_ndarray.memory().memory(), - &vulkan_interop_info); - - VkBuffer ndarray_buffer = vulkan_interop_info.buffer; - - uint32_t width = vulkan_ndarray.shape().dim_count > 0 ? vulkan_ndarray.shape().dims[0] : 1; - uint32_t height = vulkan_ndarray.shape().dim_count > 1 ? vulkan_ndarray.shape().dims[1] : 1; - uint32_t channel = vulkan_ndarray.shape().dim_count > 2 ? vulkan_ndarray.shape().dims[2] : 1; - - // Get VkImage - VkImageLayout image_layout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; - g_runtime.transition_image(vulkan_texture.image(), TI_IMAGE_LAYOUT_TRANSFER_DST); - if(texture_to_ndarray) { - image_layout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL; - g_runtime.transition_image(vulkan_texture.image(), TI_IMAGE_LAYOUT_TRANSFER_SRC); - } - - TiVulkanImageInteropInfo interop_info; - ti_export_vulkan_image(g_runtime, - vulkan_texture.image().image(), - &interop_info); - - VkImage vk_image = interop_info.image; - VkDevice vk_device = g_runtime.renderer_->device_; - VkCommandPool cmd_pool = g_runtime.renderer_->command_pool_; - VkQueue graphics_queue = g_runtime.renderer_->queue_; - copyImage2Buffer(vk_device, - cmd_pool, - graphics_queue, - vk_image, - ndarray_buffer, - image_layout, - width, - height, - channel, - texture_to_ndarray /*image_to_buffer*/); -} - - -template -void TextureHelper::copy_from_cpu_ndarray(GraphicsRuntime& g_runtime, - ti::Texture& vulkan_texture, - ti::Runtime& cpu_runtime, - ti::NdArray& cpu_ndarray) { - - // 1. Create Vulkan staging buffer - ti::NdArray vulkan_ndarray = clone_ndarray(g_runtime, cpu_ndarray); - - // 2. Copy device ndarray to vulkan ndarray - InteropHelper::copy_from_cpu(g_runtime, vulkan_ndarray, cpu_runtime, cpu_ndarray); - - // 3. Copy vulkan ndarray to vulkan texture - TextureHelper::interchange_vulkan_ndarray_texture(g_runtime, vulkan_texture, g_runtime, vulkan_ndarray, false /*texture_to_ndarray*/); - -} - -template -void TextureHelper::copy_from_cuda_ndarray(GraphicsRuntime& g_runtime, - ti::Texture& vulkan_texture, - ti::Runtime& cuda_runtime, - ti::NdArray& cuda_ndarray) { - - // 1. Create Vulkan staging buffer - ti::NdArray vulkan_ndarray = clone_ndarray(g_runtime, cuda_ndarray); - - // 2. Copy device ndarray to vulkan ndarray - InteropHelper::copy_from_cuda(g_runtime, vulkan_ndarray, cuda_runtime, cuda_ndarray); - - // 3. Copy vulkan ndarray to vulkan texture - TextureHelper::interchange_vulkan_ndarray_texture(g_runtime, vulkan_texture, g_runtime, vulkan_ndarray, false /*texture_to_ndarray*/); -} - -template -void TextureHelper::copy_from_opengl_ndarray(GraphicsRuntime &g_runtime, - ti::Texture &vulkan_texture, - ti::Runtime &opengl_runtime, - ti::NdArray &opengl_ndarray) { - - // 1, Create Vulkan staging buffer - ti::NdArray vulkan_ndarray = clone_ndarray(g_runtime, opengl_ndarray); - - // 2. Copy device ndarray to vulkan ndarray - InteropHelper::copy_from_opengl(g_runtime, vulkan_ndarray, opengl_runtime, opengl_ndarray); - - // 3. Copy vulkan ndarray to vulkan texture - TextureHelper::interchange_vulkan_ndarray_texture(g_runtime, vulkan_texture, g_runtime, vulkan_ndarray, false /* texture_to_ndarray*/); -} - -template class TextureHelper; -template class TextureHelper; -template class TextureHelper; -template class TextureHelper; - -} -} - diff --git a/framework/src/taichi/aot_demo/renderer.cpp b/framework/src/taichi/aot_demo/renderer.cpp index 08c0ff74..1d4c3efe 100644 --- a/framework/src/taichi/aot_demo/renderer.cpp +++ b/framework/src/taichi/aot_demo/renderer.cpp @@ -6,7 +6,11 @@ #include #include #include +#include "taichi/aot_demo/common.hpp" +#include "taichi/aot_demo/graphics_task.hpp" +#include "taichi/aot_demo/shadow_buffer.hpp" #include "taichi/aot_demo/renderer.hpp" +#include "taichi/aot_demo/shadow_texture.hpp" namespace ti { namespace aot_demo { @@ -15,22 +19,6 @@ namespace aot_demo { std::vector vert2spv(const std::string& vert); std::vector frag2spv(const std::string& frag); -#define check_vulkan_result(x) \ - if (x < VK_SUCCESS) { \ - uint32_t x2 = (uint32_t)x; \ - std::printf("File \"%s\", line %d, in %s:\n", __FILE__, __LINE__, __func__); \ - std::printf(" vulkan failed: %d\n", x2); \ - std::fflush(stdout); \ - throw std::runtime_error("vulkan failed"); \ - } - -inline void check_taichi_error() { - TiError error = ti_get_last_error(0, nullptr); - if (error < TI_ERROR_SUCCESS) { - throw std::runtime_error("taichi failed"); - } -} - VKAPI_ATTR VkBool32 VKAPI_CALL vulkan_validation_callback( VkDebugUtilsMessageSeverityFlagBitsEXT severity, VkDebugUtilsMessageTypeFlagsEXT type, @@ -49,7 +37,7 @@ VKAPI_ATTR VkBool32 VKAPI_CALL vulkan_validation_callback( return VK_FALSE; } -Renderer::Renderer(bool debug, uint32_t width, uint32_t height) { +Renderer::Renderer(const RendererConfig& config) { VkResult res = VK_SUCCESS; uint32_t nlep = 0; @@ -64,7 +52,7 @@ Renderer::Renderer(bool debug, uint32_t width, uint32_t height) { lens.at(i) = leps.at(i).extensionName; } - if (debug) { + if (config.debug) { llns.emplace_back("VK_LAYER_KHRONOS_validation"); lens.emplace_back(VK_EXT_DEBUG_UTILS_EXTENSION_NAME); } @@ -91,7 +79,7 @@ Renderer::Renderer(bool debug, uint32_t width, uint32_t height) { ici.ppEnabledExtensionNames = lens.data(); VkDebugUtilsMessengerCreateInfoEXT dumci {}; - if (debug) { + if (config.debug) { dumci.sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_MESSENGER_CREATE_INFO_EXT; dumci.messageSeverity = VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT | @@ -113,7 +101,7 @@ Renderer::Renderer(bool debug, uint32_t width, uint32_t height) { VK_VALIDATION_FEATURE_ENABLE_DEBUG_PRINTF_EXT, }; VkValidationFeaturesEXT vf {}; - if (debug) { + if (config.debug) { vf.sType = VK_STRUCTURE_TYPE_VALIDATION_FEATURES_EXT; vf.enabledValidationFeatureCount = vfes.size(); vf.pEnabledValidationFeatures = vfes.data(); @@ -127,7 +115,7 @@ Renderer::Renderer(bool debug, uint32_t width, uint32_t height) { check_vulkan_result(res); VkDebugUtilsMessengerEXT debug_utils_messenger = VK_NULL_HANDLE; - if (debug) { + if (config.debug) { dumci.pNext = nullptr; PFN_vkCreateDebugUtilsMessengerEXT vkCreateDebugUtilsMessengerEXT_ = (PFN_vkCreateDebugUtilsMessengerEXT)vkGetInstanceProcAddr(instance, "vkCreateDebugUtilsMessengerEXT"); res = vkCreateDebugUtilsMessengerEXT_(instance, &dumci, nullptr, &debug_utils_messenger); @@ -342,8 +330,6 @@ Renderer::Renderer(bool debug, uint32_t width, uint32_t height) { TiVulkanRuntimeInteropInfo vrii {}; vrii.get_instance_proc_addr = loader; - // FIXME: (penguinliong) Use the real Vulkan API version when device - // capability is in. vrii.api_version = api_version; vrii.instance = instance; vrii.physical_device = physical_device; @@ -353,10 +339,19 @@ Renderer::Renderer(bool debug, uint32_t width, uint32_t height) { vrii.graphics_queue = queue; vrii.graphics_queue_family_index = queue_family_index; ti::Runtime runtime = - ti::Runtime(TI_ARCH_VULKAN, ti_import_vulkan_runtime(&vrii), true); + ti::Runtime(TI_ARCH_VULKAN, ti_import_vulkan_runtime(&vrii), true); + + ti::Runtime client_runtime {}; + if (config.client_arch == TI_ARCH_VULKAN) { + // Borrow the Vulkan device and import Taichi runtime from the renderer. + client_runtime = ti::Runtime(TI_ARCH_VULKAN, runtime, false); + } else { + // Otherwise create an interop foreign runtime. + client_runtime = ti::Runtime(config.client_arch); + } check_taichi_error(); - ti::NdArray rect_vertex_buffer {}; + ti::NdArray rect_vertex_buffer; { TiMemoryAllocateInfo mai {}; mai.size = sizeof(glm::vec2) * 6; @@ -410,7 +405,7 @@ Renderer::Renderer(bool debug, uint32_t width, uint32_t height) { render_pass_ = render_pass; framebuffer_ = VK_NULL_HANDLE; - set_framebuffer_size(width, height); + set_framebuffer_size(config.framebuffer_width, config.framebuffer_height); command_pool_ = command_pool; render_present_semaphore_ = render_present_semaphore; @@ -418,6 +413,7 @@ Renderer::Renderer(bool debug, uint32_t width, uint32_t height) { present_fence_ = present_fence; runtime_ = std::move(runtime); + client_runtime_ = std::move(client_runtime); loader_ = loader; rect_vertex_buffer_ = std::move(rect_vertex_buffer); @@ -787,13 +783,13 @@ void Renderer::enqueue_graphics_task(const GraphicsTask& graphics_task) { &graphics_task.descriptor_set_, 0, nullptr); { - const TiVulkanMemoryInteropInfo& vmii = export_ti_memory(config.vertex_buffer); + const TiVulkanMemoryInteropInfo& vmii = export_ti_memory(*config.vertex_buffer); VkDeviceSize o = 0; vkCmdBindVertexBuffers(frame_command_buffer_, 0, 1, &vmii.buffer, &o); } if (is_indexed) { - const TiVulkanMemoryInteropInfo& vmii = export_ti_memory(config.index_buffer); + const TiVulkanMemoryInteropInfo& vmii = export_ti_memory(*config.index_buffer); vkCmdBindIndexBuffer(frame_command_buffer_, vmii.buffer, 0, VK_INDEX_TYPE_UINT32); vkCmdDrawIndexed(frame_command_buffer_, config.index_count, config.instance_count, 0, 0, 0); @@ -913,16 +909,15 @@ void Renderer::present_to_surface() { check_vulkan_result(res); } -void Renderer::present_to_ndarray(ti::NdArray& dst) { - assert(!in_frame_); - assert(dst.shape().dim_count == 2); - assert(dst.shape().dims[0] == width_); - assert(dst.shape().dims[1] == height_); - assert(dst.elem_shape().dim_count == 1); - assert(dst.elem_shape().dims[0] == 4); +ti::NdArray Renderer::present_to_ndarray() { VkResult res = VK_SUCCESS; + assert(!in_frame_); + + ti::NdArray dst = runtime_.allocate_ndarray({height_, width_}, {4}, true); - const TiVulkanMemoryInteropInfo& vmii = export_ti_memory(dst.memory()); + TiVulkanMemoryInteropInfo vmii{}; + ti_export_vulkan_memory(runtime_, dst.memory(), &vmii); + check_taichi_error(); VkCommandBufferAllocateInfo cbai {}; cbai.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO; @@ -964,6 +959,8 @@ void Renderer::present_to_ndarray(ti::NdArray& dst) { si.pWaitDstStageMask = &ps; res = vkQueueSubmit(queue_, 1, &si, present_fence_); check_vulkan_result(res); + + return dst; } void Renderer::next_frame() { @@ -982,7 +979,10 @@ void Renderer::next_frame() { check_vulkan_result(res); } -const TiVulkanMemoryInteropInfo& Renderer::export_ti_memory(TiMemory memory) { +const TiVulkanMemoryInteropInfo &Renderer::export_ti_memory( + const ShadowBuffer &shadow_buffer) { + TiMemory memory = shadow_buffer.memory_; + auto it = ti_memory_interops_.find(memory); if (it == ti_memory_interops_.end()) { TiVulkanMemoryInteropInfo vmii {}; @@ -994,6 +994,21 @@ const TiVulkanMemoryInteropInfo& Renderer::export_ti_memory(TiMemory memory) { return it->second; } +const TiVulkanImageInteropInfo &Renderer::export_ti_image( + const ShadowTexture &shadow_texture) { + TiImage image = shadow_texture.image_; + + auto it = ti_image_interops_.find(image); + if (it == ti_image_interops_.end()) { + TiVulkanImageInteropInfo viii {}; + ti_export_vulkan_image(runtime_, image, &viii); + check_taichi_error(); + + it = ti_image_interops_.emplace(std::make_pair(image, std::move(viii))).first; + } + return it->second; +} + GraphicsTask::GraphicsTask( const std::shared_ptr& renderer, const GraphicsTaskConfig& config @@ -1016,7 +1031,7 @@ GraphicsTask::GraphicsTask( for (size_t i = 0; i < config.resources.size(); ++i) { VkDescriptorType dt; switch (config.resources.at(i).type) { - case L_GRAPHICS_TASK_RESOURCE_TYPE_NDARRAY: + case L_GRAPHICS_TASK_RESOURCE_TYPE_BUFFER: dt = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; break; case L_GRAPHICS_TASK_RESOURCE_TYPE_TEXTURE: @@ -1153,10 +1168,11 @@ GraphicsTask::GraphicsTask( for (size_t i = 0; i < config.resources.size(); ++i) { const GraphicsTaskResource &resource = config.resources.at(i); switch (resource.type) { - case L_GRAPHICS_TASK_RESOURCE_TYPE_NDARRAY: + case L_GRAPHICS_TASK_RESOURCE_TYPE_BUFFER: { const TiVulkanMemoryInteropInfo& vmii = - renderer_->export_ti_memory(resource.ndarray.memory); + renderer_->export_ti_memory(*resource.shadow_buffer); + check_taichi_error(); VkDescriptorBufferInfo dbi {}; dbi.buffer = vmii.buffer; @@ -1175,8 +1191,8 @@ GraphicsTask::GraphicsTask( } case L_GRAPHICS_TASK_RESOURCE_TYPE_TEXTURE: { - TiVulkanImageInteropInfo viii {}; - ti_export_vulkan_image(renderer_->runtime(), resource.texture.image, &viii); + TiVulkanImageInteropInfo viii{}; + renderer_->export_ti_image(*resource.shadow_texture); check_taichi_error(); VkImageViewType ivt; @@ -1441,5 +1457,17 @@ void GraphicsTask::destroy() { texture_views_.clear(); } + +std::shared_ptr GraphicsTaskBuilder::create_shadow_buffer( + const ti::Memory &src, + ShadowBufferUsage usage) { + return std::make_shared(renderer_, src, usage); +} +std::shared_ptr GraphicsTaskBuilder::create_shadow_texture( + const ti::Image &src, + ShadowTextureUsage usage) { + return std::make_shared(renderer_, src, usage); +} + } // namespace aot_demo } // namespace ti diff --git a/framework/src/taichi/aot_demo/interop/cross_device_copy.cpp b/framework/src/taichi/aot_demo/shadow_buffer.cpp similarity index 56% rename from framework/src/taichi/aot_demo/interop/cross_device_copy.cpp rename to framework/src/taichi/aot_demo/shadow_buffer.cpp index b187b8d3..72639b08 100644 --- a/framework/src/taichi/aot_demo/interop/cross_device_copy.cpp +++ b/framework/src/taichi/aot_demo/shadow_buffer.cpp @@ -1,74 +1,40 @@ -#include "taichi/aot_demo/interop/cross_device_copy.hpp" -#include "taichi/aot_demo/interop/common_utils.hpp" +#pragma once +#include "taichi/aot_demo/shadow_buffer.hpp" #include "taichi/aot_demo/renderer.hpp" - namespace ti { namespace aot_demo { -template -void InteropHelper::copy_from_vulkan(GraphicsRuntime& dst_runtime, - ti::NdArray& dst_vulkan_ndarray, - ti::Runtime& src_runtime, - ti::NdArray& src_vulkan_ndarray) { - // Get Dst Vulkan Interop Info - TiVulkanMemoryInteropInfo dst_vulkan_interop_info; - ti_export_vulkan_memory(dst_runtime.runtime(), - dst_vulkan_ndarray.memory().memory(), - &dst_vulkan_interop_info); - - TiVulkanMemoryInteropInfo src_vulkan_interop_info; - ti_export_vulkan_memory(src_runtime, src_vulkan_ndarray.memory().memory(), &src_vulkan_interop_info); - - VkBuffer src_buffer = src_vulkan_interop_info.buffer; - VkBuffer dst_buffer = dst_vulkan_interop_info.buffer; - - VkDeviceSize buffer_size = dst_vulkan_interop_info.size; - - VkDevice vk_device = dst_runtime.renderer_->device_; - VkCommandPool cmd_pool = dst_runtime.renderer_->command_pool_; - VkQueue graphics_queue = dst_runtime.renderer_->queue_; - copyBuffer(vk_device, cmd_pool, graphics_queue, src_buffer, dst_buffer, buffer_size); +ShadowBuffer::ShadowBuffer(const std::shared_ptr &renderer, + const ti::Memory &client_memory, + ShadowBufferUsage usage) + : renderer_(renderer) { + TiMemoryAllocateInfo mai {}; + mai.size = client_memory.size(); +#ifndef ANDROID + mai.export_sharing = TI_TRUE; +#endif // ANDROID + switch (usage) { + case ShadowBufferUsage::VertexBuffer: + mai.usage = TI_MEMORY_USAGE_STORAGE_BIT | TI_MEMORY_USAGE_VERTEX_BIT; + break; + case ShadowBufferUsage::IndexBuffer: + mai.usage = TI_MEMORY_USAGE_STORAGE_BIT | TI_MEMORY_USAGE_INDEX_BIT; + break; + case ShadowBufferUsage::StorageBuffer: + mai.usage = TI_MEMORY_USAGE_STORAGE_BIT; + break; + } + ti::Memory memory = renderer->runtime_.allocate_memory(mai); + + usage_ = usage; + memory_ = std::move(memory); + client_memory_ = ti::Memory(renderer_->client_runtime_, client_memory, + client_memory.size(), false); } -// TiMemory does not expose interface to check whether it's host accessible -// Therefore we'll copy via staging buffer anyway. -template -void InteropHelper::copy_from_cpu(GraphicsRuntime& runtime, - ti::NdArray& vulkan_ndarray, - ti::Runtime& cpu_runtime, - ti::NdArray& cpu_ndarray) { -#ifdef TI_WITH_CPU - // Get Interop Info - TiVulkanMemoryInteropInfo vulkan_interop_info; - ti_export_vulkan_memory(runtime.runtime(), - vulkan_ndarray.memory().memory(), - &vulkan_interop_info); - - TiCpuMemoryInteropInfo cpu_interop_info; - ti_export_cpu_memory(cpu_runtime, cpu_ndarray.memory().memory(), &cpu_interop_info); - - // Create staging buffer - VkDevice vk_device = runtime.renderer_->device_; - VkPhysicalDevice physical_device = runtime.renderer_->physical_device_; - VkBuffer staging_buffer; - VkDeviceMemory staging_buffer_memory; - VkDeviceSize buffer_size = cpu_interop_info.size; - createBuffer(vk_device, physical_device, buffer_size, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, staging_buffer, staging_buffer_memory); - - // Copy CPU data to staging buffer - void* data; - vkMapMemory(vk_device, staging_buffer_memory, 0, buffer_size, 0, &data); - memcpy(data, cpu_interop_info.ptr, (size_t) buffer_size); - vkUnmapMemory(vk_device, staging_buffer_memory); - - // Copy data from staging buffer to vertex buffer - VkCommandPool cmd_pool = runtime.renderer_->command_pool_; - VkQueue graphics_queue = runtime.renderer_->queue_; - copyBuffer(vk_device, cmd_pool, graphics_queue, staging_buffer, vulkan_interop_info.buffer, buffer_size); -#else - throw std::runtime_error("Unable to perform copy_from_cpu() with TI_WITH_CPU=OFF"); -#endif // TI_WITH_CPU +ShadowBuffer::~ShadowBuffer() { + memory_.destroy(); } struct DeviceMemoryHandle { @@ -151,9 +117,30 @@ DeviceMemoryHandle get_device_mem_handle(VkDeviceMemory mem, VkDevice device) { #endif // _WIN32 } -/*---------------------*/ -/* CUDA Implementation */ -/*---------------------*/ + + +void ShadowBuffer::copy_from_vulkan_(const ti::Memory &src) { + ti::Runtime &client_runtime = renderer_->client_runtime(); + + assert(client_runtime.arch() == TI_ARCH_VULKAN); + client_runtime.copy_memory_device_to_device(memory_.slice(), src.slice()); +} + +void ShadowBuffer::copy_from_cpu_(const ti::Memory &src) { +#if TI_WITH_CPU + ti::Runtime &client_runtime = renderer_->client_runtime(); + ti::Runtime &runtime = renderer_->client_runtime(); + + TiCpuMemoryInteropInfo cmii{}; + ti_export_cpu_memory(client_runtime, src.memory(), &cmii); + + ti::Memory staging_buffer = runtime.allocate_memory(memory_.size(), true); + staging_buffer.write(cmii.ptr, cmii.size); + + runtime.copy_memory_device_to_device(memory_.slice(), staging_buffer.slice()); +#endif // TI_WITH_CPU +} + #ifdef TI_WITH_CUDA CUexternalMemory import_cuda_memory_object_from_handle(const DeviceMemoryHandle& handle, @@ -193,38 +180,35 @@ void *map_buffer_onto_external_memory(CUexternalMemory ext_mem, return ptr; } #endif // TI_WITH_CUDA - -template -void InteropHelper::copy_from_cuda(GraphicsRuntime& runtime, - ti::NdArray& vulkan_ndarray, - ti::Runtime& cuda_runtime, - ti::NdArray& cuda_ndarray) { +void ShadowBuffer::copy_from_cuda_(const ti::Memory &src) { #ifdef TI_WITH_CUDA - // Get Interop Info - TiVulkanMemoryInteropInfo vulkan_interop_info; - ti_export_vulkan_memory(runtime.runtime(), - vulkan_ndarray.memory().memory(), - &vulkan_interop_info); - - TiCudaMemoryInteropInfo cuda_interop_info; - ti_export_cuda_memory(cuda_runtime, cuda_ndarray.memory().memory(), &cuda_interop_info); - - // Get binded VkDeviceMemory from VkBuffer - VkDevice vk_device = runtime.renderer_->device_; - VkDeviceMemory vertex_buffer_mem = vulkan_interop_info.memory; - - size_t alloc_offset = vulkan_interop_info.offset; - size_t alloc_size = vulkan_interop_info.size; - size_t mem_size = alloc_offset + alloc_size; - auto handle = get_device_mem_handle(vertex_buffer_mem, vk_device); - CUexternalMemory externalMem = - import_cuda_memory_object_from_handle(handle, mem_size, false); - CUdeviceptr dst_cuda_ptr = reinterpret_cast(map_buffer_onto_external_memory(externalMem, alloc_offset, vulkan_interop_info.size)); - CUdeviceptr src_cuda_ptr = reinterpret_cast(cuda_interop_info.ptr); - - cuMemcpyDtoD_v2(dst_cuda_ptr, src_cuda_ptr, vulkan_interop_info.size); + ti::Runtime &runtime = renderer_->runtime_; + ti::Runtime &cuda_runtime = renderer_->client_runtime(); + // Get Interop Info + TiVulkanMemoryInteropInfo vulkan_interop_info; + ti_export_vulkan_memory(runtime.runtime(), + memory_.memory(), + &vulkan_interop_info); + + TiCudaMemoryInteropInfo cuda_interop_info; + ti_export_cuda_memory(cuda_runtime, src.memory(), &cuda_interop_info); + + // Get binded VkDeviceMemory from VkBuffer + VkDevice vk_device = runtime.renderer_->device_; + VkDeviceMemory vertex_buffer_mem = vulkan_interop_info.memory; + + size_t alloc_offset = vulkan_interop_info.offset; + size_t alloc_size = vulkan_interop_info.size; + size_t mem_size = alloc_offset + alloc_size; + auto handle = get_device_mem_handle(vertex_buffer_mem, vk_device); + CUexternalMemory externalMem = + import_cuda_memory_object_from_handle(handle, mem_size, false); + CUdeviceptr dst_cuda_ptr = reinterpret_cast(map_buffer_onto_external_memory(externalMem, alloc_offset, vulkan_interop_info.size)); + CUdeviceptr src_cuda_ptr = reinterpret_cast(cuda_interop_info.ptr); + + cuMemcpyDtoD_v2(dst_cuda_ptr, src_cuda_ptr, vulkan_interop_info.size); #else - throw std::runtime_error("Unable to perform copy_from_cuda() with TI_WITH_CUDA=OFF"); + assert(false); #endif // TI_WITH_CUDA } @@ -263,13 +247,10 @@ OpenglMemoryObject import_opengl_memory_object_from_handle( } #endif // TI_WITH_OPENGL -template -void InteropHelper::copy_from_opengl(GraphicsRuntime &runtime, - ti::NdArray &vulkan_ndarray, - ti::Runtime &opengl_runtime, - ti::NdArray &opengl_ndarray) -{ +void ShadowBuffer::copy_from_opengl_(const ti::Memory &src) { #ifdef TI_WITH_OPENGL + ti::Runtime &runtime = renderer_->runtime_; + ti::Runtime &opengl_runtime = renderer_->client_runtime(); static bool initialized = false; if (!initialized) { TiOpenglRuntimeInteropInfo orii{}; @@ -283,12 +264,12 @@ void InteropHelper::copy_from_opengl(GraphicsRuntime &runtime, // Get Interop Info TiVulkanMemoryInteropInfo vulkan_interop_info{}; ti_export_vulkan_memory(runtime.runtime(), - vulkan_ndarray.memory().memory(), + memory_.memory(), &vulkan_interop_info); TiOpenglMemoryInteropInfo opengl_interop_info{}; ti_export_opengl_memory(opengl_runtime.runtime(), - opengl_ndarray.memory().memory(), &opengl_interop_info); + src.memory(), &opengl_interop_info); VkDevice vk_device = runtime.renderer_->device_; VkDeviceMemory vertex_buffer_mem = vulkan_interop_info.memory; @@ -332,14 +313,30 @@ void InteropHelper::copy_from_opengl(GraphicsRuntime &runtime, throw std::runtime_error("opengl failed"); } #else - throw std::runtime_error("Unable to perform copy_from_opengl() with TI_WITH_OPENGL=OFF"); + assert(false); #endif // TI_WITH_OPENGL } -template class InteropHelper; -template class InteropHelper; -template class InteropHelper; -template class InteropHelper; - -} +void ShadowBuffer::update() { + switch (renderer_->client_runtime_.arch()) { + case TI_ARCH_VULKAN: + copy_from_vulkan_(client_memory_); + break; + case TI_ARCH_X64: + case TI_ARCH_ARM64: + copy_from_cpu_(client_memory_); + break; + case TI_ARCH_CUDA: + copy_from_cuda_(client_memory_); + break; + case TI_ARCH_OPENGL: + copy_from_opengl_(client_memory_); + break; + default: + assert(false); + } } + + +} // namespace aot_demo +} // namespace ti diff --git a/framework/src/taichi/aot_demo/shadow_texture.cpp b/framework/src/taichi/aot_demo/shadow_texture.cpp new file mode 100644 index 00000000..c48e2cd4 --- /dev/null +++ b/framework/src/taichi/aot_demo/shadow_texture.cpp @@ -0,0 +1,48 @@ +#pragma once +#include "taichi/aot_demo/shadow_texture.hpp" +#include +#include "taichi/aot_demo/renderer.hpp" + +namespace ti { +namespace aot_demo { + +ShadowTexture::ShadowTexture(const std::shared_ptr &renderer, + const ti::Image &client_image, + ShadowTextureUsage usage) : renderer_(renderer) { + TiImageAllocateInfo iai{}; + iai.dimension = client_image.dimension(); + iai.extent = client_image.extent(); + iai.mip_level_count = client_image.mip_level_count(); + iai.format = client_image.format(); + ti::Image image = renderer->runtime_.allocate_image(iai); + + usage_ = usage; + image_ = std::move(image); + client_image_ = + ti::Image(renderer_->client_runtime_, client_image, client_image.dimension(), client_image.extent(), + client_image.mip_level_count(), client_image.format(), false); +} + +ShadowTexture::~ShadowTexture() { + image_.destroy(); +} + +void ShadowTexture::copy_from_vulkan_(const ti::Image &src) { + ti::Runtime &client_runtime = renderer_->client_runtime(); + + assert(client_runtime.arch() == TI_ARCH_VULKAN); + client_runtime.copy_image_device_to_device(image_.slice(), src.slice()); +} + +void ShadowTexture::update() { + switch (renderer_->client_runtime_.arch()) { + case TI_ARCH_VULKAN: + copy_from_vulkan_(client_image_); + break; + default: + assert(false); + } +} + +} // namespace aot_demo +} // namespace ti