From 0c0ff192a2304e114bc9e6557582dfba101360ff Mon Sep 17 00:00:00 2001 From: Olga Malysheva Date: Thu, 31 Oct 2024 18:23:11 +0100 Subject: [PATCH] Update documentation for oneTBB 2022.0.0 (#1544) --- doc/GSG/get_started.rst | 27 +- doc/conf.py | 10 +- doc/index/toctree.rst | 1 - doc/main/reference/reference.rst | 1 + doc/main/reference/rvalue_reduce.rst | 4 +- doc/main/reference/try_put_and_wait.rst | 324 ++++++++++++++++++ .../Exceptions_and_Cancellation.rst | 9 +- doc/main/tbb_userguide/Linux_OS.rst | 8 +- ...C_Dynamic_Memory_Interface_Replacement.rst | 4 +- doc/main/tbb_userguide/Windows_OS_ug.rst | 10 +- 10 files changed, 369 insertions(+), 29 deletions(-) create mode 100644 doc/main/reference/try_put_and_wait.rst diff --git a/doc/GSG/get_started.rst b/doc/GSG/get_started.rst index d437ce89b8..2af04be6b0 100644 --- a/doc/GSG/get_started.rst +++ b/doc/GSG/get_started.rst @@ -8,11 +8,36 @@ It is helpful for new users of parallel programming and experienced developers t It is recommended for you to have a basic knowledge of C++ programming and some experience with parallel programming concepts. +|full_name| is a runtime-based parallel programming model for C++ code that uses tasks. +The template-based runtime library can help you harness the latent performance of multi-core processors. + +oneTBB enables you to simplify parallel programming by breaking computation into parallel running tasks. Within a single process, +parallelism is carried out by mapping tasks to threads. Threads are an operating system mechanism that allows the same or different sets of instructions +to be executed simultaneously. Using threads can make your program work faster and more efficiently. + +Here you can see one of the possible executions of tasks by threads. + +.. figure:: Images/how-oneTBB-works.png + :scale: 70% + :align: center + +Use oneTBB to write scalable applications that: + +* Specify logical parallel structure instead of threads. +* Emphasize data-parallel programming. +* Take advantage of concurrent collections and parallel algorithms. + +oneTBB supports nested parallelism and load balancing. It means that you can use the library without worrying about oversubscribing a system, which happens when more tasks are assigned to a system than it can handle efficiently. + +oneTBB is used in different areas, such as scientific simulations, gaming, data analysis, etc. + +It is available as a stand-alone product and as part of the |base_tk|. + To start using oneTBB, follow the next steps: ********************************************* -#. Learn what :ref:`oneTBB is` and see the :ref:`System Requirements`. +#. See the :ref:`System Requirements`. #. :ref:`Install oneTBB`. #. Run your program using oneTBB following the :ref:`Next Steps `. #. Learn how to :ref:`Integrate oneTBB into your project ` using CMake* and pkg-config tool. diff --git a/doc/conf.py b/doc/conf.py index a0ef593b9b..00dfed0e7f 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -140,9 +140,7 @@ } if BUILD_TYPE != 'oneapi' and BUILD_TYPE != 'dita': - html_theme_options = { - "extra_footer": "
© Intel Corporation. Intel, the Intel logo, and other Intel marks are trademarks of Intel Corporation or its subsidiaries. Other names and brands may be claimed as the property of others. No license (express or implied, by estoppel or otherwise) to any intellectual property rights is granted by this document, with the sole exception that code included in this document is licensed subject to the Zero-Clause BSD open source license (OBSD), http://opensource.org/licenses/0BSD.

oneTBB is licensed under Apache License Version 2.0. Refer to the LICENSE file for the full license text and copyright notice.
" - } + html_theme_options["extra_footer"]="
© Intel Corporation. Intel, the Intel logo, and other Intel marks are trademarks of Intel Corporation or its subsidiaries. Other names and brands may be claimed as the property of others. No license (express or implied, by estoppel or otherwise) to any intellectual property rights is granted by this document, with the sole exception that code included in this document is licensed subject to the Zero-Clause BSD open source license (OBSD), http://opensource.org/licenses/0BSD.

oneTBB is licensed under Apache License Version 2.0. Refer to the LICENSE file for the full license text and copyright notice.
" # Add any paths that contain custom static files (such as style sheets) here, @@ -159,11 +157,7 @@ else: html_js_files = ['custom.js'] -html_theme_options = { - "logo": { - "text": "oneTBB Documentation", - } -} +html_theme_options["logo"] = {"text": "oneTBB Documentation"} html_logo = '_static/oneAPI-rgb-rev-100.png' html_favicon = '_static/favicons.png' diff --git a/doc/index/toctree.rst b/doc/index/toctree.rst index fba9aee46c..542a4bb601 100644 --- a/doc/index/toctree.rst +++ b/doc/index/toctree.rst @@ -17,7 +17,6 @@ :maxdepth: 2 /GSG/get_started - /GSG/intro /GSG/system_requirements /GSG/installation /GSG/next_steps diff --git a/doc/main/reference/reference.rst b/doc/main/reference/reference.rst index c8ba0af944..4c293c02c7 100644 --- a/doc/main/reference/reference.rst +++ b/doc/main/reference/reference.rst @@ -50,3 +50,4 @@ The key properties of a preview feature are: concurrent_lru_cache_cls task_group_extensions custom_mutex_chmap + try_put_and_wait diff --git a/doc/main/reference/rvalue_reduce.rst b/doc/main/reference/rvalue_reduce.rst index 69d480d465..7cf66d86b3 100644 --- a/doc/main/reference/rvalue_reduce.rst +++ b/doc/main/reference/rvalue_reduce.rst @@ -33,7 +33,8 @@ or .. cpp:function:: Value Func::operator()(const Range& range, const Value& x) const - Accumulates the result for a subrange, starting with initial value ``x``. The ``Range`` type must meet the `Range requirements _`. + Accumulates the result for a subrange, starting with initial value ``x``. The ``Range`` type must meet the + `Range requirements `_. The ``Value`` type must be the same as a corresponding template parameter for the `parallel_reduce algorithm `_. If both ``rvalue`` and ``lvalue`` forms are provided, the ``rvalue`` is preferred. @@ -55,6 +56,7 @@ Example ******* .. code:: cpp + // C++17 #include #include diff --git a/doc/main/reference/try_put_and_wait.rst b/doc/main/reference/try_put_and_wait.rst new file mode 100644 index 0000000000..4e05961f39 --- /dev/null +++ b/doc/main/reference/try_put_and_wait.rst @@ -0,0 +1,324 @@ +.. _try_put_and_wait: + +Waiting for Single Messages in Flow Graph +========================================= + +.. contents:: + :local: + :depth: 1 + +Description +*********** + +This feature adds a new ``try_put_and_wait`` interface to the receiving nodes in the Flow Graph. +This function puts a message as an input into a Flow Graph and waits until all work related to +that message is complete. +``try_put_and_wait`` may reduce latency compared to calling ``graph::wait_for_all`` since +``graph::wait_for_all`` waits for all work, including work that is unrelated to the input message, to complete. + +``node.try_put_and_wait(msg)`` performs ``node.try_put(msg)`` on the node and waits until the work on ``msg`` is completed. +Therefore, the following conditions are true: + +* Any task initiated by any node in the Flow Graph that involves working with ``msg`` or any other intermediate result + computed from ``msg`` is completed. +* No intermediate results computed from ``msg`` remain in any buffers in the graph. + +.. caution:: + + To prevent ``try_put_and_wait`` calls from infinite waiting, avoid using buffering nodes at the end of the Flow Graph since the final result + will not be automatically consumed by the Flow Graph. + +.. caution:: + + The ``multifunction_node`` and ``async_node`` classes are not currently supported by this feature. Including one of these nodes in the + Flow Graph may cause ``try_put_and_wait`` to exit early, even if the computations on the initial input message are + still in progress. + +API +*** + +Header +------ + +.. code:: cpp + + #define TBB_PREVIEW_FLOW_GRAPH_FEATURES // macro option 1 + #define TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT // macro option 2 + #include + +Synopsis +-------- + +.. code:: cpp + + namespace oneapi { + namespace tbb { + template + class continue_node { + public: + bool try_put_and_wait(const continue_msg& input); + }; // class continue_node + + template + class function_node { + public: + bool try_put_and_wait(const Input& input); + }; // class function_node + + template + class overwrite_node { + public: + bool try_put_and_wait(const T& input); + }; // class overwrite_node + + template + class write_once_node { + public: + bool try_put_and_wait(const T& input); + }; // class write_once_node + + template + class buffer_node { + public: + bool try_put_and_wait(const T& input); + }; // class buffer_node + + template + class queue_node { + public: + bool try_put_and_wait(const T& input); + }; // class queue_node + + template > + class priority_queue_node { + public: + bool try_put_and_wait(const T& input); + }; // class priority_queue_node + + template + class sequencer_node { + public: + bool try_put_and_wait(const T& input); + }; // class sequencer_node + + template + class limiter_node { + public: + bool try_put_and_wait(const T& input); + }; // class limiter_node + + template + class broadcast_node { + public: + bool try_put_and_wait(const T& input); + }; // class broadcast_node + + template + class split_node { + public: + bool try_put_and_wait(const TupleType& input); + }; // class split_node + } // namespace tbb + } // namespace oneapi + +Member Functions +---------------- + +.. code:: cpp + + template + bool continue_node::try_put_and_wait(const continue_msg& input) + +**Effects**: Increments the count of input signals received. If the incremented count is equal to the number +of known predecessors, performs the ``body`` function object execution. + +Waits for the completion of the ``input`` in the Flow Graph, meaning all tasks created by each node and +related to ``input`` are executed, and no related objects remain in any buffer within the graph. + +**Returns**: ``true``. + +.. code:: cpp + + template + bool function_node::try_put_and_wait(const Input& input) + +**Effects**: If the concurrency limit allows, executes the user-provided body on the incoming message ``input``. +Otherwise, depending on the ``Policy`` of the node, either queues the incoming message ``input`` or rejects it. + +Waits for the completion of the ``input`` in the Flow Graph, meaning all tasks created by each node and +related to ``input`` are executed, and no related objects remain in any buffer within the graph. + +**Returns**: ``true`` if the input is accepted, ``false`` otherwise. + +.. code:: cpp + + template + bool overwrite_node::try_put_and_wait(const T& input) + +**Effects**: Stores ``input`` in the internal single-item buffer and broadcasts it to all successors. + +Waits for the completion of the ``input`` in the Flow Graph, meaning all tasks created by each node and +related to ``input`` are executed, and no related objects remain in any buffer within the graph. + +**Returns**: ``true``. + +.. caution:: + + Since the input element is not retrieved from ``overwrite_node`` once accepted by the successor, + retrieve it by explicitly calling the ``clear()`` method or by overwriting with another element to prevent + ``try_put_and_wait`` from indefinite waiting. + +.. code:: cpp + + template + bool write_once_node::try_put_and_wait(const T& input) + +**Effects**: Stores ``input`` in the internal single-item buffer if it does not contain a valid value already. +If a new value is set, the node broadcasts it to all successors. + +Waits for the completion of the ``input`` in the Flow Graph, meaning all tasks created by each node and +related to ``input`` are executed, and no related objects remain in any buffer within the graph. + +**Returns**: ``true`` for the first time after construction or a call to ``clear()``. + +.. caution:: + + Since the input element is not retrieved from the ``write_once_node`` once accepted by the successor, + retrieve it by explicitly calling the ``clear()`` method to prevent ``try_put_and_wait`` from indefinite waiting. + +.. code:: cpp + + template + bool buffer_node::try_put_and_wait(const T& input) + +**Effects**: Adds ``input`` to the set of items managed by the node and tries forwarding it to a successor. + +Waits for the completion of the ``input`` in the Flow Graph, meaning all tasks created by each node and +related to ``input`` are executed, and no related objects remain in any buffer within the graph. + +**Returns**: ``true``. + +.. code:: cpp + + template + bool queue_node::try_put_and_wait(const T& input) + +**Effects**: Adds ``input`` to the set of items managed by the node and tries forwarding the least recently added item +to a successor. + +Waits for the completion of the ``input`` in the Flow Graph, meaning all tasks created by each node and +related to ``input`` are executed, and no related objects remain in any buffer within the graph. + +**Returns**: ``true``. + +.. code:: cpp + + template + bool priority_queue_node::try_put_and_wait(const T& input) + +**Effects**: Adds ``input`` to the ``priority_queue_node`` and attempts to forward the item with the highest +priority among all items added to the node but not yet forwarded to the successors. + +Waits for the completion of the ``input`` in the Flow Graph, meaning all tasks created by each node and +related to ``input`` are executed, and no related objects remain in any buffer within the graph. + +**Returns**: ``true``. + +.. code:: cpp + + template + bool sequencer_node::try_put_and_wait(const T& input) + +**Effects**: Adds ``input`` to the ``sequencer_node`` and tries forwarding the next item in sequence to a successor. + +Waits for the completion of the ``input`` in the Flow Graph, meaning all tasks created by each node and +related to ``input`` are executed, and no related objects remain in any buffer within the graph. + +**Returns**: ``true``. + +.. code:: cpp + + template + bool limiter_node::try_put_and_wait(const T& input) + +**Effects**: If the broadcast count is below the threshold, broadcasts ``input`` to all successors. + +Waits for the completion of the ``input`` in the Flow Graph, meaning all tasks created by each node and +related to ``input`` are executed, and no related objects remain in any buffer within the graph. + +**Returns**: ``true`` if ``input`` is broadcasted; ``false`` otherwise. + +.. code:: cpp + + template + bool broadcast_node::try_put_and_wait(const T& input) + +**Effects**: Broadcasts ``input`` to all successors. + +Waits for the completion of the ``input`` in the Flow Graph, meaning all tasks created by each node and +related to ``input`` are executed, and no related objects remain in any buffer within the graph. + +**Returns**: ``true`` even if the node cannot successfully forward the message to any of its successors. + +.. code:: cpp + + template + bool split_node::try_put_and_wait(const TupleType& input); + +**Effects**: Broadcasts each element in the incoming tuple to the nodes connected to the ``split_node`` output ports. +The element at index ``i`` of ``input`` is broadcasted through the output port number ``i``. + +Waits for the completion of the ``input`` in the Flow Graph, meaning all tasks created by each node and +related to ``input`` are executed, and no related objects remain in any buffer within the graph. + +**Returns**: ``true``. + +Example +******* + +.. code:: cpp + + #define TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT + #include + #include + + struct f1_body; + struct f2_body; + struct f3_body; + struct f4_body; + + int main() { + using namespace oneapi::tbb; + + flow::graph g; + flow::broadcast_node start_node(g); + + flow::function_node f1(g, flow::unlimited, f1_body{}); + flow::function_node f2(g, flow::unlimited, f2_body{}); + flow::function_node f3(g, flow::unlimited, f3_body{}); + + flow::join_node> join(g); + + flow::function_node, int> f4(g, flow::serial, f4_body{}); + + flow::make_edge(start_node, f1); + flow::make_edge(f1, f2); + + flow::make_edge(start_node, f3); + + flow::make_edge(f2, flow::input_port<0>(join)); + flow::make_edge(f3, flow::input_port<1>(join)); + + flow::make_edge(join, f4); + + // Submit work into the graph + parallel_for(0, 100, [](int input) { + start_node.try_put_and_wait(input); + + // Post processing the result of input + }); + } + +Each iteration of ``parallel_for`` submits an input into the Flow Graph. After returning from ``try_put_and_wait(input)``, it is +guaranteed that all of the work related to the completion of ``input`` is done by all of the nodes in the graph. Tasks related to inputs +submitted by other calls are not guaranteed to be completed. diff --git a/doc/main/tbb_userguide/Exceptions_and_Cancellation.rst b/doc/main/tbb_userguide/Exceptions_and_Cancellation.rst index 724b8b6ec9..290f2f2cc3 100644 --- a/doc/main/tbb_userguide/Exceptions_and_Cancellation.rst +++ b/doc/main/tbb_userguide/Exceptions_and_Cancellation.rst @@ -22,14 +22,11 @@ the following steps generally occur: thread that invoked the algorithm. -The exception thrown in step 3 might be the original exception, or might -merely be a summary of type ``captured_exception``. The latter usually -occurs on current systems because propagating exceptions between threads -requires support for the C++ ``std::exception_ptr`` functionality. As -compilers evolve to support this functionality, future versions of +As compilers evolve to support this functionality, future versions of oneTBB might throw the original exception. So be sure your code can catch either type of exception. The following example demonstrates -exception handling. +exception handling: + :: diff --git a/doc/main/tbb_userguide/Linux_OS.rst b/doc/main/tbb_userguide/Linux_OS.rst index 1d25a04dcd..0f0c245720 100644 --- a/doc/main/tbb_userguide/Linux_OS.rst +++ b/doc/main/tbb_userguide/Linux_OS.rst @@ -25,12 +25,12 @@ structure for Linux\*, relative to ** - | ``LIBRARY_PATH`` | ``LD_LIBRARY_PATH`` -where +Where: * ```` - ``ia32`` or ``intel64`` + + .. note:: Starting with oneTBB 2022.0, 32-bit binaries are supported only by the open-source version of the library. * ```` - ``libtbb``, ``libtbbmalloc``, ``libtbbmalloc_proxy`` or ``libtbbbind`` - * ```` - ``_debug`` or empty - -* ```` - binary version in a form of ``.`` \ No newline at end of file +* ```` - binary version in a form of ``.`` diff --git a/doc/main/tbb_userguide/Windows_C_Dynamic_Memory_Interface_Replacement.rst b/doc/main/tbb_userguide/Windows_C_Dynamic_Memory_Interface_Replacement.rst index f4f78ae567..cd2d2e1a93 100644 --- a/doc/main/tbb_userguide/Windows_C_Dynamic_Memory_Interface_Replacement.rst +++ b/doc/main/tbb_userguide/Windows_C_Dynamic_Memory_Interface_Replacement.rst @@ -44,7 +44,6 @@ To do the replacement use one of the following methods: - Alternatively, add the following parameters to the linker options for the .exe or .dll file that is loaded during application startup. - For 32-bit code (note the triple underscore): @@ -52,8 +51,7 @@ To do the replacement use one of the following methods: tbbmalloc_proxy.lib /INCLUDE:"___TBB_malloc_proxy" - - + For 64-bit code (note the double underscore): diff --git a/doc/main/tbb_userguide/Windows_OS_ug.rst b/doc/main/tbb_userguide/Windows_OS_ug.rst index 3fc4a5a223..85fc3306ce 100644 --- a/doc/main/tbb_userguide/Windows_OS_ug.rst +++ b/doc/main/tbb_userguide/Windows_OS_ug.rst @@ -30,12 +30,13 @@ structure for Windows\*, relative to <*tbb_install_dir*>. - Same as corresponding ``.dll`` file. - \ -where +Where * ```` - ``ia32`` or ``intel64`` -* ```` - ``tbb``, ``tbbmalloc``, ``tbbmalloc_proxy`` or ``tbbbind`` + .. note:: Starting with oneTBB 2022.0, 32-bit binaries are supported only by the open-source version of the library. +* ```` - ``tbb``, ``tbbmalloc``, ``tbbmalloc_proxy`` or ``tbbbind`` * ```` - ``14`` - use for dynamic linkage with the CRT @@ -47,11 +48,10 @@ where - ``_mt`` - use for static linkage with the CRT * ```` - ``_debug`` or empty - * ```` - binary version -The last column shows which environment variables are used by the -Microsoft\* Visual C++\* or Intel® C++ Compiler Classic or Intel® oneAPI DPC++/C++ Compiler to find these +The last column shows, which environment variables are used by the +Microsoft\* Visual C++\* or Intel® C++ Compiler Classic or Intel® oneAPI DPC++/C++ Compiler, to find these subdirectories. .. CAUTION::