diff --git a/mps_data/job_1.yaml b/mps_data/job_1.yaml new file mode 100644 index 000000000..9afb9252c --- /dev/null +++ b/mps_data/job_1.yaml @@ -0,0 +1,31 @@ +version: 9999 +resources: + - type: node + count: 1 + with: + - type: socket + count: 1 + with: + - type: slot + label: default + exclusive: false + count: 1 + with: + - type: core + count: 1 + exclusive: true + - type: gpu + count: 1 + exclusive: false + +# a comment +attributes: + system: + duration: 120 + cosched: true + c_r: true +tasks: + - command: [ "app" ] + slot: default + count: + per_slot: 1 diff --git a/mps_data/job_1N.yaml b/mps_data/job_1N.yaml new file mode 100644 index 000000000..9de05e4f5 --- /dev/null +++ b/mps_data/job_1N.yaml @@ -0,0 +1,31 @@ +version: 9999 +resources: + - type: node + count: 1 + with: + - type: socket + count: 1 + with: + - type: core + count: 2 + - type: gpu + count: 1 + with: + - type: slot + label: default + count: 1 + with: + - type: gpu_mps + count: 1 + +# a comment +attributes: + system: + duration: 120 + cosched: true + c_r: true +tasks: + - command: [ "app" ] + slot: default + count: + per_slot: 1 diff --git a/mps_data/job_2.yaml b/mps_data/job_2.yaml new file mode 100644 index 000000000..b05ea3538 --- /dev/null +++ b/mps_data/job_2.yaml @@ -0,0 +1,35 @@ +version: 9999 +resources: + - type: node + count: 1 + with: + - type: socket + count: 1 + with: + - type: slot + label: default + exclusive: false + count: 1 + with: + - type: core + count: 2 + exclusive: true + - type: gpu + count: 2 + exclusive: false + with: + - type: gpu_mps + count: 1 + exclusive: true + +# a comment +attributes: + system: + duration: 120 + cosched: true + c_r: true +tasks: + - command: [ "app" ] + slot: default + count: + per_slot: 1 diff --git a/mps_data/job_2N.yaml b/mps_data/job_2N.yaml new file mode 100644 index 000000000..eda2f0652 --- /dev/null +++ b/mps_data/job_2N.yaml @@ -0,0 +1,27 @@ +version: 9999 +resources: + - type: node + count: 4 + with: + - type: socket + count: 1 + with: + - type: slot + label: default + count: 1 + with: + - type: core + count: 4 + - type: gpu + count: 4 +# a comment +attributes: + system: + duration: 720 + cosched: true + c_r: true +tasks: + - command: [ "app" ] + slot: default + count: + per_slot: 1 diff --git a/mps_data/job_4N.yaml b/mps_data/job_4N.yaml new file mode 100644 index 000000000..7db06cc25 --- /dev/null +++ b/mps_data/job_4N.yaml @@ -0,0 +1,27 @@ +version: 9999 +resources: + - type: node + count: 4 + with: + - type: socket + count: 1 + with: + - type: slot + label: default + count: 1 + with: + - type: core + count: 2 + - type: gpu + count: 2 +# a comment +attributes: + system: + duration: 240 + cosched: true + c_r: true +tasks: + - command: [ "app" ] + slot: default + count: + per_slot: 1 diff --git a/mps_data/small.graphml b/mps_data/small.graphml new file mode 100644 index 000000000..89ab6f05d --- /dev/null +++ b/mps_data/small.graphml @@ -0,0 +1,138 @@ + + + + + + + + + xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" + xsi:schemaLocation="http://graphml.graphdrawing.org/xmlns + http://graphml.graphdrawing.org/xmlns/1.1/graphml.xsd"> + + + + 0 + + + + + + 1 + + + containment + + + + + containment + + + contains + + + in + + + + + 0 + + + 0 + + + 1 + + + + + MULTIPLY + + + + 1 + + + + containment + + + 1 + + + 1 + + + + + + + + + 1 + cluster + small + + + rack + rack + + + node + node + + + socket + socket + + + core + core + + + gpu + gpu + + + gpu_mps + gpu_mps + + + memory + memory + 32 + GB + + + + 1 + + + 1 + 2 + + + 1 + + + 1 + 8 + + + 1 + 2 + + + 1 + 2 + + + 1 + 4 + + + + diff --git a/resource/CMakeLists.txt b/resource/CMakeLists.txt index 625be874c..c623a5be5 100644 --- a/resource/CMakeLists.txt +++ b/resource/CMakeLists.txt @@ -17,6 +17,7 @@ set(RESOURCE_HEADERS policies/dfu_match_multilevel_id_impl.hpp policies/dfu_match_locality.hpp policies/dfu_match_var_aware.hpp + policies/dfu_match_cosched_aware.hpp policies/dfu_match_policy_factory.hpp jobinfo/jobinfo.hpp schema/resource_graph.hpp @@ -52,6 +53,7 @@ set(RESOURCE_HEADERS add_library(resource STATIC policies/dfu_match_locality.cpp policies/dfu_match_var_aware.cpp + policies/dfu_match_cosched_aware.cpp policies/dfu_match_policy_factory.cpp jobinfo/jobinfo.cpp schema/resource_data.cpp diff --git a/resource/evaluators/scoring_api.cpp b/resource/evaluators/scoring_api.cpp index f7ece7880..7d745f314 100644 --- a/resource/evaluators/scoring_api.cpp +++ b/resource/evaluators/scoring_api.cpp @@ -112,6 +112,10 @@ void scoring_api_t::merge (const scoring_api_t &o) } } +void scoring_api_t::add_element_to_child_score (int score) +{ + children_score_list.push_back (score); +} void scoring_api_t::resrc_types (subsystem_t s, std::vector &v) { for (auto &kv : m_ssys_map[s]) @@ -139,7 +143,22 @@ void scoring_api_t::set_avail (unsigned int avail) { m_avail = avail; } - +void scoring_api_t::set_children_score_vector (const std::vector source) +{ + children_score_list = source; +} +std::vector scoring_api_t::get_children_score_vector () +{ + return children_score_list; +} +int64_t scoring_api_t::get_children_avearge_score () +{ + if (children_score_list.size () == 0) + return 0; + return static_cast ( + std::accumulate (children_score_list.begin (), children_score_list.end (), 0LL) + / children_score_list.size ()); +} bool scoring_api_t::is_contained (subsystem_t s, resource_type_t const &r) { return m_ssys_map[s].contains (r); diff --git a/resource/evaluators/scoring_api.hpp b/resource/evaluators/scoring_api.hpp index 824c03f8c..8613496c7 100644 --- a/resource/evaluators/scoring_api.hpp +++ b/resource/evaluators/scoring_api.hpp @@ -56,6 +56,11 @@ class scoring_api_t { void set_overall_score (int64_t overall); unsigned int avail (); void set_avail (unsigned int avail); + // Add scores to a vector that keeps the children score. Needed to show preferences above slot also + void add_element_to_child_score(int score); + std::vector get_children_score_vector(); + void set_children_score_vector(const std::vector source); + int64_t get_children_avearge_score(); bool is_contained (subsystem_t s, resource_type_t const &r); template @@ -99,6 +104,7 @@ class scoring_api_t { private: intern::interned_key_vec> m_ssys_map; + std::vector children_score_list; bool m_hier_constrain_now = false; int64_t m_overall_score = -1; unsigned int m_avail = 0; diff --git a/resource/libjobspec/jobspec.cpp b/resource/libjobspec/jobspec.cpp index 34cae0faf..352bb810e 100644 --- a/resource/libjobspec/jobspec.cpp +++ b/resource/libjobspec/jobspec.cpp @@ -256,6 +256,36 @@ Task::Task (const YAML::Node &tasknode) } } +namespace { +std::vector handle_cosched (std::vector &resources, bool cosched, bool c_r) +{ + for (auto &res : resources) { + if (res.type == Flux::resource_model::gpu_rt) { + /* GPU has no child meaning mps not defined*/ + if (cosched) { + res.exclusive = tristate_t::FALSE; + if (c_r) { + res.cosched = true; + res.cosched_count = 0; + } + } + if (res.with.size () == 0) { + std::cout << "adding mps partition in jobspec "< 0) { + handle_cosched (res.with, cosched, c_r); + } + } + return resources; +} +} // namespace + namespace { std::vector parse_yaml_tasks (const YAML::Node &tasks) { @@ -311,6 +341,10 @@ Attributes parse_yaml_attributes (const YAML::Node &attrs) a.system.queue = s.second.as (); } else if (s.first.as () == "cwd") { a.system.cwd = s.second.as (); + } else if (s.first.as () == "cosched") { + a.system.cosched = s.second.as (); + } else if (s.first.as () == "c_r") { + a.system.c_r = s.second.as (); } else if (s.first.as () == "environment") { for (auto &&e : s.second) { a.system.environment[e.first.as ()] = @@ -329,7 +363,15 @@ Attributes parse_yaml_attributes (const YAML::Node &attrs) return a; } } // namespace - +void print_resource_tree (std::vector resources) +{ + for (auto &res : resources) { + std::cout << "Resource type: " << res.type << " count: " << res.count.min + << " with: " << res.with.size () << " exclusive flag " + << static_cast (res.exclusive) << std::endl; + print_resource_tree (res.with); + } +} Jobspec::Jobspec (const YAML::Node &top) { try { @@ -370,7 +412,9 @@ Jobspec::Jobspec (const YAML::Node &top) /* Import resources section */ resources = parse_yaml_resources (top["resources"]); - + /* Handle coscheduling case */ + resources = handle_cosched (resources, attributes.system.cosched, attributes.system.c_r); + /*print_resource_tree (resources);*/ /* Import tasks section */ tasks = parse_yaml_tasks (top["tasks"]); } catch (YAML::Exception &e) { diff --git a/resource/libjobspec/jobspec.hpp b/resource/libjobspec/jobspec.hpp index d8ae7ed82..1c610d484 100644 --- a/resource/libjobspec/jobspec.hpp +++ b/resource/libjobspec/jobspec.hpp @@ -64,7 +64,8 @@ class Resource { // user_data has no library internal usage, it is // entirely for the convenience of external code std::unordered_map user_data; - + bool cosched = false; + unsigned cosched_count = 0; Resource (const YAML::Node &); }; @@ -81,8 +82,11 @@ class Task { struct System { double duration = 0.0f; + bool cosched = false; + bool c_r = false; // Job Support Checkpoint/Restart std::string queue = ""; std::string cwd = ""; + std::unordered_map environment; std::unordered_map optional; std::shared_ptr constraint = nullptr; diff --git a/resource/modules/resource_match.cpp b/resource/modules/resource_match.cpp index f8554555a..034c96beb 100644 --- a/resource/modules/resource_match.cpp +++ b/resource/modules/resource_match.cpp @@ -1488,6 +1488,7 @@ static inline std::string get_status_string (int64_t now, int64_t at) static inline bool is_existent_jobid (const std::shared_ptr &ctx, uint64_t jobid) { + return (ctx->jobs.find (jobid) != ctx->jobs.end ()) ? true : false; } diff --git a/resource/policies/base/.nfsd28e711e9b34e1580000020a b/resource/policies/base/.nfsd28e711e9b34e1580000020a new file mode 100644 index 000000000..a4da64b24 --- /dev/null +++ b/resource/policies/base/.nfsd28e711e9b34e1580000020a @@ -0,0 +1,200 @@ +/*****************************************************************************\ + * Copyright 2014 Lawrence Livermore National Security, LLC + * (c.f. AUTHORS, NOTICE.LLNS, LICENSE) + * + * This file is part of the Flux resource manager framework. + * For details, see https://github.com/flux-framework. + * + * SPDX-License-Identifier: LGPL-3.0 +\*****************************************************************************/ + +#ifndef DFU_MATCH_CB_HPP +#define DFU_MATCH_CB_HPP + +#include +#include +#include +#include +#include +#include "resource/libjobspec/jobspec.hpp" +#include "resource/schema/resource_graph.hpp" +#include "resource/evaluators/scoring_api.hpp" +#include "resource/policies/base/matcher.hpp" +#include "resource/jobinfo/jobinfo.hpp" +#include "resource/planner/c/planner.h" + +enum class traverser_match_kind_t { RESOURCE_MATCH, SLOT_MATCH, NONE_MATCH, PRISTINE_NONE_MATCH }; +namespace Flux { +namespace resource_model { + +/*! Base DFU matcher class. + * Define the set of visitor methods that are called + * back by a DFU resource-graph traverser. + */ +class dfu_match_cb_t : public matcher_data_t, public matcher_util_api_t { + public: + dfu_match_cb_t (); + dfu_match_cb_t (const std::string &name); + dfu_match_cb_t (const dfu_match_cb_t &o); + dfu_match_cb_t &operator= (const dfu_match_cb_t &o); + virtual ~dfu_match_cb_t (); + + /*! + * Called back when all of the graph vertices and edges have been visited. + * Must be overridden by a derived class if this visit event should + * be programed. + * + * \param subsystem subsystem_t object of the dominant subsystem. + * \param resources vector of resources to be matched. + * \param g filtered resource graph. + * \param dfu score interface object - See utilities/README.md + * \return return 0 on success; otherwise -1. + */ + virtual int dom_finish_graph (subsystem_t subsystem, + const std::vector &resources, + const resource_graph_t &g, + scoring_api_t &dfu); + + /*! + * Called back on each postorder visit of a group of slot resources + * (resources that can be contained within one or more slots) of the + * dominant subsystem. + */ + virtual int dom_finish_slot (subsystem_t subsystem, scoring_api_t &dfu); + + /*! + * Called back on each preorder visit of the dominant subsystem. + * Must be overridden by a derived class if this visit event should + * be programed. + * + * \param u descriptor of the visiting vertex. + * \param subsystem subsystem_t object of the dominant subsystem. + * \param resources vector of resources to be matched (resource section + * of a jobspec). + * \param g filtered resource graph. + * + * \return return 0 on success; otherwise -1. + */ + virtual int dom_discover_vtx (vtx_t u, + subsystem_t subsystem, + const std::vector &resources, + const resource_graph_t &g); + + /*/*!*/ + /* * Called back on each postorder visit of the dominant subsystem.*/ + /* * Must be overridden by a derived class if this visit event should*/ + /* * be programed.*/ + /* * Should return a score calculated based on the subtree and up walks*/ + /* * using the score API object (dfu). Any score above MATCH_MET*/ + /* * is qualified to be a match.*/ + /* **/ + /* * \param u descriptor of the visiting vertex*/ + /* * \param subsystem subsystem_t object of the dominant subsystem*/ + /* * \param resources vector of resources to be matched*/ + /* * \param g filtered resource graph*/ + /* * \param dfu score interface object -- See utilities/README.md*/ + /* **/ + /* * \return return 0 on success; otherwise -1*/ + /* */ + /*virtual int dom_finish_vtx (vtx_t u,*/ + /* subsystem_t subsystem,*/ + /* const std::vector &resources,*/ + /* const resource_graph_t &g,*/ + /* scoring_api_t &dfu);*/ + /*! + * Called back on each postorder visit of the dominant subsystem. + * Must be overridden by a derived class if this visit event should + * be programed. + * Should return a score calculated based on the subtree and up walks + * using the score API object (dfu). Any score above MATCH_MET + * is qualified to be a match. + * + * \param u descriptor of the visiting vertex + * \param subsystem subsystem_t object of the dominant subsystem + * \param resources vector of resources to be matched + * \param g filtered resource graph + * \param dfu score interface object -- See utilities/README.md + * \param sm Type of match, whether this match is a slot or not + * + * \return return 0 on success; otherwise -1 + */ + + virtual int dom_finish_vtx (vtx_t u, + subsystem_t subsystem, + const std::vector &resources, + const resource_graph_t &g, + scoring_api_t &dfu, + traverser_match_kind_t sm); + /*! Called back on each pre-up visit of an auxiliary subsystem. + * Must be overridden by a derived class if this visit event should + * be programed. + * + * \param u descriptor of the visiting vertex + * \param subsystem subsystem_t of the auxiliary subsystem being walked + * \param resources vector of resources to be matched + * \param g filtered resource graph + * + * \return return 0 on success; otherwise -1 + */ + virtual int aux_discover_vtx (vtx_t u, + subsystem_t subsystem, + const std::vector &resources, + const resource_graph_t &g); + + /* + * Called back on each post-up visit of the auxiliary subsystem. + * Must be overridden by a derived class if this visit event should + * be programed. Should return a score calculated based on the subtree + * and up walks using the score API object (dfu). + * Any score above MATCH_MET is qualified to be a match. + * + * \param u descriptor of the visiting vertex + * \param subsystem subsystem_t object of an auxiliary subsystem + * \param resources vector of resources to be matched + * \param g filtered resource graph object + * \param dfu score interface object - -- See utilities/README.md + * + * \return return 0 on success; otherwise -1 + */ + virtual int aux_finish_vtx (vtx_t u, + subsystem_t subsystem, + const std::vector &resources, + const resource_graph_t &g, + scoring_api_t &dfu); + + /* + * Set a knob to limit graph traversal: i.g., stop traversing + * when k instances of qualified matches are found for each requested + * resource type. + * + * \param k num of qualified matches + * + * \return return 0 on success; otherwise -1 + */ + virtual int set_stop_on_k_matches (unsigned int k); + + /* + * Return the knob to limit graph traversal: i.g., stop traversing + * when k instances of qualified matches are found for each requested + * resource type. + */ + virtual int get_stop_on_k_matches () const; + + virtual int dom_node_emit(vtx_t u,subsystem_t subsytem,const resource_graph_t &g,unsigned int needs ) const; + + void incr (); + void decr (); + std::string level (); + + private: + int m_trav_level; +}; + +} // namespace resource_model +} // namespace Flux + +#endif // DFU_MATCH_CB_HPP + +/* + * vi:tabstop=4 shiftwidth=4 expandtab + */ diff --git a/resource/policies/base/dfu_match_cb.cpp b/resource/policies/base/dfu_match_cb.cpp index f488aae7f..2049b24ba 100644 --- a/resource/policies/base/dfu_match_cb.cpp +++ b/resource/policies/base/dfu_match_cb.cpp @@ -69,7 +69,7 @@ int dfu_match_cb_t::dom_finish_vtx (vtx_t u, subsystem_t subsystem, const std::vector &resources, const resource_graph_t &g, - scoring_api_t &dfu) + scoring_api_t &dfu,traverser_match_kind_t sm) { m_trav_level--; return 0; @@ -94,6 +94,9 @@ int dfu_match_cb_t::aux_finish_vtx (vtx_t u, m_trav_level--; return 0; } +int dfu_match_cb_t::dom_node_emit(vtx_t u,subsystem_t subsytem,const resource_graph_t &g,unsigned int needs ) { + return 0; +} int dfu_match_cb_t::set_stop_on_k_matches (unsigned int k) { diff --git a/resource/policies/base/dfu_match_cb.hpp b/resource/policies/base/dfu_match_cb.hpp index 7c96b48cd..c01c15edc 100644 --- a/resource/policies/base/dfu_match_cb.hpp +++ b/resource/policies/base/dfu_match_cb.hpp @@ -20,8 +20,10 @@ #include "resource/schema/resource_graph.hpp" #include "resource/evaluators/scoring_api.hpp" #include "resource/policies/base/matcher.hpp" +#include "resource/jobinfo/jobinfo.hpp" #include "resource/planner/c/planner.h" +enum class traverser_match_kind_t { RESOURCE_MATCH, SLOT_MATCH, NONE_MATCH, PRISTINE_NONE_MATCH }; namespace Flux { namespace resource_model { @@ -78,6 +80,27 @@ class dfu_match_cb_t : public matcher_data_t, public matcher_util_api_t { const std::vector &resources, const resource_graph_t &g); + /*/*!*/ + /* * Called back on each postorder visit of the dominant subsystem.*/ + /* * Must be overridden by a derived class if this visit event should*/ + /* * be programed.*/ + /* * Should return a score calculated based on the subtree and up walks*/ + /* * using the score API object (dfu). Any score above MATCH_MET*/ + /* * is qualified to be a match.*/ + /* **/ + /* * \param u descriptor of the visiting vertex*/ + /* * \param subsystem subsystem_t object of the dominant subsystem*/ + /* * \param resources vector of resources to be matched*/ + /* * \param g filtered resource graph*/ + /* * \param dfu score interface object -- See utilities/README.md*/ + /* **/ + /* * \return return 0 on success; otherwise -1*/ + /* */ + /*virtual int dom_finish_vtx (vtx_t u,*/ + /* subsystem_t subsystem,*/ + /* const std::vector &resources,*/ + /* const resource_graph_t &g,*/ + /* scoring_api_t &dfu);*/ /*! * Called back on each postorder visit of the dominant subsystem. * Must be overridden by a derived class if this visit event should @@ -91,15 +114,17 @@ class dfu_match_cb_t : public matcher_data_t, public matcher_util_api_t { * \param resources vector of resources to be matched * \param g filtered resource graph * \param dfu score interface object -- See utilities/README.md + * \param sm Type of match, whether this match is a slot or not * * \return return 0 on success; otherwise -1 */ + virtual int dom_finish_vtx (vtx_t u, subsystem_t subsystem, const std::vector &resources, const resource_graph_t &g, - scoring_api_t &dfu); - + scoring_api_t &dfu, + traverser_match_kind_t sm); /*! Called back on each pre-up visit of an auxiliary subsystem. * Must be overridden by a derived class if this visit event should * be programed. @@ -155,6 +180,8 @@ class dfu_match_cb_t : public matcher_data_t, public matcher_util_api_t { */ virtual int get_stop_on_k_matches () const; + virtual int dom_node_emit(vtx_t u,subsystem_t subsytem,const resource_graph_t &g,unsigned int needs ); + void incr (); void decr (); std::string level (); diff --git a/resource/policies/dfu_match_cosched_aware.cpp b/resource/policies/dfu_match_cosched_aware.cpp new file mode 100644 index 000000000..08ec10f55 --- /dev/null +++ b/resource/policies/dfu_match_cosched_aware.cpp @@ -0,0 +1,172 @@ +/*****************************************************************************\ + * Copyright 2019 Lawrence Livermore National Security, LLC + * (c.f. AUTHORS, NOTICE.LLNS, LICENSE) + * + * This file is part of the Flux resource manager framework. + * For details, see https://github.com/flux-framework. + * + * SPDX-License-Identifier: LGPL-3.0 +\*****************************************************************************/ + +#include "policies/base/dfu_match_cb.hpp" +#include "schema/data_std.hpp" +extern "C" { +#if HAVE_CONFIG_H +#include +#endif +} + +#include "resource/policies/dfu_match_cosched_aware.hpp" + +namespace Flux { +namespace resource_model { + +cosched_aware_t::cosched_aware_t () +{ +} + +cosched_aware_t::cosched_aware_t (const std::string &name) : dfu_match_cb_t (name) +{ +} + +cosched_aware_t::cosched_aware_t (const cosched_aware_t &o) : dfu_match_cb_t (o) +{ +} + +cosched_aware_t &cosched_aware_t::operator= (const cosched_aware_t &o) +{ + dfu_match_cb_t::operator= (o); + return *this; +} + +cosched_aware_t::~cosched_aware_t () +{ +} + +int cosched_aware_t::dom_finish_graph (subsystem_t subsystem, + const std::vector &resources, + const resource_graph_t &g, + scoring_api_t &dfu) +{ + int score = MATCH_MET; + fold::less comp; + + for (auto &resource : resources) { + unsigned int qc = dfu.qualified_count (subsystem, resource.type); + unsigned int count = calc_count (resource, qc); + if (count == 0) { + score = MATCH_UNMET; + break; + } + dfu.choose_accum_best_k (subsystem, resource.type, count, comp); + } + dfu.set_overall_score (score); + return (score == MATCH_MET) ? 0 : -1; + return 0; +} + +int cosched_aware_t::dom_finish_slot (subsystem_t subsystem, scoring_api_t &dfu) +{ + std::vector types; + dfu.resrc_types (subsystem, types); + for (auto &type : types) { + dfu.choose_accum_all (subsystem, type); + } + return 0; +} +/** + IMPORTANT: Averaging Scores Across Nodes + + Overview: + - Currently, scores are being set in two different locations: + 1. **Match Callback**: Scores are assigned to individual nodes. + 2. **Traverser**: Scores are assigned to parent nodes. + + Problem: + - For GPU_MPS (a child node), we cannot directly set the score for its parent GPU node. + - The parent GPU must first consider the state of its child nodes before setting its score. + - The current approach involves: + - Checking the GPU's state. + - Determining how many of its child nodes are free. + - Setting the GPU's score based on this information. + + Challenges: + 1. **Initializing Scores**: + - When should the score for nodes be initialized? + - When should the parent node rely on child node scores instead of setting its own? + 2. **Consistency**: + - Ensuring a coherent relationship between child node scores and parent node scores during +traversal. + 3. Proposed logic: + - For child nodes that are not yet traversed, the score should be set to `-1`. + - For parent nodes whose scores have already been updated during traversal, skip reassigning +their scores. +**/ + +int cosched_aware_t::dom_finish_vtx (vtx_t u, + subsystem_t subsystem, + const std::vector &resources, + const resource_graph_t &g, + scoring_api_t &dfu, + traverser_match_kind_t sm) +{ + int64_t score = MATCH_MET; + fold::less comp; + /* Default value for worst-performing-class assumed as 9999. */ + int64_t perf_class = 9999; + int64_t overall = score + g[u].id + perf_class; + for (auto &resource : resources) { + if (resource.type != g[u].type) + continue; + + // jobspec resource type matches with the visiting vertex + for (auto &c_resource : resource.with) { + // test children resource count requirements + unsigned int qc = dfu.qualified_count (subsystem, c_resource.type); + unsigned int count = calc_count (c_resource, qc); + if (count == 0) { + score = MATCH_UNMET; + break; + } + dfu.choose_accum_best_k (subsystem, c_resource.type, count, comp); + } + } + for (auto &resource : resources) { + if (resource.type != Flux::resource_model::gpu_rt) { + continue; + } + for (auto &c_resource : resource.with) { + if (c_resource.type == Flux::resource_model::gpu_mps_rt) { + unsigned int qc_1 = dfu.qualified_count (subsystem, c_resource.type); + if (qc_1 == 2) { + overall = (score == MATCH_MET) ? (score + g[u].id + 1) : score; + } else { + overall = (score == MATCH_MET) ? (score + g[u].id + 9999) : score; + } + } + } + if (g[u].type == Flux::resource_model::gpu_rt) + dfu.set_overall_score (overall); + } + if (dfu.overall_score () == -1) + dfu.set_overall_score (overall); + if (sm == traverser_match_kind_t::SLOT_MATCH) + dfu.set_overall_score (dfu.get_children_avearge_score ()); + + decr (); + return (score == MATCH_MET) ? 0 : -1; +} + +int cosched_aware_t::dom_node_emit (vtx_t u, + subsystem_t subsytem, + const resource_graph_t &g, + unsigned int needs) +{ + return 0; +} +} // namespace resource_model +} // namespace Flux + +/* + * vi:tabstop=4 shiftwidth=4 expandtab + */ diff --git a/resource/policies/dfu_match_cosched_aware.hpp b/resource/policies/dfu_match_cosched_aware.hpp new file mode 100644 index 000000000..c0cf0a331 --- /dev/null +++ b/resource/policies/dfu_match_cosched_aware.hpp @@ -0,0 +1,55 @@ +/*****************************************************************************\ + * Copyright 2019 Lawrence Livermore National Security, LLC + * (c.f. AUTHORS, NOTICE.LLNS, LICENSE) + * + * This file is part of the Flux resource manager framework. + * For details, see https://github.com/flux-framework. + * + * SPDX-License-Identifier: LGPL-3.0 +\*****************************************************************************/ + +#ifndef DFU_MATCH_COSCHED_AWARE_HPP +#define DFU_MATCH_COSCHED_AWARE_HPP + +#include +#include +#include +#include +#include "resource/policies/base/dfu_match_cb.hpp" + +namespace Flux { +namespace resource_model { + +/*! coschediation-aware policy: allocate resources based on + * similar performance class. + */ +struct cosched_aware_t : public dfu_match_cb_t { + cosched_aware_t (); + cosched_aware_t (const std::string &name); + cosched_aware_t (const cosched_aware_t &o); + cosched_aware_t &operator= (const cosched_aware_t &o); + ~cosched_aware_t (); + + int dom_finish_graph (subsystem_t subsystem, + const std::vector &resources, + const resource_graph_t &g, + scoring_api_t &dfu); + int dom_finish_vtx (vtx_t u, + subsystem_t subsystem, + const std::vector &resources, + const resource_graph_t &g, + scoring_api_t &dfu, + traverser_match_kind_t sm); + + int dom_finish_slot (subsystem_t subsystem, scoring_api_t &dfu); + + int dom_node_emit (vtx_t u, subsystem_t subsytem, const resource_graph_t &g,unsigned int needs); +}; +} // namespace resource_model +} // namespace Flux + +#endif // DFU_MATCH_cosched_AWARE_HPP + +/* + * vi:tabstop=4 shiftwidth=4 expandtab + */ diff --git a/resource/policies/dfu_match_locality.cpp b/resource/policies/dfu_match_locality.cpp index e929869af..045332995 100644 --- a/resource/policies/dfu_match_locality.cpp +++ b/resource/policies/dfu_match_locality.cpp @@ -8,6 +8,7 @@ * SPDX-License-Identifier: LGPL-3.0 \*****************************************************************************/ +#include "policies/base/dfu_match_cb.hpp" extern "C" { #if HAVE_CONFIG_H #include @@ -82,7 +83,7 @@ int greater_interval_first_t::dom_finish_vtx (vtx_t u, subsystem_t subsystem, const std::vector &resources, const resource_graph_t &g, - scoring_api_t &dfu) + scoring_api_t &dfu,traverser_match_kind_t sm) { int64_t score = MATCH_MET; int64_t overall; diff --git a/resource/policies/dfu_match_locality.hpp b/resource/policies/dfu_match_locality.hpp index 6284a89f9..6cac82247 100644 --- a/resource/policies/dfu_match_locality.hpp +++ b/resource/policies/dfu_match_locality.hpp @@ -41,7 +41,7 @@ struct greater_interval_first_t : public dfu_match_cb_t { subsystem_t subsystem, const std::vector &resources, const resource_graph_t &g, - scoring_api_t &dfu); + scoring_api_t &dfu,traverser_match_kind_t sm); int dom_finish_slot (subsystem_t subsystem, scoring_api_t &dfu); }; diff --git a/resource/policies/dfu_match_multilevel_id.hpp b/resource/policies/dfu_match_multilevel_id.hpp index e08d6fb3b..db148f803 100644 --- a/resource/policies/dfu_match_multilevel_id.hpp +++ b/resource/policies/dfu_match_multilevel_id.hpp @@ -64,7 +64,7 @@ class multilevel_id_t : public dfu_match_cb_t { subsystem_t subsystem, const std::vector &resources, const resource_graph_t &g, - scoring_api_t &dfu) override; + scoring_api_t &dfu,traverser_match_kind_t sm) override; /*! Please see its overriding method within * dfu_match_cb_t@base/dfu_match_cb.hpp diff --git a/resource/policies/dfu_match_multilevel_id_impl.hpp b/resource/policies/dfu_match_multilevel_id_impl.hpp index bf064f77f..2ff4e03f7 100644 --- a/resource/policies/dfu_match_multilevel_id_impl.hpp +++ b/resource/policies/dfu_match_multilevel_id_impl.hpp @@ -12,6 +12,7 @@ #define DFU_MATCH_MULTILEVEL_ID_IMPL_HPP #include +#include "policies/base/dfu_match_cb.hpp" #include "resource/policies/dfu_match_multilevel_id.hpp" namespace Flux { @@ -151,7 +152,8 @@ int multilevel_id_t::dom_finish_vtx (vtx_t u, subsystem_t subsystem, const std::vector &resources, const resource_graph_t &g, - scoring_api_t &dfu) + scoring_api_t &dfu, + traverser_match_kind_t sm) { int64_t score = MATCH_MET; int64_t overall; diff --git a/resource/policies/dfu_match_policy_factory.cpp b/resource/policies/dfu_match_policy_factory.cpp index 8e7a4f645..3273a2604 100644 --- a/resource/policies/dfu_match_policy_factory.cpp +++ b/resource/policies/dfu_match_policy_factory.cpp @@ -26,7 +26,7 @@ bool known_match_policy (const std::string &policy) if (policy != FIRST_MATCH && policy != FIRST_NODEX_MATCH && policy != HIGH_ID_FIRST && policy != LOW_ID_FIRST && policy != LOW_NODE_FIRST && policy != HIGH_NODE_FIRST && policy != LOW_NODEX_FIRST && policy != HIGH_NODEX_FIRST && policy != LOCALITY_AWARE - && policy != VAR_AWARE) + && policy != VAR_AWARE && policy != COSCHED_AWARE) rc = false; return rc; @@ -65,6 +65,9 @@ std::shared_ptr create_match_cb (const std::string &policy) matcher = std::make_shared (); } else if (policy == VAR_AWARE) { matcher = std::make_shared (); + }else if(policy == COSCHED_AWARE){ + matcher = std::make_shared (); + } } catch (std::bad_alloc &e) { errno = ENOMEM; diff --git a/resource/policies/dfu_match_policy_factory.hpp b/resource/policies/dfu_match_policy_factory.hpp index 5fbbed0f8..e31199271 100644 --- a/resource/policies/dfu_match_policy_factory.hpp +++ b/resource/policies/dfu_match_policy_factory.hpp @@ -20,6 +20,7 @@ #include "resource/policies/dfu_match_var_aware.hpp" #include "resource/policies/dfu_match_multilevel_id.hpp" #include "resource/policies/dfu_match_multilevel_id_impl.hpp" +#include "resource/policies/dfu_match_cosched_aware.hpp" namespace Flux { namespace resource_model { @@ -34,6 +35,7 @@ const std::string LOW_NODEX_FIRST = "lonodex"; const std::string HIGH_NODEX_FIRST = "hinodex"; const std::string LOCALITY_AWARE = "locality"; const std::string VAR_AWARE = "variation"; +const std::string COSCHED_AWARE = "cosched"; bool known_match_policy (const std::string &policy); diff --git a/resource/policies/dfu_match_var_aware.cpp b/resource/policies/dfu_match_var_aware.cpp index 72a9ce2d1..194e321a0 100644 --- a/resource/policies/dfu_match_var_aware.cpp +++ b/resource/policies/dfu_match_var_aware.cpp @@ -8,6 +8,7 @@ * SPDX-License-Identifier: LGPL-3.0 \*****************************************************************************/ +#include "policies/base/dfu_match_cb.hpp" extern "C" { #if HAVE_CONFIG_H #include @@ -75,7 +76,8 @@ int var_aware_t::dom_finish_vtx (vtx_t u, subsystem_t subsystem, const std::vector &resources, const resource_graph_t &g, - scoring_api_t &dfu) + scoring_api_t &dfu, + traverser_match_kind_t sm) { int64_t score = MATCH_MET; int64_t overall; diff --git a/resource/policies/dfu_match_var_aware.hpp b/resource/policies/dfu_match_var_aware.hpp index caa2c2db4..857f34ba2 100644 --- a/resource/policies/dfu_match_var_aware.hpp +++ b/resource/policies/dfu_match_var_aware.hpp @@ -38,7 +38,7 @@ struct var_aware_t : public dfu_match_cb_t { subsystem_t subsystem, const std::vector &resources, const resource_graph_t &g, - scoring_api_t &dfu); + scoring_api_t &dfu,traverser_match_kind_t sm); int dom_finish_slot (subsystem_t subsystem, scoring_api_t &dfu); }; diff --git a/resource/schema/data_std.cpp b/resource/schema/data_std.cpp index 8407ac653..a1db5ae3c 100644 --- a/resource/schema/data_std.cpp +++ b/resource/schema/data_std.cpp @@ -19,6 +19,7 @@ resource_type_t cluster_rt{"cluster"}; resource_type_t core_rt{"core"}; resource_type_t socket_rt{"socket"}; resource_type_t gpu_rt{"gpu"}; +resource_type_t gpu_mps_rt{"gpu_mps"}; resource_type_t node_rt{"node"}; resource_type_t rack_rt{"rack"}; resource_type_t slot_rt{"slot"}; diff --git a/resource/schema/data_std.hpp b/resource/schema/data_std.hpp index 6bbca0fcf..1b9be9ba0 100644 --- a/resource/schema/data_std.hpp +++ b/resource/schema/data_std.hpp @@ -44,6 +44,7 @@ extern resource_type_t node_rt; extern resource_type_t socket_rt; extern resource_type_t gpu_rt; extern resource_type_t core_rt; +extern resource_type_t gpu_mps_rt; template using subsystem_key_vec = intern::interned_key_vec; diff --git a/resource/schema/resource_data.hpp b/resource/schema/resource_data.hpp index 172ff0a1a..6b2bfef51 100644 --- a/resource/schema/resource_data.hpp +++ b/resource/schema/resource_data.hpp @@ -47,7 +47,7 @@ struct resource_pool_t : public resource_t { int64_t uniq_id; unsigned int size = 0; std::string unit; - + unsigned mps_data = 0; schedule_t schedule; //!< schedule data pool_infra_t idata; //!< scheduling infrastructure data status_t status = status_t::UP; diff --git a/resource/traversers/dfu.cpp b/resource/traversers/dfu.cpp index f331c3123..cba8edd14 100644 --- a/resource/traversers/dfu.cpp +++ b/resource/traversers/dfu.cpp @@ -149,11 +149,12 @@ int dfu_traverser_t::schedule (Jobspec::Jobspec &jobspec, int saved_errno = errno; planner_multi_t *p = NULL; subsystem_t dom = get_match_cb ()->dom_subsystem (); - + std::cout << "checking schedule possible" << std::endl; // precheck to see if enough resources are available for this to be feasible - if ((rc = request_feasible (meta, op, root, dfv)) < 0) + if ((rc = request_feasible (meta, op, root, dfv)) < 0) { + std::cout << "not feasible" << std::endl; goto out; - + } if ((rc = detail::dfu_impl_t::select (jobspec, root, meta, x)) == 0) { m_total_preorder = detail::dfu_impl_t::get_preorder_count (); m_total_postorder = detail::dfu_impl_t::get_postorder_count (); @@ -217,6 +218,7 @@ int dfu_traverser_t::schedule (Jobspec::Jobspec &jobspec, break; } case match_op_t::MATCH_ALLOCATE: + std::cout << "Busy"< graph_end) { detail::dfu_impl_t::reset_exclusive_resource_types (exclusive_types); + std::cout << "schedule time greater then graph end time" << std::endl; errno = EINVAL; return -1; } diff --git a/resource/traversers/dfu_impl.cpp b/resource/traversers/dfu_impl.cpp index 0c6683932..125d6a172 100644 --- a/resource/traversers/dfu_impl.cpp +++ b/resource/traversers/dfu_impl.cpp @@ -8,6 +8,7 @@ * SPDX-License-Identifier: LGPL-3.0 \*****************************************************************************/ +#include "policies/base/dfu_match_cb.hpp" extern "C" { #if HAVE_CONFIG_H #include "config.h" @@ -117,12 +118,12 @@ int dfu_impl_t::by_excl (const jobmeta_t &meta, // If a non-exclusive resource request is explicitly given on a // resource that lies under slot, this spec is invalid. - if (exclusive_in && resource.exclusive == Jobspec::tristate_t::FALSE) { - errno = EINVAL; - m_err_msg += "by_excl: exclusivity conflicts at jobspec="; - m_err_msg += resource.label + " : vertex=" + (*m_graph)[u].name; - goto done; - } + /*if (exclusive_in && resource.exclusive == Jobspec::tristate_t::FALSE) {*/ + /* errno = EINVAL;*/ + /* m_err_msg += "by_excl: exclusivity conflicts at jobspec=";*/ + /* m_err_msg += resource.label + " : vertex=" + (*m_graph)[u].name;*/ + /* goto done;*/ + /*}*/ // If a resource request is under slot or an explicit exclusivity is // requested, we check the validity of the visiting vertex using @@ -657,7 +658,7 @@ int dfu_impl_t::dom_slot (const jobmeta_t &meta, scoring_api_t &dfu) { int rc; - bool x_inout = true; + bool x_inout = *excl; scoring_api_t dfu_slot; unsigned int qual_num_slots = 0; std::vector edg_group_vector; @@ -688,7 +689,7 @@ int dfu_impl_t::dom_slot (const jobmeta_t &meta, } eval_edg_t ev_edg ((*egroup_i).edges[0].count, (*egroup_i).edges[0].count, - 1, + x_inout, (*egroup_i).edges[0].edge); score += (*egroup_i).score; edg_group.edges.push_back (ev_edg); @@ -697,9 +698,13 @@ int dfu_impl_t::dom_slot (const jobmeta_t &meta, } edg_group.score = score; edg_group.count = 1; - edg_group.exclusive = 1; + /*edg_group.exclusive = static_cast(*excl);*/ + edg_group.exclusive = x_inout; edg_group_vector.push_back (edg_group); } + // Passing on the slot dfu wieghts to parents for preference and initializing the current dfu with average of its children + dfu.set_overall_score(dfu_slot.get_children_avearge_score()); + dfu.set_children_score_vector(dfu_slot.get_children_score_vector()); for (auto &edg_group : edg_group_vector) dfu.add (dom, slot_rt, edg_group); @@ -734,15 +739,16 @@ int dfu_impl_t::dom_dfv (const jobmeta_t &meta, || (m_match->dom_discover_vtx (u, dom, resources, *m_graph) != 0)) goto done; (*m_graph)[u].idata.colors[dom] = m_color.gray (); + /*std::cout << "dom_dfv " << (*m_graph)[u].name << " sm " << static_cast (sm) << std::endl;*/ if (sm == match_kind_t::SLOT_MATCH) dom_slot (meta, u, next, nslots, check_pres, &x_inout, dfu); else dom_exp (meta, u, next, check_pres, &x_inout, dfu); *excl = x_in; (*m_graph)[u].idata.colors[dom] = m_color.black (); - p = (*m_graph)[u].schedule.plans; if ((avail = planner_avail_resources_during (p, at, duration)) == 0) { + std::cout << "node " << (*m_graph)[u].name << "planner bailed out" << std::endl; goto done; } else if (avail == -1) { m_err_msg += "dom_dfv: planner_avail_resources_during returned -1.\n"; @@ -750,12 +756,17 @@ int dfu_impl_t::dom_dfv (const jobmeta_t &meta, m_err_msg += ".\n"; goto done; } - if (m_match->dom_finish_vtx (u, dom, resources, *m_graph, dfu) != 0) + if (m_match->dom_finish_vtx (u, dom, resources, *m_graph, dfu, (traverser_match_kind_t)sm) != 0) goto done; if ((rc = resolve (dfu, to_parent)) != 0) goto done; + to_parent.add_element_to_child_score (dfu.overall_score ()); + int64_t average; + average = to_parent.get_children_avearge_score (); to_parent.set_avail (avail); - to_parent.set_overall_score (dfu.overall_score ()); + to_parent.set_overall_score (average); + /*std::cout << "Node " << (*m_graph)[u].name << " score is " << dfu.overall_score() << std::endl;*/ + /*std::cout << "to parent score is " << to_parent.overall_score() << std::endl;*/ for (auto &resource : resources) { if ((resource.type == (*m_graph)[u].type) && (!resource.label.empty ())) { diff --git a/resource/traversers/dfu_impl_update.cpp b/resource/traversers/dfu_impl_update.cpp index 8ee0b4ba6..dba7dbe99 100644 --- a/resource/traversers/dfu_impl_update.cpp +++ b/resource/traversers/dfu_impl_update.cpp @@ -259,6 +259,11 @@ int dfu_impl_t::upd_sched (vtx_t u, goto done; } if (n > 0) { + if(m_match->dom_node_emit(u,s,*m_graph,needs)==-1){ + + m_err_msg += __FUNCTION__; + m_err_msg += ": matcher dom_node_emit returned -1.\n"; + } if ((rc = emit_vtx (u, writers, needs, excl)) == -1) { m_err_msg += __FUNCTION__; m_err_msg += ": emit_vtx returned -1.\n"; diff --git a/resource/utilities/command.cpp b/resource/utilities/command.cpp index 782d0dd0f..992a20852 100644 --- a/resource/utilities/command.cpp +++ b/resource/utilities/command.cpp @@ -198,6 +198,7 @@ static void print_schedule_info (std::shared_ptr &ctx, out << "INFO:" << " =============================" << std::endl; st = (at == 0) ? job_lifecycle_t::ALLOCATED : job_lifecycle_t::RESERVED; + std::cout << "jobspec" << jobspec_fn << std::endl; ctx->jobs[jobid] = std::make_shared (jobid, st, at, jobspec_fn, "", elapse); if (at == 0) ctx->allocations[jobid] = jobid; diff --git a/resource/utilities/resource-query.cpp b/resource/utilities/resource-query.cpp index 6d6b6dff7..4a0c066cf 100644 --- a/resource/utilities/resource-query.cpp +++ b/resource/utilities/resource-query.cpp @@ -141,6 +141,7 @@ To see cli commands, type in "help" in the cli: i.e., locality: Select contiguous resources first in their ID space variation: Allocate resources based on performance classes. (perf_class must be set using set-property). + cosched: Make scheduler aware of GPU MPS configurations. (default=first). -F, --match-format=