flux-framework · kulnaman · Dec 5, 2024 · Dec 5, 2024 · Dec 5, 2024 · Dec 5, 2024
diff --git a/mps_data/job_1.yaml b/mps_data/job_1.yaml
@@ -0,0 +1,31 @@
+version: 9999
+resources:
+  - type: node
+    count: 1
+    with:
+        - type: socket
+          count: 1
+          with:
+              - type: slot
+                label: default
+                exclusive: false
+                count: 1
+                with: 
+                    - type: core
+                      count: 1
+                      exclusive: true
+                    - type: gpu
+                      count: 1
+                      exclusive: false
+
+# a comment
+attributes:
+  system:
+    duration: 120
+    cosched: true
+    c_r: true
+tasks:
+  - command: [ "app" ]
+    slot: default
+    count:
+      per_slot: 1
diff --git a/mps_data/job_1N.yaml b/mps_data/job_1N.yaml
@@ -0,0 +1,31 @@
+version: 9999
+resources:
+  - type: node
+    count: 1
+    with:
+        - type: socket
+          count: 1
+          with:
+              - type: core
+                count: 2
+              - type: gpu
+                count: 1
+                with:
+                    - type: slot
+                      label: default
+                      count: 1
+                      with:
+                          - type: gpu_mps
+                            count: 1
+
+# a comment
+attributes:
+  system:
+    duration: 120
+    cosched: true
+    c_r: true
+tasks:
+  - command: [ "app" ]
+    slot: default
+    count:
+      per_slot: 1
diff --git a/mps_data/job_2.yaml b/mps_data/job_2.yaml
@@ -0,0 +1,35 @@
+version: 9999
+resources:
+  - type: node
+    count: 1
+    with:
+        - type: socket
+          count: 1
+          with:
+              - type: slot
+                label: default
+                exclusive: false
+                count: 1
+                with: 
+                    - type: core
+                      count: 2
+                      exclusive: true
+                    - type: gpu
+                      count: 2
+                      exclusive: false
+                      with:
+                          - type: gpu_mps
+                            count: 1
+                            exclusive: true
+
+# a comment
+attributes:
+  system:
+    duration: 120
+    cosched: true
+    c_r: true
+tasks:
+  - command: [ "app" ]
+    slot: default
+    count:
+      per_slot: 1
diff --git a/mps_data/job_2N.yaml b/mps_data/job_2N.yaml
@@ -0,0 +1,27 @@
+version: 9999
+resources:
+  - type: node
+    count: 4
+    with:
+        - type: socket
+          count: 1
+          with:
+            - type: slot
+              label: default
+              count: 1
+              with:
+                - type: core
+                  count: 4
+                - type: gpu
+                  count: 4
+# a comment
+attributes:
+  system:
+    duration: 720
+    cosched: true
+    c_r: true
+tasks:
+  - command: [ "app" ]
+    slot: default
+    count:
+      per_slot: 1
diff --git a/mps_data/job_4N.yaml b/mps_data/job_4N.yaml
@@ -0,0 +1,27 @@
+version: 9999
+resources:
+  - type: node
+    count: 4
+    with:
+        - type: socket
+          count: 1
+          with:
+            - type: slot
+              label: default
+              count: 1
+              with:
+                - type: core
+                  count: 2
+                - type: gpu
+                  count: 2
+# a comment
+attributes:
+  system:
+    duration: 240 
+    cosched: true
+    c_r: true
+tasks:
+  - command: [ "app" ]
+    slot: default
+    count:
+      per_slot: 1
diff --git a/mps_data/small.graphml b/mps_data/small.graphml
@@ -0,0 +1,138 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<!-- 1 subsystem, cluster[1]->rack[1]->node[8]->socket[2]->core[18]  -->
+
+<!-- Memory pool is modeled as 4 x 32GB                              -->
+
+
+<graphml xmlns="http://graphml.graphdrawing.org/xmlns">
+    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+    xsi:schemaLocation="http://graphml.graphdrawing.org/xmlns
+        http://graphml.graphdrawing.org/xmlns/1.1/graphml.xsd">
+
+    <!-- resource pool vertex generation spec attributes -->
+    <key id="root" for="node" attr.name="root" attr.type="int">
+        <default>0</default>
+    </key>
+    <key id="type" for="node" attr.name="type" attr.type="string"/>
+    <key id="basename" for="node" attr.name="basename" attr.type="string"/>
+    <key id="unit" for="node" attr.name="unit" attr.type="string"/>
+    <key id="size" for="node" attr.name="size" attr.type="long">
+        <default>1</default>
+    </key>
+    <key id="subsystem" for="node" attr.name="subsystem" attr.type="string">
+        <default>containment</default>
+    </key>
+
+    <!-- resource relationship generation attributes     -->
+    <key id="e_subsystem" for="edge" attr.name="e_subsystem" attr.type="string">
+        <default>containment</default>
+    </key>
+    <key id="relation" for="edge" attr.name="relation" attr.type="string">
+        <default>contains</default>
+    </key>
+    <key id="rrelation" for="edge" attr.name="rrelation" attr.type="string">
+        <default>in</default>
+    </key>
+
+    <!-- id generation method                             -->
+    <key id="id_scope" for="edge" attr.name="id_scope" attr.type="int">
+        <default>0</default>
+    </key>
+    <key id="id_start" for="edge" attr.name="id_start" attr.type="int">
+        <default>0</default>
+    </key>
+    <key id="id_stride" for="edge" attr.name="id_stride" attr.type="int">
+        <default>1</default>
+    </key>
+
+    <!-- resource gen method: multiply or associate-in   -->
+    <key id="gen_method" for="edge" attr.name="gen_method" attr.type="string">
+        <default>MULTIPLY</default>
+    </key>
+    <!-- argument (scaling factor) for multiply method   -->
+    <key id="multi_scale" for="edge" attr.name="multi_scale" attr.type="int">
+        <default>1</default>
+    </key>
+    <!-- 3 arguments for associate-in method             -->
+    <key id="as_tgt_subsystem" for="edge" attr.name="as_tgt_subsystem"
+             attr.type="string">
+        <default>containment</default>
+    </key>
+    <key id="as_tgt_uplvl" for="edge" attr.name="as_tgt_uplvl" attr.type="int">
+        <default>1</default>
+    </key>
+    <key id="as_src_uplvl" for="edge" attr.name="as_src_uplvl" attr.type="int">
+        <default>1</default>
+    </key>
+
+
+    <!-- generation recipe for the small cluster         -->
+    <graph id="small" edgedefault="directed">
+
+        <!-- containment subsystem generation recipe    -->
+        <node id="cluster">
+            <data key="root">1</data>
+            <data key="type">cluster</data>
+            <data key="basename">small</data>
+        </node>
+        <node id="rack">
+            <data key="type">rack</data>
+            <data key="basename">rack</data>
+        </node>
+        <node id="node">
+            <data key="type">node</data>
+            <data key="basename">node</data>
+        </node>
+        <node id="socket">
+            <data key="type">socket</data>
+            <data key="basename">socket</data>
+        </node>
+        <node id="core">
+            <data key="type">core</data>
+            <data key="basename">core</data>
+        </node>
+        <node id="gpu">
+            <data key="type">gpu</data>
+            <data key="basename">gpu</data>
+        </node>
+        <node id="gpu_mps">
+            <data key="type">gpu_mps</data>
+            <data key="basename">gpu_mps</data>
+        </node>
+        <node id="memory">
+            <data key="type">memory</data>
+            <data key="basename">memory</data>
+            <data key="size">32</data>
+            <data key="unit">GB</data>
+        </node>
+
+        <edge id="cluster2rack" source="cluster" target="rack">
+            <data key="multi_scale">1</data>
+        </edge>
+        <edge id="rack2node" source="rack" target="node">
+            <data key="id_scope">1</data>
+            <data key="multi_scale">2</data>
+          </edge>
+        <edge id="node2socket" source="node" target="socket">
+            <data key="multi_scale">1</data>
+        </edge>
+        <edge id="socket2core" source="socket" target="core">
+            <data key="id_scope">1</data>
+            <data key="multi_scale">8</data>
+        </edge>
+        <edge id="socket2gpu" source="socket" target="gpu">
+            <data key="id_scope">1</data>
+            <data key="multi_scale">2</data>
+        </edge>
+        <edge id="gpu2gpu_mps" source="gpu" target="gpu_mps">
+            <data key="id_scope">1</data>
+            <data key="multi_scale">2</data>
+        </edge>
+        <edge id="socket2memory" source="socket" target="memory">
+            <data key="id_scope">1</data>
+            <data key="multi_scale">4</data>
+        </edge>
+    </graph>
+</graphml>
+
diff --git a/resource/CMakeLists.txt b/resource/CMakeLists.txt
@@ -17,6 +17,7 @@ set(RESOURCE_HEADERS
     policies/dfu_match_multilevel_id_impl.hpp
     policies/dfu_match_locality.hpp
     policies/dfu_match_var_aware.hpp
+    policies/dfu_match_cosched_aware.hpp
     policies/dfu_match_policy_factory.hpp
     jobinfo/jobinfo.hpp
     schema/resource_graph.hpp
@@ -52,6 +53,7 @@ set(RESOURCE_HEADERS
 add_library(resource STATIC
     policies/dfu_match_locality.cpp
     policies/dfu_match_var_aware.cpp
+    policies/dfu_match_cosched_aware.cpp
     policies/dfu_match_policy_factory.cpp
     jobinfo/jobinfo.cpp
     schema/resource_data.cpp

diff --git a/resource/evaluators/scoring_api.cpp b/resource/evaluators/scoring_api.cpp
@@ -112,6 +112,10 @@ void scoring_api_t::merge (const scoring_api_t &o)
     }
 }
 
+void scoring_api_t::add_element_to_child_score (int score)
+{
+    children_score_list.push_back (score);
+}
 void scoring_api_t::resrc_types (subsystem_t s, std::vector<resource_type_t> &v)
 {
     for (auto &kv : m_ssys_map[s])
@@ -139,7 +143,22 @@ void scoring_api_t::set_avail (unsigned int avail)
 {
     m_avail = avail;
 }
-
+void scoring_api_t::set_children_score_vector (const std::vector<int64_t> source)
+{
+    children_score_list = source;
+}
+std::vector<int64_t> scoring_api_t::get_children_score_vector ()
+{
+    return children_score_list;
+}
+int64_t scoring_api_t::get_children_avearge_score ()
+{
+    if (children_score_list.size () == 0)
+        return 0;
+    return static_cast<int64_t> (
+        std::accumulate (children_score_list.begin (), children_score_list.end (), 0LL)
+        / children_score_list.size ());
+}
 bool scoring_api_t::is_contained (subsystem_t s, resource_type_t const &r)
 {
     return m_ssys_map[s].contains (r);

diff --git a/resource/evaluators/scoring_api.hpp b/resource/evaluators/scoring_api.hpp
@@ -56,6 +56,11 @@ class scoring_api_t {
     void set_overall_score (int64_t overall);
     unsigned int avail ();
     void set_avail (unsigned int avail);
+    // Add scores to a vector that keeps the children score. Needed to show preferences above slot also
+    void add_element_to_child_score(int score);
+    std::vector<int64_t> get_children_score_vector();
+    void set_children_score_vector(const std::vector<int64_t> source);
+    int64_t get_children_avearge_score();
     bool is_contained (subsystem_t s, resource_type_t const &r);
 
     template<class compare_op = fold::greater, class binary_op = fold::plus>
@@ -99,6 +104,7 @@ class scoring_api_t {
 
    private:
     intern::interned_key_vec<subsystem_t, std::map<resource_type_t, detail::evals_t>> m_ssys_map;
+    std::vector<int64_t> children_score_list;
     bool m_hier_constrain_now = false;
     int64_t m_overall_score = -1;
     unsigned int m_avail = 0;