diff --git a/mps_data/job_1.yaml b/mps_data/job_1.yaml
new file mode 100644
index 000000000..9afb9252c
--- /dev/null
+++ b/mps_data/job_1.yaml
@@ -0,0 +1,31 @@
+version: 9999
+resources:
+ - type: node
+ count: 1
+ with:
+ - type: socket
+ count: 1
+ with:
+ - type: slot
+ label: default
+ exclusive: false
+ count: 1
+ with:
+ - type: core
+ count: 1
+ exclusive: true
+ - type: gpu
+ count: 1
+ exclusive: false
+
+# a comment
+attributes:
+ system:
+ duration: 120
+ cosched: true
+ c_r: true
+tasks:
+ - command: [ "app" ]
+ slot: default
+ count:
+ per_slot: 1
diff --git a/mps_data/job_1N.yaml b/mps_data/job_1N.yaml
new file mode 100644
index 000000000..9de05e4f5
--- /dev/null
+++ b/mps_data/job_1N.yaml
@@ -0,0 +1,31 @@
+version: 9999
+resources:
+ - type: node
+ count: 1
+ with:
+ - type: socket
+ count: 1
+ with:
+ - type: core
+ count: 2
+ - type: gpu
+ count: 1
+ with:
+ - type: slot
+ label: default
+ count: 1
+ with:
+ - type: gpu_mps
+ count: 1
+
+# a comment
+attributes:
+ system:
+ duration: 120
+ cosched: true
+ c_r: true
+tasks:
+ - command: [ "app" ]
+ slot: default
+ count:
+ per_slot: 1
diff --git a/mps_data/job_2.yaml b/mps_data/job_2.yaml
new file mode 100644
index 000000000..b05ea3538
--- /dev/null
+++ b/mps_data/job_2.yaml
@@ -0,0 +1,35 @@
+version: 9999
+resources:
+ - type: node
+ count: 1
+ with:
+ - type: socket
+ count: 1
+ with:
+ - type: slot
+ label: default
+ exclusive: false
+ count: 1
+ with:
+ - type: core
+ count: 2
+ exclusive: true
+ - type: gpu
+ count: 2
+ exclusive: false
+ with:
+ - type: gpu_mps
+ count: 1
+ exclusive: true
+
+# a comment
+attributes:
+ system:
+ duration: 120
+ cosched: true
+ c_r: true
+tasks:
+ - command: [ "app" ]
+ slot: default
+ count:
+ per_slot: 1
diff --git a/mps_data/job_2N.yaml b/mps_data/job_2N.yaml
new file mode 100644
index 000000000..eda2f0652
--- /dev/null
+++ b/mps_data/job_2N.yaml
@@ -0,0 +1,27 @@
+version: 9999
+resources:
+ - type: node
+ count: 4
+ with:
+ - type: socket
+ count: 1
+ with:
+ - type: slot
+ label: default
+ count: 1
+ with:
+ - type: core
+ count: 4
+ - type: gpu
+ count: 4
+# a comment
+attributes:
+ system:
+ duration: 720
+ cosched: true
+ c_r: true
+tasks:
+ - command: [ "app" ]
+ slot: default
+ count:
+ per_slot: 1
diff --git a/mps_data/job_4N.yaml b/mps_data/job_4N.yaml
new file mode 100644
index 000000000..7db06cc25
--- /dev/null
+++ b/mps_data/job_4N.yaml
@@ -0,0 +1,27 @@
+version: 9999
+resources:
+ - type: node
+ count: 4
+ with:
+ - type: socket
+ count: 1
+ with:
+ - type: slot
+ label: default
+ count: 1
+ with:
+ - type: core
+ count: 2
+ - type: gpu
+ count: 2
+# a comment
+attributes:
+ system:
+ duration: 240
+ cosched: true
+ c_r: true
+tasks:
+ - command: [ "app" ]
+ slot: default
+ count:
+ per_slot: 1
diff --git a/mps_data/small.graphml b/mps_data/small.graphml
new file mode 100644
index 000000000..89ab6f05d
--- /dev/null
+++ b/mps_data/small.graphml
@@ -0,0 +1,138 @@
+
+
+
+
+
+
+
+
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://graphml.graphdrawing.org/xmlns
+ http://graphml.graphdrawing.org/xmlns/1.1/graphml.xsd">
+
+
+
+ 0
+
+
+
+
+
+ 1
+
+
+ containment
+
+
+
+
+ containment
+
+
+ contains
+
+
+ in
+
+
+
+
+ 0
+
+
+ 0
+
+
+ 1
+
+
+
+
+ MULTIPLY
+
+
+
+ 1
+
+
+
+ containment
+
+
+ 1
+
+
+ 1
+
+
+
+
+
+
+
+
+ 1
+ cluster
+ small
+
+
+ rack
+ rack
+
+
+ node
+ node
+
+
+ socket
+ socket
+
+
+ core
+ core
+
+
+ gpu
+ gpu
+
+
+ gpu_mps
+ gpu_mps
+
+
+ memory
+ memory
+ 32
+ GB
+
+
+
+ 1
+
+
+ 1
+ 2
+
+
+ 1
+
+
+ 1
+ 8
+
+
+ 1
+ 2
+
+
+ 1
+ 2
+
+
+ 1
+ 4
+
+
+
+
diff --git a/resource/CMakeLists.txt b/resource/CMakeLists.txt
index 625be874c..c623a5be5 100644
--- a/resource/CMakeLists.txt
+++ b/resource/CMakeLists.txt
@@ -17,6 +17,7 @@ set(RESOURCE_HEADERS
policies/dfu_match_multilevel_id_impl.hpp
policies/dfu_match_locality.hpp
policies/dfu_match_var_aware.hpp
+ policies/dfu_match_cosched_aware.hpp
policies/dfu_match_policy_factory.hpp
jobinfo/jobinfo.hpp
schema/resource_graph.hpp
@@ -52,6 +53,7 @@ set(RESOURCE_HEADERS
add_library(resource STATIC
policies/dfu_match_locality.cpp
policies/dfu_match_var_aware.cpp
+ policies/dfu_match_cosched_aware.cpp
policies/dfu_match_policy_factory.cpp
jobinfo/jobinfo.cpp
schema/resource_data.cpp
diff --git a/resource/evaluators/scoring_api.cpp b/resource/evaluators/scoring_api.cpp
index f7ece7880..7d745f314 100644
--- a/resource/evaluators/scoring_api.cpp
+++ b/resource/evaluators/scoring_api.cpp
@@ -112,6 +112,10 @@ void scoring_api_t::merge (const scoring_api_t &o)
}
}
+void scoring_api_t::add_element_to_child_score (int score)
+{
+ children_score_list.push_back (score);
+}
void scoring_api_t::resrc_types (subsystem_t s, std::vector &v)
{
for (auto &kv : m_ssys_map[s])
@@ -139,7 +143,22 @@ void scoring_api_t::set_avail (unsigned int avail)
{
m_avail = avail;
}
-
+void scoring_api_t::set_children_score_vector (const std::vector source)
+{
+ children_score_list = source;
+}
+std::vector scoring_api_t::get_children_score_vector ()
+{
+ return children_score_list;
+}
+int64_t scoring_api_t::get_children_avearge_score ()
+{
+ if (children_score_list.size () == 0)
+ return 0;
+ return static_cast (
+ std::accumulate (children_score_list.begin (), children_score_list.end (), 0LL)
+ / children_score_list.size ());
+}
bool scoring_api_t::is_contained (subsystem_t s, resource_type_t const &r)
{
return m_ssys_map[s].contains (r);
diff --git a/resource/evaluators/scoring_api.hpp b/resource/evaluators/scoring_api.hpp
index 824c03f8c..8613496c7 100644
--- a/resource/evaluators/scoring_api.hpp
+++ b/resource/evaluators/scoring_api.hpp
@@ -56,6 +56,11 @@ class scoring_api_t {
void set_overall_score (int64_t overall);
unsigned int avail ();
void set_avail (unsigned int avail);
+ // Add scores to a vector that keeps the children score. Needed to show preferences above slot also
+ void add_element_to_child_score(int score);
+ std::vector get_children_score_vector();
+ void set_children_score_vector(const std::vector source);
+ int64_t get_children_avearge_score();
bool is_contained (subsystem_t s, resource_type_t const &r);
template
@@ -99,6 +104,7 @@ class scoring_api_t {
private:
intern::interned_key_vec> m_ssys_map;
+ std::vector children_score_list;
bool m_hier_constrain_now = false;
int64_t m_overall_score = -1;
unsigned int m_avail = 0;
diff --git a/resource/libjobspec/jobspec.cpp b/resource/libjobspec/jobspec.cpp
index 34cae0faf..352bb810e 100644
--- a/resource/libjobspec/jobspec.cpp
+++ b/resource/libjobspec/jobspec.cpp
@@ -256,6 +256,36 @@ Task::Task (const YAML::Node &tasknode)
}
}
+namespace {
+std::vector handle_cosched (std::vector &resources, bool cosched, bool c_r)
+{
+ for (auto &res : resources) {
+ if (res.type == Flux::resource_model::gpu_rt) {
+ /* GPU has no child meaning mps not defined*/
+ if (cosched) {
+ res.exclusive = tristate_t::FALSE;
+ if (c_r) {
+ res.cosched = true;
+ res.cosched_count = 0;
+ }
+ }
+ if (res.with.size () == 0) {
+ std::cout << "adding mps partition in jobspec "< 0) {
+ handle_cosched (res.with, cosched, c_r);
+ }
+ }
+ return resources;
+}
+} // namespace
+
namespace {
std::vector parse_yaml_tasks (const YAML::Node &tasks)
{
@@ -311,6 +341,10 @@ Attributes parse_yaml_attributes (const YAML::Node &attrs)
a.system.queue = s.second.as ();
} else if (s.first.as () == "cwd") {
a.system.cwd = s.second.as ();
+ } else if (s.first.as () == "cosched") {
+ a.system.cosched = s.second.as ();
+ } else if (s.first.as () == "c_r") {
+ a.system.c_r = s.second.as ();
} else if (s.first.as () == "environment") {
for (auto &&e : s.second) {
a.system.environment[e.first.as ()] =
@@ -329,7 +363,15 @@ Attributes parse_yaml_attributes (const YAML::Node &attrs)
return a;
}
} // namespace
-
+void print_resource_tree (std::vector resources)
+{
+ for (auto &res : resources) {
+ std::cout << "Resource type: " << res.type << " count: " << res.count.min
+ << " with: " << res.with.size () << " exclusive flag "
+ << static_cast (res.exclusive) << std::endl;
+ print_resource_tree (res.with);
+ }
+}
Jobspec::Jobspec (const YAML::Node &top)
{
try {
@@ -370,7 +412,9 @@ Jobspec::Jobspec (const YAML::Node &top)
/* Import resources section */
resources = parse_yaml_resources (top["resources"]);
-
+ /* Handle coscheduling case */
+ resources = handle_cosched (resources, attributes.system.cosched, attributes.system.c_r);
+ /*print_resource_tree (resources);*/
/* Import tasks section */
tasks = parse_yaml_tasks (top["tasks"]);
} catch (YAML::Exception &e) {
diff --git a/resource/libjobspec/jobspec.hpp b/resource/libjobspec/jobspec.hpp
index d8ae7ed82..1c610d484 100644
--- a/resource/libjobspec/jobspec.hpp
+++ b/resource/libjobspec/jobspec.hpp
@@ -64,7 +64,8 @@ class Resource {
// user_data has no library internal usage, it is
// entirely for the convenience of external code
std::unordered_map user_data;
-
+ bool cosched = false;
+ unsigned cosched_count = 0;
Resource (const YAML::Node &);
};
@@ -81,8 +82,11 @@ class Task {
struct System {
double duration = 0.0f;
+ bool cosched = false;
+ bool c_r = false; // Job Support Checkpoint/Restart
std::string queue = "";
std::string cwd = "";
+
std::unordered_map environment;
std::unordered_map optional;
std::shared_ptr constraint = nullptr;
diff --git a/resource/modules/resource_match.cpp b/resource/modules/resource_match.cpp
index f8554555a..034c96beb 100644
--- a/resource/modules/resource_match.cpp
+++ b/resource/modules/resource_match.cpp
@@ -1488,6 +1488,7 @@ static inline std::string get_status_string (int64_t now, int64_t at)
static inline bool is_existent_jobid (const std::shared_ptr &ctx, uint64_t jobid)
{
+
return (ctx->jobs.find (jobid) != ctx->jobs.end ()) ? true : false;
}
diff --git a/resource/policies/base/.nfsd28e711e9b34e1580000020a b/resource/policies/base/.nfsd28e711e9b34e1580000020a
new file mode 100644
index 000000000..a4da64b24
--- /dev/null
+++ b/resource/policies/base/.nfsd28e711e9b34e1580000020a
@@ -0,0 +1,200 @@
+/*****************************************************************************\
+ * Copyright 2014 Lawrence Livermore National Security, LLC
+ * (c.f. AUTHORS, NOTICE.LLNS, LICENSE)
+ *
+ * This file is part of the Flux resource manager framework.
+ * For details, see https://github.com/flux-framework.
+ *
+ * SPDX-License-Identifier: LGPL-3.0
+\*****************************************************************************/
+
+#ifndef DFU_MATCH_CB_HPP
+#define DFU_MATCH_CB_HPP
+
+#include
+#include