X-Git-Url: http://bilbo.iut-bm.univ-fcomte.fr/pub/gitweb/simgrid.git/blobdiff_plain/d6eb772e45cc853fc204bb5aebeb411cdfa7c929..ace6ec5d4b81b85275732c9ba244d358ddc30107:/docs/source/Tutorial_DAG.rst diff --git a/docs/source/Tutorial_DAG.rst b/docs/source/Tutorial_DAG.rst index e807cc4f29..4bddc09040 100644 --- a/docs/source/Tutorial_DAG.rst +++ b/docs/source/Tutorial_DAG.rst @@ -3,7 +3,7 @@ Simulating DAG ============== -This tutorial presents the basics to understand how DAG are represented in Simgrid and how to simulate their workflow. +This tutorial presents the basics to understand how DAG are represented in SimGrid and how to simulate their workflow. Definition of a DAG ------------------- @@ -26,7 +26,7 @@ Set of edges representing precedence constraints between :ref:`Activities ` and load the platform. -.. code-block:: cpp - - simgrid::s4u::Engine e(&argc, argv); - e.load_platform(argv[1]); +.. literalinclude:: ../../examples/cpp/dag-tuto/s4u-dag-tuto.cpp + :language: cpp + :lines: 12-13 Retrieve pointers to some hosts. -.. code-block:: cpp - - simgrid::s4u::Host* tremblay = e.host_by_name("Tremblay"); - simgrid::s4u::Host* jupiter = e.host_by_name("Jupiter"); +.. literalinclude:: ../../examples/cpp/dag-tuto/s4u-dag-tuto.cpp + :language: cpp + :lines: 15-16 Initiate the activities. -.. code-block:: cpp - - simgrid::s4u::ExecPtr c1 = simgrid::s4u::Exec::init(); - simgrid::s4u::ExecPtr c2 = simgrid::s4u::Exec::init(); - simgrid::s4u::ExecPtr c3 = simgrid::s4u::Exec::init(); - simgrid::s4u::CommPtr t1 = simgrid::s4u::Comm::sendto_init(); +.. literalinclude:: ../../examples/cpp/dag-tuto/s4u-dag-tuto.cpp + :language: cpp + :lines: 18-21 Give names to thoses activities. -.. code-block:: cpp - - c1->set_name("c1"); - c2->set_name("c2"); - c3->set_name("c3"); - t1->set_name("t1"); +.. literalinclude:: ../../examples/cpp/dag-tuto/s4u-dag-tuto.cpp + :language: cpp + :lines: 23-26 Set the amount of work for each activity. -.. code-block:: cpp - - c1->set_flops_amount(1e9); - c2->set_flops_amount(5e9); - c3->set_flops_amount(2e9); - t1->set_payload_size(5e8); +.. literalinclude:: ../../examples/cpp/dag-tuto/s4u-dag-tuto.cpp + :language: cpp + :lines: 28-31 Define the dependencies between the activities. -.. code-block:: cpp - - c1->add_successor(t1); - t1->add_successor(c3); - c2->add_successor(c3); +.. literalinclude:: ../../examples/cpp/dag-tuto/s4u-dag-tuto.cpp + :language: cpp + :lines: 33-35 Set the location of each Exec activity and source and destination for the Comm activity. -.. code-block:: cpp - - c1->set_host(tremblay); - c2->set_host(jupiter); - c3->set_host(jupiter); - t1->set_source(tremblay); - t1->set_destination(jupiter); +.. literalinclude:: ../../examples/cpp/dag-tuto/s4u-dag-tuto.cpp + :language: cpp + :lines: 37-41 Start the executions of Activities without dependencies. -.. code-block:: cpp - - c1->start(); - c2->start(); +.. literalinclude:: ../../examples/cpp/dag-tuto/s4u-dag-tuto.cpp + :language: cpp + :lines: 43-44 Add a callback to monitor the activities. -.. code-block:: cpp - - Activity::on_completion_cb([](simgrid::s4u::Activity const& activity) { - XBT_INFO("Activity '%s' is complete (start time: %f, finish time: %f)", activity.get_cname(), activity.get_start_time(), - activity.get_finish_time()); - }); +.. literalinclude:: ../../examples/cpp/dag-tuto/s4u-dag-tuto.cpp + :language: cpp + :lines: 46-49 Finally, run the simulation. -.. code-block:: cpp - - e.run(); +.. literalinclude:: ../../examples/cpp/dag-tuto/s4u-dag-tuto.cpp + :language: cpp + :lines: 51 The execution of this code should give you the following output: -.. code-block:: bash - - [10.194200] [main/INFO] Activity 'c1' is complete (start time: 0.000000, finish time: 10.194200) - [65.534235] [main/INFO] Activity 'c2' is complete (start time: 0.000000, finish time: 65.534235) - [85.283378] [main/INFO] Activity 't1' is complete (start time: 10.194200, finish time: 85.283378) - [111.497072] [main/INFO] Activity 'c3' is complete (start time: 85.283378, finish time: 111.497072) +.. literalinclude:: ../../examples/cpp/dag-tuto/s4u-dag-tuto.tesh + :language: none + :lines: 4- Lab 2: Import a DAG from a file ---------------- +------------------------------- -In this lab we present how to import a DAG into you Simgrid simulation, either using a DOT file, a JSON file, or a DAX file. +In this lab we present how to import a DAG into you SimGrid simulation, either using a DOT file, a JSON file, or a DAX file. The files presented in this lab describe the following DAG: -.. image:: /tuto_dag/img/dag2.svg +.. image:: /img/dag2.svg :align: center From a DOT file @@ -192,227 +166,245 @@ A DOT file describes a workflow in accordance with the graphviz format. The following DOT file describes the workflow presented at the beginning of this lab: -.. code-block:: xml +.. literalinclude:: ../../examples/cpp/dag-from-dot-simple/dag.dot + :language: dot - digraph G { - c1 [size="1e9"]; - c2 [size="5e9"]; - c3 [size="2e9"]; +It can be imported as a vector of Activities into SimGrid using :cpp:func:`simgrid::s4u::create_DAG_from_DOT`. Then, you have to assign hosts to your Activities. - root->c1 [size="2e8"]; - root->c2 [size="1e8"]; - c1->c3 [size="5e8"]; - c2->c3 [size="-1"]; - c3->end [size="2e8"]; - } +.. literalinclude:: ../../examples/cpp/dag-from-dot-simple/s4u-dag-from-dot-simple.cpp + :language: cpp -It can be imported as a vector of Activities into Simgrid using :cpp:func:`create_DAG_from_DOT(const std::string& filename)`. Then, you have to assign hosts to your Activities. +The execution of this code should give you the following output: -.. code-block:: cpp +.. literalinclude:: ../../examples/cpp/dag-from-dot-simple/s4u-dag-from-dot-simple.tesh + :language: none + :lines: 4- - #include "simgrid/s4u.hpp" +From a JSON file +................ - XBT_LOG_NEW_DEFAULT_CATEGORY(main, "Messages specific for this s4u example"); +A JSON file describes a workflow in accordance with the `wfformat `_ . - int main(int argc, char* argv[]) { - simgrid::s4u::Engine e(&argc, argv); - e.load_platform(argv[1]); +The following JSON file describes the workflow presented at the beginning of this lab: - std::vector dag = simgrid::s4u::create_DAG_from_dot(argv[2]); +.. literalinclude:: ../../examples/cpp/dag-from-json-simple/dag.json + :language: json - simgrid::s4u::Host* tremblay = e.host_by_name("Tremblay"); - simgrid::s4u::Host* jupiter = e.host_by_name("Jupiter"); - simgrid::s4u::Host* fafard = e.host_by_name("Fafard"); +It can be imported as a vector of Activities into SimGrid using :cpp:func:`simgrid::s4u::create_DAG_from_json`. - dynamic_cast(dag[0].get())->set_host(fafard); - dynamic_cast(dag[1].get())->set_host(tremblay); - dynamic_cast(dag[2].get())->set_host(jupiter); - dynamic_cast(dag[3].get())->set_host(jupiter); - dynamic_cast(dag[8].get())->set_host(jupiter); - - for (const auto& a : dag) { - if (auto* comm = dynamic_cast(a.get())) { - auto pred = dynamic_cast((*comm->get_dependencies().begin()).get()); - auto succ = dynamic_cast(comm->get_successors().front().get()); - comm->set_source(pred->get_host())->set_destination(succ->get_host()); - } - } +.. literalinclude:: ../../examples/cpp/dag-from-json-simple/s4u-dag-from-json-simple.cpp + :language: cpp - simgrid::s4u::Activity::on_completion_cb([](simgrid::s4u::Activity const& activity) { - XBT_INFO("Activity '%s' is complete (start time: %f, finish time: %f)", activity.get_cname(), activity.get_start_time(), - activity.get_finish_time()); - }); +The execution of this code should give you the following output: - e.run(); - return 0; - } +.. literalinclude:: ../../examples/cpp/dag-from-json-simple/s4u-dag-from-json-simple.tesh + :language: none + :lines: 4- -The execution of this code should give you the following output: +From a DAX file [deprecated] +............................ -.. code-block:: bash +A DAX file describes a workflow in accordance with the `Pegasus `_ format. - [0.000000] [main/INFO] Activity 'root' is complete (start time: 0.000000, finish time: 0.000000) - [33.394394] [main/INFO] Activity 'root->c2' is complete (start time: 0.000000, finish time: 33.394394) - [39.832311] [main/INFO] Activity 'root->c1' is complete (start time: 0.000000, finish time: 39.832311) - [50.026511] [main/INFO] Activity 'c1' is complete (start time: 39.832311, finish time: 50.026511) - [98.928629] [main/INFO] Activity 'c2' is complete (start time: 33.394394, finish time: 98.928629) - [125.115689] [main/INFO] Activity 'c1->c3' is complete (start time: 50.026511, finish time: 125.115689) - [151.329383] [main/INFO] Activity 'c3' is complete (start time: 125.115689, finish time: 151.329383) - [151.743605] [main/INFO] Activity 'c3->end' is complete (start time: 151.329383, finish time: 151.743605) - [151.743605] [main/INFO] Activity 'end' is complete (start time: 151.743605, finish time: 151.743605) +The following DAX file describes the workflow presented at the beginning of this lab: -From a JSON file -................ +.. literalinclude:: ../../examples/cpp/dag-from-dax-simple/dag.xml + :language: xml -A JSON file describes a workflow in accordance with the `wfformat `_ . +It can be imported as a vector of Activities into SimGrid using :cpp:func:`simgrid::s4u::create_DAG_from_DAX`. -The following JSON file describes the workflow presented at the beginning of this lab: +.. literalinclude:: ../../examples/cpp/dag-from-dax-simple/s4u-dag-from-dax-simple.cpp + :language: cpp -.. code-block:: JSON +Lab 3: Scheduling with the Min-Min algorithm +-------------------------------------------- - { - "name": "simple_json", - "schemaVersion": "1.0", - "workflow": { - "makespan": 0, - "executedAt": "2023-03-09T00:00:00-00:00", - "tasks": [ - { - "name": "c1", - "type": "compute", - "parents": [], - "runtime": 1e9, - "machine": "Tremblay" - }, - { - "name": "t1", - "type": "transfer", - "parents": ["c1"], - "bytesWritten": 5e8, - "machine": "Jupiter" - }, - { - "name": "c2", - "type": "compute", - "parents": [], - "runtime": 5e9, - "machine": "Jupiter" - }, - { - "name": "c3", - "type": "compute", - "parents": ["t1","c2"], - "runtime": 2e9, - "machine": "Jupiter" - } - ], - "machines": [ - {"nodeName": "Tremblay"}, - {"nodeName": "Jupiter"} - ] - } - } +In this lab we present how to schedule activities imported from a DAX file using the +`Min-Min algorithm `_. + +The source code for this lab can be found `here `_. -It can be imported as a vector of Activities into Simgrid using :cpp:func:`create_DAG_from_json(const std::string& filename)`. +For code readability we first create the `sg4` namespace. .. code-block:: cpp - #include "simgrid/s4u.hpp" + namespace sg4 = simgrid::s4u; - XBT_LOG_NEW_DEFAULT_CATEGORY(main, "Messages specific for this s4u example"); +The core mechanism of the algorithm lies in three functions. +They respectively serve the purpose of finding tasks to schedule, +finding the best host to execute them and properly scheduling them. - int main(int argc, char* argv[]) { - simgrid::s4u::Engine e(&argc, argv); - e.load_platform(argv[1]); +Find Tasks to Schedule +...................... - std::vector dag = simgrid::s4u::create_DAG_from_json(argv[2]); +The role of this function is to retrieve tasks that are ready to be scheduled, i.e, that have their dependencies solved. - simgrid::s4u::Activity::on_completion_cb([](simgrid::s4u::Activity const& activity) { - XBT_INFO("Activity '%s' is complete (start time: %f, finish time: %f)", activity.get_cname(), activity.get_start_time(), - activity.get_finish_time()); - }); +.. literalinclude:: ../../examples/cpp/dag-scheduling/s4u-dag-scheduling.cpp + :language: cpp + :lines: 15-38 - e.run(); - return 0; - } +Find the Best Placement +....................... -The execution of this code should give you the following output: +Once we have a task ready to be scheduled, we need to find the best placement for it. +This is done by evaluating the earliest finish time among all hosts. +It depends on the duration of the data transfers of the parents of this task to this host. -.. code-block:: bash +.. literalinclude:: ../../examples/cpp/dag-scheduling/s4u-dag-scheduling.cpp + :language: cpp + :lines: 40-91 - [10.194200] [main/INFO] Activity 'c1' is complete (start time: 0.000000, finish time: 10.194200) - [65.534235] [main/INFO] Activity 'c2' is complete (start time: 0.000000, finish time: 65.534235) - [85.283378] [main/INFO] Activity 't1' is complete (start time: 10.194200, finish time: 85.283378) - [111.497072] [main/INFO] Activity 'c3' is complete (start time: 85.283378, finish time: 111.497072) +Schedule a Task +............... -From a DAX file [deprecated] -............................ +When the best host has been found, the task is scheduled on it: -A DAX file describes a workflow in accordance with the `Pegasus `_ format. +* it sets the host of the task to schedule +* it stores the finish time of this task on the host +* it sets the destination of parents communication +* it sets the source of any child communication. -The following DAX file describes the workflow presented at the beginning of this lab: +.. literalinclude:: ../../examples/cpp/dag-scheduling/s4u-dag-scheduling.cpp + :language: cpp + :lines: 93-113 + +Mixing it all Together +...................... -.. code-block:: xml - - - - - - - - - - - - - - - - - - - - -It can be imported as a vector of Activities into Simgrid using :cpp:func:`create_DAG_from_DAX(std::string)`. +Now that we have the key components of the algorithm let's merge them inside the main function. .. code-block:: cpp - #include "simgrid/s4u.hpp" + int main(int argc, char** argv) + { + ... + +First, we initialize the SimGrid Engine. - XBT_LOG_NEW_DEFAULT_CATEGORY(main, "Messages specific for this s4u example"); +.. code-block:: cpp - int main(int argc, char* argv[]) { - simgrid::s4u::Engine e(&argc, argv); - e.load_platform(argv[1]); + sg4::Engine e(&argc, argv); - std::vector dag = simgrid::s4u::create_DAG_from_DAX(argv[2]); +The Min-Min algorithm schedules unscheduled tasks. +To keep track of them we make use of the method :cpp:func:`simgrid::s4u::Engine::track_vetoed_activities`. - simgrid::s4u::Host* tremblay = e.host_by_name("Tremblay"); - simgrid::s4u::Host* jupiter = e.host_by_name("Jupiter"); - simgrid::s4u::Host* fafard = e.host_by_name("Fafard"); +.. code-block:: cpp - dynamic_cast(dag[0].get())->set_host(fafard); - dynamic_cast(dag[1].get())->set_host(tremblay); - dynamic_cast(dag[2].get())->set_host(jupiter); - dynamic_cast(dag[3].get())->set_host(jupiter); - dynamic_cast(dag[8].get())->set_host(jupiter); - - for (const auto& a : dag) { - if (auto* comm = dynamic_cast(a.get())) { - auto pred = dynamic_cast((*comm->get_dependencies().begin()).get()); - auto succ = dynamic_cast(comm->get_successors().front().get()); - comm->set_source(pred->get_host())->set_destination(succ->get_host()); - } + std::set vetoed; + e.track_vetoed_activities(&vetoed); + +We add the following callback that will be triggered at the end of execution activities. +This callback stores the finish time of the execution, +to use it as a start time for any subsequent communications. + +.. code-block:: cpp + + sg4::Activity::on_completion_cb([](sg4::Activity const& activity) { + // when an Exec completes, we need to set the potential start time of all its ouput comms + const auto* exec = dynamic_cast(&activity); + if (exec == nullptr) // Only Execs are concerned here + return; + for (const auto& succ : exec->get_successors()) { + auto* comm = dynamic_cast(succ.get()); + if (comm != nullptr) { + auto* finish_time = new double(exec->get_finish_time()); + // We use the user data field to store the finish time of the predecessor of the comm, i.e., its potential start + // time + comm->set_data(finish_time); } + } + }); + +We load the platform and force sequential execution on hosts. - simgrid::s4u::Activity::on_completion_cb([](simgrid::s4u::Activity const& activity) { - XBT_INFO("Activity '%s' is complete (start time: %f, finish time: %f)", activity.get_cname(), activity.get_start_time(), - activity.get_finish_time()); - }); +.. code-block:: cpp + + e.load_platform(argv[1]); + + /* Mark all hosts as sequential, as it ought to be in such a scheduling example. + * + * It means that the hosts can only compute one thing at a given time. If an execution already takes place on a given + * host, any subsequently started execution will be queued until after the first execution terminates */ + for (auto const& host : e.get_all_hosts()) { + host->set_concurrency_limit(1); + host->set_data(new double(0.0)); + } + +The tasks are imported from a DAX file. + +.. code-block:: cpp + /* load the DAX file */ + auto dax = sg4::create_DAG_from_DAX(argv[2]); + +We look for the best host for the root task and schedule it. +We then advance the simulation to unlock next schedulable tasks. + +.. code-block:: cpp + + /* Schedule the root first */ + double finish_time; + auto* root = static_cast(dax.front().get()); + auto host = get_best_host(root, &finish_time); + schedule_on(root, host); + e.run(); + +Then, we get to the major loop of the algorithm. +This loop goes on until all tasks have been scheduled and executed. +It starts by finding ready tasks using `get_ready_tasks`. +It iteratively looks for the task that will finish first among ready tasks using `get_best_host`, and place it using `schedule_on`. +When no more tasks can be placed, we advance the simulation. + +.. code-block:: cpp + + while (not vetoed.empty()) { + XBT_DEBUG("Start new scheduling round"); + /* Get the set of ready tasks */ + auto ready_tasks = get_ready_tasks(dax); + vetoed.clear(); + + if (ready_tasks.empty()) { + /* there is no ready exec, let advance the simulation */ e.run(); - return 0; - } + continue; + } + /* For each ready exec: + * get the host that minimizes the completion time. + * select the exec that has the minimum completion time on its best host. + */ + double min_finish_time = std::numeric_limits::max(); + sg4::Exec* selected_task = nullptr; + sg4::Host* selected_host = nullptr; + + for (auto exec : ready_tasks) { + XBT_DEBUG("%s is ready", exec->get_cname()); + double finish_time; + host = get_best_host(exec, &finish_time); + if (finish_time < min_finish_time) { + min_finish_time = finish_time; + selected_task = exec; + selected_host = host; + } + } + + XBT_INFO("Schedule %s on %s", selected_task->get_cname(), selected_host->get_cname()); + schedule_on(selected_task, selected_host, min_finish_time); + + ready_tasks.clear(); + e.run(); + } + +Finally, we clean up the memory. + +.. code-block:: cpp + + /* Cleanup memory */ + for (auto const& h : e.get_all_hosts()) + delete h->get_data(); + + + + + +