1 /* Copyright (c) 2010-2022. The SimGrid Team. All rights reserved. */
3 /* This program is free software; you can redistribute it and/or modify it
4 * under the terms of the license (GNU LGPL) which comes with this package. */
6 #include <simgrid/kernel/routing/NetPoint.hpp>
7 #include <simgrid/kernel/routing/NetZoneImpl.hpp>
8 #include <simgrid/s4u/Actor.hpp>
9 #include <simgrid/s4u/Comm.hpp>
10 #include <simgrid/s4u/Engine.hpp>
11 #include <simgrid/s4u/Exec.hpp>
12 #include <simgrid/s4u/Host.hpp>
13 #include <simgrid/s4u/VirtualMachine.hpp>
14 #include <xbt/graph.h>
16 #include "src/instr/instr_private.hpp"
17 #include "src/kernel/resource/CpuImpl.hpp"
18 #include "src/kernel/resource/NetworkModel.hpp"
19 #include "src/surf/surf_interface.hpp"
23 XBT_LOG_NEW_DEFAULT_SUBCATEGORY(instr_routing, instr, "Tracing platform hierarchy");
25 std::string instr_pid(simgrid::s4u::Actor const& proc)
27 return std::string(proc.get_name()) + "-" + std::to_string(proc.get_pid());
30 static simgrid::instr::Container* lowestCommonAncestor(const simgrid::instr::Container* a1,
31 const simgrid::instr::Container* a2)
33 // this is only an optimization (since most of a1 and a2 share the same parent)
34 if (a1->get_parent() == a2->get_parent())
35 return a1->get_parent();
37 // create an array with all ancestors of a1
38 std::vector<simgrid::instr::Container*> ancestors_a1;
39 for (auto* p = a1->get_parent(); p != nullptr; p = p->get_parent())
40 ancestors_a1.push_back(p);
42 // create an array with all ancestors of a2
43 std::vector<simgrid::instr::Container*> ancestors_a2;
44 for (auto* p = a2->get_parent(); p != nullptr; p = p->get_parent())
45 ancestors_a2.push_back(p);
47 // find the lowest ancestor
48 simgrid::instr::Container* p = nullptr;
49 int i = static_cast<int>(ancestors_a1.size()) - 1;
50 int j = static_cast<int>(ancestors_a2.size()) - 1;
51 while (i >= 0 && j >= 0) {
52 simgrid::instr::Container* a1p = ancestors_a1.at(i);
53 if (a1p != ancestors_a2.at(j))
62 static void linkContainers(simgrid::instr::Container* src, simgrid::instr::Container* dst,
63 std::set<std::string, std::less<>>* filter)
66 if (src->get_name() == "__loopback__" || dst->get_name() == "__loopback__") {
67 XBT_DEBUG(" linkContainers: ignoring loopback link");
72 simgrid::instr::Container* parent = lowestCommonAncestor(src, dst);
73 xbt_assert(parent, "common parent unknown, this is a tracing problem");
75 // check if we already register this pair (we only need one direction)
76 std::string aux1 = src->get_name() + dst->get_name();
77 std::string aux2 = dst->get_name() + src->get_name();
78 if (filter->find(aux1) != filter->end()) {
79 XBT_DEBUG(" linkContainers: already registered %s <-> %s (1)", src->get_cname(), dst->get_cname());
82 if (filter->find(aux2) != filter->end()) {
83 XBT_DEBUG(" linkContainers: already registered %s <-> %s (2)", dst->get_cname(), src->get_cname());
87 // ok, not found, register it
92 std::string link_typename = parent->get_type()->get_name() + "-" + src->get_type()->get_name() +
93 std::to_string(src->get_type()->get_id()) + "-" + dst->get_type()->get_name() +
94 std::to_string(dst->get_type()->get_id());
95 simgrid::instr::LinkType* link =
96 parent->get_type()->by_name_or_create(link_typename, src->get_type(), dst->get_type());
97 link->set_calling_container(parent);
100 static long long counter = 0;
102 std::string key = std::to_string(counter);
105 link->start_event(src, "topology", key);
106 link->end_event(dst, "topology", key);
108 XBT_DEBUG(" linkContainers %s <-> %s", src->get_cname(), dst->get_cname());
111 static void recursiveGraphExtraction(const simgrid::s4u::NetZone* netzone, const simgrid::instr::Container* container,
112 std::set<std::string, std::less<>>* filter)
114 if (not TRACE_platform_topology()) {
115 XBT_DEBUG("Graph extraction disabled by user.");
118 XBT_DEBUG("Graph extraction for NetZone = %s", netzone->get_cname());
120 // bottom-up recursion
121 for (auto const& nz_son : netzone->get_children()) {
122 const simgrid::instr::Container* child_container = container->get_child_by_name(nz_son->get_name());
123 recursiveGraphExtraction(nz_son, child_container, filter);
126 auto* graph = xbt_graph_new_graph(0, nullptr);
127 std::map<std::string, xbt_node_t, std::less<>> nodes;
128 std::map<std::string, xbt_edge_t, std::less<>> edges;
130 netzone->get_impl()->get_graph(graph, &nodes, &edges);
131 for (auto const& [_, edge] : edges) {
132 linkContainers(simgrid::instr::Container::by_name(static_cast<const char*>(edge->src->data)),
133 simgrid::instr::Container::by_name(static_cast<const char*>(edge->dst->data)), filter);
135 xbt_graph_free_graph(graph, xbt_free_f, xbt_free_f, nullptr);
139 * user categories support
141 static void recursiveNewVariableType(const std::string& new_typename, const std::string& color,
142 simgrid::instr::Type* root)
144 if (root->get_name() == "HOST" || root->get_name() == "VM")
145 root->by_name_or_create(std::string("p") + new_typename, color);
147 if (root->get_name() == "LINK")
148 root->by_name_or_create(std::string("b") + new_typename, color);
150 for (auto const& [_, child] : root->get_children()) {
151 recursiveNewVariableType(new_typename, color, child.get());
155 void instr_new_variable_type(const std::string& new_typename, const std::string& color)
157 recursiveNewVariableType(new_typename, color, simgrid::instr::Container::get_root()->get_type());
160 static void recursiveNewUserVariableType(const std::string& parent_type, const std::string& new_typename,
161 const std::string& color, simgrid::instr::Type* root)
163 if (root->get_name() == parent_type) {
164 root->by_name_or_create(new_typename, color);
166 for (auto const& [_, child] : root->get_children())
167 recursiveNewUserVariableType(parent_type, new_typename, color, child.get());
170 void instr_new_user_variable_type(const std::string& parent_type, const std::string& new_typename,
171 const std::string& color)
173 recursiveNewUserVariableType(parent_type, new_typename, color, simgrid::instr::Container::get_root()->get_type());
176 static void recursiveNewUserStateType(const std::string& parent_type, const std::string& new_typename,
177 simgrid::instr::Type* root)
179 if (root->get_name() == parent_type)
180 root->by_name_or_create<simgrid::instr::StateType>(new_typename);
182 for (auto const& [_, child] : root->get_children())
183 recursiveNewUserStateType(parent_type, new_typename, child.get());
186 void instr_new_user_state_type(const std::string& parent_type, const std::string& new_typename)
188 recursiveNewUserStateType(parent_type, new_typename, simgrid::instr::Container::get_root()->get_type());
191 static void recursiveNewValueForUserStateType(const std::string& type_name, const char* val, const std::string& color,
192 simgrid::instr::Type* root)
194 if (root->get_name() == type_name)
195 static_cast<simgrid::instr::StateType*>(root)->add_entity_value(val, color);
197 for (auto const& [_, child] : root->get_children())
198 recursiveNewValueForUserStateType(type_name, val, color, child.get());
201 void instr_new_value_for_user_state_type(const std::string& type_name, const char* value, const std::string& color)
203 recursiveNewValueForUserStateType(type_name, value, color, simgrid::instr::Container::get_root()->get_type());
206 namespace simgrid::instr {
208 /** @brief Creates a file with the topology of the platform file used for the simulator.
210 * The graph topology will have the following properties: all hosts, links and routers of the platform file are mapped
211 * to graph nodes; routes are mapped to edges. The platform's zones are not represented in the output.
213 void platform_graph_export_graphviz(const std::string& output_filename)
215 auto* g = xbt_graph_new_graph(0, nullptr);
216 std::map<std::string, xbt_node_t, std::less<>> nodes;
217 std::map<std::string, xbt_edge_t, std::less<>> edges;
218 s4u::Engine::get_instance()->get_netzone_root()->extract_xbt_graph(g, &nodes, &edges);
221 fs.open(output_filename, std::ofstream::out);
222 xbt_assert(not fs.fail(), "Failed to open %s", output_filename.c_str());
225 fs << "digraph test {\n";
227 fs << "graph test {\n";
229 fs << " graph [overlap=scale]\n";
231 fs << " node [shape=box, style=filled]\n";
232 fs << " node [width=.3, height=.3, style=filled, color=skyblue]\n\n";
234 for (auto const& [node, _] : nodes)
235 fs << " \"" << node << "\";\n";
237 for (auto const& [_, edge] : edges) {
238 const char* src_s = static_cast<char*>(edge->src->data);
239 const char* dst_s = static_cast<char*>(edge->dst->data);
241 fs << " \"" << src_s << "\" -> \"" << dst_s << "\";\n";
243 fs << " \"" << src_s << "\" -- \"" << dst_s << "\";\n";
248 xbt_graph_free_graph(g, xbt_free_f, xbt_free_f, nullptr);
251 void platform_graph_export_csv(const std::string& output_filename)
253 auto* g = xbt_graph_new_graph(0, nullptr);
254 std::map<std::string, xbt_node_t, std::less<>> nodes;
255 std::map<std::string, xbt_edge_t, std::less<>> edges;
256 s4u::Engine::get_instance()->get_netzone_root()->extract_xbt_graph(g, &nodes, &edges);
259 fs.open(output_filename, std::ofstream::out);
260 xbt_assert(not fs.fail(), "Failed to open %s", output_filename.c_str());
262 fs << "src,dst" << std::endl;
263 for (auto const& [_, edge] : edges) {
264 const char* src_s = static_cast<char*>(edge->src->data);
265 const char* dst_s = static_cast<char*>(edge->dst->data);
266 fs << src_s << "," << dst_s << "\n";
269 xbt_graph_free_graph(g, xbt_free_f, xbt_free_f, nullptr);
273 static std::vector<NetZoneContainer*> currentContainer; /* push and pop, used only in creation */
274 static void on_netzone_creation(s4u::NetZone const& netzone)
276 std::string id = netzone.get_name();
277 if (Container::get_root() == nullptr) {
278 auto* root = new NetZoneContainer(id, 0, nullptr);
279 xbt_assert(Container::get_root() == root);
281 if (TRACE_smpi_is_enabled()) {
282 auto* mpi = root->get_type()->by_name_or_create<ContainerType>("MPI");
283 if (not TRACE_smpi_is_grouped())
284 mpi->by_name_or_create<StateType>("MPI_STATE");
285 root->get_type()->by_name_or_create("MPI_LINK", mpi, mpi);
286 root->get_type()->by_name_or_create("MIGRATE_LINK", mpi, mpi);
287 mpi->by_name_or_create<StateType>("MIGRATE_STATE");
290 if (TRACE_needs_platform()) {
291 currentContainer.push_back(root);
296 if (TRACE_needs_platform()) {
297 auto level = static_cast<unsigned>(currentContainer.size());
298 auto* container = new NetZoneContainer(id, level, currentContainer.back());
299 currentContainer.push_back(container);
303 static void on_link_creation(s4u::Link const& link)
305 if (currentContainer.empty()) // No ongoing parsing. Are you creating the loopback?
308 auto* container = new Container(link.get_name(), "LINK", currentContainer.back());
310 if ((TRACE_categorized() || TRACE_uncategorized() || TRACE_platform()) && (not TRACE_disable_link())) {
311 VariableType* bandwidth = container->get_type()->by_name_or_create("bandwidth", "");
312 bandwidth->set_calling_container(container);
313 bandwidth->set_event(0, link.get_bandwidth());
314 VariableType* latency = container->get_type()->by_name_or_create("latency", "");
315 latency->set_calling_container(container);
316 latency->set_event(0, link.get_latency());
319 if (TRACE_uncategorized()) {
320 container->get_type()->by_name_or_create("bandwidth_used", "0.5 0.5 0.5");
324 static void on_host_creation(s4u::Host const& host)
326 if (Container::by_name_or_null(host.get_name())) // This host already exists, do nothing
329 Container* container = new HostContainer(host, currentContainer.back());
330 const Container* root = Container::get_root();
332 if ((TRACE_categorized() || TRACE_uncategorized() || TRACE_platform()) && (not TRACE_disable_speed())) {
333 VariableType* speed = container->get_type()->by_name_or_create("speed", "");
334 speed->set_calling_container(container);
335 speed->set_event(0, host.get_speed());
337 VariableType* cores = container->get_type()->by_name_or_create("core_count", "");
338 cores->set_calling_container(container);
339 cores->set_event(0, host.get_core_count());
342 if (TRACE_uncategorized())
343 container->get_type()->by_name_or_create("speed_used", "0.5 0.5 0.5");
345 if (TRACE_smpi_is_enabled() && TRACE_smpi_is_grouped()) {
346 auto* mpi = container->get_type()->by_name_or_create<ContainerType>("MPI");
347 mpi->by_name_or_create<StateType>("MPI_STATE");
348 root->get_type()->by_name_or_create("MIGRATE_LINK", mpi, mpi);
349 mpi->by_name_or_create<StateType>("MIGRATE_STATE");
353 static void on_action_state_change(kernel::resource::Action const& action,
354 kernel::resource::Action::State /* previous */)
356 auto n = static_cast<unsigned>(action.get_variable()->get_number_of_constraint());
358 for (unsigned i = 0; i < n; i++) {
359 double value = action.get_rate() * action.get_variable()->get_constraint_weight(i);
360 /* Beware of composite actions: ptasks put links and cpus together. Extra pb: we cannot dynamic_cast from void* */
361 kernel::resource::Resource* resource = action.get_variable()->get_constraint(i)->get_id();
362 if (const auto* cpu = dynamic_cast<kernel::resource::CpuImpl*>(resource))
363 resource_set_utilization("HOST", "speed_used", cpu->get_cname(), action.get_category(), value,
364 action.get_last_update(), simgrid_get_clock() - action.get_last_update());
366 if (const auto* link = dynamic_cast<kernel::resource::StandardLinkImpl*>(resource))
367 resource_set_utilization("LINK", "bandwidth_used", link->get_cname(), action.get_category(), value,
368 action.get_last_update(), simgrid_get_clock() - action.get_last_update());
372 static void on_platform_created()
374 currentContainer.clear();
375 std::set<std::string, std::less<>> filter;
376 XBT_DEBUG("Starting graph extraction.");
377 recursiveGraphExtraction(s4u::Engine::get_instance()->get_netzone_root(), Container::get_root(), &filter);
378 XBT_DEBUG("Graph extraction finished.");
382 static void on_actor_creation(s4u::Actor const& actor)
384 const Container* root = Container::get_root();
385 Container* container = Container::by_name(actor.get_host()->get_name());
386 std::string container_name = instr_pid(actor);
388 container->create_child(container_name, "ACTOR");
389 auto* actor_type = container->get_type()->by_name_or_create<ContainerType>("ACTOR");
390 auto* state = actor_type->by_name_or_create<StateType>("ACTOR_STATE");
391 state->add_entity_value("suspend", "1 0 1");
392 state->add_entity_value("sleep", "1 1 0");
393 state->add_entity_value("receive", "1 0 0");
394 state->add_entity_value("send", "0 0 1");
395 state->add_entity_value("execute", "0 1 1");
396 root->get_type()->by_name_or_create("ACTOR_LINK", actor_type, actor_type);
398 actor.on_exit([container_name](bool failed) {
400 // kill means that this actor no longer exists, let's destroy it
401 Container::by_name(container_name)->remove_from_parent();
405 static void on_actor_host_change(s4u::Actor const& actor, s4u::Host const& /*previous_location*/)
407 static long long int counter = 0;
408 Container* container = Container::by_name(instr_pid(actor));
409 LinkType* link = Container::get_root()->get_link("ACTOR_LINK");
412 link->start_event(container, "M", std::to_string(counter));
413 // destroy existing container of this process
414 container->remove_from_parent();
415 // create new container on the new_host location
416 Container::by_name(actor.get_host()->get_name())->create_child(instr_pid(actor), "ACTOR");
418 link->end_event(Container::by_name(instr_pid(actor)), "M", std::to_string(counter));
422 static void on_vm_creation(s4u::Host const& host)
424 const Container* container = new HostContainer(host, currentContainer.back());
425 const Container* root = Container::get_root();
426 auto* vm = container->get_type()->by_name_or_create<ContainerType>("VM");
427 auto* state = vm->by_name_or_create<StateType>("VM_STATE");
428 state->add_entity_value("suspend", "1 0 1");
429 state->add_entity_value("sleep", "1 1 0");
430 state->add_entity_value("receive", "1 0 0");
431 state->add_entity_value("send", "0 0 1");
432 state->add_entity_value("execute", "0 1 1");
433 root->get_type()->by_name_or_create("VM_LINK", vm, vm);
434 root->get_type()->by_name_or_create("VM_ACTOR_LINK", vm, vm);
437 void define_callbacks()
439 // always need the callbacks to zones (we need only the root zone), to create the rootContainer and the rootType
441 if (TRACE_needs_platform()) {
442 s4u::Engine::on_platform_created_cb(on_platform_created);
443 s4u::Host::on_creation_cb(on_host_creation);
444 s4u::Host::on_speed_change_cb([](s4u::Host const& host) {
445 Container::by_name(host.get_name())
446 ->get_variable("speed")
447 ->set_event(simgrid_get_clock(), host.get_core_count() * host.get_available_speed());
449 s4u::Link::on_creation_cb(on_link_creation);
450 s4u::Link::on_bandwidth_change_cb([](s4u::Link const& link) {
451 Container::by_name(link.get_name())
452 ->get_variable("bandwidth")
453 ->set_event(simgrid_get_clock(), sg_bandwidth_factor * link.get_bandwidth());
455 s4u::NetZone::on_seal_cb([](s4u::NetZone const& /*netzone*/) { currentContainer.pop_back(); });
456 kernel::routing::NetPoint::on_creation.connect([](kernel::routing::NetPoint const& netpoint) {
457 if (netpoint.is_router())
458 new RouterContainer(netpoint.get_name(), currentContainer.back());
462 s4u::NetZone::on_creation_cb(on_netzone_creation);
464 kernel::resource::CpuAction::on_state_change.connect(on_action_state_change);
465 s4u::Link::on_communication_state_change_cb(on_action_state_change);
467 if (TRACE_actor_is_enabled()) {
468 s4u::Actor::on_creation_cb(on_actor_creation);
469 s4u::Actor::on_destruction_cb([](s4u::Actor const& actor) {
470 auto container = Container::by_name_or_null(instr_pid(actor));
471 if (container != nullptr)
472 container->remove_from_parent();
474 s4u::Actor::on_suspend_cb([](s4u::Actor const& actor) {
475 Container::by_name(instr_pid(actor))->get_state("ACTOR_STATE")->push_event("suspend");
477 s4u::Actor::on_resume_cb(
478 [](s4u::Actor const& actor) { Container::by_name(instr_pid(actor))->get_state("ACTOR_STATE")->pop_event(); });
479 s4u::Actor::on_sleep_cb([](s4u::Actor const& actor) {
480 Container::by_name(instr_pid(actor))->get_state("ACTOR_STATE")->push_event("sleep");
482 s4u::Actor::on_wake_up_cb(
483 [](s4u::Actor const& actor) { Container::by_name(instr_pid(actor))->get_state("ACTOR_STATE")->pop_event(); });
484 s4u::Exec::on_start_cb([](s4u::Exec const&) {
485 Container::by_name(instr_pid(*s4u::Actor::self()))->get_state("ACTOR_STATE")->push_event("execute");
487 s4u::Activity::on_completion_cb([](const s4u::Activity&) {
488 Container::by_name(instr_pid(*s4u::Actor::self()))->get_state("ACTOR_STATE")->pop_event();
490 s4u::Comm::on_send_cb([](s4u::Comm const&) {
491 Container::by_name(instr_pid(*s4u::Actor::self()))->get_state("ACTOR_STATE")->push_event("send");
493 s4u::Comm::on_recv_cb([](s4u::Comm const&) {
494 Container::by_name(instr_pid(*s4u::Actor::self()))->get_state("ACTOR_STATE")->push_event("receive");
496 s4u::Actor::on_host_change_cb(on_actor_host_change);
499 if (TRACE_smpi_is_enabled() && TRACE_smpi_is_computing()) {
500 s4u::Exec::on_start_cb([](s4u::Exec const& exec) {
501 Container::by_name(std::string("rank-") + std::to_string(s4u::Actor::self()->get_pid()))
502 ->get_state("MPI_STATE")
503 ->push_event("computing", new CpuTIData("compute", exec.get_cost()));
505 s4u::Activity::on_completion_cb([](const s4u::Activity&) {
506 Container::by_name(std::string("rank-") + std::to_string(s4u::Actor::self()->get_pid()))
507 ->get_state("MPI_STATE")
512 if (TRACE_vm_is_enabled()) {
513 s4u::Host::on_creation_cb(on_vm_creation);
514 s4u::VirtualMachine::on_start_cb([](s4u::VirtualMachine const& vm) {
515 Container::by_name(vm.get_name())->get_state("VM_STATE")->push_event("start");
517 s4u::VirtualMachine::on_started_cb(
518 [](s4u::VirtualMachine const& vm) { Container::by_name(vm.get_name())->get_state("VM_STATE")->pop_event(); });
519 s4u::VirtualMachine::on_suspend_cb([](s4u::VirtualMachine const& vm) {
520 Container::by_name(vm.get_name())->get_state("VM_STATE")->push_event("suspend");
522 s4u::VirtualMachine::on_resume_cb(
523 [](s4u::VirtualMachine const& vm) { Container::by_name(vm.get_name())->get_state("VM_STATE")->pop_event(); });
524 s4u::Host::on_destruction_cb(
525 [](s4u::Host const& host) { Container::by_name(host.get_name())->remove_from_parent(); });
528 } // namespace simgrid::instr