(multiplicative factor applied when updating the amount of work remaining).
- Example: examples/cpp/exec-cpu-factors
+Python:
+ - Added support to programmatic platform creation in Python.
+ - Several new methods were added to Python interface. For more details, see
+ the corresponding documentation.
+ - New python examples:
+ - Clusters: examples/python/clusters-multicpu
+ - Disk: examples/python/io-degradation
+ - CPU: examples/python/exec-cpu-nonlienar
+ - Link: examples/python/network-nonlinear
+
S4U:
- New: s4u::Disk::set_sharing_policy() and s4u::Host::set_sharing_policy().
Allows the configuration of non-linear resource sharing for hosts and
* New section "Release Notes" documenting recent and current developments.
* New section "Modeling I/O: the realistic way" presenting how to properly
model disks in SimGrid.
+ * Improvements in API Reference for C++ and Python interfaces.
Fixed bugs (FG#.. -> FramaGit bugs; FG!.. -> FG merge requests)
(FG: issues on Framagit; GF: issues on GForge; GH: issues on GitHub)
include examples/cpp/clusters-multicpu/s4u-clusters-multicpu.tesh
include examples/cpp/comm-dependent/s4u-comm-dependent.cpp
include examples/cpp/comm-dependent/s4u-comm-dependent.tesh
+include examples/cpp/comm-failure/s4u-comm-failure.cpp
+include examples/cpp/comm-failure/s4u-comm-failure.tesh
include examples/cpp/comm-host2host/s4u-comm-host2host.cpp
include examples/cpp/comm-host2host/s4u-comm-host2host.tesh
include examples/cpp/comm-pingpong/s4u-comm-pingpong.cpp
include examples/cpp/exec-dependent/s4u-exec-dependent.tesh
include examples/cpp/exec-dvfs/s4u-exec-dvfs.cpp
include examples/cpp/exec-dvfs/s4u-exec-dvfs.tesh
+include examples/cpp/exec-failure/s4u-exec-failure.cpp
+include examples/cpp/exec-failure/s4u-exec-failure.tesh
include examples/cpp/exec-ptask-multicore/s4u-exec-ptask-multicore.cpp
include examples/cpp/exec-ptask-multicore/s4u-exec-ptask-multicore.tesh
include examples/cpp/exec-ptask/s4u-exec-ptask.cpp
include examples/python/exec-async/exec-async.tesh
include examples/python/exec-basic/exec-basic.py
include examples/python/exec-basic/exec-basic.tesh
+include examples/python/exec-cpu-nonlinear/exec-cpu-nonlinear.py
+include examples/python/exec-cpu-nonlinear/exec-cpu-nonlinear.tesh
include examples/python/exec-dvfs/exec-dvfs.py
include examples/python/exec-dvfs/exec-dvfs.tesh
include examples/python/exec-remote/exec-remote.py
include examples/python/exec-remote/exec-remote.tesh
+include examples/python/io-degradation/io-degradation.py
+include examples/python/io-degradation/io-degradation.tesh
include examples/python/network-nonlinear/network-nonlinear.py
include examples/python/network-nonlinear/network-nonlinear.tesh
include examples/smpi/NAS/DGraph.c
.. group-tab:: Python
.. automethod:: simgrid.Mailbox.get
+ .. automethod:: simgrid.Mailbox.get_async
.. group-tab:: C
idiom on disks because SimGrid does not allow (yet) to create nor
destroy resources once the simulation is started.
+ .. doxygenfunction:: simgrid::s4u::Disk::seal()
+
+ .. group-tab:: Python
+
+ .. code:: Python
+
+ from simgrid import Disk
+
+ .. automethod:: simgrid.Disk.seal
+
Querying info
-------------
.. doxygenfunction:: simgrid::s4u::Disk::get_read_bandwidth() const
.. doxygenfunction:: simgrid::s4u::Disk::get_write_bandwidth() const
.. doxygenfunction:: simgrid::s4u::Disk::set_property(const std::string &, const std::string &value)
+ .. doxygenfunction:: simgrid::s4u::Disk::set_sharing_policy(Operation op, SharingPolicy policy, const s4u::NonLinearResourceCb& cb = {})
+
+ .. group-tab:: Python
+
+ .. autoattribute:: simgrid.Disk.name
+ .. automethod:: simgrid.Disk.set_sharing_policy
I/O operations
--------------
.. doxygenfunction:: simgrid::s4u::Disk::write(sg_size_t size) const
.. doxygenfunction:: simgrid::s4u::Disk::write_async(sg_size_t size) const
+ .. group-tab:: Python
+
+ .. automethod:: simgrid.Disk.read
+ .. automethod:: simgrid.Disk.read_async
+ .. automethod:: simgrid.Disk.write
+ .. automethod:: simgrid.Disk.write_async
+
Signals
-------
destroy resources once the simulation is started.
.. doxygenfunction:: simgrid::s4u::Host::destroy()
+ .. doxygenfunction:: simgrid::s4u::Host::seal()
.. group-tab:: Python
from simgrid import Host
+ .. automethod:: simgrid.Host.seal
+
.. group-tab:: C
.. code:: C
.. doxygenfunction:: sg_host_list()
.. doxygenfunction:: sg_hosts_as_dynar()
+Modifying characteristics
+-------------------------
+
+.. tabs::
+
+ .. group-tab:: C++
+
+ .. doxygenfunction:: simgrid::s4u::Host::set_core_count(int core_count)
+ .. doxygenfunction:: simgrid::s4u::Host::set_coordinates(const std::string& coords)
+ .. doxygenfunction:: simgrid::s4u::Host::set_sharing_policy(SharingPolicy policy, const s4u::NonLinearResourceCb& cb = {})
+
+ .. group-tab:: Python
+
+ .. automethod:: simgrid.Host.set_core_count
+ .. automethod:: simgrid.Host.set_coordinates
+ .. automethod:: simgrid.Host.set_sharing_policy
+
Querying info
-------------
.. doxygenfunction:: simgrid::s4u::Host::get_disks() const
.. doxygenfunction:: simgrid::s4u::Host::remove_disk(const std::string &disk_name)
+ .. group-tab:: Python
+
+ .. automethod:: simgrid.Host.get_disks
+
.. group-tab:: C
.. doxygenfunction:: sg_host_get_actor_list(const_sg_host_t host, xbt_dynar_t whereto)
.. doxygenfunction:: simgrid::s4u::Host::get_netpoint() const
.. doxygenfunction:: simgrid::s4u::Host::route_to(const Host *dest, std::vector< Link * > &links, double *latency) const
.. doxygenfunction:: simgrid::s4u::Host::route_to(const Host *dest, std::vector< kernel::resource::LinkImpl * > &links, double *latency) const
+ .. doxygenfunction:: simgrid::s4u::Host::create_disk(const std::string& name, double read_bandwidth, double write_bandwidth)
+ .. doxygenfunction:: simgrid::s4u::Host::create_disk(const std::string& name, const std::string& read_bandwidth, const std::string& write_bandwidth)
+
+ .. group-tab:: Python
+
+ .. automethod:: simgrid.Host.get_netpoint
+ .. automethod:: simgrid.Host.create_disk
.. group-tab:: C
idiom on hosts because SimGrid does not allow (yet) to create nor
destroy resources once the simulation is started.
+ .. doxygenfunction:: simgrid::s4u::Link::seal()
+
.. group-tab:: Python
.. code:: Python
from simgrid import Link
+ .. automethod:: simgrid.Link.seal
+
.. group-tab:: C
.. code:: C
.. doxygenfunction:: simgrid::s4u::Link::by_name(const std::string &name)
.. doxygenfunction:: simgrid::s4u::Link::by_name_or_null(const std::string &name)
+ .. group-tab:: Python
+
+ .. autoattribute:: simgrid.Link.name
+
.. group-tab:: C
.. doxygenfunction:: sg_link_by_name(const char *name)
.. doxygenfunction:: simgrid::s4u::Link::set_bandwidth(double value)
.. doxygenfunction:: simgrid::s4u::Link::set_latency(double value)
+ .. doxygenfunction:: simgrid::s4u::Link::set_latency(const std::string& value)
+ .. doxygenfunction:: simgrid::s4u::Link::set_concurrency_limit(int limit)
+ .. doxygenfunction:: simgrid::s4u::Link::set_sharing_policy(SharingPolicy policy, const NonLinearResourceCb& cb = {})
+
+ .. group-tab:: Python
+
+ .. automethod:: simgrid.Link.set_latency
+ .. automethod:: simgrid.Link.set_concurrency_limit
+ .. automethod:: simgrid.Link.set_sharing_policy
.. group-tab:: C
.. doxygenfunction:: simgrid::s4u::Link::set_host_wifi_rate
+ .. group-tab:: Python
+
+ .. automethod:: simgrid.Link.set_host_wifi_rate
+
Signals
-------
idiom on network zones because SimGrid does not allow (yet) to create nor
destroy resources once the simulation is started.
+ .. automethod:: simgrid.NetZone.seal
+
+ .. group-tab:: Python
+
+ .. code:: Python
+
+ from simgrid import NetZone
+
+ .. autoclass:: simgrid.NetZone
+ .. automethod:: simgrid.NetZone.seal
+
.. group-tab:: C
.. code:: C
.. doxygenfunction:: simgrid::s4u::NetZone::get_cname() const
.. doxygenfunction:: simgrid::s4u::NetZone::get_name() const
+ .. doxygenfunction:: simgrid::s4u::NetZone::get_netpoint()
+
+ .. group-tab:: Python
+
+ .. autoattribute:: simgrid.NetZone.name
+ .. automethod:: simgrid.NetZone.get_netpoint
.. group-tab:: C
.. doxygenfunction:: simgrid::s4u::NetZone::get_property(const std::string &key) const
.. doxygenfunction:: simgrid::s4u::NetZone::set_property(const std::string &key, const std::string &value)
+ .. group-tab:: Python
+
+ .. automethod:: simgrid.NetZone.set_property
+
.. group-tab:: C
.. doxygenfunction:: sg_zone_get_property_value(const_sg_netzone_t as, const char *name)
.. doxygenfunction:: simgrid::s4u::NetZone::add_bypass_route
.. doxygenfunction:: simgrid::s4u::NetZone::get_children() const
.. doxygenfunction:: simgrid::s4u::NetZone::get_parent() const
+ .. doxygenfunction:: simgrid::s4u::NetZone::set_parent(const NetZone* parent)
+
+ .. group-tab:: Python
+
+ .. automethod:: simgrid.NetZone.add_route
+ .. automethod:: simgrid.NetZone.set_parent
.. group-tab:: C
.. doxygenvariable:: simgrid::s4u::NetZone::on_creation
.. doxygenvariable:: simgrid::s4u::NetZone::on_seal
+Creating resources
+------------------
+
+Zones
+^^^^^
+.. tabs::
+
+ .. group-tab:: C++
+
+ .. doxygenfunction:: simgrid::s4u::create_full_zone(const std::string& name)
+ .. doxygenfunction:: simgrid::s4u::create_empty_zone(const std::string& name)
+ .. doxygenfunction:: simgrid::s4u::create_star_zone(const std::string& name)
+ .. doxygenfunction:: simgrid::s4u::create_dijkstra_zone(const std::string& name, bool cache)
+ .. doxygenfunction:: simgrid::s4u::create_floyd_zone(const std::string& name)
+ .. doxygenfunction:: simgrid::s4u::create_vivaldi_zone(const std::string& name)
+ .. doxygenfunction:: simgrid::s4u::create_wifi_zone(const std::string& name)
+ .. doxygenfunction:: simgrid::s4u::create_torus_zone(const std::string& name, const NetZone* parent, const std::vector<unsigned int>& dimensions, const ClusterCallbacks& set_callbacks, double bandwidth, double latency, Link::SharingPolicy sharing_policy)
+ .. doxygenfunction:: simgrid::s4u::create_fatTree_zone(const std::string& name, const NetZone* parent, const FatTreeParams& parameters, const ClusterCallbacks& set_callbacks, double bandwidth, double latency, Link::SharingPolicy sharing_policy)
+ .. doxygenfunction:: simgrid::s4u::create_dragonfly_zone(const std::string& name, const NetZone* parent, const DragonflyParams& parameters, const ClusterCallbacks& set_callbacks, double bandwidth, double latency, Link::SharingPolicy sharing_policy)
+
+ .. group-tab:: Python
+
+ .. automethod:: simgrid.NetZone.create_full_zone
+ .. automethod:: simgrid.NetZone.create_empty_zone
+ .. automethod:: simgrid.NetZone.create_star_zone
+ .. automethod:: simgrid.NetZone.create_dijkstra_zone
+ .. automethod:: simgrid.NetZone.create_floyd_zone
+ .. automethod:: simgrid.NetZone.create_vivaldi_zone
+ .. automethod:: simgrid.NetZone.create_wifi_zone
+ .. automethod:: simgrid.NetZone.create_torus_zone
+ .. automethod:: simgrid.NetZone.create_fatTree_zone
+ .. automethod:: simgrid.NetZone.create_dragonfly_zone
+
+Hosts
+^^^^^
+
+.. tabs::
+
+ .. group-tab:: C++
+
+ .. doxygenfunction:: simgrid::s4u::NetZone::create_host(const std::string& name, const std::vector<double>& speed_per_pstate)
+ .. doxygenfunction:: simgrid::s4u::NetZone::create_host(const std::string& name, double speed)
+ .. doxygenfunction:: simgrid::s4u::NetZone::create_host(const std::string& name, const std::vector<std::string>& speed_per_pstate)
+ .. doxygenfunction:: simgrid::s4u::NetZone::create_host(const std::string& name, const std::string& speed)
+
+ .. group-tab:: Python
+
+ .. automethod:: simgrid.NetZone.create_host
+
+Links
+^^^^^
+
+.. tabs::
+
+ .. group-tab:: C++
+
+ .. doxygenfunction:: simgrid::s4u::NetZone::create_link(const std::string& name, const std::vector<double>& bandwidths)
+ .. doxygenfunction:: simgrid::s4u::NetZone::create_link(const std::string& name, double bandwidth)
+ .. doxygenfunction:: simgrid::s4u::NetZone::create_link(const std::string& name, const std::vector<std::string>& bandwidthds)
+ .. doxygenfunction:: simgrid::s4u::NetZone::create_link(const std::string& name, const std::string& bandwidth)
+ .. doxygenfunction:: simgrid::s4u::NetZone::create_split_duplex_link(const std::string& name, const std::string& bandwidth)
+ .. doxygenfunction:: simgrid::s4u::NetZone::create_split_duplex_link(const std::string& name, double bandwidth)
+
+ .. group-tab:: Python
+
+ .. automethod:: simgrid.NetZone.create_link
+ .. automethod:: simgrid.NetZone.create_split_duplex_link
+
+Router
+^^^^^^
+
+.. tabs::
+
+ .. group-tab:: C++
+
+ .. doxygenfunction:: simgrid::s4u::NetZone::create_router(const std::string& name)
+
+ .. group-tab:: Python
+
+ .. automethod:: simgrid.NetZone.create_router
+
.. _API_s4u_VirtualMachine:
=======================
.. _API_s4u_Activity:
+==========
+Activities
+==========
+
==============
class Activity
==============
.. doxygenfunction:: simgrid::s4u::Io::test
.. doxygenfunction:: simgrid::s4u::Io::wait
.. doxygenfunction:: simgrid::s4u::Io::wait_for
+ .. doxygenfunction:: simgrid::s4u::Io::wait_any
+ .. doxygenfunction:: simgrid::s4u::Io::wait_any_for
+
+ .. group-tab:: Python
+
+ .. automethod:: simgrid.Io.test
+ .. automethod:: simgrid.Io.wait
+ .. automethod:: simgrid.Io.wait_any_for
+ .. automethod:: simgrid.Io.wait_any
.. _API_s4u_Synchronizations:
FROM debian:10.10-slim
-RUN echo " \
- deb [check-valid-until=no, trusted=yes] http://snapshot.debian.org/archive/debian/20210707T150931Z/ buster main contrib non-free \n\
- deb-src [check-valid-until=no, trusted=yes] http://snapshot.debian.org/archive/debian/20210707T150931Z/ buster main contrib non-free \n\
- deb [check-valid-until=no, trusted=yes] http://snapshot.debian.org/archive/debian-security/20210707T150931Z/ buster/updates main contrib non-free \n\
- deb-src [check-valid-until=no, trusted=yes] http://snapshot.debian.org/archive/debian-security/20210707T150931Z/ buster/updates main contrib non-free" > /etc/apt/sources.list
-
-RUN apt-get -o Acquire::Check-Valid-Until=false update;
-
+RUN printf '%s\n' \
+ "deb [check-valid-until=no, trusted=yes] http://snapshot.debian.org/archive/debian/20210707T150931Z/ buster main contrib non-free" \
+ "deb-src [check-valid-until=no, trusted=yes] http://snapshot.debian.org/archive/debian/20210707T150931Z/ buster main contrib non-free" \
+ "deb [check-valid-until=no, trusted=yes] http://snapshot.debian.org/archive/debian-security/20210707T150931Z/ buster/updates main contrib non-free" \
+ "deb-src [check-valid-until=no, trusted=yes] http://snapshot.debian.org/archive/debian-security/20210707T150931Z/ buster/updates main contrib non-free" > /etc/apt/sources.list \
+&& \
+ apt-get -o Acquire::Check-Valid-Until=false update \
+&& \
+ apt-get install -y --no-install-recommends \
# emacs
-RUN apt-get install -y \
emacs \
ess \
- elpa-htmlize
-
+ elpa-htmlize \
# R
-RUN apt-get install -y \
r-base-core \
r-cran-ggplot2 \
r-cran-dplyr \
r-cran-plyr \
r-cran-jsonlite \
- r-cran-gridextra
-
+ r-cran-gridextra \
# simgrid dependencies
-RUN apt install -y \
g++ \
gcc \
git \
libboost-dev \
libboost-all-dev \
cmake \
- dpkg-dev
-
-RUN apt-get install -y \
- curl
-
-RUN apt-get clean \
+ dpkg-dev \
+# misc tools
+ curl \
+&& \
+ apt-get clean \
&& rm -rf /var/lib/apt/lists/*
# install ox-rst to convert org to rst
static void create_disk(sg4::Host* host, std::mt19937& gen, const std::string& disk_name,
const boost::property_tree::ptree& pt)
{
-
double read_bw = pt.get_child("read_bw").begin()->second.get_value<double>() * 1e6;
double write_bw = pt.get_child("write_bw").begin()->second.get_value<double>() * 1e6;
auto* disk = host->create_disk(disk_name, read_bw, write_bw);
actor-lifetime actor-migrate actor-suspend actor-yield actor-stacksize
app-bittorrent app-chainsend app-token-ring
comm-pingpong comm-ready comm-serialize comm-suspend comm-wait comm-waitany comm-waitall comm-waituntil
- comm-dependent comm-host2host
+ comm-dependent comm-host2host comm-failure
cloud-capping cloud-migration cloud-simple
dht-chord dht-kademlia
energy-exec energy-boot energy-link energy-vm energy-exec-ptask energy-wifi
engine-filtering
exec-async exec-basic exec-dvfs exec-remote exec-waitany exec-waitfor exec-dependent exec-unassigned
- exec-ptask-multicore exec-cpu-nonlinear exec-cpu-factors
+ exec-ptask-multicore exec-cpu-nonlinear exec-cpu-factors exec-failure
maestro-set
mc-bugged1 mc-bugged2 mc-electric-fence mc-failing-assert
network-wifi
--- /dev/null
+/* Copyright (c) 2021. The SimGrid Team. All rights reserved. */
+
+/* This program is free software; you can redistribute it and/or modify it
+ * under the terms of the license (GNU LGPL) which comes with this package. */
+
+/* This example shows how to serialize a set of communications going through a link
+ *
+ * As for the other asynchronous examples, the sender initiates all the messages it wants to send and
+ * pack the resulting simgrid::s4u::CommPtr objects in a vector.
+ * At the same time, the receiver starts receiving all messages asynchronously. Without serialization,
+ * all messages would be received at the same timestamp in the receiver.
+ *
+ * However, as they will be serialized in a link of the platform, the messages arrive 2 by 2.
+ *
+ * The sender then blocks until all ongoing communication terminate, using simgrid::s4u::Comm::wait_all()
+ */
+
+#include <simgrid/s4u.hpp>
+
+XBT_LOG_NEW_DEFAULT_CATEGORY(s4u_comm_failure, "Messages specific for this s4u example");
+namespace sg4 = simgrid::s4u;
+
+class Sender {
+ std::string mailbox1_name;
+ std::string mailbox2_name;
+
+public:
+ Sender(std::string mailbox1_name, std::string mailbox2_name)
+ : mailbox1_name(mailbox1_name), mailbox2_name(mailbox2_name)
+ {
+ }
+
+ void operator()()
+ {
+ auto mailbox1 = sg4::Mailbox::by_name(mailbox1_name);
+ auto mailbox2 = sg4::Mailbox::by_name(mailbox2_name);
+
+ XBT_INFO("Initiating asynchronous send to %s", mailbox1->get_cname());
+ auto comm1 = mailbox1->put_async((void*)666, 5);
+ XBT_INFO("Initiating asynchronous send to %s", mailbox2->get_cname());
+ auto comm2 = mailbox2->put_async((void*)666, 2);
+
+ XBT_INFO("Calling wait_any..");
+ std::vector<sg4::CommPtr> pending_comms;
+ pending_comms.push_back(comm1);
+ pending_comms.push_back(comm2);
+ try {
+ long index = sg4::Comm::wait_any(pending_comms);
+ XBT_INFO("Wait any returned index %ld (comm to %s)", index, pending_comms.at(index)->get_mailbox()->get_cname());
+ } catch (simgrid::NetworkFailureException& e) {
+ XBT_INFO("Sender has experienced a network failure exception, so it knows that something went wrong");
+ XBT_INFO("Now it needs to figure out which of the two comms failed by looking at their state");
+ }
+
+ XBT_INFO("Comm to %s has state: %s", comm1->get_mailbox()->get_cname(), comm1->get_state_str());
+ XBT_INFO("Comm to %s has state: %s", comm2->get_mailbox()->get_cname(), comm2->get_state_str());
+
+ try {
+ comm1->wait();
+ } catch (simgrid::NetworkFailureException& e) {
+ XBT_INFO("Waiting on a FAILED comm raises an exception: '%s'", e.what());
+ }
+ XBT_INFO("Wait for remaining comm, just to be nice");
+ pending_comms.erase(pending_comms.begin());
+ simgrid::s4u::Comm::wait_any(pending_comms);
+ }
+};
+
+class Receiver {
+ std::string mailbox_name;
+
+public:
+ explicit Receiver(std::string mailbox_name) : mailbox_name(mailbox_name) {}
+
+ void operator()()
+ {
+ auto mailbox = sg4::Mailbox::by_name(mailbox_name);
+ XBT_INFO("Receiver posting a receive...");
+ try {
+ mailbox->get<void*>();
+ XBT_INFO("Receiver has received successfully!");
+ } catch (simgrid::NetworkFailureException& e) {
+ XBT_INFO("Receiver has experience a network failure exception");
+ }
+ }
+};
+
+class LinkKiller {
+ std::string link_name;
+
+public:
+ explicit LinkKiller(std::string link_name) : link_name(link_name) {}
+
+ void operator()()
+ {
+ auto link_to_kill = sg4::Link::by_name(link_name);
+ XBT_INFO("LinkKiller sleeping 10 seconds...");
+ sg4::this_actor::sleep_for(10.0);
+ XBT_INFO("LinkKiller turning off link %s", link_to_kill->get_cname());
+ link_to_kill->turn_off();
+ XBT_INFO("LinkKiller killed. exiting");
+ }
+};
+
+int main(int argc, char** argv)
+{
+
+ sg4::Engine engine(&argc, argv);
+ auto* zone = sg4::create_full_zone("AS0");
+ auto* host1 = zone->create_host("Host1", "1f");
+ auto* host2 = zone->create_host("Host2", "1f");
+ auto* host3 = zone->create_host("Host3", "1f");
+
+ sg4::LinkInRoute linkto2{zone->create_link("linkto2", "1bps")->seal()};
+ sg4::LinkInRoute linkto3{zone->create_link("linkto3", "1bps")->seal()};
+
+ zone->add_route(host1->get_netpoint(), host2->get_netpoint(), nullptr, nullptr, {linkto2}, false);
+ zone->add_route(host1->get_netpoint(), host3->get_netpoint(), nullptr, nullptr, {linkto3}, false);
+ zone->seal();
+
+ sg4::Actor::create("Sender", host1, Sender("mailbox2", "mailbox3"));
+ sg4::Actor::create("Receiver", host2, Receiver("mailbox2"))->daemonize();
+ sg4::Actor::create("Receiver", host3, Receiver("mailbox3"))->daemonize();
+ sg4::Actor::create("LinkKiller", host1, LinkKiller("linkto2"))->daemonize();
+
+ engine.run();
+
+ return 0;
+}
--- /dev/null
+#!/usr/bin/env tesh
+
+$ ${bindir:=.}/s4u-comm-failure "--log=root.fmt:[%10.6r]%e(%i:%a@%h)%e%m%n"
+> [ 0.000000] (4:LinkKiller@Host1) LinkKiller sleeping 10 seconds...
+> [ 0.000000] (2:Receiver@Host2) Receiver posting a receive...
+> [ 0.000000] (3:Receiver@Host3) Receiver posting a receive...
+> [ 0.000000] (1:Sender@Host1) Initiating asynchronous send to mailbox2
+> [ 0.000000] (1:Sender@Host1) Initiating asynchronous send to mailbox3
+> [ 0.000000] (1:Sender@Host1) Calling wait_any..
+> [ 10.000000] (4:LinkKiller@Host1) LinkKiller turning off link linkto2
+> [ 10.000000] (4:LinkKiller@Host1) LinkKiller killed. exiting
+> [ 10.000000] (2:Receiver@Host2) Receiver has experience a network failure exception
+> [ 10.000000] (1:Sender@Host1) Sender has experienced a network failure exception, so it knows that something went wrong
+> [ 10.000000] (1:Sender@Host1) Now it needs to figure out which of the two comms failed by looking at their state
+> [ 10.000000] (1:Sender@Host1) Comm to mailbox2 has state: FAILED
+> [ 10.000000] (1:Sender@Host1) Comm to mailbox3 has state: STARTED
+> [ 10.000000] (1:Sender@Host1) Waiting on a FAILED comm raises an exception: 'Cannot wait for a failed communication'
+> [ 10.000000] (1:Sender@Host1) Wait for remaining comm, just to be nice
+> [ 16.494845] (3:Receiver@Host3) Receiver has received successfully!
--- /dev/null
+/* Copyright (c) 2021. The SimGrid Team. All rights reserved. */
+
+/* This program is free software; you can redistribute it and/or modify it
+ * under the terms of the license (GNU LGPL) which comes with this package. */
+
+/* This example shows how to serialize a set of communications going through a link
+ *
+ * As for the other asynchronous examples, the sender initiates all the messages it wants to send and
+ * pack the resulting simgrid::s4u::CommPtr objects in a vector.
+ * At the same time, the receiver starts receiving all messages asynchronously. Without serialization,
+ * all messages would be received at the same timestamp in the receiver.
+ *
+ * However, as they will be serialized in a link of the platform, the messages arrive 2 by 2.
+ *
+ * The sender then blocks until all ongoing communication terminate, using simgrid::s4u::Comm::wait_all()
+ */
+
+#include <simgrid/s4u.hpp>
+
+XBT_LOG_NEW_DEFAULT_CATEGORY(s4u_exec_failure, "Messages specific for this s4u example");
+namespace sg4 = simgrid::s4u;
+
+static void dispatcher(sg4::Host* host1, sg4::Host* host2)
+{
+ std::vector<sg4::ExecPtr> pending_execs;
+ XBT_INFO("Initiating asynchronous exec on %s", host1->get_cname());
+ auto exec1 = sg4::this_actor::exec_init(20)->set_host(host1);
+ pending_execs.push_back(exec1);
+ exec1->start();
+ XBT_INFO("Initiating asynchronous exec on %s", host2->get_cname());
+ auto exec2 = sg4::this_actor::exec_init(20)->set_host(host2);
+ pending_execs.push_back(exec2);
+ exec2->start();
+
+ XBT_INFO("Calling wait_any..");
+ try {
+ long index = sg4::Exec::wait_any(pending_execs);
+ XBT_INFO("Wait any returned index %ld (exec on %s)", index, pending_execs.at(index)->get_host()->get_cname());
+ } catch (simgrid::HostFailureException& e) {
+ XBT_INFO("Dispatcher has experienced a host failure exception, so it knows that something went wrong");
+ XBT_INFO("Now it needs to figure out which of the two execs failed by looking at their state");
+ }
+
+ XBT_INFO("Exec on %s has state: %s", pending_execs[0]->get_host()->get_cname(), pending_execs[0]->get_state_str());
+ XBT_INFO("Exec on %s has state: %s", pending_execs[1]->get_host()->get_cname(), pending_execs[1]->get_state_str());
+
+ try {
+ pending_execs[1]->wait();
+ } catch (simgrid::HostFailureException& e) {
+ XBT_INFO("Waiting on a FAILED exec raises an exception: '%s'", e.what());
+ }
+ pending_execs.pop_back();
+ XBT_INFO("Wait for remaining exec, just to be nice");
+ simgrid::s4u::Exec::wait_any(pending_execs);
+ XBT_INFO("Dispatcher ends");
+}
+
+static void host_killer(sg4::Host* to_kill)
+{
+ XBT_INFO("HostKiller sleeping 10 seconds...");
+ sg4::this_actor::sleep_for(10.0);
+ XBT_INFO("HostKiller turning off host %s", to_kill->get_cname());
+ to_kill->turn_off();
+ XBT_INFO("HostKiller ends");
+}
+
+int main(int argc, char** argv)
+{
+
+ sg4::Engine engine(&argc, argv);
+
+ auto* zone = sg4::create_full_zone("AS0");
+ auto* host1 = zone->create_host("Host1", "1f");
+ auto* host2 = zone->create_host("Host2", "1f");
+ zone->seal();
+
+ sg4::Actor::create("Dispatcher", host1, dispatcher, host1, host2);
+ sg4::Actor::create("HostKiller", host1, host_killer, host2)->daemonize();
+
+ engine.run();
+
+ return 0;
+}
--- /dev/null
+#!/usr/bin/env tesh
+
+$ ${bindir:=.}/s4u-exec-failure "--log=root.fmt:[%10.6r]%e(%i:%a@%h)%e%m%n"
+> [ 0.000000] (1:Dispatcher@Host1) Initiating asynchronous exec on Host1
+> [ 0.000000] (2:HostKiller@Host1) HostKiller sleeping 10 seconds...
+> [ 0.000000] (1:Dispatcher@Host1) Initiating asynchronous exec on Host2
+> [ 0.000000] (1:Dispatcher@Host1) Calling wait_any..
+> [ 10.000000] (2:HostKiller@Host1) HostKiller turning off host Host2
+> [ 10.000000] (1:Dispatcher@Host1) Dispatcher has experienced a host failure exception, so it knows that something went wrong
+> [ 10.000000] (1:Dispatcher@Host1) Now it needs to figure out which of the two execs failed by looking at their state
+> [ 10.000000] (1:Dispatcher@Host1) Exec on Host1 has state: STARTED
+> [ 10.000000] (1:Dispatcher@Host1) Exec on Host2 has state: FAILED
+> [ 10.000000] (1:Dispatcher@Host1) Waiting on a FAILED exec raises an exception: 'Cannot wait for a failed exec'
+> [ 10.000000] (1:Dispatcher@Host1) Wait for remaining exec, just to be nice
+> [ 10.000000] (2:HostKiller@Host1) HostKiller ends
+> [ 20.000000] (1:Dispatcher@Host1) Dispatcher ends
exec->wait();
XBT_INFO("Done!");
+
+ XBT_INFO("And now, even harder. Start a remote activity on Ginette and turn off the host after 0.5 sec");
+ exec = simgrid::s4u::this_actor::exec_init(48.492e6)->set_host(ginette);
+ exec->start();
+
+ simgrid::s4u::this_actor::sleep_for(0.5);
+ ginette->turn_off();
+ try {
+ exec->wait();
+ } catch (const simgrid::HostFailureException& e) {
+ XBT_INFO("Execution failed on %s", ginette->get_cname());
+ }
+ XBT_INFO("Done!");
}
int main(int argc, char* argv[])
> [ 1.500000] (1:test@Fafard) Loads before the move: Boivin=0; Fafard=0; Ginette=48492000
> [ 1.600000] (1:test@Fafard) Loads after the move: Boivin=98095000; Fafard=0; Ginette=0
> [ 2.000000] (1:test@Fafard) Done!
+> [ 2.000000] (1:test@Fafard) And now, even harder. Start a remote activity on Ginette and turn off the host after 0.5 sec
+> [ 2.500000] (1:test@Fafard) Execution failed on Ginette
+> [ 2.500000] (1:test@Fafard) Done!
foreach(example actor-create actor-daemon actor-join actor-kill actor-migrate actor-suspend actor-yield actor-lifetime
comm-wait comm-waitall comm-waitany
exec-async exec-basic exec-dvfs exec-remote
- network-nonlinear clusters-multicpu)
+ network-nonlinear clusters-multicpu io-degradation exec-cpu-nonlinear)
set(tesh_files ${tesh_files} ${CMAKE_CURRENT_SOURCE_DIR}/${example}/${example}.tesh)
set(examples_src ${examples_src} ${CMAKE_CURRENT_SOURCE_DIR}/${example}/${example}.py)
#####################################################################################################
-def create_hostzone(zone: simgrid.NetZone, coord: typing.List[int], id: int) -> typing.Tuple[simgrid.NetPoint, simgrid.NetPoint]:
+def create_hostzone(zone: simgrid.NetZone, coord: typing.List[int], ident: int) -> typing.Tuple[simgrid.NetPoint, simgrid.NetPoint]:
"""
Callback to set a cluster leaf/element
:param zone: Cluster netzone being created (usefull to create the hosts/links inside it)
:param coord: Coordinates in the cluster
- :param id: Internal identifier in the torus (for information)
+ :param ident: Internal identifier in the torus (for information)
:return netpoint, gateway: the netpoint to the StarZone and CPU0 as gateway
"""
num_cpus = 8 # Number of CPUs in the zone
link_bw = "100GBps" # Link bw connecting the CPU
link_lat = "1ns" # Link latency
- hostname = "host" + str(id)
+ hostname = "host" + str(ident)
# create the StarZone
host_zone = simgrid.NetZone.create_star_zone(hostname)
# setting my Torus parent zone
#####################################################################################################
-def create_limiter(zone: simgrid.NetZone, coord: typing.List[int], id: int) -> simgrid.Link:
+def create_limiter(zone: simgrid.NetZone, coord: typing.List[int], ident: int) -> simgrid.Link:
"""
Callback to create limiter link (1Gbs) for each netpoint
The coord parameter depends on the cluster being created:
- Torus: Direct translation of the Torus' dimensions, e.g. (0, 0, 0) for a 3-D Torus
- - Fat-Tree: A pair (level in the tree, id), e.g. (0, 0) for first leaf in the tree and (1,0) for the first switch at
+ - Fat-Tree: A pair (level in the tree, ident), e.g. (0, 0) for first leaf in the tree and (1,0) for the first switch at
level 1.
- Dragonfly: a tuple (group, chassis, blades/routers, nodes), e.g. (0, 0, 0, 0) for first node in the cluster. To
identify the router inside a (group, chassis, blade), we use MAX_UINT in the last parameter (e.g. 0, 0, 0,
:param zone: Torus netzone being created (usefull to create the hosts/links inside it)
:param coord: Coordinates in the cluster
- :param id: Internal identifier in the torus (for information)
+ :param ident: Internal identifier in the torus (for information)
:return: Limiter link
"""
- return zone.create_link("limiter-" + str(id), [1e9]).seal()
+ return zone.create_link("limiter-" + str(ident), [1e9]).seal()
def create_torus_cluster():
#####################################################################################################
-def create_fatTree_cluster():
+def create_fat_tree_cluster():
"""
Creates a Fat-Tree cluster
if platform == "torus":
create_torus_cluster()
elif platform == "fatTree":
- create_fatTree_cluster()
+ create_fat_tree_cluster()
elif platform == "dragonfly":
create_dragonfly_cluster()
else:
--- /dev/null
+# Copyright (c) 2006-2021. The SimGrid Team. All rights reserved.
+#
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the license (GNU LGPL) which comes with this package.
+
+# This example shows how to simulate a non-linear resource sharing for
+# CPUs.
+
+
+from simgrid import Actor, Engine, NetZone, Host, this_actor
+import sys
+import functools
+
+
+def runner():
+ computation_amount = this_actor.get_host().speed
+ n_task = 10
+
+ this_actor.info("Execute %d tasks of %g flops, should take %d second in a CPU without degradation. It will take the double here." % (
+ n_task, computation_amount, n_task))
+ tasks = [this_actor.exec_init(computation_amount).start()
+ for _ in range(n_task)]
+
+ this_actor.info("Waiting for all tasks to be done!")
+ for task in tasks:
+ task.wait()
+
+ this_actor.info("Finished executing. Goodbye now!")
+
+
+def cpu_nonlinear(host: Host, capacity: float, n: int) -> float:
+ """ Non-linear resource sharing for CPU """
+ # emulates a degradation in CPU according to the number of tasks
+ # totally unrealistic but for learning purposes
+ capacity = capacity / 2 if n > 1 else capacity
+ this_actor.info("Host %s, %d concurrent tasks, new capacity %f" %
+ (host.name, n, capacity))
+ return capacity
+
+
+def load_platform():
+ """ Create a simple 1-host platform """
+ zone = NetZone.create_empty_zone("Zone1")
+ runner_host = zone.create_host("runner", 1e6)
+ runner_host.set_sharing_policy(
+ Host.SharingPolicy.NONLINEAR, functools.partial(cpu_nonlinear, runner_host))
+ runner_host.seal()
+ zone.seal()
+
+ # create actor runner
+ Actor.create("runner", runner_host, runner)
+
+
+if __name__ == '__main__':
+ e = Engine(sys.argv)
+
+ # create platform
+ load_platform()
+
+ # runs the simulation
+ e.run()
+
+ # explicitly deleting Engine object to avoid segfault during cleanup phase.
+ # During Engine destruction, the cleanup of std::function linked to non_linear callback is called.
+ # If we let the cleanup by itself, it fails trying on its destruction because the python main program
+ # has already freed its variables
+ del(e)
--- /dev/null
+#!/usr/bin/env tesh
+
+$ ${pythoncmd:=python3} ${PYTHON_TOOL_OPTIONS:=} ${srcdir:=.}/exec-cpu-nonlinear.py "--log=root.fmt:[%10.6r]%e(%i:%a@%h)%e%m%n"
+> [ 0.000000] (1:runner@runner) Execute 10 tasks of 1e+06 flops, should take 10 second in a CPU without degradation. It will take the double here.
+> [ 0.000000] (1:runner@runner) Waiting for all tasks to be done!
+> [ 0.000000] (0:maestro@) Host runner, 10 concurrent tasks, new capacity 500000.000000
+> [ 20.000000] (1:runner@runner) Finished executing. Goodbye now!
--- /dev/null
+# Copyright (c) 2006-2021. The SimGrid Team. All rights reserved.
+#
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the license (GNU LGPL) which comes with this package.
+
+# This example shows how to simulate a non-linear resource sharing for disk
+# operations.
+#
+# It is inspired on the paper
+# "Adding Storage Simulation Capacities to the SimGridToolkit: Concepts, Models, and API"
+# Available at : https://hal.inria.fr/hal-01197128/document
+#
+# It shows how to simulate concurrent operations degrading overall performance of IO
+# operations (specifically the effects presented in Fig. 8 of the paper).
+
+
+from simgrid import Actor, Engine, NetZone, Host, Disk, this_actor
+import sys
+import functools
+
+
+def estimate_bw(disk: Disk, n_flows: int, read: bool):
+ """ Calculates the bandwidth for disk doing async operations """
+ size = 100000
+ cur_time = Engine.get_clock()
+ activities = [disk.read_async(size) if read else disk.write_async(
+ size) for _ in range(n_flows)]
+
+ for act in activities:
+ act.wait()
+
+ elapsed_time = Engine.get_clock() - cur_time
+ estimated_bw = float(size * n_flows) / elapsed_time
+ this_actor.info("Disk: %s, concurrent %s: %d, estimated bandwidth: %f" % (
+ disk.name, "read" if read else "write", n_flows, estimated_bw))
+
+
+def host():
+ # Estimating bw for each disk and considering concurrent flows
+ for n in range(1, 15, 2):
+ for disk in Host.current().get_disks():
+ estimate_bw(disk, n, True)
+ estimate_bw(disk, n, False)
+
+
+def ssd_dynamic_sharing(disk: Disk, op: str, capacity: float, n: int) -> float:
+ """
+ Non-linear resource callback for SSD disks
+
+ In this case, we have measurements for some resource sharing and directly use them to return the
+ correct value
+ :param disk: Disk on which the operation is happening (defined by the user through the std::bind)
+ :param op: read or write operation (defined by the user through the std::bind)
+ :param capacity: Resource current capacity in SimGrid
+ :param n: Number of activities sharing this resource
+ """
+ # measurements for SSD disks
+ speed = {
+ "write": {1: 131.},
+ "read": {1: 152., 2: 161., 3: 184., 4: 197., 5: 207., 6: 215., 7: 220., 8: 224., 9: 227., 10: 231., 11: 233., 12: 235., 13: 237., 14: 238., 15: 239.}
+ }
+
+ # no special bandwidth for this disk sharing N flows, just returns maximal capacity
+ if (n in speed[op]):
+ capacity = speed[op][n]
+
+ return capacity
+
+
+def sata_dynamic_sharing(disk: Disk, capacity: float, n: int) -> float:
+ """
+ Non-linear resource callback for SATA disks
+
+ In this case, the degradation for read operations is linear and we have a formula that represents it.
+
+ :param disk: Disk on which the operation is happening (defined by the user through the std::bind)
+ :param capacity: Resource current capacity in SimGrid
+ :param n: Number of activities sharing this resource
+ :return: New disk capacity
+ """
+ return 68.3 - 1.7 * n
+
+
+def create_ssd_disk(host: Host, disk_name: str):
+ """ Creates an SSD disk, setting the appropriate callback for non-linear resource sharing """
+ disk = host.create_disk(disk_name, "240MBps", "170MBps")
+ disk.set_sharing_policy(Disk.Operation.READ, Disk.SharingPolicy.NONLINEAR,
+ functools.partial(ssd_dynamic_sharing, disk, "read"))
+ disk.set_sharing_policy(Disk.Operation.WRITE, Disk.SharingPolicy.NONLINEAR,
+ functools.partial(ssd_dynamic_sharing, disk, "write"))
+ disk.set_sharing_policy(Disk.Operation.READWRITE,
+ Disk.SharingPolicy.LINEAR)
+
+
+def create_sata_disk(host: Host, disk_name: str):
+ """ Same for a SATA disk, only read operation follows a non-linear resource sharing """
+ disk = host.create_disk(disk_name, "68MBps", "50MBps")
+ disk.set_sharing_policy(Disk.Operation.READ, Disk.SharingPolicy.NONLINEAR,
+ functools.partial(sata_dynamic_sharing, disk))
+ # this is the default behavior, expliciting only to make it clearer
+ disk.set_sharing_policy(Disk.Operation.WRITE, Disk.SharingPolicy.LINEAR)
+ disk.set_sharing_policy(Disk.Operation.READWRITE,
+ Disk.SharingPolicy.LINEAR)
+
+
+if __name__ == '__main__':
+ e = Engine(sys.argv)
+ # simple platform containing 1 host and 2 disk
+ zone = NetZone.create_full_zone("bob_zone")
+ bob = zone.create_host("bob", 1e6)
+ create_ssd_disk(bob, "Edel (SSD)")
+ create_sata_disk(bob, "Griffon (SATA II)")
+ zone.seal()
+
+ Actor.create("runner", bob, host)
+
+ e.run()
+ this_actor.info("Simulated time: %g" % Engine.get_clock())
+
+ # explicitly deleting Engine object to avoid segfault during cleanup phase.
+ # During Engine destruction, the cleanup of std::function linked to non_linear callback is called.
+ # If we let the cleanup by itself, it fails trying on its destruction because the python main program
+ # has already freed its variables
+ del(e)
--- /dev/null
+#!/usr/bin/env tesh
+
+$ ${pythoncmd:=python3} ${PYTHON_TOOL_OPTIONS:=} ${srcdir:=.}/io-degradation.py "--log=root.fmt:[%10.6r]%e(%i:%a@%h)%e%m%n"
+> [657.894737] (1:runner@bob) Disk: Edel (SSD), concurrent read: 1, estimated bandwidth: 152.000000
+> [1421.253515] (1:runner@bob) Disk: Edel (SSD), concurrent write: 1, estimated bandwidth: 131.000000
+> [2922.755017] (1:runner@bob) Disk: Griffon (SATA II), concurrent read: 1, estimated bandwidth: 66.600000
+> [2922.757017] (1:runner@bob) Disk: Griffon (SATA II), concurrent write: 1, estimated bandwidth: 50000000.001182
+> [4553.191800] (1:runner@bob) Disk: Edel (SSD), concurrent read: 3, estimated bandwidth: 184.000000
+> [4553.193564] (1:runner@bob) Disk: Edel (SSD), concurrent write: 3, estimated bandwidth: 170000000.022058
+> [9300.029007] (1:runner@bob) Disk: Griffon (SATA II), concurrent read: 3, estimated bandwidth: 63.200000
+> [9300.035007] (1:runner@bob) Disk: Griffon (SATA II), concurrent write: 3, estimated bandwidth: 50000000.004972
+> [11715.493945] (1:runner@bob) Disk: Edel (SSD), concurrent read: 5, estimated bandwidth: 207.000000
+> [11715.496886] (1:runner@bob) Disk: Edel (SSD), concurrent write: 5, estimated bandwidth: 170000000.039581
+> [20076.700899] (1:runner@bob) Disk: Griffon (SATA II), concurrent read: 5, estimated bandwidth: 59.800000
+> [20076.710899] (1:runner@bob) Disk: Griffon (SATA II), concurrent write: 5, estimated bandwidth: 50000000.008004
+> [23258.529081] (1:runner@bob) Disk: Edel (SSD), concurrent read: 7, estimated bandwidth: 220.000000
+> [23258.533199] (1:runner@bob) Disk: Edel (SSD), concurrent write: 7, estimated bandwidth: 170000000.009542
+> [35669.880716] (1:runner@bob) Disk: Griffon (SATA II), concurrent read: 7, estimated bandwidth: 56.400000
+> [35669.894716] (1:runner@bob) Disk: Griffon (SATA II), concurrent write: 7, estimated bandwidth: 49999999.989814
+> [39634.652426] (1:runner@bob) Disk: Edel (SSD), concurrent read: 9, estimated bandwidth: 227.000000
+> [39634.657720] (1:runner@bob) Disk: Edel (SSD), concurrent write: 9, estimated bandwidth: 169999999.992853
+> [56615.789795] (1:runner@bob) Disk: Griffon (SATA II), concurrent read: 9, estimated bandwidth: 53.000000
+> [56615.807795] (1:runner@bob) Disk: Griffon (SATA II), concurrent write: 9, estimated bandwidth: 50000000.010025
+> [61336.837838] (1:runner@bob) Disk: Edel (SSD), concurrent read: 11, estimated bandwidth: 233.000000
+> [61336.844309] (1:runner@bob) Disk: Edel (SSD), concurrent write: 11, estimated bandwidth: 170000000.077813
+> [83514.263663] (1:runner@bob) Disk: Griffon (SATA II), concurrent read: 11, estimated bandwidth: 49.600000
+> [83514.285663] (1:runner@bob) Disk: Griffon (SATA II), concurrent write: 11, estimated bandwidth: 50000000.006350
+> [88999.517731] (1:runner@bob) Disk: Edel (SSD), concurrent read: 13, estimated bandwidth: 237.000000
+> [88999.525378] (1:runner@bob) Disk: Edel (SSD), concurrent write: 13, estimated bandwidth: 169999999.974881
+> [117138.053517] (1:runner@bob) Disk: Griffon (SATA II), concurrent read: 13, estimated bandwidth: 46.200000
+> [117138.079517] (1:runner@bob) Disk: Griffon (SATA II), concurrent write: 13, estimated bandwidth: 50000000.003806
+> [117138.079517] (0:maestro@) Simulated time: 117138
pending_comms = []
this_actor.info("Wait for %d messages asynchronously" % self.msg_count)
- for i in range(self.msg_count):
+ for _ in range(self.msg_count):
comm, data = mbox.get_async()
pending_comms.append(comm)
pending_msgs.append(data)
public:
// enum class State { ... }
- XBT_DECLARE_ENUM_CLASS(State, INITED, STARTING, STARTED, CANCELED, FINISHED);
+ XBT_DECLARE_ENUM_CLASS(State, INITED, STARTING, STARTED, FAILED, CANCELED, FINISHED);
protected:
Activity() = default;
static void set_config(const std::string& name, double value);
static void set_config(const std::string& name, const std::string& value);
+ Engine* set_default_comm_data_copy_callback(void (*callback)(kernel::activity::CommImpl*, void*, size_t));
/** Callback fired when the platform is created (ie, the xml file parsed),
* right before the actual simulation starts. */
static xbt::signal<void()> on_platform_created;
/****************************** Communication *********************************/
#ifdef __cplusplus
-XBT_PUBLIC void SIMIX_comm_set_copy_data_callback(void (*callback)(simgrid::kernel::activity::CommImpl*, void*,
- size_t));
+XBT_ATTRIB_DEPRECATED_v333("Please use Engine::set_default_comm_data_copy_callback()") XBT_PUBLIC
+ void SIMIX_comm_set_copy_data_callback(void (*callback)(simgrid::kernel::activity::CommImpl*, void*, size_t));
+
XBT_PUBLIC void SIMIX_comm_copy_pointer_callback(simgrid::kernel::activity::CommImpl* comm, void* buff,
size_t buff_size);
XBT_PUBLIC void SIMIX_comm_copy_buffer_callback(simgrid::kernel::activity::CommImpl* comm, void* buff,
void _XBT_LOGV_CTOR(catName)(void) \
{ \
XBT_LOG_EXTERNAL_CATEGORY(catName); \
- if (!_XBT_LOGV(catName).initialized) { \
- _xbt_log_cat_init(&_XBT_LOGV(catName), xbt_log_priority_uninitialized); \
- } \
+ (void)_xbt_log_cat_init(&_XBT_LOGV(catName), xbt_log_priority_uninitialized); \
} \
XBT_EXPORT_NO_IMPORT s_xbt_log_category_t _XBT_LOGV(catName) = { \
&_XBT_LOGV(parent), \
# Disable some rules on some files
-sonar.issue.ignore.multicriteria=j1,j2,j3,j4,j5,jni1,jni2,c1,c2a,c2b,c3,c4,c5a,c5b,c6a,c6b,c7,c8a,c8b,c9,c10a,c10b,c10c,cex1a,cex1b,cex2a,cex2b,cex3,cex4,f1,p1,s1,s2,s3,s4,s5,s6
+sonar.issue.ignore.multicriteria=j1,j2,j3,j4,j5,jni1,jni2,c1,c2a,c2b,c3,c4,c5a,c5b,c6a,c6b,c7,c8a,c8b,c9,c10a,c10b,c10c,cex1a,cex1b,cex2a,cex2b,cex3,cex4,f1,p1,s1,s2,s3,s4,s5
# The Object.finalize() method should not be overridden
# But we need to clean the native memory with JNI
sonar.issue.ignore.multicriteria.s5.ruleKey=cpp:S995
sonar.issue.ignore.multicriteria.s5.resourceKey=src/smpi/bindings/*.cpp
-#There is an intentional uninitialized variable in this test, to check SMPI does not use irrelevant values for some ranks
-sonar.issue.ignore.multicriteria.s6.ruleKey=c:S836
-sonar.issue.ignore.multicriteria.s6.resourceKey=teshsuite/smpi/replay-ti-colls/replay-ti-colls.c
-
# Exclude some files from the analysis:
# - our unit tests
# - the tests that we borrowed elsewhere (MPICH and ISP)
"Registers the main function of an actor");
/* Class Netzone */
- py::class_<simgrid::s4u::NetZone, std::unique_ptr<simgrid::s4u::NetZone, py::nodelete>>(m, "NetZone",
- "Networking Zones")
- .def_static("create_full_zone", &simgrid::s4u::create_full_zone, "Creates a zone of type FullZone")
+ py::class_<simgrid::s4u::NetZone, std::unique_ptr<simgrid::s4u::NetZone, py::nodelete>> netzone(m, "NetZone",
+ "Networking Zones");
+ netzone.def_static("create_full_zone", &simgrid::s4u::create_full_zone, "Creates a zone of type FullZone")
.def_static("create_torus_zone", &simgrid::s4u::create_torus_zone, "Creates a cluster of type Torus")
.def_static("create_fatTree_zone", &simgrid::s4u::create_fatTree_zone, "Creates a cluster of type Fat-Tree")
.def_static("create_dragonfly_zone", &simgrid::s4u::create_dragonfly_zone, "Creates a cluster of type Dragonfly")
const std::pair<unsigned int, unsigned int>&, unsigned int>());
/* Class Host */
- py::class_<simgrid::s4u::Host, std::unique_ptr<Host, py::nodelete>>(m, "Host", "Simulated host")
- .def("by_name", &Host::by_name, "Retrieves a host from its name, or die")
+ py::class_<simgrid::s4u::Host, std::unique_ptr<Host, py::nodelete>> host(m, "Host", "Simulated host");
+ host.def("by_name", &Host::by_name, "Retrieves a host from its name, or die")
.def("get_pstate_count", &Host::get_pstate_count, "Retrieve the count of defined pstate levels")
.def("get_pstate_speed", &Host::get_pstate_speed, "Retrieve the maximal speed at the given pstate")
.def("get_netpoint", &Host::get_netpoint, "Retrieve the netpoint associated to this host")
.def("get_disks", &Host::get_disks, "Retrieve the list of disks in this host")
.def("set_core_count", &Host::set_core_count, "Set the number of cores in the CPU")
.def("set_coordinates", &Host::set_coordinates, "Set the coordinates of this host")
+ .def("set_sharing_policy", &simgrid::s4u::Host::set_sharing_policy, "Describe how the CPU is shared",
+ py::arg("policy"), py::arg("cb") = simgrid::s4u::NonLinearResourceCb())
.def("create_disk", py::overload_cast<const std::string&, double, double>(&Host::create_disk), "Create a disk")
.def("create_disk",
py::overload_cast<const std::string&, const std::string&, const std::string&>(&Host::create_disk),
"speed", &Host::get_speed,
"The peak computing speed in flops/s at the current pstate, taking the external load into account. "
"This is the max potential speed.");
+ py::enum_<simgrid::s4u::Host::SharingPolicy>(host, "SharingPolicy")
+ .value("NONLINEAR", simgrid::s4u::Host::SharingPolicy::NONLINEAR)
+ .value("LINEAR", simgrid::s4u::Host::SharingPolicy::LINEAR)
+ .export_values();
/* Class Disk */
- py::class_<simgrid::s4u::Disk, std::unique_ptr<simgrid::s4u::Disk, py::nodelete>>(m, "Disk", "Simulated disk")
- .def("read", &simgrid::s4u::Disk::read, py::call_guard<GilScopedRelease>(), "Read data from disk")
+ py::class_<simgrid::s4u::Disk, std::unique_ptr<simgrid::s4u::Disk, py::nodelete>> disk(m, "Disk", "Simulated disk");
+ disk.def("read", &simgrid::s4u::Disk::read, py::call_guard<GilScopedRelease>(), "Read data from disk")
.def("write", &simgrid::s4u::Disk::write, py::call_guard<GilScopedRelease>(), "Write data in disk")
+ .def("read_async", &simgrid::s4u::Disk::read_async, "Non-blocking read data from disk")
+ .def("write_async", &simgrid::s4u::Disk::write_async, "Non-blocking write data in disk")
+ .def("set_sharing_policy", &simgrid::s4u::Disk::set_sharing_policy, "Set sharing policy for this disk",
+ py::arg("op"), py::arg("policy"), py::arg("cb") = simgrid::s4u::NonLinearResourceCb())
.def("seal", &simgrid::s4u::Disk::seal, "Seal this disk")
.def_property_readonly(
"name", [](const simgrid::s4u::Disk* self) { return self->get_name(); }, "The name of this disk");
+ py::enum_<simgrid::s4u::Disk::SharingPolicy>(disk, "SharingPolicy")
+ .value("NONLINEAR", simgrid::s4u::Disk::SharingPolicy::NONLINEAR)
+ .value("LINEAR", simgrid::s4u::Disk::SharingPolicy::LINEAR)
+ .export_values();
+ py::enum_<simgrid::s4u::Disk::Operation>(disk, "Operation")
+ .value("READ", simgrid::s4u::Disk::Operation::READ)
+ .value("WRITE", simgrid::s4u::Disk::Operation::WRITE)
+ .value("READWRITE", simgrid::s4u::Disk::Operation::READWRITE)
+ .export_values();
/* Class NetPoint */
- py::class_<simgrid::kernel::routing::NetPoint, std::unique_ptr<simgrid::kernel::routing::NetPoint, py::nodelete>>(
- m, "NetPoint", "NetPoint object");
+ py::class_<simgrid::kernel::routing::NetPoint, std::unique_ptr<simgrid::kernel::routing::NetPoint, py::nodelete>>
+ netpoint(m, "NetPoint", "NetPoint object");
/* Class Link */
py::class_<simgrid::s4u::Link, std::unique_ptr<simgrid::s4u::Link, py::nodelete>> link(m, "Link", "Network link");
- link.def("set_latency", py::overload_cast<const std::string&>(&simgrid::s4u::Link::set_latency), "Set the latency");
- link.def("set_latency", py::overload_cast<double>(&simgrid::s4u::Link::set_latency), "Set the latency");
- link.def("set_sharing_policy", &simgrid::s4u::Link::set_sharing_policy, "Set sharing policy for this link");
- link.def("set_concurrency_limit", &simgrid::s4u::Link::set_concurrency_limit, "Set concurrency limit for this link");
- link.def("set_host_wifi_rate", &simgrid::s4u::Link::set_host_wifi_rate,
- "Set level of communication speed of given host on this Wi-Fi link");
- link.def("seal", &simgrid::s4u::Link::seal, "Seal this link");
- link.def_property_readonly(
- "name",
- [](const simgrid::s4u::Link* self) {
- return std::string(self->get_name().c_str()); // Convert from xbt::string because of MC
- },
- "The name of this link");
+ link.def("set_latency", py::overload_cast<const std::string&>(&simgrid::s4u::Link::set_latency), "Set the latency")
+ .def("set_latency", py::overload_cast<double>(&simgrid::s4u::Link::set_latency), "Set the latency")
+ .def("set_sharing_policy", &simgrid::s4u::Link::set_sharing_policy, "Set sharing policy for this link")
+ .def("set_concurrency_limit", &simgrid::s4u::Link::set_concurrency_limit, "Set concurrency limit for this link")
+ .def("set_host_wifi_rate", &simgrid::s4u::Link::set_host_wifi_rate,
+ "Set level of communication speed of given host on this Wi-Fi link")
+ .def("seal", &simgrid::s4u::Link::seal, "Seal this link")
+ .def_property_readonly(
+ "name",
+ [](const simgrid::s4u::Link* self) {
+ return std::string(self->get_name().c_str()); // Convert from xbt::string because of MC
+ },
+ "The name of this link");
py::enum_<simgrid::s4u::Link::SharingPolicy>(link, "SharingPolicy")
.value("NONLINEAR", simgrid::s4u::Link::SharingPolicy::NONLINEAR)
.value("WIFI", simgrid::s4u::Link::SharingPolicy::WIFI)
py::class_<PyGetAsync>(m, "PyGetAsync", "Wrapper for async get communications")
.def(py::init<>())
.def(
- "get", [](PyGetAsync* self) { return py::reinterpret_steal<py::object>(*(self->get())); },
+ "get", [](const PyGetAsync* self) { return py::reinterpret_steal<py::object>(*(self->get())); },
"Get python object after async communication in receiver side");
/* Class Comm */
py::call_guard<GilScopedRelease>(),
"Block until the completion of any communication in the list and return the index of the terminated one.");
+ /* Class Io */
+ py::class_<simgrid::s4u::Io, simgrid::s4u::IoPtr>(m, "Io", "I/O activities")
+ .def("test", &simgrid::s4u::Io::test, py::call_guard<GilScopedRelease>(), "Test whether the I/O is terminated.")
+ .def("wait", &simgrid::s4u::Io::wait, py::call_guard<GilScopedRelease>(),
+ "Block until the completion of that I/O operation")
+ .def_static(
+ "wait_any_for", &simgrid::s4u::Io::wait_any_for, py::call_guard<GilScopedRelease>(),
+ "Block until the completion of any I/O in the list (or timeout) and return the index of the terminated one.")
+ .def_static("wait_any", &simgrid::s4u::Io::wait_any, py::call_guard<GilScopedRelease>(),
+ "Block until the completion of any I/O in the list and return the index of the terminated one.");
+
/* Class Exec */
py::class_<simgrid::s4u::Exec, simgrid::s4u::ExecPtr>(m, "Exec", "Execution")
.def_property_readonly(
"application")
.def(
"create",
- [](py::str name, Host* host, py::object fun, py::args args) {
+ [](py::str name, Host* h, py::object fun, py::args args) {
fun.inc_ref(); // FIXME: why is this needed for tests like exec-async, exec-dvfs and exec-remote?
args.inc_ref(); // FIXME: why is this needed for tests like actor-migrate?
- return simgrid::s4u::Actor::create(name, host, [fun, args]() {
+ return simgrid::s4u::Actor::create(name, h, [fun, args]() {
GilScopedAcquire py_context;
try {
fun(*args);
simcall->issuer_->context_->set_wannadie();
} else {
switch (state_) {
+ case State::FAILED:
+ simcall->issuer_->exception_ =
+ std::make_exception_ptr(NetworkFailureException(XBT_THROW_POINT, "Remote peer failed"));
+ break;
case State::SRC_TIMEOUT:
simcall->issuer_->exception_ = std::make_exception_ptr(
TimeoutException(XBT_THROW_POINT, "Communication timeouted because of the sender"));
case State::SRC_HOST_FAILURE:
if (simcall->issuer_ == src_actor_)
simcall->issuer_->context_->set_wannadie();
- else
+ else {
+ state_ = kernel::activity::State::FAILED;
simcall->issuer_->exception_ =
std::make_exception_ptr(NetworkFailureException(XBT_THROW_POINT, "Remote peer failed"));
+ }
break;
case State::DST_HOST_FAILURE:
if (simcall->issuer_ == dst_actor_)
simcall->issuer_->context_->set_wannadie();
- else
+ else {
+ state_ = kernel::activity::State::FAILED;
simcall->issuer_->exception_ =
std::make_exception_ptr(NetworkFailureException(XBT_THROW_POINT, "Remote peer failed"));
+ }
break;
case State::LINK_FAILURE:
} else {
XBT_DEBUG("I'm neither source nor dest");
}
+ state_ = kernel::activity::State::FAILED;
simcall->issuer_->throw_exception(
std::make_exception_ptr(NetworkFailureException(XBT_THROW_POINT, "Link failure")));
break;
}
switch (state_) {
case State::FAILED:
- simcall->issuer_->context_->set_wannadie();
+ piface_->complete(s4u::Activity::State::FAILED);
if (simcall->issuer_->get_host()->is_on())
simcall->issuer_->exception_ = std::make_exception_ptr(HostFailureException(XBT_THROW_POINT, "Host failed"));
- /* else, the actor will be killed with no possibility to survive */
+ else /* else, the actor will be killed with no possibility to survive */
+ simcall->issuer_->context_->set_wannadie();
break;
case State::CANCELED:
for (auto* exec : execs) {
/* associate this simcall to the the synchro */
exec->simcalls_.push_back(&issuer->simcall_);
-
/* see if the synchro is already finished */
if (exec->state_ != State::WAITING && exec->state_ != State::RUNNING) {
exec->finish();
switch (state_) {
case State::FAILED:
simcall->issuer_->context_->set_wannadie();
+ piface_->complete(s4u::Activity::State::FAILED);
simcall->issuer_->exception_ =
std::make_exception_ptr(StorageFailureException(XBT_THROW_POINT, "Storage failed"));
break;
// Pop/drop the top of the stack:
case DW_OP_drop:
- stack.pop();
+ (void)stack.pop();
break;
case DW_OP_swap:
{
bool res = mc::actor_is_enabled(kernel::actor::ActorImpl::by_pid(msg->aid));
s_mc_message_int_t answer{MessageType::ACTOR_ENABLED_REPLY, res};
- channel_.send(answer);
+ xbt_assert(channel_.send(answer) == 0, "Could not send ACTOR_ENABLED_REPLY");
}
#define assert_msg_size(_name_, _type_) \
#include "simgrid/Exception.hpp"
#include "simgrid/s4u/Activity.hpp"
#include "simgrid/s4u/Engine.hpp"
+#include "simgrid/s4u/Exec.hpp"
+#include "simgrid/s4u/Io.hpp"
#include "src/kernel/activity/ActivityImpl.hpp"
#include "src/kernel/actor/ActorImpl.hpp"
#include "src/kernel/actor/SimcallObserver.hpp"
if (state_ == State::INITED)
vetoable_start();
+ if (state_ == State::FAILED) {
+ if (dynamic_cast<Exec*>(this))
+ throw HostFailureException(XBT_THROW_POINT, "Cannot wait for a failed exec");
+ if (dynamic_cast<Io*>(this))
+ throw StorageFailureException(XBT_THROW_POINT, "Cannot wait for a failed I/O");
+ }
+
kernel::actor::ActorImpl* issuer = kernel::actor::ActorImpl::self();
kernel::actor::ActivityWaitSimcall observer{issuer, pimpl_.get(), timeout};
if (kernel::actor::simcall_blocking(
std::vector<kernel::activity::CommImpl*> rcomms(comms.size());
std::transform(begin(comms), end(comms), begin(rcomms),
[](const CommPtr& comm) { return static_cast<kernel::activity::CommImpl*>(comm->pimpl_.get()); });
- ssize_t changed_pos = simcall_comm_waitany(rcomms.data(), rcomms.size(), timeout);
+ ssize_t changed_pos = -1;
+ try {
+ changed_pos = simcall_comm_waitany(rcomms.data(), rcomms.size(), timeout);
+ } catch (const NetworkFailureException& e) {
+ for (auto c : comms) {
+ if (c->pimpl_->state_ == kernel::activity::State::FAILED) {
+ c->complete(State::FAILED);
+ }
+ }
+ e.rethrow_nested(XBT_THROW_POINT, boost::core::demangle(typeid(e).name()) + " raised in kernel mode.");
+ }
if (changed_pos != -1)
comms.at(changed_pos)->complete(State::FINISHED);
return changed_pos;
switch (state_) {
case State::FINISHED:
break;
+ case State::FAILED:
+ throw NetworkFailureException(XBT_THROW_POINT, "Cannot wait for a failed communication");
case State::INITED:
case State::STARTING: // It's not started yet. Do it in one simcall if it's a regular communication
break;
case State::STARTED:
- simcall_comm_wait(get_impl(), timeout);
+ try {
+ simcall_comm_wait(get_impl(), timeout);
+ } catch (const NetworkFailureException& e) {
+ complete(State::FAILED);
+ e.rethrow_nested(XBT_THROW_POINT, boost::core::demangle(typeid(e).name()) + " raised in kernel mode.");
+ }
break;
case State::CANCELED:
#include "simgrid/simix.h"
#include "src/instr/instr_private.hpp"
#include "src/kernel/EngineImpl.hpp"
+#include "src/kernel/activity/CommImpl.hpp"
#include "src/mc/mc_replay.hpp"
#include "src/surf/network_interface.hpp"
#include "surf/surf.hpp" // routing_platf. FIXME:KILLME. SOON
config::set_value(name.c_str(), value);
}
+Engine* Engine::set_default_comm_data_copy_callback(void (*callback)(kernel::activity::CommImpl*, void*, size_t))
+{
+ kernel::activity::CommImpl::set_copy_data_callback(callback);
+ return this;
+}
} // namespace s4u
} // namespace simgrid
#include "simgrid/exec.h"
#include "simgrid/s4u/Actor.hpp"
#include "simgrid/s4u/Exec.hpp"
+#include "simgrid/s4u/Host.hpp"
#include "src/kernel/activity/ExecImpl.hpp"
#include "src/kernel/actor/ActorImpl.hpp"
#include "src/kernel/actor/SimcallObserver.hpp"
ExecPtr Exec::init()
{
auto pimpl = kernel::activity::ExecImplPtr(new kernel::activity::ExecImpl());
+ Host::on_state_change.connect([pimpl](s4u::Host const& h) {
+ if (not h.is_on() && pimpl->state_ == kernel::activity::State::RUNNING &&
+ std::find(pimpl->get_hosts().begin(), pimpl->get_hosts().end(), &h) != pimpl->get_hosts().end()) {
+ pimpl->state_ = kernel::activity::State::FAILED;
+ pimpl->post();
+ }
+ });
return ExecPtr(pimpl->get_iface());
}
sg_storage_file_system_init();
// parse the platform file: get the host list
engine->load_platform(argv[1]);
- simgrid::kernel::activity::CommImpl::set_copy_data_callback(smpi_comm_copy_buffer_callback);
+ engine->set_default_comm_data_copy_callback(smpi_comm_copy_buffer_callback);
if (smpi_cfg_privatization() == SmpiPrivStrategies::DLOPEN)
smpi_init_privatization_dlopen(executable);
parent->firstChild = cat;
if (not parent->initialized)
- _xbt_log_cat_init(parent, xbt_log_priority_uninitialized /* ignored */ );
+ (void)_xbt_log_cat_init(parent, xbt_log_priority_uninitialized /* ignored */);
cat->threshold = parent->threshold;
sign of the result is machine dependent for negative values, so force
it to be treated as an unsigned int. */
-#define ADDR2UINT(addr) ((uintptr_t) ((char*) (addr) - (char*) NULL))
+#define ADDR2UINT(addr) ((uintptr_t) (addr))
#define RESIDUAL(addr,bsize) ((uintptr_t) (ADDR2UINT (addr) % (bsize)))
/* Determine the amount of memory spanned by the initial heap table
static void receiver()
{
sg4::Mailbox* mbox = sg4::Mailbox::by_name(sg4::this_actor::get_host()->get_name());
- double* payload = nullptr;
while (true) {
- payload = mbox->get<double>();
- if (*payload < 0) {
- delete payload;
+ auto payload = mbox->get_unique<double>();
+ if (*payload < 0)
break;
- }
XBT_INFO("Received data. Elapsed %lf", sg4::Engine::get_clock() - *payload);
- delete payload;
}
XBT_INFO("Bye");
}
# This program is free software; you can redistribute it and/or modify it
# under the terms of the license (GNU LGPL) which comes with this package.
-from simgrid import Actor, Engine, Comm, Host, Mailbox, NetZone, Link, LinkInRoute, this_actor
+from simgrid import Actor, Engine, Host, Mailbox, NetZone, LinkInRoute, this_actor
import sys
-import functools
class Sender:
"""
hosts = []
# dijkstra
dijkstra = NetZone.create_dijkstra_zone("dijkstra")
- this_actor.info("Creating zone: " + dijkstra.name)
+ msg_base = "Creating zone: "
+ this_actor.info(msg_base + dijkstra.name)
dijkstra.set_parent(root)
host1 = dijkstra.create_host("host1", [1e9, 1e8]).set_core_count(2)
hosts.append(host1)
# vivaldi
vivaldi = NetZone.create_vivaldi_zone("vivaldi")
- this_actor.info("Creating zone: " + vivaldi.name)
+ this_actor.info(msg_base + vivaldi.name)
vivaldi.set_parent(root)
host3 = vivaldi.create_host("host3", 1e9).set_coordinates("1 1 1").seal()
host4 = vivaldi.create_host("host4", "1Gf").set_coordinates("2 2 2").seal()
# empty
empty = NetZone.create_empty_zone("empty")
- this_actor.info("Creating zone: " + empty.name)
+ this_actor.info(msg_base + empty.name)
empty.set_parent(root)
host5 = empty.create_host("host5", 1e9)
hosts.append(host5)
# wifi
wifi = NetZone.create_wifi_zone("wifi")
- this_actor.info("Creating zone: " + wifi.name)
+ this_actor.info(msg_base + wifi.name)
wifi.set_parent(root)
router = wifi.create_router("wifi_router")
wifi.set_property("access_point", "wifi_router")
wifi.seal()
# create routes between netzones
- linkA = vivaldi.create_link("linkA", 1e9).seal()
- linkB = vivaldi.create_link("linkB", "1GBps").seal()
- linkC = vivaldi.create_link("linkC", "1GBps").seal()
+ link_a = vivaldi.create_link("linkA", 1e9).seal()
+ link_b = vivaldi.create_link("linkB", "1GBps").seal()
+ link_c = vivaldi.create_link("linkC", "1GBps").seal()
root.add_route(dijkstra.get_netpoint(), vivaldi.get_netpoint(
- ), host1.get_netpoint(), host3.get_netpoint(), [LinkInRoute(linkA)], True)
+ ), host1.get_netpoint(), host3.get_netpoint(), [LinkInRoute(link_a)], True)
root.add_route(vivaldi.get_netpoint(), empty.get_netpoint(
- ), host3.get_netpoint(), host5.get_netpoint(), [LinkInRoute(linkB)], True)
+ ), host3.get_netpoint(), host5.get_netpoint(), [LinkInRoute(link_b)], True)
root.add_route(empty.get_netpoint(), wifi.get_netpoint(
- ), host5.get_netpoint(), router, [LinkInRoute(linkC)], True)
+ ), host5.get_netpoint(), router, [LinkInRoute(link_c)], True)
# create actors Sender/Receiver
Actor.create("sender", hosts[0], Sender(hosts))
set(teshsuite_src ${teshsuite_src} ${CMAKE_CURRENT_SOURCE_DIR}/${x}/${x}.c)
endforeach()
-if (CMAKE_C_COMPILER_ID MATCHES "Clang|LLVM")
- set_source_files_properties(${CMAKE_CURRENT_SOURCE_DIR}/replay-ti-colls/replay-ti-colls.c PROPERTIES COMPILE_FLAGS "-Wno-sometimes-uninitialized")
-endif()
-
set(teshsuite_src ${teshsuite_src} ${CMAKE_CURRENT_SOURCE_DIR}/fort_args/fort_args.f90 PARENT_SCOPE)
set(tesh_files ${tesh_files} ${CMAKE_CURRENT_SOURCE_DIR}/coll-allreduce/coll-allreduce-large.tesh
${CMAKE_CURRENT_SOURCE_DIR}/coll-allreduce/coll-allreduce-automatic.tesh
int main(int argc, char *argv[])
{
MPI_Datatype column[LOOPS], xpose[LOOPS];
- double t[NUM_SIZES], ttmp, tmean;
+ double t[NUM_SIZES], ttmp;
double tMeanLower, tMeanHigher;
int size;
int i, j, errs = 0, nrows, ncols;
MPI_Init(&argc, &argv);
- tmean = 0;
size = 1;
for (i = -SKIP; i < NUM_SIZES; i++) {
nrows = ncols = size;
* time large enough */
t[i] = 0;
}
- tmean += t[i];
}
for (j = 0; j < LOOPS; j++) {
if (i >= 0)
size *= 2;
}
- tmean /= NUM_SIZES;
/* Now, analyze the times to see that they do not grow too fast
* as a function of size. As that is a vague criteria, we do the
-//exclude from clang static analysis, as there is an intentional uninitialized value passed to MPI calls.
-#ifndef __clang_analyzer__
-
+#include "mpi.h"
#include <stdio.h>
#include <string.h>
-#include "mpi.h"
+#define BUFSIZE (1024 * 1024)
+#define BOUNDED(sz) ((sz) < BUFSIZE ? (sz) : BUFSIZE)
+
+static void setup_recvbuf(int nprocs, int** recvbuf, int** displs, int** counts, int** rcounts)
+{
+ *recvbuf = malloc(BUFSIZE * nprocs * sizeof(int));
+ for (int i = 0; i < BUFSIZE * nprocs; i++)
+ (*recvbuf)[i] = i;
+
+ *displs = malloc(nprocs * sizeof(int));
+ *counts = malloc(nprocs * sizeof(int));
+ *rcounts = malloc(nprocs * sizeof(int));
+ for (int i = 0; i < nprocs; i++) {
+ (*displs)[i] = i * BUFSIZE;
+ (*counts)[i] = BOUNDED(i);
+ (*rcounts)[i] = (*counts)[i];
+ }
+}
-#define BUFSIZE 1024*1024
-
-int
-main (int argc, char **argv){
- int i, nprocs = -1;
- int rank = -1;
- int *sendbuf, *recvbuf, *displs, *counts, *rcounts, *alltoallvcounts;
+int main(int argc, char** argv)
+{
+ int nprocs = -1;
+ int rank = -1;
/* init */
- MPI_Init (&argc, &argv);
- MPI_Comm_size (MPI_COMM_WORLD, &nprocs);
- MPI_Comm_rank (MPI_COMM_WORLD, &rank);
+ MPI_Init(&argc, &argv);
+ MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
+ MPI_Comm_rank(MPI_COMM_WORLD, &rank);
- sendbuf = (int *) malloc (BUFSIZE * nprocs * sizeof(int));
- for (i = 0; i < BUFSIZE * nprocs; i++)
+ int* sendbuf = malloc(BUFSIZE * nprocs * sizeof(int));
+ for (int i = 0; i < BUFSIZE * nprocs; i++)
sendbuf[i] = rank;
- alltoallvcounts = (int *) malloc (nprocs * sizeof(int));
- for (i = 0; i < nprocs; i++)
- if ((i + rank) < BUFSIZE)
- alltoallvcounts[i] = i + rank;
- else
- alltoallvcounts[i] = BUFSIZE;
-
- if (rank == 0) {
- recvbuf = (int *) malloc (BUFSIZE * nprocs * sizeof(int));
- for (i = 0; i < BUFSIZE * nprocs; i++)
- recvbuf[i] = i;
-
- displs = (int *) malloc (nprocs * sizeof(int));
- counts = (int *) malloc (nprocs * sizeof(int));
- rcounts = (int *) malloc (nprocs * sizeof(int));
- for (i = 0; i < nprocs; i++) {
- displs[i] = i * BUFSIZE;
- if (i < BUFSIZE)
- rcounts[i] = counts[i] = i;
- else
- rcounts[i] = counts[i] = BUFSIZE;
- }
- }
-
- //first test, with unallocated non significative buffers
- MPI_Barrier (MPI_COMM_WORLD);
- MPI_Bcast (sendbuf, BUFSIZE, MPI_INT, 0, MPI_COMM_WORLD);
- MPI_Gather (&sendbuf[rank*BUFSIZE], BUFSIZE, MPI_INT, recvbuf, BUFSIZE, MPI_INT, 0, MPI_COMM_WORLD);
- MPI_Scatter (recvbuf, BUFSIZE, MPI_INT, sendbuf, BUFSIZE, MPI_INT, 0, MPI_COMM_WORLD);
- MPI_Gatherv (&sendbuf[rank*BUFSIZE], (rank < BUFSIZE) ? rank : BUFSIZE, MPI_INT, recvbuf, rcounts, displs, MPI_INT, 0, MPI_COMM_WORLD);
- MPI_Scatterv (recvbuf, counts, displs, MPI_INT, sendbuf, (rank < BUFSIZE) ? rank : BUFSIZE, MPI_INT, 0, MPI_COMM_WORLD);
- MPI_Reduce (sendbuf, recvbuf, BUFSIZE, MPI_INT, MPI_MAX, 0, MPI_COMM_WORLD);
-
- if (rank != 0) {
- recvbuf = (int *) malloc (BUFSIZE * nprocs * sizeof(int));
- for (i = 0; i < BUFSIZE * nprocs; i++)
- recvbuf[i] = i;
-
- displs = (int *) malloc (nprocs * sizeof(int));
- counts = (int *) malloc (nprocs * sizeof(int));
- rcounts = (int *) malloc (nprocs * sizeof(int));
- for (i = 0; i < nprocs; i++) {
- displs[i] = i * BUFSIZE;
- if (i < BUFSIZE)
- rcounts[i] = counts[i] = i;
- else
- rcounts[i] = counts[i] = BUFSIZE;
- }
- }
-
- MPI_Barrier (MPI_COMM_WORLD);
- MPI_Bcast (sendbuf, BUFSIZE, MPI_INT, 0, MPI_COMM_WORLD);
- MPI_Gather (&sendbuf[rank*BUFSIZE], BUFSIZE, MPI_INT, recvbuf, BUFSIZE, MPI_INT, 0, MPI_COMM_WORLD);
- MPI_Scatter (recvbuf, BUFSIZE, MPI_INT, sendbuf, BUFSIZE, MPI_INT, 0, MPI_COMM_WORLD);
- MPI_Gatherv (&sendbuf[rank*BUFSIZE], (rank < BUFSIZE) ? rank : BUFSIZE, MPI_INT, recvbuf, rcounts, displs, MPI_INT, 0, MPI_COMM_WORLD);
- MPI_Scatterv (recvbuf, counts, displs, MPI_INT, sendbuf, (rank < BUFSIZE) ? rank : BUFSIZE, MPI_INT, 0, MPI_COMM_WORLD);
- MPI_Reduce (sendbuf, recvbuf, BUFSIZE, MPI_INT, MPI_MAX, 0, MPI_COMM_WORLD);
- MPI_Allgather (sendbuf, BUFSIZE, MPI_INT, recvbuf, BUFSIZE, MPI_INT, MPI_COMM_WORLD);
- MPI_Alltoall (recvbuf, BUFSIZE, MPI_INT, sendbuf, BUFSIZE, MPI_INT, MPI_COMM_WORLD);
- MPI_Allgatherv (sendbuf, (rank < BUFSIZE) ? rank : BUFSIZE, MPI_INT, recvbuf, rcounts, displs, MPI_INT, MPI_COMM_WORLD);
- MPI_Alltoallv (recvbuf, alltoallvcounts, displs, MPI_INT, sendbuf, alltoallvcounts, displs, MPI_INT, MPI_COMM_WORLD);
- MPI_Allreduce (sendbuf, recvbuf, BUFSIZE, MPI_INT, MPI_MAX, MPI_COMM_WORLD);
- MPI_Reduce_scatter (sendbuf, recvbuf, rcounts, MPI_INT, MPI_MAX, MPI_COMM_WORLD);
- MPI_Scan (sendbuf, recvbuf, BUFSIZE, MPI_INT, MPI_MAX, MPI_COMM_WORLD);
- MPI_Exscan (sendbuf, recvbuf, BUFSIZE, MPI_INT, MPI_MAX, MPI_COMM_WORLD);
- MPI_Barrier (MPI_COMM_WORLD);
-
- free (alltoallvcounts);
- free (sendbuf);
- free (recvbuf);
- free (displs);
- free (counts);
- free (rcounts);
-
- MPI_Finalize ();
+ int* alltoallvcounts = malloc(nprocs * sizeof(int));
+ for (int i = 0; i < nprocs; i++)
+ alltoallvcounts[i] = BOUNDED(i + rank);
+
+ int* dummy_buffer = malloc(sizeof(int));
+ // initialize buffers with an invalid value (we want to trigger a valgrind error if they are used)
+ int* recvbuf = dummy_buffer + 1;
+ int* displs = dummy_buffer + 1;
+ int* counts = dummy_buffer + 1;
+ int* rcounts = dummy_buffer + 1;
+ if (rank == 0)
+ setup_recvbuf(nprocs, &recvbuf, &displs, &counts, &rcounts);
+
+ // first test, with unallocated non significative buffers
+ MPI_Barrier(MPI_COMM_WORLD);
+ MPI_Bcast(sendbuf, BUFSIZE, MPI_INT, 0, MPI_COMM_WORLD);
+ MPI_Gather(&sendbuf[rank * BUFSIZE], BUFSIZE, MPI_INT, recvbuf, BUFSIZE, MPI_INT, 0, MPI_COMM_WORLD);
+ MPI_Scatter(recvbuf, BUFSIZE, MPI_INT, sendbuf, BUFSIZE, MPI_INT, 0, MPI_COMM_WORLD);
+ MPI_Gatherv(&sendbuf[rank * BUFSIZE], BOUNDED(rank), MPI_INT, recvbuf, rcounts, displs, MPI_INT, 0, MPI_COMM_WORLD);
+ MPI_Scatterv(recvbuf, counts, displs, MPI_INT, sendbuf, BOUNDED(rank), MPI_INT, 0, MPI_COMM_WORLD);
+ MPI_Reduce(sendbuf, recvbuf, BUFSIZE, MPI_INT, MPI_MAX, 0, MPI_COMM_WORLD);
+
+ free(dummy_buffer);
+ if (rank != 0)
+ setup_recvbuf(nprocs, &recvbuf, &displs, &counts, &rcounts);
+
+ MPI_Barrier(MPI_COMM_WORLD);
+ MPI_Bcast(sendbuf, BUFSIZE, MPI_INT, 0, MPI_COMM_WORLD);
+ MPI_Gather(&sendbuf[rank * BUFSIZE], BUFSIZE, MPI_INT, recvbuf, BUFSIZE, MPI_INT, 0, MPI_COMM_WORLD);
+ MPI_Scatter(recvbuf, BUFSIZE, MPI_INT, sendbuf, BUFSIZE, MPI_INT, 0, MPI_COMM_WORLD);
+ MPI_Gatherv(&sendbuf[rank * BUFSIZE], BOUNDED(rank), MPI_INT, recvbuf, rcounts, displs, MPI_INT, 0, MPI_COMM_WORLD);
+ MPI_Scatterv(recvbuf, counts, displs, MPI_INT, sendbuf, BOUNDED(rank), MPI_INT, 0, MPI_COMM_WORLD);
+ MPI_Reduce(sendbuf, recvbuf, BUFSIZE, MPI_INT, MPI_MAX, 0, MPI_COMM_WORLD);
+ MPI_Allgather(sendbuf, BUFSIZE, MPI_INT, recvbuf, BUFSIZE, MPI_INT, MPI_COMM_WORLD);
+ MPI_Alltoall(recvbuf, BUFSIZE, MPI_INT, sendbuf, BUFSIZE, MPI_INT, MPI_COMM_WORLD);
+ MPI_Allgatherv(sendbuf, BOUNDED(rank), MPI_INT, recvbuf, rcounts, displs, MPI_INT, MPI_COMM_WORLD);
+ MPI_Alltoallv(recvbuf, alltoallvcounts, displs, MPI_INT, sendbuf, alltoallvcounts, displs, MPI_INT, MPI_COMM_WORLD);
+ MPI_Allreduce(sendbuf, recvbuf, BUFSIZE, MPI_INT, MPI_MAX, MPI_COMM_WORLD);
+ MPI_Reduce_scatter(sendbuf, recvbuf, rcounts, MPI_INT, MPI_MAX, MPI_COMM_WORLD);
+ MPI_Scan(sendbuf, recvbuf, BUFSIZE, MPI_INT, MPI_MAX, MPI_COMM_WORLD);
+ MPI_Exscan(sendbuf, recvbuf, BUFSIZE, MPI_INT, MPI_MAX, MPI_COMM_WORLD);
+ MPI_Barrier(MPI_COMM_WORLD);
+
+ free(alltoallvcounts);
+ free(sendbuf);
+ free(recvbuf);
+ free(displs);
+ free(counts);
+ free(rcounts);
+
+ MPI_Finalize();
return 0;
}
-
-#endif
\ No newline at end of file