6 #include <unordered_map>
8 #include <simgrid/msg.h>
10 #include "communicator.h"
12 #include "msg_thread.h"
15 #include "simgrid_features.h"
20 static void set_proc_mutex(mutex_t* m) { proc_mutex = m; }
22 // Note: normally used with proc_mutex locked.
23 static double get_total_load_init() { return total_load_init; }
24 static double get_total_load_running() { return total_load_running; }
25 static double get_total_load_exit() { return total_load_exit; }
27 process(int argc, char* argv[]);
30 double get_real_load() const { return real_load; }
31 double get_comp_amount() const { return acc.comp_amount; }
32 unsigned get_comp_iter() const { return comp_iter; }
33 unsigned get_all_comp_iter() const { return all_comp_iter; }
34 double get_iter_deviation() const;
35 double get_data_send_amount() const { return acc.data_send.amount; }
36 double get_data_recv_amount() const { return acc.data_recv.amount; }
37 unsigned get_data_send_count() const { return acc.data_send.count; }
38 unsigned get_data_recv_count() const { return acc.data_recv.count; }
39 double get_ctrl_send_amount() const { return acc.ctrl_send.amount; }
40 double get_ctrl_recv_amount() const { return acc.ctrl_recv.amount; }
41 unsigned get_ctrl_send_count() const { return acc.ctrl_send.count; }
42 unsigned get_ctrl_recv_count() const { return acc.ctrl_recv.count; }
43 double get_idle_duration() const { return idle_duration; }
44 double get_convergence() const { return convergence; }
49 typedef std::vector<neighbor> neigh_type;
50 typedef std::vector<neighbor*> pneigh_type;
52 pneigh_type pneigh; // list of pointers to neighbors that
53 // we are free to reorder
55 // Get and set current load, which may be real load, or expected
56 // load if opt::bookkeeping is true.
57 double get_load() const { return expected_load; }
59 // The load balancing algorithm comes here...
60 virtual void load_balance();
62 // Register some amount of load to send to given neighbor.
63 void send(neighbor& nb, double amount);
64 void send(neighbor* nb, double amount) { send(*nb, amount); }
66 // Sort pneigh by applying comp to their loads
67 template <typename Compare>
68 void pneigh_sort_by_load(const Compare& comp);
70 // Calls neighbor::print(verbose, logp, cat) for each member of neigh.
71 void print_loads(bool verbose = false,
72 e_xbt_log_priority_t logp = xbt_log_priority_info,
73 xbt_log_category_t cat = _XBT_LOGV(default)) const;
75 // Calls neighbor::print(verbose, logp, cat) for each member of pneigh.
76 void print_loads_p(bool verbose = false,
77 e_xbt_log_priority_t logp = xbt_log_priority_info,
78 xbt_log_category_t cat = _XBT_LOGV(default)) const;
81 static mutex_t *proc_mutex; // protect access to global variables
82 // (must be set before constructing
85 static double total_load_init; // sum of process loads at init
86 static double total_load_running; // sum of loads while running
87 static double total_load_exit; // sum of process loads at exit
89 static int process_counter;
90 static double total_load_average;
91 static double average_load_ratio;
92 static double load_diff_threshold;
94 static std::atomic<int> convergence_counter;
96 typedef std::unordered_map<msg_host_t, neighbor*> rev_neigh_type;
97 neigh_type neigh; // list of neighbors (do not alter
98 // after construction!)
99 rev_neigh_type rev_neigh; // map msg_host_t -> neighbor
101 communicator comm; // communicator for this process
102 int ctrl_close_pending; // number of "close" messages to wait
104 int data_close_pending; // number of "close" messages to wait
106 bool close_received; // true if we received a "close" message
107 bool finalizing; // true when finalize() is running
109 unsigned lb_iter; // counter of load-balancing iterations
110 unsigned comp_iter; // counter of computation iterations
111 unsigned all_comp_iter; // counter of computation iterations
112 // (counting empty iterations too)
114 double prev_load_broadcast; // used to ensure that we do not send
115 // a same information messages
116 double real_load; // current load
117 double expected_load; // expected load in bookkeeping mode
118 double received_load; // load received from neighbors
120 double idle_duration; // how long we had nothing to compute
121 double convergence; // date when convergence was achieved, or -1.0
122 int local_convergence_counter; // number of iterations since convergence
124 mutex_t mutex; // synchronization between threads
127 struct mesg_accounting {
128 double amount; // sum of message size
129 unsigned count; // number of messages
130 mesg_accounting(): amount(0.0), count(0) { }
133 double comp_amount; // total computing done so far (flops)
134 mesg_accounting data_send; // data messages sent
135 mesg_accounting data_recv; // data messages received
136 mesg_accounting ctrl_send; // ctrl message sent
137 mesg_accounting ctrl_recv; // ctrl message received
138 accounting(): comp_amount(0.0) { }
140 accounting acc; // use a structure so that it is
141 // automatically initialized a
144 void add_comp_amount(double amount) { acc.comp_amount += amount; }
145 void add_data_send_mesg(double amount) {
146 ++acc.data_send.count;
147 acc.data_send.amount += amount;
149 void add_data_recv_mesg(double amount) {
150 ++acc.data_recv.count;
151 acc.data_recv.amount += amount;
153 void add_ctrl_send_mesg(double amount) {
154 ++acc.ctrl_send.count;
155 acc.ctrl_send.amount += amount;
157 void add_ctrl_recv_mesg(double amount) {
158 ++acc.ctrl_recv.count;
159 acc.ctrl_recv.amount += amount;
162 // Load-balancing loop
163 msg_thread* lb_thread;
164 void load_balance_loop();
166 // Simulate computation loop
169 // Check for convergence
170 void convergence_check();
172 // Check if we need to stop
173 bool still_running();
175 // Returns the sum of "to_send" for all neighbors.
176 double get_sum_of_to_send() const;
178 // Compute load_to_send (for data_send), subject to the execution parameters
179 static double compute_load_to_send(double desired);
182 void ctrl_send(neighbor& nb);
183 void data_send(neighbor& nb);
184 void ctrl_close(neighbor& nb);
185 void data_close(neighbor& nb);
188 // Parameter "timeout" may be 0 for non-blocking operation, -1 for
189 // infinite waiting, or any positive timeout.
190 void ctrl_receive(double timeout);
191 void data_receive(double timeout);
192 void handle_message(message* msg, msg_host_t from);
195 template <typename Compare>
196 void process::pneigh_sort_by_load(const Compare& comp)
198 std::sort(pneigh.begin(), pneigh.end(),
199 [&comp](const neighbor* a, const neighbor* b) {
200 return comp(a->get_load(), b->get_load());