]> AND Private Git Repository - loba.git/blob - process.h
Logo AND Algorithmique Numérique Distribuée

Private GIT Repository
xbt_cond_timedwait: mutex is not held on timeout.
[loba.git] / process.h
1 #ifndef PROCESS_H
2 #define PROCESS_H
3
4 #define USE_UNORDERED_MAP 1
5 //#undef USE_UNORDERED_MAP
6
7 #include <algorithm>
8 #include <functional>
9 #ifdef USE_UNORDERED_MAP
10 #  include <unordered_map>
11 #  define MAP_TEMPLATE std::unordered_map
12 #else
13 #  include <map>
14 #  define MAP_TEMPLATE std::map
15 #endif
16 #include <vector>
17 #include <msg/msg.h>
18 #include <xbt/log.h>
19 #include "atomic_compat.h"
20 #include "communicator.h"
21 #include "misc.h"
22 #include "msg_thread.h"
23 #include "neighbor.h"
24 #include "options.h"
25 #include "simgrid_features.h"
26 #include "synchro.h"
27
28 class process {
29 public:
30     static void set_proc_mutex(mutex_t* m) { proc_mutex = m;            }
31
32     // Note: normally used with proc_mutex locked.
33     static double get_total_load_init()    { return total_load_init;    }
34     static double get_total_load_running() { return total_load_running; }
35     static double get_total_load_exit()    { return total_load_exit;    }
36
37     process(int argc, char* argv[]);
38     virtual ~process();
39
40     double get_real_load() const           { return real_load;            }
41     double get_comp_amount() const         { return acc.comp_amount;      }
42     unsigned get_comp_iter() const         { return comp_iter;            }
43     unsigned get_all_comp_iter() const     { return all_comp_iter;        }
44     double get_iter_deviation() const;
45     double get_data_send_amount() const    { return acc.data_send.amount; }
46     double get_data_recv_amount() const    { return acc.data_recv.amount; }
47     unsigned get_data_send_count() const   { return acc.data_send.count;  }
48     unsigned get_data_recv_count() const   { return acc.data_recv.count;  }
49     double get_ctrl_send_amount() const    { return acc.ctrl_send.amount; }
50     double get_ctrl_recv_amount() const    { return acc.ctrl_recv.amount; }
51     unsigned get_ctrl_send_count() const   { return acc.ctrl_send.count;  }
52     unsigned get_ctrl_recv_count() const   { return acc.ctrl_recv.count;  }
53     double get_idle_duration() const       { return idle_duration;        }
54     double get_convergence() const         { return convergence;          }
55
56     int run();
57
58 protected:
59     typedef std::vector<neighbor> neigh_type;
60     typedef std::vector<neighbor*> pneigh_type;
61
62     pneigh_type pneigh;         // list of pointers to neighbors that
63                                 // we are free to reorder
64
65     // Get and set current load, which may be real load, or expected
66     // load if opt::bookkeeping is true.
67     double get_load() const                { return expected_load; }
68
69     // The load balancing algorithm comes here...
70     virtual void load_balance();
71
72     // Register some amount of load to send to given neighbor.
73     void send(neighbor& nb, double amount);
74     void send(neighbor* nb, double amount) { send(*nb, amount); }
75
76     // Sort pneigh by applying comp to their loads
77     template <typename Compare>
78     void pneigh_sort_by_load(const Compare& comp);
79
80     // Calls neighbor::print(verbose, logp, cat) for each member of neigh.
81     void print_loads(bool verbose = false,
82                      e_xbt_log_priority_t logp = xbt_log_priority_info,
83                      xbt_log_category_t cat = _XBT_LOGV(default)) const;
84
85     // Calls neighbor::print(verbose, logp, cat) for each member of pneigh.
86     void print_loads_p(bool verbose = false,
87                        e_xbt_log_priority_t logp = xbt_log_priority_info,
88                        xbt_log_category_t cat = _XBT_LOGV(default)) const;
89
90 private:
91     static mutex_t *proc_mutex; // protect access to global variables
92                                 // (must be set before constructing
93                                 // the first object!)
94
95     static double total_load_init; // sum of process loads at init
96     static double total_load_running; // sum of loads while running
97     static double total_load_exit; // sum of process loads at exit
98
99     static int process_counter;
100     static double total_load_average;
101     static double average_load_ratio;
102     static double load_diff_threshold;
103
104     static std::atomic<int> convergence_counter;
105
106     typedef MAP_TEMPLATE<msg_host_t, neighbor*> rev_neigh_type;
107     neigh_type neigh;           // list of neighbors (do not alter
108                                 // after construction!)
109     rev_neigh_type rev_neigh;   // map msg_host_t -> neighbor
110
111     communicator comm;          // communicator for this process
112     int ctrl_close_pending;     // number of "close" messages to wait
113                                 // on ctrl channel
114     int data_close_pending;     // number of "close" messages to wait
115                                 // on data channel
116     bool close_received;        // true if we received a "close" message
117     bool finalizing;            // true when finalize() is running
118
119     unsigned lb_iter;           // counter of load-balancing iterations
120     unsigned comp_iter;         // counter of computation iterations
121     unsigned all_comp_iter;     // counter of computation iterations
122                                 // (counting empty iterations too)
123
124     double prev_load_broadcast; // used to ensure that we do not send
125                                 // a same information messages
126     double real_load;           // current load
127     double expected_load;       // expected load in bookkeeping mode
128     double received_load;       // load received from neighbors
129
130     double idle_duration;       // how long we had nothing to compute
131     double convergence;         // date when convergence was achieved, or -1.0
132     int local_convergence_counter; // number of iterations since convergence
133
134     mutex_t mutex;              // synchronization between threads
135     condition_t cond;
136
137     struct mesg_accounting {
138         double amount;          // sum of message size
139         unsigned count;         // number of messages
140         mesg_accounting(): amount(0.0), count(0) { }
141     };
142     struct accounting {
143         double comp_amount;        // total computing done so far (flops)
144         mesg_accounting data_send; // data messages sent
145         mesg_accounting data_recv; // data messages received
146         mesg_accounting ctrl_send; // ctrl message sent
147         mesg_accounting ctrl_recv; // ctrl message received
148         accounting(): comp_amount(0.0) { }
149     };
150     accounting acc;             // use a structure so that it is
151                                 // automatically initialized a
152                                 // construction
153
154     void add_comp_amount(double amount) { acc.comp_amount += amount; }
155     void add_data_send_mesg(double amount) {
156         ++acc.data_send.count;
157         acc.data_send.amount += amount;
158     }
159     void add_data_recv_mesg(double amount) {
160         ++acc.data_recv.count;
161         acc.data_recv.amount += amount;
162     }
163     void add_ctrl_send_mesg(double amount) {
164         ++acc.ctrl_send.count;
165         acc.ctrl_send.amount += amount;
166     }
167     void add_ctrl_recv_mesg(double amount) {
168         ++acc.ctrl_recv.count;
169         acc.ctrl_recv.amount += amount;
170     }
171
172     // Load-balancing loop
173     msg_thread* lb_thread;
174     void load_balance_loop();
175
176     // Simulate computation loop
177     void compute_loop();
178
179     // Check for convergence
180     void convergence_check();
181
182     // Check if we need to stop
183     bool still_running();
184
185     // Returns the sum of "to_send" for all neighbors.
186     double get_sum_of_to_send() const;
187
188     // Compute load_to_send (for data_send), subject to the execution parameters
189     static double compute_load_to_send(double desired);
190
191     // Send procedures
192     void ctrl_send(neighbor& nb);
193     void data_send(neighbor& nb);
194     void ctrl_close(neighbor& nb);
195     void data_close(neighbor& nb);
196
197     // Receive procedure
198     // Parameter "timeout" may be 0 for non-blocking operation, -1 for
199     // infinite waiting, or any positive timeout.
200     void ctrl_receive(double timeout);
201     void data_receive(double timeout);
202     void handle_message(message* msg, msg_host_t from);
203 };
204
205 template <typename Compare>
206 void process::pneigh_sort_by_load(const Compare& comp)
207 {
208     using std::placeholders::_1;
209     using std::placeholders::_2;
210     std::sort(pneigh.begin(), pneigh.end(),
211               std::bind(comp,
212                         std::bind(&neighbor::get_load, _1),
213                         std::bind(&neighbor::get_load, _2)));
214 }
215
216 #endif // !PROCESS_H
217
218 // Local variables:
219 // mode: c++
220 // End: