#define USE_UNORDERED_MAP 1
//#undef USE_UNORDERED_MAP
-#include <vector>
+#include <algorithm>
+#include <atomic>
#ifdef USE_UNORDERED_MAP
-# include <tr1/unordered_map>
-# define MAP_TEMPLATE std::tr1::unordered_map
+# include <unordered_map>
+# define MAP_TEMPLATE std::unordered_map
#else
# include <map>
# define MAP_TEMPLATE std::map
#endif
-#include <msg/msg.h>
+#include <vector>
+#include <simgrid/msg.h>
+#include <xbt/log.h>
#include "communicator.h"
+#include "misc.h"
+#include "msg_thread.h"
#include "neighbor.h"
+#include "options.h"
+#include "simgrid_features.h"
+#include "synchro.h"
class process {
public:
+ static void set_proc_mutex(mutex_t* m) { proc_mutex = m; }
+
+ // Note: normally used with proc_mutex locked.
+ static double get_total_load_init() { return total_load_init; }
+ static double get_total_load_running() { return total_load_running; }
+ static double get_total_load_exit() { return total_load_exit; }
+
process(int argc, char* argv[]);
- ~process();
+ virtual ~process();
+
+ double get_real_load() const { return real_load; }
+ double get_comp_amount() const { return acc.comp_amount; }
+ unsigned get_comp_iter() const { return comp_iter; }
+ unsigned get_all_comp_iter() const { return all_comp_iter; }
+ double get_iter_deviation() const;
+ double get_data_send_amount() const { return acc.data_send.amount; }
+ double get_data_recv_amount() const { return acc.data_recv.amount; }
+ unsigned get_data_send_count() const { return acc.data_send.count; }
+ unsigned get_data_recv_count() const { return acc.data_recv.count; }
+ double get_ctrl_send_amount() const { return acc.ctrl_send.amount; }
+ double get_ctrl_recv_amount() const { return acc.ctrl_recv.amount; }
+ unsigned get_ctrl_send_count() const { return acc.ctrl_send.count; }
+ unsigned get_ctrl_recv_count() const { return acc.ctrl_recv.count; }
+ double get_idle_duration() const { return idle_duration; }
+ double get_convergence() const { return convergence; }
+
int run();
protected:
typedef std::vector<neighbor> neigh_type;
typedef std::vector<neighbor*> pneigh_type;
- neigh_type neigh;
- pneigh_type pneigh;
+ pneigh_type pneigh; // list of pointers to neighbors that
+ // we are free to reorder
+
+ // Get and set current load, which may be real load, or expected
+ // load if opt::bookkeeping is true.
+ double get_load() const { return expected_load; }
+
+ // The load balancing algorithm comes here...
+ virtual void load_balance();
+
+ // Register some amount of load to send to given neighbor.
+ void send(neighbor& nb, double amount);
+ void send(neighbor* nb, double amount) { send(*nb, amount); }
+
+ // Sort pneigh by applying comp to their loads
+ template <typename Compare>
+ void pneigh_sort_by_load(const Compare& comp);
+
+ // Calls neighbor::print(verbose, logp, cat) for each member of neigh.
+ void print_loads(bool verbose = false,
+ e_xbt_log_priority_t logp = xbt_log_priority_info,
+ xbt_log_category_t cat = _XBT_LOGV(default)) const;
+
+ // Calls neighbor::print(verbose, logp, cat) for each member of pneigh.
+ void print_loads_p(bool verbose = false,
+ e_xbt_log_priority_t logp = xbt_log_priority_info,
+ xbt_log_category_t cat = _XBT_LOGV(default)) const;
private:
- typedef MAP_TEMPLATE<m_host_t, neighbor*> rev_neigh_type;
- enum recv_wait_mode { NO_WAIT = 0, WAIT, WAIT_FOR_CLOSE };
+ static mutex_t *proc_mutex; // protect access to global variables
+ // (must be set before constructing
+ // the first object!)
+
+ static double total_load_init; // sum of process loads at init
+ static double total_load_running; // sum of loads while running
+ static double total_load_exit; // sum of process loads at exit
- rev_neigh_type rev_neigh;
+ static int process_counter;
+ static double total_load_average;
+ static double average_load_ratio;
+ static double load_diff_threshold;
- communicator comm;
- int ctrl_close_pending;
- int data_close_pending;
+ static std::atomic<int> convergence_counter;
- unsigned iter;
+ typedef MAP_TEMPLATE<msg_host_t, neighbor*> rev_neigh_type;
+ neigh_type neigh; // list of neighbors (do not alter
+ // after construction!)
+ rev_neigh_type rev_neigh; // map msg_host_t -> neighbor
- double prev_load_broadcast;
- double load;
- double expected_load;
+ communicator comm; // communicator for this process
+ int ctrl_close_pending; // number of "close" messages to wait
+ // on ctrl channel
+ int data_close_pending; // number of "close" messages to wait
+ // on data channel
+ bool close_received; // true if we received a "close" message
+ bool finalizing; // true when finalize() is running
- double sum_of_to_send() const;
- virtual double load_balance(double my_load);
+ unsigned lb_iter; // counter of load-balancing iterations
+ unsigned comp_iter; // counter of computation iterations
+ unsigned all_comp_iter; // counter of computation iterations
+ // (counting empty iterations too)
- void compute();
- void send1_no_bookkeeping(neighbor& nb);
- void send1_bookkeeping(neighbor& nb);
- void send();
- bool receive(recv_wait_mode wait);
- void finalize1(neighbor& nb);
- void finalize();
- void print_loads(e_xbt_log_priority_t logp = xbt_log_priority_info);
+ double prev_load_broadcast; // used to ensure that we do not send
+ // a same information messages
+ double real_load; // current load
+ double expected_load; // expected load in bookkeeping mode
+ double received_load; // load received from neighbors
+
+ double idle_duration; // how long we had nothing to compute
+ double convergence; // date when convergence was achieved, or -1.0
+ int local_convergence_counter; // number of iterations since convergence
+
+ mutex_t mutex; // synchronization between threads
+ condition_t cond;
+
+ struct mesg_accounting {
+ double amount; // sum of message size
+ unsigned count; // number of messages
+ mesg_accounting(): amount(0.0), count(0) { }
+ };
+ struct accounting {
+ double comp_amount; // total computing done so far (flops)
+ mesg_accounting data_send; // data messages sent
+ mesg_accounting data_recv; // data messages received
+ mesg_accounting ctrl_send; // ctrl message sent
+ mesg_accounting ctrl_recv; // ctrl message received
+ accounting(): comp_amount(0.0) { }
+ };
+ accounting acc; // use a structure so that it is
+ // automatically initialized a
+ // construction
+
+ void add_comp_amount(double amount) { acc.comp_amount += amount; }
+ void add_data_send_mesg(double amount) {
+ ++acc.data_send.count;
+ acc.data_send.amount += amount;
+ }
+ void add_data_recv_mesg(double amount) {
+ ++acc.data_recv.count;
+ acc.data_recv.amount += amount;
+ }
+ void add_ctrl_send_mesg(double amount) {
+ ++acc.ctrl_send.count;
+ acc.ctrl_send.amount += amount;
+ }
+ void add_ctrl_recv_mesg(double amount) {
+ ++acc.ctrl_recv.count;
+ acc.ctrl_recv.amount += amount;
+ }
+
+ // Load-balancing loop
+ msg_thread* lb_thread;
+ void load_balance_loop();
+
+ // Simulate computation loop
+ void compute_loop();
+
+ // Check for convergence
+ void convergence_check();
+
+ // Check if we need to stop
+ bool still_running();
+
+ // Returns the sum of "to_send" for all neighbors.
+ double get_sum_of_to_send() const;
+
+ // Compute load_to_send (for data_send), subject to the execution parameters
+ static double compute_load_to_send(double desired);
+
+ // Send procedures
+ void ctrl_send(neighbor& nb);
+ void data_send(neighbor& nb);
+ void ctrl_close(neighbor& nb);
+ void data_close(neighbor& nb);
+
+ // Receive procedure
+ // Parameter "timeout" may be 0 for non-blocking operation, -1 for
+ // infinite waiting, or any positive timeout.
+ void ctrl_receive(double timeout);
+ void data_receive(double timeout);
+ void handle_message(message* msg, msg_host_t from);
};
+template <typename Compare>
+void process::pneigh_sort_by_load(const Compare& comp)
+{
+ std::sort(pneigh.begin(), pneigh.end(),
+ [&comp](const neighbor* a, const neighbor* b) {
+ return comp(a->get_load(), b->get_load());
+ });
+}
+
#endif // !PROCESS_H
// Local variables: