1 -- A SimGrid Lua implementation of the Chord DHT
3 -- Copyright (c) 2011-2012, 2014. The SimGrid Team.
4 -- All rights reserved.
6 -- This program is free software; you can redistribute it and/or modify it
7 -- under the terms of the license (GNU LGPL) which comes with this package.
16 max_simulation_time = 1000
18 fix_fingers_delay = 120
19 check_predecessor_delay = 120
22 -- current node (don't worry, globals are duplicated in each simulated process)
24 -- FIXME: my_id does not exist.
26 next_finger_to_fix = 1,
32 -- Main function of each Chord process
35 -- - the id of a guy I know in the system (except for the first node)
38 simgrid.info("Hi! This is my first message; I just entered the program!")
39 -- TODO simplify the deployment file
42 my_node.id = math.tointeger(args[1])
43 simgrid.info("My id is now " .. my_node.id)
45 known_id = math.tointeger(args[2])
46 simgrid.info("Updated my known_id to " .. known_id)
49 -- initialize the node and the fingertable;
50 -- at the beginning, this node only knows itself (we need to discover others)
51 for i = 1, nb_bits, 1 do
52 my_node.fingers[i] = my_node.id
55 -- Let's make sure we can receive messages!
56 my_node.comm_recv = simgrid.task.irecv(my_node.id)
59 local join_success = false
60 --simgrid.info(known_id)
62 if known_id == nil then
63 -- only the first node ("Jacqueline") will enter here
64 -- as configured in file ../../msg/chord/chord.xml
65 simgrid.info("First node here. Going to create everything.")
69 -- Communicate to the first node and join the ring there
70 -- This will also initialize
71 -- my_node.predecessor and my_node.successor
72 join_success = join(known_id)
75 -- At this point, finger[1] does not necessarily actually point
76 -- to the *real* successor; it might be that the first node still
77 -- didn't notify us that another node joined with
78 -- an ID that satisfies my_id <= ID <= current_successorId
80 -- TODO Remove this, but make sure my_node.predecessor is initialized somewhere
81 --if my_node.id == 1 then
82 --simgrid.info("YUHU!")
83 --my_node.predecessor = 1
89 local now = simgrid.get_clock()
90 local next_stabilize_date = now + stabilize_delay
91 local next_fix_fingers_date = now + fix_fingers_delay
92 local next_check_predecessor_date = now + check_predecessor_delay
93 local next_lookup_date = now + lookup_delay
97 simgrid.debug("I'm now entering the main loop.")
99 while now < max_simulation_time do
101 task, err = my_node.comm_recv:test()
102 simgrid.info(now .. " " .. next_stabilize_date .. " " .. now + stabilize_delay .. " " .. next_fix_fingers_date .. " " .. next_check_predecessor_date .. " " .. next_lookup_date)
105 -- I received a task: answer it
106 simgrid.info("I received a task of type '" .. task.type .."'! My id is " .. my_node.id)
107 my_node.comm_recv = simgrid.task.irecv(my_node.id)
110 -- the communication has failed: nevermind, we'll try again
111 simgrid.info("Error while receiving a task! My id is " .. my_node.id)
112 my_node.comm_recv = simgrid.task.irecv(my_node.id)
114 -- no task was received: do periodic calls
116 if now >= next_stabilize_date then
117 simgrid.debug("Stabilizing...")
119 simgrid.debug("Finished stabilizing!")
120 next_stabilize_date = simgrid.get_clock() + stabilize_delay
122 --elseif now >= next_fix_fingers_date then
124 --next_fix_fingers_date = simgrid.get_clock() + fix_fingers_delay
126 --elseif now >= next_check_predecessor_date then
127 --check_predecessor()
128 --next_check_predecessor_date = simgrid.get_clock() + check_predecessor_delay
130 --elseif now >= next_lookup_date then
132 --simgrid.debug("I'm now executing a lookup, as lookup_delay makes me do this. " .. simgrid.get_clock())
133 --next_lookup_date = simgrid.get_clock() + lookup_delay
136 ---- nothing to do: sleep for a while
137 --simgrid.debug("Didn't have to stabilize, update my fingers, check my predecessors or do a random lookup; hence, I'm starting to sleep now...")
138 simgrid.process.sleep(5)
139 --simgrid.debug("Slept for 5s")
142 now = simgrid.get_clock()
150 -- Makes the current node leave the ring
153 simgrid.info("Leaving the ring, because max_simulation_time was reached.")
154 -- TODO: notify others
157 -- This function is called when the current node receives a task
158 -- and can not immediately deal with it; for instance, if the host
159 -- waits on a response for a 'find successor' query but receives a
160 -- 'get predecessor' message instead; we cannot just discard this
161 -- message so we deal with it here.
163 -- - task: the task received
164 function handle_task(task)
166 simgrid.debug("Handling task in handle_task()")
167 local type = task.type
169 if type == "find successor" then
171 task.answer_to = math.tointeger(task.answer_to)
172 task.request_id = math.tointeger(task.request_id)
174 simgrid.info("Received a 'find successor' request from " .. string.format("%d", task.answer_to) ..
175 " for id " .. string.format("%d", task.request_id))
177 -- Is my successor have the right host? This can happen if there are holes
178 -- in the ring; for instance, if my id is 13 and my successor is 17 and
179 -- 14,15,16 don't exist but I'm asked for 15, then yes, 17 is the right
180 -- answer to the request.
182 -- Test: my_node.id + 1 <= task.request_id <= my_node.fingers[1]
184 -- TODO: Why the +1? We could receive this message from a host that forwarded
185 -- this message (and the original sender doesn't know us),
186 -- so why do we exclude ourselves?
187 if is_in_interval(task.request_id, my_node.id + 1, my_node.fingers[1]) then
189 simgrid.info("Sending back a 'find successor answer' to " ..
190 string.format("%d", task.answer_to) .. ": the successor of " .. string.format("%d", task.request_id) ..
191 " is " .. string.format("%d", my_node.fingers[1]))
193 task.type = "find successor answer"
194 -- TODO: Can we remove the "" here?
195 task.answer = math.tointeger(my_node.fingers[1])
196 simgrid.info("Answer" .. task.answer)
197 task:dsend(math.tointeger(task.answer_to))
199 -- forward the request to the closest preceding finger in my table
201 simgrid.info("Forwarding the 'find successor' request to my closest preceding finger")
202 task:dsend(closest_preceding_node(task.request_id))
205 elseif type == "get predecessor" then
206 simgrid.info("Received a 'find predecessor' request from " .. string.format("%d", task.answer_to) ..
207 " for id. Sending back an answer.")
209 task.type = "get predecessor answer"
211 --for i,v in pairs(my_node) do
212 --print(my_node.id, i, v)
214 --print(my_node.predecessor)
215 if my_node.predecessor ~= nil then
216 task.answer = math.tointeger(my_node.predecessor)
217 --print(my_node.predecessor)
219 -- FIXME: This is completely wrong here. Fix this;
220 -- we need to figure out what to send if we don't know our
222 simgrid.critical("Don't know my predecessor yet!")
223 my_node.predecessor = remote_get_predecessor(my_node.fingers[1])
224 task.answer = my_node.predecessor
227 --print("It will break now, since task.answer is nil here.")
228 --task.answer = my_node.predecessor
231 task:dsend(math.tointeger(task.answer_to))
232 --print("After dsend returned")
234 elseif type == "notify" then
235 -- someone is telling me that he may be my new predecessor
236 simgrid.info("Host id " .. task.request_id .. " wants to be my predecessor")
237 notify(math.tointeger(task.request_id))
239 elseif type == "predecessor leaving" then
241 simgrid.debug("predecessor leaving")
243 elseif type == "successor_leaving" then
244 -- TODO: We could / should use something like table.remove(my_node.fingers, 1) here
247 elseif type == "find successor answer" then
248 -- ignoring, this is handled in remote_find_successor
251 elseif type == "get predecessor answer" then
252 -- ignoring, this is already handled in
255 error("Unknown type of task received: " .. task.type)
258 simgrid.info("I'm leaving handle_task() now.")
261 -- Returns whether an id belongs to the interval [a, b[.
262 -- The parameters are normalized to make sure they are between 0 and nb_keys - 1.
263 -- 1 belongs to [62, 3].
264 -- 1 does not belong to [3, 62].
265 -- 63 belongs to [62, 3].
266 -- 63 does not belong to [3, 62].
267 -- 24 belongs to [21, 29].
268 -- 24 does not belong to [29, 21].
269 function is_in_interval(id, a, b)
270 id= math.tointeger(id)
271 a = math.tointeger(a)
272 b = math.tointeger(b)
274 -- normalize the parameters
275 -- TODO: Currently, nb_bits = 24; so a,b,id < 24! Really?
280 -- make sure a <= b and a <= id
292 -- Returns whether the current node is in the ring.
293 function has_joined()
295 return my_node.fingers[my_node.id] ~= nil
298 -- Creates a new Chord ring.
300 simgrid.debug("I've now initialized my predecessor and fingertable.")
301 --my_node.predecessor = my_node.id
302 my_node.predecessor = nil
303 my_node.fingers[1] = my_node.id
306 -- Attemps to join the Chord ring.
307 -- - known_id: id of a node already in the ring
308 -- - return value: true if the join was successful
309 function join(known_id)
311 simgrid.info("Joining the ring with id " .. my_node.id .. ", knowing node " .. known_id)
313 local successor = remote_find_successor(known_id, my_node.id)
314 simgrid.info("Returned from remote_find_successor; my successor is " .. successor)
315 if successor == nil then
316 simgrid.critical("Cannot join the ring.")
320 -- We don't know the predecessor yet, so we initialize it with NULL
321 my_node.predecessor = nil
322 my_node.fingers[1] = successor
324 -- Everything was successfull!
328 -- Returns the closest preceding finger of an id with respect to the finger
329 -- table of the current node.
330 -- - id: the id to find
331 -- - return value: the closest preceding finger of that id
332 function closest_preceding_node(id)
334 for i = nb_bits, 1, -1 do
335 if is_in_interval(my_node.fingers[i], my_node.id + 1, id - 1) then
336 -- finger i is the first one before id
337 simgrid.info("fix_fingers: The closest preceding node for " .. id .. " is finger " .. i .. " (node " .. my_node.fingers[i] .. ")")
338 return my_node.fingers[i]
343 -- Finds the successor of an id
344 -- id: the id to find
345 -- return value: the id of the successor, or nil if the request failed
346 function find_successor(id)
348 if is_in_interval(id, my_node.id + 1, my_node.fingers[1]) then
349 -- my successor is the successor
350 simgrid.info("Looking for successor of " .. id .. ", but I determined it's my own successor: " .. my_node.fingers[1])
351 return my_node.fingers[1]
353 -- ask to the closest preceding finger in my table
354 simgrid.info("fix_fingers: Looking for successor of " .. id .. ", checking closest preceding node")
355 local ask_to = closest_preceding_node(id)
356 simgrid.info("fix_fingers: Looking for successor of " .. id .. ", checking closest preceding node")
357 return remote_find_successor(ask_to, id)
362 -- Asks a remote node the successor of an id.
363 -- ask_to: id of a remote node to ask to
364 -- id: the id to find
365 -- return value: the id of the successor, or nil if the request failed
366 function remote_find_successor(ask_to, id)
368 local task = simgrid.task.new("", comp_size, comm_size)
369 task.type = "find successor"
370 task.request_id = id -- This is the id we want to find
371 task.answer_to = my_node.id -- This is where the answer needs to go
374 simgrid.info("Sending a 'find successor' request to " .. ask_to .. " for id " .. id .. " (timeout=".. timeout .. ")")
375 if task:send(ask_to, timeout) then
376 -- request successfully sent: wait for an answer
379 simgrid.info("New iteration in while loop of remote_find_successor(); I'm still waiting for a response!")
380 --print(task.request_id)
381 simgrid.info("Starting to wait for a message; timeout=" .. timeout)
382 task = my_node.comm_recv:wait(timeout)
383 simgrid.info("Finished to wait")
384 -- TODO Do we need this?
385 --for i,v in pairs(task) do
388 --simgrid.info("I will crash!")
389 --task.answer = math.tointeger(task.answer)
390 --simgrid.info("Ich denke task.type ist leer")
391 --simgrid.info("Before irecv: " .. my_node.id)
393 -- Even if the recv above failed (timeout occurred) -- we want to be
394 -- able to receive a message if it comes in, even without us explicitly
395 -- calling the recv() method.
396 my_node.comm_recv = simgrid.task.irecv(my_node.id)
399 -- failed to receive the answer
402 -- a task was received: is it the expected answer (i.e., the response to
403 -- our query and for the id we're interested in)
404 if task.type ~= "find successor answer" or task.request_id ~= id then
405 -- this is not our answer, but we still need to handle it.
406 simgrid.info("Wrong request of type " .. task.type .. " received, expected 'find successor answer'")
410 -- this is our answer
411 simgrid.info("Received the answer to my 'find successor' request for id " ..
412 id .. ": the successor is " .. task.answer)
414 -- TODO: Do we need math.tointeger here?
415 return math.tointeger(task.answer)
420 simgrid.info("Failed to send the 'find successor' request to " .. ask_to ..
424 -- This can never be reached, because if this host finds the successor, it
425 -- will return it right away!
426 simgrid.info("Whooops! I should never reach this statement, because I didn't find a successor!")
428 -- We need to return the successor here
431 -- Asks a remote node its predecessor.
432 -- ask_to: id of a remote node to ask to
433 -- return value: the id of its predecessor, or nil if the request failed
434 function remote_get_predecessor(ask_to)
436 local task = simgrid.task.new("", comp_size, comm_size)
437 task.type = "get predecessor"
438 task.answer_to = math.tointeger(my_node.id)
439 -- TODO c.heinrich: Remove this
440 --task.note = "Bla " .. ask_to .. " at time " .. simgrid.get_clock()
442 simgrid.info("Sending request for '" .. task.type .."' to id '" .. ask_to .. "'")
443 if task:send(ask_to, timeout) then
444 simgrid.info("Done sending the request to " .. ask_to)
445 -- request successfully sent: wait for an answer
446 -- We need to iterate here because we might receive other
447 -- messages too (but not the answer to the request we just sent);
448 -- hence, we loop here.
450 simgrid.info("Starting to wait. My id: " .. my_node.id)
451 task = my_node.comm_recv:wait(timeout)
452 simgrid.info("Finished to wait. My id: " .. my_node.id .. " ask_to is " .. ask_to)
453 my_node.comm_recv = simgrid.task.irecv(my_node.id)
456 -- failed to receive the answer
457 simgrid.info("Task not received - is null?")
460 -- a task was received: is it the expected answer?
461 if task.type ~= "get predecessor answer" then
462 -- this is not our answer
463 simgrid.info("Task is NOT 'get predecessor answer'")
466 -- this is our answer
467 -- FIXME make sure the message answers to this particular request
468 --simgrid.info(task.answer)
469 for i,v in pairs(task) do
470 print(my_node.id, i, v)
472 simgrid.info("Task is answer for predecessor! The answer is: ")
473 if (task.answer) then print("NIL!\n") else print("Not NIL\n") end
483 -- Checks the immediate successor of the current node.
486 local successor = my_node.fingers[1]
487 if successor ~= my_node.id then
488 simgrid.info("Getting remote predecessor from ".. successor)
489 candidate = remote_get_predecessor(successor)
490 simgrid.info("Received ".. candidate .. " as candidate")
492 candidate = my_node.predecessor
495 simgrid.info("Still stabilizing")
496 -- candidate might become my new successor
497 if candidate ~= nil and is_in_interval(candidate, my_node.id + 1, successor - 1) then
498 simgrid.info("I'm updating my successor to " .. math.tointeger(candidate))
499 my_node.fingers[1] = math.tointeger(candidate)
501 -- If candidate is not my_node.id, then I should notify candidate that I'm here.
502 -- (So this node updates it's predecessor to me)
503 --remote_notify(candidate, my_node.id)
506 simgrid.info("Successor: " .. successor .. " and my id: " .. my_node.id)
507 -- If candidate is nil, this means that our successor has no predecessor.
508 -- So we should tell him about us...
509 -- TODO: I think a host that receives a message could automatically add
510 -- this other node as a predecessor if it doesn't have any... needs to
511 -- be implemented somewhere else, not here.
512 if candidate == nil and successor ~= my_node.id then
513 remote_notify(successor, my_node.id)
517 -- Notifies the current node that its predecessor may have changed
518 -- - candidate_predecessor: the possible new predecessor
519 function notify(candidate_predecessor)
520 if my_node.predecessor == nil or is_in_interval(candidate_predecessor,
521 my_node.predecessor + 1, my_node.id - 1) then
522 simgrid.info("Updated my predecessor to " .. candidate_predecessor)
523 my_node.predecessor = math.tointeger(candidate_predecessor)
527 -- Notifies a remote node that its predecessor my have changed.
529 -- - candidate the possible new predecessor
530 function remote_notify(notify_to, candidate_predecessor)
532 simgrid.info("Updating someone else's predecessor (id: " .. notify_to .. " predecessor to ".. candidate_predecessor .. ")")
533 local task = simgrid.task.new("", comp_size, comm_size)
535 task.request_id = candidate_predecessor
536 task:dsend(notify_to)
539 -- Refreshes the finger table of the current node,
540 -- one finger per call.
541 function fix_fingers()
543 local i = math.tointeger(my_node.next_finger_to_fix)
544 local id = find_successor(math.tointeger(my_node.id + 2^i))
545 simgrid.info("Called fix_fingers(). Next finger to fix: " .. i .. " and I will check " .. my_node.id + 2^i .. ". Request returned " .. id)
548 if id ~= my_node.fingers[i] then
549 my_node.fingers[i] = id
550 simgrid.info("fix_fingers: Updated finger " .. i .. " to " .. id)
552 simgrid.info("fix_fingers: id is " .. id)
554 my_node.next_finger_to_fix = (i % nb_bits) + 1
558 -- Checks whether the predecessor of the current node has failed.
559 function check_predecessor()
563 -- Performs a find successor request to an arbitrary id.
564 function random_lookup()
569 simgrid.platform(arg[1] or "../../msg/msg_platform.xml")
570 simgrid.application(arg[2] or "../../msg/chord/chord90.xml")