-- A SimGrid Lua implementation of the Chord DHT
+-- Copyright (c) 2011-2012, 2014. The SimGrid Team.
+-- All rights reserved.
+
+-- This program is free software; you can redistribute it and/or modify it
+-- under the terms of the license (GNU LGPL) which comes with this package.
+
require("simgrid")
-nb_bits = 24
-nb_keys = 2^nb_bits
-comp_size = 0
-comm_size = 10
-timeout = 50
-max_simulation_time = 1000
-stabilize_delay = 20
-fix_fingers_delay = 120
+nb_bits = 24
+nb_keys = 2^nb_bits
+comp_size = 0
+comm_size = 10
+timeout = 50
+max_simulation_time = 1000
+stabilize_delay = 20
+fix_fingers_delay = 120
check_predecessor_delay = 120
-lookup_delay = 10
+lookup_delay = 10
--- current node (don't worry, globals are duplicated in each process)
+-- current node (don't worry, globals are duplicated in each simulated process)
my_node = {
+ -- FIXME: my_id does not exist.
id = my_id,
next_finger_to_fix = 1,
fingers = {},
-- - the id of a guy I know in the system (except for the first node)
function node(...)
- -- TODO simplify the parameters
+ simgrid.debug("Hi! This is my first message; I just entered the program!")
+ -- TODO simplify the deployment file
local known_id
local args = {...}
- my_node.id = tonumber(args[1])
+ my_node.id = math.tointeger(args[1])
+ simgrid.debug("My id is now " .. my_node.id)
if #args == 4 then
- known_id = tonumber(args[2])
+ known_id = math.tointeger(args[2])
+ simgrid.info("Updated my known_id to " .. known_id)
end
- -- initialize the node
- for i = 1, nb_bits do
+ -- initialize the node and the fingertable;
+ -- at the beginning, this node only knows itself (we need to discover others)
+ for i = 1, nb_bits, 1 do
my_node.fingers[i] = my_node.id
end
+
+ -- Let's make sure we can receive messages!
my_node.comm_recv = simgrid.task.irecv(my_node.id)
-- join the ring
local join_success = false
+ --simgrid.info(known_id)
+
if known_id == nil then
- -- first node
+ -- only the first node ("Jacqueline") will enter here
+ -- as configured in file ../../msg/chord/chord.xml
+ simgrid.debug("I'm the node that is in charge. Going to create everything.")
create()
join_success = true
else
+ -- Communicate to the first node and join the ring
+ -- This will also initialize
+ -- my_node.predecessor and my_node.successor
join_success = join(known_id)
end
+ -- At this point, finger[1] does not necessarily actually point
+ -- to the *real* successor; it might be that the first node still
+ -- didn't notify us that another node joined with
+ -- an ID that satisfies my_id <= ID <= current_successorId
+
+ -- TODO Remove this, but make sure my_node.predecessor is initialized somewhere
+ --if my_node.id == 1 then
+ --my_node.predecessor = 1
+ --end
+
-- main loop
if join_success then
- local now = simgrid.get_clock()
- local next_stabilize_date = now + stabilize_delay
- local next_fix_fingers_date = now + fix_fingers_delay
+ local now = simgrid.get_clock()
+ local next_stabilize_date = now + stabilize_delay
+ local next_fix_fingers_date = now + fix_fingers_delay
local next_check_predecessor_date = now + check_predecessor_delay
- local next_lookup_date = now + lookup_delay
+ local next_lookup_date = now + lookup_delay
+
+ local task, err
- local task, success
+ simgrid.debug("I'm now entering the main loop.")
while now < max_simulation_time do
- task, success = simgrid.comm.test(my_node.comm_recv)
+ task, err = my_node.comm_recv:test()
+ simgrid.info(now .. " " .. next_stabilize_date .. " " .. now + stabilize_delay .. " " .. next_fix_fingers_date .. " " .. next_check_predecessor_date .. " " .. next_lookup_date)
if task then
- -- I received a task: answer it
+ -- I received a task: answer it
+ simgrid.info("I received a task of type '" .. task.type .."'! My id is " .. my_node.id)
my_node.comm_recv = simgrid.task.irecv(my_node.id)
- handle_task(task)
- elseif failed then
- -- the communication has failed: nevermind
+ handle_task(task)
+ elseif err then
+ -- the communication has failed: nevermind, we'll try again
+ simgrid.info("Error while receiving a task! My id is " .. my_node.id)
my_node.comm_recv = simgrid.task.irecv(my_node.id)
else
-- no task was received: do periodic calls
- if now >= next_stabilize_date then
+
+ if now >= next_stabilize_date then
+ simgrid.debug("Stabilizing...")
stabilize()
- next_stabilize_date = simgrid.get_clock() + stabilize_delay
-
- elseif now >= next_fix_fingers_date then
- fix_fingers()
- next_fix_fingers_date = simgrid.get_clock() + fix_fingers_delay
-
- elseif now >= next_check_predecessor_date then
- check_predecessor()
- next_check_predecessor_date = simgrid.get_clock() + check_predecessor_delay
- elseif now >= next_lookup_date then
- random_lookup()
- next_lookup_date = simgrid.get_clock() + lookup_delay
-
- else
- -- nothing to do: sleep for a while
- simgrid.process.sleep(5)
- end
+ simgrid.debug("Finished stabilizing!")
+ next_stabilize_date = simgrid.get_clock() + stabilize_delay
+
+ --elseif now >= next_fix_fingers_date then
+ --fix_fingers()
+ --next_fix_fingers_date = simgrid.get_clock() + fix_fingers_delay
+
+ --elseif now >= next_check_predecessor_date then
+ --check_predecessor()
+ --next_check_predecessor_date = simgrid.get_clock() + check_predecessor_delay
+
+ --elseif now >= next_lookup_date then
+ --random_lookup()
+ --simgrid.debug("I'm now executing a lookup, as lookup_delay makes me do this. " .. simgrid.get_clock())
+ --next_lookup_date = simgrid.get_clock() + lookup_delay
+
+ else
+ ---- nothing to do: sleep for a while
+ simgrid.debug("Didn't have to stabilize, update my fingers, check my predecessors or do a random lookup; hence, I'm starting to sleep now...")
+ simgrid.process.sleep(5)
+ simgrid.debug("Slept for 5s")
+ end
end
now = simgrid.get_clock()
end -- while
-- Makes the current node leave the ring
function leave()
- simgrid.info("Leaving the ring")
+ simgrid.info("Leaving the ring, because max_simulation_time was reached.")
-- TODO: notify others
end
--- This function is called when the current node receives a task.
+-- This function is called when the current node receives a task
+-- and can not immediately deal with it; for instance, if the host
+-- waits on a response for a 'find successor' query but receives a
+-- 'get predecessor' message instead; we cannot just discard this
+-- message so we deal with it here.
+--
-- - task: the task received
function handle_task(task)
+ simgrid.debug("Handling task in handle_task()")
local type = task.type
if type == "find successor" then
- simgrid.info("Received a 'find successor' request from " .. task.answer_to ..
- " for id " .. task.request_id)
-
- -- is my successor the successor?
+ task.answer_to = math.tointeger(task.answer_to)
+ task.request_id = math.tointeger(task.request_id)
+
+ simgrid.info("Received a 'find successor' request from " .. string.format("%d", task.answer_to) ..
+ " for id " .. string.format("%d", task.request_id))
+
+ -- Is my successor have the right host? This can happen if there are holes
+ -- in the ring; for instance, if my id is 13 and my successor is 17 and
+ -- 14,15,16 don't exist but I'm asked for 15, then yes, 17 is the right
+ -- answer to the request.
+ --
+ -- Test: my_node.id + 1 <= task.request_id <= my_node.fingers[1]
+ -- ^^^
+ -- TODO: Why the +1? We could receive this message from a host that forwarded
+ -- this message (and the original sender doesn't know us),
+ -- so why do we exclude ourselves?
if is_in_interval(task.request_id, my_node.id + 1, my_node.fingers[1]) then
simgrid.info("Sending back a 'find successor answer' to " ..
- task.answer_to .. ": the successor of " .. task.request_id ..
- " is " .. my_node.fingers[1])
-
- local ans_task = simgrid.task.new("", comp_size, comm_size)
- ans_task.type = "find successor answer"
- ans_task.request_id = task.request_id
- ans_task.answer = my_node.fingers[1]
- ans_task:dsend(task.answer_to)
+ string.format("%d", task.answer_to) .. ": the successor of " .. string.format("%d", task.request_id) ..
+ " is " .. string.format("%d", my_node.fingers[1]))
+
+ task.type = "find successor answer"
+ -- TODO: Can we remove the "" here?
+ task.answer = math.tointeger(my_node.fingers[1])
+ simgrid.info("Answer" .. task.answer)
+ task:dsend(math.tointeger(task.answer_to))
else
-- forward the request to the closest preceding finger in my table
simgrid.info("Forwarding the 'find successor' request to my closest preceding finger")
-
- local next_task = simgrid.task.new("", comp_size, comm_size)
- next_task.type = "find successor"
- next_task.request_id = task.request_id
- next_task.answer_to = task.answer_to
- next_task:dsend(closest_preceding_node(next_task.request_id))
+ task:dsend(closest_preceding_node(task.request_id))
end
elseif type == "get predecessor" then
- local ans_task = simgrid.task.new("", comp_size, comm_size)
- ans_task.type = "get predecessor answer"
- ans_task.answer = my_node.predecessor
- ans_task:dsend(task.answer_to)
+ simgrid.info("Received a 'find predecessor' request from " .. string.format("%d", task.answer_to) ..
+ " for id. Sending back an answer.")
+
+ task.type = "get predecessor answer"
+
+ --for i,v in pairs(my_node) do
+ --print(my_node.id, i, v)
+ --end
+ --print(my_node.predecessor)
+ if my_node.predecessor ~= nil then
+ task.answer = math.tointeger(my_node.predecessor)
+ --print(my_node.predecessor)
+ else
+ -- FIXME: This is completely wrong here. Fix this;
+ -- we need to figure out what to send if we don't know our
+ -- predecessor yet (this DOES happen and this means that task.answer
+ -- is initialised with nil and when task.answer is accessed (not here), it will
+ -- break in Lua 5.3 (because it is nil).
+ simgrid.critical("Don't know my predecessor yet!")
+ my_node.predecessor = remote_get_predecessor(my_node.fingers[1])
+ task.answer = my_node.predecessor
+ end
+
+ --print("It will break now, since task.answer is nil here.")
+ --task.answer = my_node.predecessor
+ --print(task.answer)
+ --print("Before")
+ task:dsend(math.tointeger(task.answer_to))
+ --print("After dsend returned")
elseif type == "notify" then
-- someone is telling me that he may be my new predecessor
- notify(task.request_id)
+ simgrid.info("Host id " .. task.request_id .. " wants to be my predecessor")
+ notify(math.tointeger(task.request_id))
elseif type == "predecessor leaving" then
-- TODO
+ simgrid.debug("predecessor leaving")
elseif type == "successor_leaving" then
- -- TODO
+ -- TODO: We could / should use something like table.remove(my_node.fingers, 1) here
+ simgrid.debug(type)
elseif type == "find successor answer" then
- -- ignoring
+ -- ignoring, this is handled in remote_find_successor
+ simgrid.debug(type)
elseif type == "get predecessor answer" then
- -- ignoring
+ -- ignoring, this is already handled in
else
error("Unknown type of task received: " .. task.type)
end
+
+ simgrid.info("I'm leaving handle_task() now.")
end
-- Returns whether an id belongs to the interval [a, b[.
-- 24 belongs to [21, 29].
-- 24 does not belong to [29, 21].
function is_in_interval(id, a, b)
-
-- normalize the parameters
+ -- TODO: Currently, nb_bits = 24; so a,b,id < 24! Really?
id = id % nb_bits
a = a % nb_bits
b = b % nb_bits
-
+
-- make sure a <= b and a <= id
if b < a then
b = b + nb_keys
-- Returns whether the current node is in the ring.
function has_joined()
- return my_node.fingers[1] ~= nil
+ return my_node.fingers[my_node.id] ~= nil
end
-- Creates a new Chord ring.
function create()
+ simgrid.debug("I've now initialized my predecessor and fingertable.")
+ --my_node.predecessor = my_node.id
my_node.predecessor = nil
- my_node.fingers[1] = my_node.id
+ my_node.fingers[1] = my_node.id
end
-- Attemps to join the Chord ring.
simgrid.info("Joining the ring with id " .. my_node.id .. ", knowing node " .. known_id)
local successor = remote_find_successor(known_id, my_node.id)
+ simgrid.info("Returned from remote_find_successor; my successor is " .. successor)
if successor == nil then
- simgrid.info("Cannot join the ring.")
+ simgrid.critical("Cannot join the ring.")
return false
end
+ -- We don't know the predecessor yet, so we initialize it with NULL
my_node.predecessor = nil
my_node.fingers[1] = successor
+
+ -- Everything was successfull!
return true
end
for i = nb_bits, 1, -1 do
if is_in_interval(my_node.fingers[i], my_node.id + 1, id - 1) then
-- finger i is the first one before id
+ simgrid.info("fix_fingers: The closest preceding node for " .. id .. " is finger " .. i .. " (node " .. my_node.fingers[i] .. ")")
return my_node.fingers[i]
end
end
if is_in_interval(id, my_node.id + 1, my_node.fingers[1]) then
-- my successor is the successor
+ simgrid.info("Looking for successor of " .. id .. ", but I determined it's my own successor: " .. my_node.fingers[1])
return my_node.fingers[1]
+ else
+ -- ask to the closest preceding finger in my table
+ simgrid.info("fix_fingers: Looking for successor of " .. id .. ", checking closest preceding node")
+ local ask_to = closest_preceding_node(id)
+ simgrid.info("fix_fingers: Looking for successor of " .. id .. ", checking closest preceding node")
+ return remote_find_successor(ask_to, id)
end
- -- ask to the closest preceding finger in my table
- local ask_to = closest_preceding_node(id)
- return remote_find_successor(ask_to, id)
end
-- Asks a remote node the successor of an id.
-- return value: the id of the successor, or nil if the request failed
function remote_find_successor(ask_to, id)
- local task = simgrid.task.new("", comp_size, comm_size)
- task.type = "find successor"
- task.request_id = id
- task.answer_to = my_node.id
+ local task = simgrid.task.new("", comp_size, comm_size)
+ task.type = "find successor"
+ task.request_id = id -- This is the id we want to find
+ task.answer_to = my_node.id -- This is where the answer needs to go
+ -- (back to us)
- simgrid.info("Sending a 'find successor' request to " .. ask_to .. " for id " .. id)
+ simgrid.info("Sending a 'find successor' request to " .. ask_to .. " for id " .. id .. " (timeout=".. timeout .. ")")
if task:send(ask_to, timeout) then
-- request successfully sent: wait for an answer
- simgrid.info("Sent the 'find successor' request to " .. ask_to ..
- " for id " .. id .. ", waiting for the answer")
-
while true do
- task = simgrid.comm.wait(my_node.comm_recv, timeout)
+ simgrid.info("New iteration in while loop of remote_find_successor(); I'm still waiting for a response!")
+ --print(task.request_id)
+ simgrid.info("Starting to wait for a message; timeout=" .. timeout)
+ task = my_node.comm_recv:wait(timeout)
+ simgrid.info("Finished to wait")
+ -- TODO Do we need this?
+ --for i,v in pairs(task) do
+ --print(i, v)
+ --end
+ --simgrid.info("I will crash!")
+ --task.answer = math.tointeger(task.answer)
+ --simgrid.info("Ich denke task.type ist leer")
+ --simgrid.info("Before irecv: " .. my_node.id)
+
+ -- Even if the recv above failed (timeout occurred) -- we want to be
+ -- able to receive a message if it comes in, even without us explicitly
+ -- calling the recv() method.
my_node.comm_recv = simgrid.task.irecv(my_node.id)
-
+
if not task then
- -- failed to receive the answer
- simgrid.info("Failed to receive the answer to my 'find successor' request")
- return nil
+ -- failed to receive the answer
+ return nil
else
- -- a task was received: is it the expected answer?
- if task.type ~= "find successor answer" or task.request_id ~= id then
- -- this is not our answer
- simgrid.info("Received another request of type " .. task.type)
- handle_task(task)
- else
- -- this is our answer
- simgrid.info("Received the answer to my 'find successor' request for id " ..
- id .. ": the successor is " .. task.answer)
- return task.answer
- end
+ -- a task was received: is it the expected answer (i.e., the response to
+ -- our query and for the id we're interested in)
+ if task.type ~= "find successor answer" or task.request_id ~= id then
+ -- this is not our answer, but we still need to handle it.
+ simgrid.info("Wrong request of type " .. task.type .. " received, expected 'find successor answer'")
+ handle_task(task)
+
+ else
+ -- this is our answer
+ simgrid.info("Received the answer to my 'find successor' request for id " ..
+ id .. ": the successor is " .. task.answer)
+
+ -- TODO: Do we need math.tointeger here?
+ return math.tointeger(task.answer)
+ end
end
end
else
simgrid.info("Failed to send the 'find successor' request to " .. ask_to ..
- " for id " .. id)
+ " for id " .. id)
end
- return successor
+ -- This can never be reached, because if this host finds the successor, it
+ -- will return it right away!
+ simgrid.info("Whooops! I should never reach this statement, because I didn't find a successor!")
+
+ -- We need to return the successor here
end
-- Asks a remote node its predecessor.
local task = simgrid.task.new("", comp_size, comm_size)
task.type = "get predecessor"
- task.answer_to = my_node.id
+ task.answer_to = math.tointeger(my_node.id)
+ -- TODO c.heinrich: Remove this
+ --task.note = "Bla " .. ask_to .. " at time " .. simgrid.get_clock()
+ simgrid.info("Sending request for '" .. task.type .."' to id '" .. ask_to .. "'")
if task:send(ask_to, timeout) then
+ simgrid.info("Done sending the request to " .. ask_to)
-- request successfully sent: wait for an answer
+ -- We need to iterate here because we might receive other
+ -- messages too (but not the answer to the request we just sent);
+ -- hence, we loop here.
while true do
- task = simgrid.comm.wait(my_node.comm_recv, timeout)
+ simgrid.info("Starting to wait. My id: " .. my_node.id)
+ task = my_node.comm_recv:wait(timeout)
+ simgrid.info("Finished to wait. My id: " .. my_node.id .. " ask_to is " .. ask_to)
my_node.comm_recv = simgrid.task.irecv(my_node.id)
-
+
if not task then
- -- failed to receive the answer
- return nil
+ -- failed to receive the answer
+ simgrid.info("Task not received - is null?")
+ return nil
else
- -- a task was received: is it the expected answer?
- if task.type ~= "get predecessor answer" then
+ -- a task was received: is it the expected answer?
+ if task.type ~= "get predecessor answer" then
-- this is not our answer
- handle_task(task)
- else
- -- this is our answer
- -- FIXME make sure the message answers to this particular request
- return task.answer
- end
+ simgrid.info("Task is NOT 'get predecessor answer'")
+ handle_task(task)
+ else
+ -- this is our answer
+ -- FIXME make sure the message answers to this particular request
+ --simgrid.info(task.answer)
+ for i,v in pairs(task) do
+ print(my_node.id, i, v)
+ end
+ simgrid.info("Task is answer for predecessor! The answer is: ")
+ if (task.answer) then print("NIL!\n") else print("Not NIL\n") end
+ return task.answer
+ end
end
end
end
-- Checks the immediate successor of the current node.
function stabilize()
-
local candidate
local successor = my_node.fingers[1]
if successor ~= my_node.id then
+ simgrid.info("Getting remote predecessor from ".. successor)
candidate = remote_get_predecessor(successor)
+ simgrid.info("Received ".. candidate .. " as candidate")
else
candidate = my_node.predecessor
end
- -- this node is a candidate to become my new successor
+ simgrid.info("Still stabilizing")
+ -- candidate might become my new successor
if candidate ~= nil and is_in_interval(candidate, my_node.id + 1, successor - 1) then
- my_node.fingers[1] = candidate
+ simgrid.info("I'm updating my successor to " .. math.tointeger(candidate))
+ my_node.fingers[1] = math.tointeger(candidate)
+
+ -- If candidate is not my_node.id, then I should notify candidate that I'm here.
+ -- (So this node updates it's predecessor to me)
+ --remote_notify(candidate, my_node.id)
end
- if successor ~= my_node.id then
+ simgrid.info("Successor: " .. successor .. " and my id: " .. my_node.id)
+ -- If candidate is nil, this means that our successor has no predecessor.
+ -- So we should tell him about us...
+ -- TODO: I think a host that receives a message could automatically add
+ -- this other node as a predecessor if it doesn't have any... needs to
+ -- be implemented somewhere else, not here.
+ if candidate == nil and successor ~= my_node.id then
remote_notify(successor, my_node.id)
end
end
--- Notifies the current node that its predecessor my have changed
+-- Notifies the current node that its predecessor may have changed
-- - candidate_predecessor: the possible new predecessor
function notify(candidate_predecessor)
-
if my_node.predecessor == nil or is_in_interval(candidate_predecessor,
my_node.predecessor + 1, my_node.id - 1) then
- my_node.predecessor = candidate_predecessor
+ simgrid.info("Updated my predecessor to " .. candidate_predecessor)
+ my_node.predecessor = math.tointeger(candidate_predecessor)
end
end
-- - candidate the possible new predecessor
function remote_notify(notify_to, candidate_predecessor)
+ simgrid.info("Updating someone else's predecessor (id: " .. notify_to .. " predecessor to ".. candidate_predecessor .. ")")
local task = simgrid.task.new("", comp_size, comm_size)
task.type = "notify"
task.request_id = candidate_predecessor
task:dsend(notify_to)
end
--- Refreshes the finger table of the current node.
+-- Refreshes the finger table of the current node,
+-- one finger per call.
function fix_fingers()
- local i = my_node.next_finger_to_fix
- local id = find_successor(my_node.id + 2^i)
+ local i = math.tointeger(my_node.next_finger_to_fix)
+ local id = find_successor(math.tointeger(my_node.id + 2^i))
+ simgrid.info("Called fix_fingers(). Next finger to fix: " .. i .. " and I will check " .. my_node.id + 2^i .. ". Request returned " .. id)
+
if id ~= nil then
if id ~= my_node.fingers[i] then
my_node.fingers[i] = id
+ simgrid.info("fix_fingers: Updated finger " .. i .. " to " .. id)
+ else
+ simgrid.info("fix_fingers: id is " .. id)
end
my_node.next_finger_to_fix = (i % nb_bits) + 1
end
simgrid.platform(arg[1] or "../../msg/msg_platform.xml")
simgrid.application(arg[2] or "../../msg/chord/chord90.xml")
simgrid.run()
-