1 /* Copyright (c) 2015-2021. The SimGrid Team. All rights reserved. */
3 /* This program is free software; you can redistribute it and/or modify it
4 * under the terms of the license (GNU LGPL) which comes with this package. */
16 #include <sys/types.h>
23 #include "src/internal_config.h"
24 #include "src/xbt/memory_map.hpp"
26 #include "private.hpp"
27 #include "src/smpi/include/smpi_actor.hpp"
29 XBT_LOG_NEW_DEFAULT_SUBCATEGORY(smpi_memory, smpi, "Memory layout support for SMPI");
31 char* smpi_data_exe_start = nullptr;
32 size_t smpi_data_exe_size = 0;
33 SmpiPrivStrategies smpi_privatize_global_variables;
34 static void* smpi_data_exe_copy;
36 // Initialized by smpi_prepare_global_memory_segment().
37 static std::vector<simgrid::xbt::VmMap> initial_vm_map;
39 // We keep a copy of all the privatization regions: We can then delete everything easily by iterating over this
40 // collection and nothing can be leaked. We could also iterate over all actors but we would have to be diligent when two
41 // actors use the same privatization region (so, smart pointers would have to be used etc.)
42 // Use a std::deque so that pointers remain valid after push_back().
43 static std::deque<s_smpi_privatization_region_t> smpi_privatization_regions;
45 static constexpr int PROT_RWX = PROT_READ | PROT_WRITE | PROT_EXEC;
46 static constexpr int PROT_RW = PROT_READ | PROT_WRITE;
48 /** Take a snapshot of the process' memory map.
50 void smpi_prepare_global_memory_segment()
52 initial_vm_map = simgrid::xbt::get_memory_map(getpid());
55 static void smpi_get_executable_global_size()
57 char* buffer = realpath(simgrid::xbt::binary_name.c_str(), nullptr);
58 xbt_assert(buffer != nullptr, "Could not resolve real path of binary file '%s'", simgrid::xbt::binary_name.c_str());
59 std::string full_name = buffer;
62 std::vector<simgrid::xbt::VmMap> map = simgrid::xbt::get_memory_map(getpid());
63 for (auto i = map.begin(); i != map.end() ; ++i) {
64 // TODO, In practice, this implementation would not detect a completely
65 // anonymous data segment. This does not happen in practice, however.
67 // File backed RW entry:
68 if (i->pathname == full_name && (i->prot & PROT_RWX) == PROT_RW) {
69 smpi_data_exe_start = (char*)i->start_addr;
70 smpi_data_exe_size = i->end_addr - i->start_addr;
71 /* Here we are making the assumption that a suitable empty region
72 following the rw- area is the end of the data segment. It would
73 be better to check with the size of the data segment. */
75 if (j != map.end() && j->pathname.empty() && (j->prot & PROT_RWX) == PROT_RW &&
76 (char*)j->start_addr == smpi_data_exe_start + smpi_data_exe_size) {
77 // Only count the portion of this region not present in the initial map.
78 auto found = std::find_if(initial_vm_map.begin(), initial_vm_map.end(), [&j](const simgrid::xbt::VmMap& m) {
79 return j->start_addr <= m.start_addr && m.start_addr < j->end_addr;
81 auto end_addr = (found == initial_vm_map.end() ? j->end_addr : found->start_addr);
82 smpi_data_exe_size = (char*)end_addr - smpi_data_exe_start;
87 xbt_die("Did not find my data segment.");
91 #if HAVE_SANITIZER_ADDRESS
92 #include <sanitizer/asan_interface.h>
93 static void* asan_safe_memcpy(void* dest, void* src, size_t n)
95 char* psrc = static_cast<char*>(src);
96 char* pdest = static_cast<char*>(dest);
97 for (size_t i = 0; i < n;) {
98 while (i < n && __asan_address_is_poisoned(psrc + i))
101 char* p = static_cast<char*>(__asan_region_is_poisoned(psrc + i, n - i));
102 size_t j = p ? (p - psrc) : n;
103 memcpy(pdest + i, psrc + i, j - i);
110 #define asan_safe_memcpy(dest, src, n) memcpy((dest), (src), (n))
114 * @brief Uses shm_open to get a temporary shm, and returns its file descriptor.
116 int smpi_temp_shm_get()
118 constexpr unsigned INDEX_MASK = 0xffffffffUL;
119 static unsigned index = INDEX_MASK;
120 char shmname[32]; // cannot be longer than PSHMNAMLEN = 31 on macOS (shm_open raises ENAMETOOLONG otherwise)
123 unsigned limit = index;
125 index = (index + 1) & INDEX_MASK;
126 snprintf(shmname, sizeof(shmname), "/smpi-buffer-%016x", index);
127 fd = shm_open(shmname, O_RDWR | O_CREAT | O_EXCL, S_IRUSR | S_IWUSR);
128 } while (fd == -1 && errno == EEXIST && index != limit);
131 if (errno == EMFILE) {
132 xbt_die("Impossible to create temporary file for memory mapping: %s\n\
133 The shm_open() system call failed with the EMFILE error code (too many files). \n\n\
134 This means that you reached the system limits concerning the amount of files per process. \
135 This is not a surprise if you are trying to virtualize many processes on top of SMPI. \
136 Don't panic -- you should simply increase your system limits and try again. \n\n\
137 First, check what your limits are:\n\
138 cat /proc/sys/fs/file-max # Gives you the system-wide limit\n\
139 ulimit -Hn # Gives you the per process hard limit\n\
140 ulimit -Sn # Gives you the per process soft limit\n\
141 cat /proc/self/limits # Displays any per-process limitation (including the one given above)\n\n\
142 If one of these values is less than the amount of MPI processes that you try to run, then you got the explanation of this error. \
143 Ask the Internet about tutorials on how to increase the files limit such as: https://rtcamp.com/tutorials/linux/increase-open-files-limit/",
146 xbt_die("Impossible to create temporary file for memory mapping. shm_open: %s", strerror(errno));
148 XBT_DEBUG("Got temporary shm %s (fd = %d)", shmname, fd);
149 if (shm_unlink(shmname) < 0)
150 XBT_WARN("Could not early unlink %s. shm_unlink: %s", shmname, strerror(errno));
155 * @brief Mmap a region of size bytes from temporary shm with file descriptor fd.
157 void* smpi_temp_shm_mmap(int fd, size_t size)
160 xbt_assert(fstat(fd, &st) == 0, "Could not stat fd %d: %s", fd, strerror(errno));
161 xbt_assert(static_cast<off_t>(size) <= st.st_size || ftruncate(fd, static_cast<off_t>(size)) == 0,
162 "Could not truncate fd %d to %zu: %s", fd, size, strerror(errno));
163 void* mem = mmap(nullptr, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
166 "Failed to map fd %d with size %zu: %s\n"
167 "If you are running a lot of ranks, you may be exceeding the amount of mappings allowed per process.\n"
168 "On Linux systems, change this value with sudo sysctl -w vm.max_map_count=newvalue (default value: 65536)\n"
169 "Please see https://simgrid.org/doc/latest/Configuring_SimGrid.html#configuring-the-user-code-virtualization for "
171 fd, size, strerror(errno));
175 /** Map a given SMPI privatization segment (make an SMPI process active)
177 * When doing a state restoration, the state of the restored variables might not be consistent with the state of the
178 * virtual memory. In this case, we to change the data segment.
180 void smpi_switch_data_segment(simgrid::s4u::ActorPtr actor)
182 static aid_t smpi_loaded_page = -1;
183 if (smpi_loaded_page == actor->get_pid()) // no need to switch, we've already loaded the one we want
186 if (smpi_data_exe_size == 0) // no need to switch
189 #if HAVE_PRIVATIZATION
190 // FIXME, cross-process support (mmap across process when necessary)
191 XBT_DEBUG("Switching data frame to the one of process %ld", actor->get_pid());
192 const simgrid::smpi::ActorExt* process = smpi_process_remote(actor);
193 int current = process->privatized_region()->file_descriptor;
194 xbt_assert(mmap(TOPAGE(smpi_data_exe_start), smpi_data_exe_size, PROT_RW, MAP_FIXED | MAP_SHARED, current, 0) ==
195 TOPAGE(smpi_data_exe_start),
196 "Couldn't map the new region (errno %d): %s", errno, strerror(errno));
197 smpi_loaded_page = actor->get_pid();
202 * @brief Makes a backup of the segment in memory that stores the global variables of a process.
203 * This backup is then used to initialize the global variables for every single
204 * process that is added, regardless of the progress of the simulation.
206 void smpi_backup_global_memory_segment()
208 #if HAVE_PRIVATIZATION
209 smpi_get_executable_global_size();
210 initial_vm_map.clear();
211 initial_vm_map.shrink_to_fit();
213 XBT_DEBUG("bss+data segment found : size %zu starting at %p", smpi_data_exe_size, smpi_data_exe_start);
215 if (smpi_data_exe_size == 0) { // no need to do anything as global variables don't exist
216 smpi_privatize_global_variables = SmpiPrivStrategies::NONE;
220 smpi_data_exe_copy = ::operator new(smpi_data_exe_size);
221 // Make a copy of the data segment. This clean copy is retained over the whole runtime
222 // of the simulation and can be used to initialize a dynamically added, new process.
223 asan_safe_memcpy(smpi_data_exe_copy, TOPAGE(smpi_data_exe_start), smpi_data_exe_size);
224 #else /* ! HAVE_PRIVATIZATION */
225 xbt_die("You are trying to use privatization on a system that does not support it. Don't.");
229 // Initializes the memory mapping for a single process and returns the privatization region
230 smpi_privatization_region_t smpi_init_global_memory_segment_process()
232 int file_descriptor = smpi_temp_shm_get();
234 // ask for a free region
235 void* address = smpi_temp_shm_mmap(file_descriptor, smpi_data_exe_size);
237 // initialize the values
238 asan_safe_memcpy(address, smpi_data_exe_copy, smpi_data_exe_size);
240 // store the address of the mapping for further switches
241 smpi_privatization_regions.emplace_back(s_smpi_privatization_region_t{address, file_descriptor});
243 return &smpi_privatization_regions.back();
246 void smpi_destroy_global_memory_segments(){
247 if (smpi_data_exe_size == 0) // no need to switch
249 #if HAVE_PRIVATIZATION
250 for (auto const& region : smpi_privatization_regions) {
251 if (munmap(region.address, smpi_data_exe_size) < 0)
252 XBT_WARN("Unmapping of fd %d failed: %s", region.file_descriptor, strerror(errno));
253 close(region.file_descriptor);
255 smpi_privatization_regions.clear();
256 ::operator delete(smpi_data_exe_copy);
260 static std::vector<unsigned char> sendbuffer;
261 static std::vector<unsigned char> recvbuffer;
263 //allocate a single buffer for all sends, growing it if needed
264 unsigned char* smpi_get_tmp_sendbuffer(size_t size)
266 if (not smpi_process()->replaying())
267 return new unsigned char[size];
268 // FIXME: a resize() may invalidate a previous pointer. Maybe we need to handle a queue of buffers with a reference
269 // counter. The same holds for smpi_get_tmp_recvbuffer.
270 if (sendbuffer.size() < size)
271 sendbuffer.resize(size);
272 return sendbuffer.data();
275 //allocate a single buffer for all recv
276 unsigned char* smpi_get_tmp_recvbuffer(size_t size)
278 if (not smpi_process()->replaying())
279 return new unsigned char[size];
280 if (recvbuffer.size() < size)
281 recvbuffer.resize(size);
282 return recvbuffer.data();
285 void smpi_free_tmp_buffer(const unsigned char* buf)
287 if (not smpi_process()->replaying())
291 void smpi_free_replay_tmp_buffers()
293 std::vector<unsigned char>().swap(sendbuffer);
294 std::vector<unsigned char>().swap(recvbuffer);