Logo AND Algorithmique Numérique Distribuée

Public GIT Repository
Merge branch 'master' of framagit.org:simgrid/simgrid
[simgrid.git] / src / mc / remote / RemoteProcess.cpp
1 /* Copyright (c) 2014-2021. The SimGrid Team. All rights reserved.          */
2
3 /* This program is free software; you can redistribute it and/or modify it
4  * under the terms of the license (GNU LGPL) which comes with this package. */
5
6 #define _FILE_OFFSET_BITS 64 /* needed for pread_whole to work as expected on 32bits */
7
8 #include "src/mc/remote/RemoteProcess.hpp"
9
10 #include "src/mc/sosp/Snapshot.hpp"
11 #include "xbt/file.hpp"
12 #include "xbt/log.h"
13
14 #include <fcntl.h>
15 #include <libunwind-ptrace.h>
16 #include <sys/mman.h> // PROT_*
17
18 #include <algorithm>
19 #include <cerrno>
20 #include <cstring>
21 #include <memory>
22 #include <mutex>
23 #include <string>
24
25 using simgrid::mc::remote;
26
27 XBT_LOG_NEW_DEFAULT_SUBCATEGORY(mc_process, mc, "MC process information");
28
29 namespace simgrid {
30 namespace mc {
31
32 // ***** Helper stuff
33
34 // List of library which memory segments are not considered:
35 static const std::vector<std::string> filtered_libraries = {
36 #ifdef __linux__
37     "ld",
38 #elif defined __FreeBSD__
39     "ld-elf",
40     "ld-elf32",
41     "libkvm",      /* kernel data access library */
42     "libprocstat", /* process and file information retrieval */
43     "libthr",      /* thread library */
44     "libutil",
45 #endif
46     "libargp", /* workarounds for glibc-less systems */
47     "libasan", /* gcc sanitizers */
48     "libasn1",
49     "libboost_chrono",
50     "libboost_context",
51     "libboost_context-mt",
52     "libboost_stacktrace_addr2line",
53     "libboost_stacktrace_backtrace",
54     "libboost_system",
55     "libboost_thread",
56     "libboost_timer",
57     "libbrotlicommon",
58     "libbrotlidec",
59     "libbz2",
60     "libc",
61     "libc++",
62     "libcdt",
63     "libcgraph",
64     "libcom_err",
65     "libcrypt",
66     "libcrypto",
67     "libcurl",
68     "libcurl-gnutls",
69     "libcxxrt",
70     "libdebuginfod",
71     "libdl",
72     "libdw",
73     "libelf",
74     "libevent",
75     "libexecinfo",
76     "libffi",
77     "libflang",
78     "libflangrti",
79     "libgcc_s",
80     "libgmp",
81     "libgnutls",
82     "libgcrypt",
83     "libgfortran",
84     "libgpg-error",
85     "libgssapi",
86     "libgssapi_krb5",
87     "libhcrypto",
88     "libheimbase",
89     "libheimntlm",
90     "libhx509",
91     "libhogweed",
92     "libidn2",
93     "libimf",
94     "libintlc",
95     "libirng",
96     "libk5crypto",
97     "libkeyutils",
98     "libkrb5",
99     "libkrb5support", /*odd behaviour on fedora rawhide ... remove these when fixed*/
100     "liblber",
101     "libldap",
102     "libldap_r",
103     "liblua5.1",
104     "liblua5.3",
105     "liblzma",
106     "libm",
107     "libmd",
108     "libnettle",
109     "libnghttp2",
110     "libomp",
111     "libp11-kit",
112     "libpapi",
113     "libpcre2",
114     "libpfm",
115     "libpgmath",
116     "libpsl",
117     "libpthread",
118     "libquadmath",
119     "libresolv",
120     "libroken",
121     "librt",
122     "librtmp",
123     "libsasl2",
124     "libselinux",
125     "libsqlite3",
126     "libssh",
127     "libssh2",
128     "libssl",
129     "libstdc++",
130     "libsvml",
131     "libtasn1",
132     "libtsan",  /* gcc sanitizers */
133     "libubsan", /* gcc sanitizers */
134     "libunistring",
135     "libunwind",
136     "libunwind-ptrace",
137     "libunwind-x86",
138     "libunwind-x86_64",
139     "libwind",
140     "libz",
141     "libzstd"};
142
143 static bool is_filtered_lib(const std::string& libname)
144 {
145   return std::find(begin(filtered_libraries), end(filtered_libraries), libname) != end(filtered_libraries);
146 }
147
148 static std::string get_lib_name(const std::string& pathname)
149 {
150   std::string map_basename = simgrid::xbt::Path(pathname).get_base_name();
151   std::string libname;
152
153   size_t pos = map_basename.rfind(".so");
154   if (pos != std::string::npos) {
155     // strip the extension (matching regex "\.so.*$")
156     libname.assign(map_basename, 0, pos);
157
158     // strip the version suffix (matching regex "-[.0-9-]*$")
159     while (true) {
160       pos = libname.rfind('-');
161       if (pos == std::string::npos || libname.find_first_not_of(".0123456789", pos + 1) != std::string::npos)
162         break;
163       libname.erase(pos);
164     }
165   }
166
167   return libname;
168 }
169
170 static ssize_t pread_whole(int fd, void* buf, size_t count, off_t offset)
171 {
172   auto* buffer       = static_cast<char*>(buf);
173   ssize_t real_count = count;
174   while (count) {
175     ssize_t res = pread(fd, buffer, count, offset);
176     if (res > 0) {
177       count -= res;
178       buffer += res;
179       offset += res;
180     } else if (res == 0)
181       return -1;
182     else if (errno != EINTR) {
183       XBT_ERROR("pread_whole: %s", strerror(errno));
184       return -1;
185     }
186   }
187   return real_count;
188 }
189
190 static ssize_t pwrite_whole(int fd, const void* buf, size_t count, off_t offset)
191 {
192   const auto* buffer = static_cast<const char*>(buf);
193   ssize_t real_count = count;
194   while (count) {
195     ssize_t res = pwrite(fd, buffer, count, offset);
196     if (res > 0) {
197       count -= res;
198       buffer += res;
199       offset += res;
200     } else if (res == 0)
201       return -1;
202     else if (errno != EINTR) {
203       XBT_ERROR("pwrite_whole: %s", strerror(errno));
204       return -1;
205     }
206   }
207   return real_count;
208 }
209
210 int open_vm(pid_t pid, int flags)
211 {
212   std::string buffer = "/proc/" + std::to_string(pid) + "/mem";
213   return open(buffer.c_str(), flags);
214 }
215
216 // ***** RemoteProcess
217
218 RemoteProcess::RemoteProcess(pid_t pid) : AddressSpace(this), pid_(pid), running_(true) {}
219
220 void RemoteProcess::init(xbt_mheap_t mmalloc_default_mdp, unsigned long* maxpid, xbt_dynar_t actors,
221                          xbt_dynar_t dead_actors)
222 {
223   this->heap_address      = remote(mmalloc_default_mdp);
224   this->maxpid_addr_      = remote(maxpid);
225   this->actors_addr_      = remote(actors);
226   this->dead_actors_addr_ = remote(dead_actors);
227
228   this->memory_map_ = simgrid::xbt::get_memory_map(this->pid_);
229   this->init_memory_map_info();
230
231   int fd = open_vm(this->pid_, O_RDWR);
232   xbt_assert(fd >= 0, "Could not open file for process virtual address space");
233   this->memory_file = fd;
234
235   this->smx_actors_infos.clear();
236   this->smx_dead_actors_infos.clear();
237   this->unw_addr_space            = simgrid::mc::UnwindContext::createUnwindAddressSpace();
238   this->unw_underlying_addr_space = simgrid::unw::create_addr_space();
239   this->unw_underlying_context    = simgrid::unw::create_context(this->unw_underlying_addr_space, this->pid_);
240 }
241
242 RemoteProcess::~RemoteProcess()
243 {
244   if (this->memory_file >= 0)
245     close(this->memory_file);
246
247   if (this->unw_underlying_addr_space != unw_local_addr_space) {
248     if (this->unw_underlying_addr_space)
249       unw_destroy_addr_space(this->unw_underlying_addr_space);
250     if (this->unw_underlying_context)
251       _UPT_destroy(this->unw_underlying_context);
252   }
253
254   unw_destroy_addr_space(this->unw_addr_space);
255 }
256
257 /** Refresh the information about the process
258  *
259  *  Do not use directly, this is used by the getters when appropriate
260  *  in order to have fresh data.
261  */
262 void RemoteProcess::refresh_heap()
263 {
264   // Read/dereference/refresh the std_heap pointer:
265   if (not this->heap)
266     this->heap = std::make_unique<s_xbt_mheap_t>();
267   this->read(this->heap.get(), this->heap_address);
268   this->cache_flags_ |= RemoteProcess::cache_heap;
269 }
270
271 /** Refresh the information about the process
272  *
273  *  Do not use directly, this is used by the getters when appropriate
274  *  in order to have fresh data.
275  * */
276 void RemoteProcess::refresh_malloc_info()
277 {
278   // Refresh process->heapinfo:
279   if (this->cache_flags_ & RemoteProcess::cache_malloc)
280     return;
281   size_t count = this->heap->heaplimit + 1;
282   if (this->heap_info.size() < count)
283     this->heap_info.resize(count);
284   this->read_bytes(this->heap_info.data(), count * sizeof(malloc_info), remote(this->heap->heapinfo));
285   this->cache_flags_ |= RemoteProcess::cache_malloc;
286 }
287
288 /** @brief Finds the range of the different memory segments and binary paths */
289 void RemoteProcess::init_memory_map_info()
290 {
291   XBT_DEBUG("Get debug information ...");
292   this->maestro_stack_start_ = nullptr;
293   this->maestro_stack_end_   = nullptr;
294   this->object_infos.resize(0);
295   this->binary_info = nullptr;
296
297   std::vector<simgrid::xbt::VmMap> const& maps = this->memory_map_;
298
299   const char* current_name = nullptr;
300
301   this->object_infos.clear();
302
303   for (size_t i = 0; i < maps.size(); i++) {
304     simgrid::xbt::VmMap const& reg = maps[i];
305     const char* pathname           = maps[i].pathname.c_str();
306
307     // Nothing to do
308     if (maps[i].pathname.empty()) {
309       current_name = nullptr;
310       continue;
311     }
312
313     // [stack], [vvar], [vsyscall], [vdso] ...
314     if (pathname[0] == '[') {
315       if ((reg.prot & PROT_WRITE) && not memcmp(pathname, "[stack]", 7)) {
316         this->maestro_stack_start_ = remote(reg.start_addr);
317         this->maestro_stack_end_   = remote(reg.end_addr);
318       }
319       current_name = nullptr;
320       continue;
321     }
322
323     if (current_name && strcmp(current_name, pathname) == 0)
324       continue;
325
326     current_name = pathname;
327     if (not(reg.prot & PROT_READ) && (reg.prot & PROT_EXEC))
328       continue;
329
330     const bool is_executable = not i;
331     std::string libname;
332     if (not is_executable) {
333       libname = get_lib_name(pathname);
334       if (is_filtered_lib(libname)) {
335         continue;
336       }
337     }
338
339     std::shared_ptr<simgrid::mc::ObjectInformation> info =
340         simgrid::mc::createObjectInformation(this->memory_map_, pathname);
341     this->object_infos.push_back(info);
342     if (is_executable)
343       this->binary_info = info;
344   }
345
346   xbt_assert(this->maestro_stack_start_, "Did not find maestro_stack_start");
347   xbt_assert(this->maestro_stack_end_, "Did not find maestro_stack_end");
348
349   XBT_DEBUG("Get debug information done !");
350 }
351
352 std::shared_ptr<simgrid::mc::ObjectInformation> RemoteProcess::find_object_info(RemotePtr<void> addr) const
353 {
354   for (auto const& object_info : this->object_infos)
355     if (addr.address() >= (std::uint64_t)object_info->start && addr.address() <= (std::uint64_t)object_info->end)
356       return object_info;
357   return nullptr;
358 }
359
360 std::shared_ptr<ObjectInformation> RemoteProcess::find_object_info_exec(RemotePtr<void> addr) const
361 {
362   for (std::shared_ptr<ObjectInformation> const& info : this->object_infos)
363     if (addr.address() >= (std::uint64_t)info->start_exec && addr.address() <= (std::uint64_t)info->end_exec)
364       return info;
365   return nullptr;
366 }
367
368 std::shared_ptr<ObjectInformation> RemoteProcess::find_object_info_rw(RemotePtr<void> addr) const
369 {
370   for (std::shared_ptr<ObjectInformation> const& info : this->object_infos)
371     if (addr.address() >= (std::uint64_t)info->start_rw && addr.address() <= (std::uint64_t)info->end_rw)
372       return info;
373   return nullptr;
374 }
375
376 simgrid::mc::Frame* RemoteProcess::find_function(RemotePtr<void> ip) const
377 {
378   std::shared_ptr<simgrid::mc::ObjectInformation> info = this->find_object_info_exec(ip);
379   return info ? info->find_function((void*)ip.address()) : nullptr;
380 }
381
382 /** Find (one occurrence of) the named variable definition
383  */
384 const simgrid::mc::Variable* RemoteProcess::find_variable(const char* name) const
385 {
386   // First lookup the variable in the executable shared object.
387   // A global variable used directly by the executable code from a library
388   // is reinstantiated in the executable memory .data/.bss.
389   // We need to look up the variable in the executable first.
390   if (this->binary_info) {
391     std::shared_ptr<simgrid::mc::ObjectInformation> const& info = this->binary_info;
392     const simgrid::mc::Variable* var                            = info->find_variable(name);
393     if (var)
394       return var;
395   }
396
397   for (std::shared_ptr<simgrid::mc::ObjectInformation> const& info : this->object_infos) {
398     const simgrid::mc::Variable* var = info->find_variable(name);
399     if (var)
400       return var;
401   }
402
403   return nullptr;
404 }
405
406 void RemoteProcess::read_variable(const char* name, void* target, size_t size) const
407 {
408   const simgrid::mc::Variable* var = this->find_variable(name);
409   xbt_assert(var, "Variable %s not found", name);
410   xbt_assert(var->address, "No simple location for this variable");
411
412   if (not var->type->full_type) // Try to resolve this type. The needed ObjectInfo was maybe (lazily) loaded recently
413     for (auto const& object_info : this->object_infos)
414       postProcessObjectInformation(this, object_info.get());
415   xbt_assert(var->type->full_type, "Partial type for %s (even after re-resolving types), cannot retrieve its size.",
416              name);
417   xbt_assert((size_t)var->type->full_type->byte_size == size, "Unexpected size for %s (expected %zu, received %zu).",
418              name, size, (size_t)var->type->full_type->byte_size);
419   this->read_bytes(target, size, remote(var->address));
420 }
421
422 std::string RemoteProcess::read_string(RemotePtr<char> address) const
423 {
424   if (not address)
425     return {};
426
427   std::vector<char> res(128);
428   off_t off = 0;
429
430   while (true) {
431     ssize_t c = pread(this->memory_file, res.data() + off, res.size() - off, (off_t)address.address() + off);
432     if (c == -1 && errno == EINTR)
433       continue;
434     xbt_assert(c > 0, "Could not read string from remote process");
435
436     const void* p = memchr(res.data() + off, '\0', c);
437     if (p)
438       return std::string(res.data());
439
440     off += c;
441     if (off == (off_t)res.size())
442       res.resize(res.size() * 2);
443   }
444 }
445
446 void* RemoteProcess::read_bytes(void* buffer, std::size_t size, RemotePtr<void> address, ReadOptions /*options*/) const
447 {
448   xbt_assert(pread_whole(this->memory_file, buffer, size, (size_t)address.address()) != -1,
449              "Read at %p from process %lli failed", (void*)address.address(), (long long)this->pid_);
450   return buffer;
451 }
452
453 /** Write data to a process memory
454  *
455  *  @param buffer   local memory address (source)
456  *  @param len      data size
457  *  @param address  target process memory address (target)
458  */
459 void RemoteProcess::write_bytes(const void* buffer, size_t len, RemotePtr<void> address) const
460 {
461   xbt_assert(pwrite_whole(this->memory_file, buffer, len, (size_t)address.address()) != -1,
462              "Write to process %lli failed", (long long)this->pid_);
463 }
464
465 static void zero_buffer_init(const void** zero_buffer, size_t zero_buffer_size)
466 {
467   int fd = open("/dev/zero", O_RDONLY);
468   xbt_assert(fd >= 0, "Could not open /dev/zero");
469   *zero_buffer = mmap(nullptr, zero_buffer_size, PROT_READ, MAP_SHARED, fd, 0);
470   xbt_assert(*zero_buffer != MAP_FAILED, "Could not map the zero buffer");
471   close(fd);
472 }
473
474 void RemoteProcess::clear_bytes(RemotePtr<void> address, size_t len) const
475 {
476   static constexpr size_t zero_buffer_size = 10 * 4096;
477   static const void* zero_buffer;
478   static std::once_flag zero_buffer_flag;
479
480   std::call_once(zero_buffer_flag, zero_buffer_init, &zero_buffer, zero_buffer_size);
481   while (len) {
482     size_t s = len > zero_buffer_size ? zero_buffer_size : len;
483     this->write_bytes(zero_buffer, s, address);
484     address = remote((char*)address.address() + s);
485     len -= s;
486   }
487 }
488
489 void RemoteProcess::ignore_region(std::uint64_t addr, std::size_t size)
490 {
491   IgnoredRegion region;
492   region.addr = addr;
493   region.size = size;
494
495   auto pos = std::lower_bound(ignored_regions_.begin(), ignored_regions_.end(), region,
496                               [](auto const& reg1, auto const& reg2) {
497                                 return reg1.addr < reg2.addr || (reg1.addr == reg2.addr && reg1.size < reg2.size);
498                               });
499   if (pos == ignored_regions_.end() || pos->addr != addr || pos->size != size)
500     ignored_regions_.insert(pos, region);
501 }
502
503 void RemoteProcess::ignore_heap(IgnoredHeapRegion const& region)
504 {
505   // Binary search the position of insertion:
506   auto pos = std::lower_bound(ignored_heap_.begin(), ignored_heap_.end(), region.address,
507                               [](auto const& reg, auto const* addr) { return reg.address < addr; });
508   if (pos == ignored_heap_.end() || pos->address != region.address) {
509     // Insert it:
510     ignored_heap_.insert(pos, region);
511   }
512 }
513
514 void RemoteProcess::unignore_heap(void* address, size_t size)
515 {
516   // Binary search:
517   auto pos = std::lower_bound(ignored_heap_.begin(), ignored_heap_.end(), address,
518                               [](auto const& reg, auto const* addr) { return reg.address < addr; });
519   if (pos != ignored_heap_.end() && static_cast<char*>(pos->address) <= static_cast<char*>(address) + size)
520     ignored_heap_.erase(pos);
521 }
522
523 void RemoteProcess::ignore_local_variable(const char* var_name, const char* frame_name) const
524 {
525   if (frame_name != nullptr && strcmp(frame_name, "*") == 0)
526     frame_name = nullptr;
527   for (std::shared_ptr<simgrid::mc::ObjectInformation> const& info : this->object_infos)
528     info->remove_local_variable(var_name, frame_name);
529 }
530
531 std::vector<simgrid::mc::ActorInformation>& RemoteProcess::actors()
532 {
533   this->refresh_simix();
534   return smx_actors_infos;
535 }
536
537 std::vector<simgrid::mc::ActorInformation>& RemoteProcess::dead_actors()
538 {
539   this->refresh_simix();
540   return smx_dead_actors_infos;
541 }
542
543 void RemoteProcess::dump_stack() const
544 {
545   unw_addr_space_t as = unw_create_addr_space(&_UPT_accessors, BYTE_ORDER);
546   if (as == nullptr) {
547     XBT_ERROR("Could not initialize ptrace address space");
548     return;
549   }
550
551   void* context = _UPT_create(this->pid_);
552   if (context == nullptr) {
553     unw_destroy_addr_space(as);
554     XBT_ERROR("Could not initialize ptrace context");
555     return;
556   }
557
558   unw_cursor_t cursor;
559   if (unw_init_remote(&cursor, as, context) != 0) {
560     _UPT_destroy(context);
561     unw_destroy_addr_space(as);
562     XBT_ERROR("Could not initialiez ptrace cursor");
563     return;
564   }
565
566   simgrid::mc::dumpStack(stderr, &cursor);
567
568   _UPT_destroy(context);
569   unw_destroy_addr_space(as);
570 }
571 } // namespace mc
572 } // namespace simgrid