Logo AND Algorithmique Numérique Distribuée

Public GIT Repository
cosmetics, somewhat hinted by sonar
[simgrid.git] / src / mc / remote / RemoteProcess.cpp
1 /* Copyright (c) 2014-2021. The SimGrid Team. All rights reserved.          */
2
3 /* This program is free software; you can redistribute it and/or modify it
4  * under the terms of the license (GNU LGPL) which comes with this package. */
5
6 #define _FILE_OFFSET_BITS 64 /* needed for pread_whole to work as expected on 32bits */
7
8 #include "src/mc/remote/RemoteProcess.hpp"
9
10 #include "src/mc/sosp/Snapshot.hpp"
11 #include "xbt/file.hpp"
12 #include "xbt/log.h"
13
14 #include <fcntl.h>
15 #include <libunwind-ptrace.h>
16 #include <sys/mman.h> // PROT_*
17
18 #include <algorithm>
19 #include <cerrno>
20 #include <cstring>
21 #include <memory>
22 #include <string>
23
24 using simgrid::mc::remote;
25
26 XBT_LOG_NEW_DEFAULT_SUBCATEGORY(mc_process, mc, "MC process information");
27
28 namespace simgrid {
29 namespace mc {
30
31 // ***** Helper stuff
32
33 // List of library which memory segments are not considered:
34 static const std::vector<std::string> filtered_libraries = {
35 #ifdef __linux__
36     "ld",
37 #elif defined __FreeBSD__
38     "ld-elf",
39     "ld-elf32",
40     "libkvm",      /* kernel data access library */
41     "libprocstat", /* process and file information retrieval */
42     "libthr",      /* thread library */
43     "libutil",
44 #endif
45     "libargp", /* workarounds for glibc-less systems */
46     "libasan", /* gcc sanitizers */
47     "libasn1",
48     "libboost_chrono",
49     "libboost_context",
50     "libboost_context-mt",
51     "libboost_stacktrace_addr2line",
52     "libboost_stacktrace_backtrace",
53     "libboost_system",
54     "libboost_thread",
55     "libboost_timer",
56     "libbrotlicommon",
57     "libbrotlidec",
58     "libbz2",
59     "libc",
60     "libc++",
61     "libcdt",
62     "libcgraph",
63     "libcom_err",
64     "libcrypt",
65     "libcrypto",
66     "libcurl",
67     "libcurl-gnutls",
68     "libcxxrt",
69     "libdebuginfod",
70     "libdl",
71     "libdw",
72     "libelf",
73     "libevent",
74     "libexecinfo",
75     "libffi",
76     "libflang",
77     "libflangrti",
78     "libgcc_s",
79     "libgmp",
80     "libgnutls",
81     "libgcrypt",
82     "libgfortran",
83     "libgpg-error",
84     "libgssapi",
85     "libgssapi_krb5",
86     "libhcrypto",
87     "libheimbase",
88     "libheimntlm",
89     "libhx509",
90     "libhogweed",
91     "libidn2",
92     "libimf",
93     "libintlc",
94     "libirng",
95     "libk5crypto",
96     "libkeyutils",
97     "libkrb5",
98     "libkrb5support", /*odd behaviour on fedora rawhide ... remove these when fixed*/
99     "liblber",
100     "libldap",
101     "libldap_r",
102     "liblua5.1",
103     "liblua5.3",
104     "liblzma",
105     "libm",
106     "libmd",
107     "libnettle",
108     "libnghttp2",
109     "libomp",
110     "libp11-kit",
111     "libpapi",
112     "libpcre2",
113     "libpfm",
114     "libpgmath",
115     "libpsl",
116     "libpthread",
117     "libquadmath",
118     "libresolv",
119     "libroken",
120     "librt",
121     "librtmp",
122     "libsasl2",
123     "libselinux",
124     "libsqlite3",
125     "libssh",
126     "libssh2",
127     "libssl",
128     "libstdc++",
129     "libsvml",
130     "libtasn1",
131     "libtsan",  /* gcc sanitizers */
132     "libubsan", /* gcc sanitizers */
133     "libunistring",
134     "libunwind",
135     "libunwind-ptrace",
136     "libunwind-x86",
137     "libunwind-x86_64",
138     "libwind",
139     "libz",
140     "libzstd"};
141
142 static bool is_filtered_lib(const std::string& libname)
143 {
144   return std::find(begin(filtered_libraries), end(filtered_libraries), libname) != end(filtered_libraries);
145 }
146
147 static std::string get_lib_name(const std::string& pathname)
148 {
149   std::string map_basename = simgrid::xbt::Path(pathname).get_base_name();
150   std::string libname;
151
152   size_t pos = map_basename.rfind(".so");
153   if (pos != std::string::npos) {
154     // strip the extension (matching regex "\.so.*$")
155     libname.assign(map_basename, 0, pos);
156
157     // strip the version suffix (matching regex "-[.0-9-]*$")
158     while (true) {
159       pos = libname.rfind('-');
160       if (pos == std::string::npos || libname.find_first_not_of(".0123456789", pos + 1) != std::string::npos)
161         break;
162       libname.erase(pos);
163     }
164   }
165
166   return libname;
167 }
168
169 static ssize_t pread_whole(int fd, void* buf, size_t count, off_t offset)
170 {
171   auto* buffer       = static_cast<char*>(buf);
172   ssize_t real_count = count;
173   while (count) {
174     ssize_t res = pread(fd, buffer, count, offset);
175     if (res > 0) {
176       count -= res;
177       buffer += res;
178       offset += res;
179     } else if (res == 0)
180       return -1;
181     else if (errno != EINTR) {
182       XBT_ERROR("pread_whole: %s", strerror(errno));
183       return -1;
184     }
185   }
186   return real_count;
187 }
188
189 static ssize_t pwrite_whole(int fd, const void* buf, size_t count, off_t offset)
190 {
191   const auto* buffer = static_cast<const char*>(buf);
192   ssize_t real_count = count;
193   while (count) {
194     ssize_t res = pwrite(fd, buffer, count, offset);
195     if (res > 0) {
196       count -= res;
197       buffer += res;
198       offset += res;
199     } else if (res == 0)
200       return -1;
201     else if (errno != EINTR) {
202       XBT_ERROR("pwrite_whole: %s", strerror(errno));
203       return -1;
204     }
205   }
206   return real_count;
207 }
208
209 int open_vm(pid_t pid, int flags)
210 {
211   std::string buffer = "/proc/" + std::to_string(pid) + "/mem";
212   return open(buffer.c_str(), flags);
213 }
214
215 // ***** RemoteProcess
216
217 RemoteProcess::RemoteProcess(pid_t pid) : AddressSpace(this), pid_(pid), running_(true) {}
218
219 void RemoteProcess::init(void* mmalloc_default_mdp, void* maxpid, void* actors, void* dead_actors)
220 {
221   this->heap_address      = mmalloc_default_mdp;
222   this->maxpid_addr_      = maxpid;
223   this->actors_addr_      = actors;
224   this->dead_actors_addr_ = dead_actors;
225
226   this->memory_map_ = simgrid::xbt::get_memory_map(this->pid_);
227   this->init_memory_map_info();
228
229   int fd = open_vm(this->pid_, O_RDWR);
230   xbt_assert(fd >= 0, "Could not open file for process virtual address space");
231   this->memory_file = fd;
232
233   this->smx_actors_infos.clear();
234   this->smx_dead_actors_infos.clear();
235   this->unw_addr_space            = simgrid::mc::UnwindContext::createUnwindAddressSpace();
236   this->unw_underlying_addr_space = simgrid::unw::create_addr_space();
237   this->unw_underlying_context    = simgrid::unw::create_context(this->unw_underlying_addr_space, this->pid_);
238 }
239
240 RemoteProcess::~RemoteProcess()
241 {
242   if (this->memory_file >= 0)
243     close(this->memory_file);
244
245   if (this->unw_underlying_addr_space != unw_local_addr_space) {
246     if (this->unw_underlying_addr_space)
247       unw_destroy_addr_space(this->unw_underlying_addr_space);
248     if (this->unw_underlying_context)
249       _UPT_destroy(this->unw_underlying_context);
250   }
251
252   unw_destroy_addr_space(this->unw_addr_space);
253 }
254
255 /** Refresh the information about the process
256  *
257  *  Do not use directly, this is used by the getters when appropriate
258  *  in order to have fresh data.
259  */
260 void RemoteProcess::refresh_heap()
261 {
262   // Read/dereference/refresh the std_heap pointer:
263   if (not this->heap)
264     this->heap = std::make_unique<s_xbt_mheap_t>();
265   this->read_bytes(this->heap.get(), sizeof(mdesc), remote(this->heap_address));
266   this->cache_flags_ |= RemoteProcess::cache_heap;
267 }
268
269 /** Refresh the information about the process
270  *
271  *  Do not use directly, this is used by the getters when appropriate
272  *  in order to have fresh data.
273  * */
274 void RemoteProcess::refresh_malloc_info()
275 {
276   // Refresh process->heapinfo:
277   if (this->cache_flags_ & RemoteProcess::cache_malloc)
278     return;
279   size_t count = this->heap->heaplimit + 1;
280   if (this->heap_info.size() < count)
281     this->heap_info.resize(count);
282   this->read_bytes(this->heap_info.data(), count * sizeof(malloc_info), remote(this->heap->heapinfo));
283   this->cache_flags_ |= RemoteProcess::cache_malloc;
284 }
285
286 /** @brief Finds the range of the different memory segments and binary paths */
287 void RemoteProcess::init_memory_map_info()
288 {
289   XBT_DEBUG("Get debug information ...");
290   this->maestro_stack_start_ = nullptr;
291   this->maestro_stack_end_   = nullptr;
292   this->object_infos.resize(0);
293   this->binary_info = nullptr;
294
295   std::vector<simgrid::xbt::VmMap> const& maps = this->memory_map_;
296
297   const char* current_name = nullptr;
298
299   this->object_infos.clear();
300
301   for (size_t i = 0; i < maps.size(); i++) {
302     simgrid::xbt::VmMap const& reg = maps[i];
303     const char* pathname           = maps[i].pathname.c_str();
304
305     // Nothing to do
306     if (maps[i].pathname.empty()) {
307       current_name = nullptr;
308       continue;
309     }
310
311     // [stack], [vvar], [vsyscall], [vdso] ...
312     if (pathname[0] == '[') {
313       if ((reg.prot & PROT_WRITE) && not memcmp(pathname, "[stack]", 7)) {
314         this->maestro_stack_start_ = remote(reg.start_addr);
315         this->maestro_stack_end_   = remote(reg.end_addr);
316       }
317       current_name = nullptr;
318       continue;
319     }
320
321     if (current_name && strcmp(current_name, pathname) == 0)
322       continue;
323
324     current_name = pathname;
325     if (not(reg.prot & PROT_READ) && (reg.prot & PROT_EXEC))
326       continue;
327
328     const bool is_executable = not i;
329     std::string libname;
330     if (not is_executable) {
331       libname = get_lib_name(pathname);
332       if (is_filtered_lib(libname)) {
333         continue;
334       }
335     }
336
337     std::shared_ptr<simgrid::mc::ObjectInformation> info =
338         simgrid::mc::createObjectInformation(this->memory_map_, pathname);
339     this->object_infos.push_back(info);
340     if (is_executable)
341       this->binary_info = info;
342   }
343
344   xbt_assert(this->maestro_stack_start_, "Did not find maestro_stack_start");
345   xbt_assert(this->maestro_stack_end_, "Did not find maestro_stack_end");
346
347   XBT_DEBUG("Get debug information done !");
348 }
349
350 std::shared_ptr<simgrid::mc::ObjectInformation> RemoteProcess::find_object_info(RemotePtr<void> addr) const
351 {
352   for (auto const& object_info : this->object_infos)
353     if (addr.address() >= (std::uint64_t)object_info->start && addr.address() <= (std::uint64_t)object_info->end)
354       return object_info;
355   return nullptr;
356 }
357
358 std::shared_ptr<ObjectInformation> RemoteProcess::find_object_info_exec(RemotePtr<void> addr) const
359 {
360   for (std::shared_ptr<ObjectInformation> const& info : this->object_infos)
361     if (addr.address() >= (std::uint64_t)info->start_exec && addr.address() <= (std::uint64_t)info->end_exec)
362       return info;
363   return nullptr;
364 }
365
366 std::shared_ptr<ObjectInformation> RemoteProcess::find_object_info_rw(RemotePtr<void> addr) const
367 {
368   for (std::shared_ptr<ObjectInformation> const& info : this->object_infos)
369     if (addr.address() >= (std::uint64_t)info->start_rw && addr.address() <= (std::uint64_t)info->end_rw)
370       return info;
371   return nullptr;
372 }
373
374 simgrid::mc::Frame* RemoteProcess::find_function(RemotePtr<void> ip) const
375 {
376   std::shared_ptr<simgrid::mc::ObjectInformation> info = this->find_object_info_exec(ip);
377   return info ? info->find_function((void*)ip.address()) : nullptr;
378 }
379
380 /** Find (one occurrence of) the named variable definition
381  */
382 const simgrid::mc::Variable* RemoteProcess::find_variable(const char* name) const
383 {
384   // First lookup the variable in the executable shared object.
385   // A global variable used directly by the executable code from a library
386   // is reinstantiated in the executable memory .data/.bss.
387   // We need to look up the variable in the executable first.
388   if (this->binary_info) {
389     std::shared_ptr<simgrid::mc::ObjectInformation> const& info = this->binary_info;
390     const simgrid::mc::Variable* var                            = info->find_variable(name);
391     if (var)
392       return var;
393   }
394
395   for (std::shared_ptr<simgrid::mc::ObjectInformation> const& info : this->object_infos) {
396     const simgrid::mc::Variable* var = info->find_variable(name);
397     if (var)
398       return var;
399   }
400
401   return nullptr;
402 }
403
404 void RemoteProcess::read_variable(const char* name, void* target, size_t size) const
405 {
406   const simgrid::mc::Variable* var = this->find_variable(name);
407   xbt_assert(var, "Variable %s not found", name);
408   xbt_assert(var->address, "No simple location for this variable");
409
410   if (not var->type->full_type) // Try to resolve this type. The needed ObjectInfo was maybe (lazily) loaded recently
411     for (auto const& object_info : this->object_infos)
412       postProcessObjectInformation(this, object_info.get());
413   xbt_assert(var->type->full_type, "Partial type for %s (even after re-resolving types), cannot retrieve its size.",
414              name);
415   xbt_assert((size_t)var->type->full_type->byte_size == size, "Unexpected size for %s (expected %zu, received %zu).",
416              name, size, (size_t)var->type->full_type->byte_size);
417   this->read_bytes(target, size, remote(var->address));
418 }
419
420 std::string RemoteProcess::read_string(RemotePtr<char> address) const
421 {
422   if (not address)
423     return {};
424
425   std::vector<char> res(128);
426   off_t off = 0;
427
428   while (true) {
429     ssize_t c = pread(this->memory_file, res.data() + off, res.size() - off, (off_t)address.address() + off);
430     if (c == -1 && errno == EINTR)
431       continue;
432     xbt_assert(c > 0, "Could not read string from remote process");
433
434     const void* p = memchr(res.data() + off, '\0', c);
435     if (p)
436       return std::string(res.data());
437
438     off += c;
439     if (off == (off_t)res.size())
440       res.resize(res.size() * 2);
441   }
442 }
443
444 void* RemoteProcess::read_bytes(void* buffer, std::size_t size, RemotePtr<void> address, ReadOptions /*options*/) const
445 {
446   if (pread_whole(this->memory_file, buffer, size, (size_t)address.address()) < 0)
447     xbt_die("Read at %p from process %lli failed", (void*)address.address(), (long long)this->pid_);
448   return buffer;
449 }
450
451 /** Write data to a process memory
452  *
453  *  @param buffer   local memory address (source)
454  *  @param len      data size
455  *  @param address  target process memory address (target)
456  */
457 void RemoteProcess::write_bytes(const void* buffer, size_t len, RemotePtr<void> address) const
458 {
459   if (pwrite_whole(this->memory_file, buffer, len, (size_t)address.address()) < 0)
460     xbt_die("Write to process %lli failed", (long long)this->pid_);
461 }
462
463 static const void* zero_buffer;
464 static const size_t zero_buffer_size = 10 * 4096;
465
466 static void zero_buffer_init()
467 {
468   int fd = open("/dev/zero", O_RDONLY);
469   xbt_assert(fd >= 0, "Could not open /dev/zero");
470   zero_buffer = mmap(nullptr, zero_buffer_size, PROT_READ, MAP_SHARED, fd, 0);
471   xbt_assert(zero_buffer != MAP_FAILED, "Could not map the zero buffer");
472   close(fd);
473 }
474
475 void RemoteProcess::clear_bytes(RemotePtr<void> address, size_t len) const
476 {
477   pthread_once_t zero_buffer_flag = PTHREAD_ONCE_INIT;
478
479   pthread_once(&zero_buffer_flag, zero_buffer_init);
480   while (len) {
481     size_t s = len > zero_buffer_size ? zero_buffer_size : len;
482     this->write_bytes(zero_buffer, s, address);
483     address = remote((char*)address.address() + s);
484     len -= s;
485   }
486 }
487
488 void RemoteProcess::ignore_region(std::uint64_t addr, std::size_t size)
489 {
490   IgnoredRegion region;
491   region.addr = addr;
492   region.size = size;
493
494   auto pos = std::lower_bound(ignored_regions_.begin(), ignored_regions_.end(), region,
495                               [](auto const& reg1, auto const& reg2) {
496                                 return reg1.addr < reg2.addr || (reg1.addr == reg2.addr && reg1.size < reg2.size);
497                               });
498   if (pos == ignored_regions_.end() || pos->addr != addr || pos->size != size)
499     ignored_regions_.insert(pos, region);
500 }
501
502 void RemoteProcess::ignore_heap(IgnoredHeapRegion const& region)
503 {
504   // Binary search the position of insertion:
505   auto pos = std::lower_bound(ignored_heap_.begin(), ignored_heap_.end(), region.address,
506                               [](auto const& reg, auto const* addr) { return reg.address < addr; });
507   if (pos == ignored_heap_.end() || pos->address != region.address) {
508     // Insert it:
509     ignored_heap_.insert(pos, region);
510   }
511 }
512
513 void RemoteProcess::unignore_heap(void* address, size_t size)
514 {
515   // Binary search:
516   auto pos = std::lower_bound(ignored_heap_.begin(), ignored_heap_.end(), address,
517                               [](auto const& reg, auto const* addr) { return reg.address < addr; });
518   if (pos != ignored_heap_.end() && static_cast<char*>(pos->address) <= static_cast<char*>(address) + size)
519     ignored_heap_.erase(pos);
520 }
521
522 void RemoteProcess::ignore_local_variable(const char* var_name, const char* frame_name) const
523 {
524   if (frame_name != nullptr && strcmp(frame_name, "*") == 0)
525     frame_name = nullptr;
526   for (std::shared_ptr<simgrid::mc::ObjectInformation> const& info : this->object_infos)
527     info->remove_local_variable(var_name, frame_name);
528 }
529
530 std::vector<simgrid::mc::ActorInformation>& RemoteProcess::actors()
531 {
532   this->refresh_simix();
533   return smx_actors_infos;
534 }
535
536 std::vector<simgrid::mc::ActorInformation>& RemoteProcess::dead_actors()
537 {
538   this->refresh_simix();
539   return smx_dead_actors_infos;
540 }
541
542 void RemoteProcess::dump_stack() const
543 {
544   unw_addr_space_t as = unw_create_addr_space(&_UPT_accessors, BYTE_ORDER);
545   if (as == nullptr) {
546     XBT_ERROR("Could not initialize ptrace address space");
547     return;
548   }
549
550   void* context = _UPT_create(this->pid_);
551   if (context == nullptr) {
552     unw_destroy_addr_space(as);
553     XBT_ERROR("Could not initialize ptrace context");
554     return;
555   }
556
557   unw_cursor_t cursor;
558   if (unw_init_remote(&cursor, as, context) != 0) {
559     _UPT_destroy(context);
560     unw_destroy_addr_space(as);
561     XBT_ERROR("Could not initialiez ptrace cursor");
562     return;
563   }
564
565   simgrid::mc::dumpStack(stderr, &cursor);
566
567   _UPT_destroy(context);
568   unw_destroy_addr_space(as);
569 }
570
571 unsigned long RemoteProcess::get_maxpid() const
572 {
573   unsigned long maxpid;
574   this->read_bytes(&maxpid, sizeof(unsigned long), remote(maxpid_addr_));
575   return maxpid;
576 }
577
578 void RemoteProcess::get_actor_vectors(RemotePtr<s_xbt_dynar_t>& actors, RemotePtr<s_xbt_dynar_t>& dead_actors)
579 {
580   actors      = remote(static_cast<s_xbt_dynar_t*>(actors_addr_));
581   dead_actors = remote(static_cast<s_xbt_dynar_t*>(dead_actors_addr_));
582 }
583 } // namespace mc
584 } // namespace simgrid