Logo AND Algorithmique Numérique Distribuée

Public GIT Repository
Remove last bits of Lua bindings.
[simgrid.git] / src / mc / remote / RemoteProcess.cpp
1 /* Copyright (c) 2014-2022. The SimGrid Team. All rights reserved.          */
2
3 /* This program is free software; you can redistribute it and/or modify it
4  * under the terms of the license (GNU LGPL) which comes with this package. */
5
6 #define _FILE_OFFSET_BITS 64 /* needed for pread_whole to work as expected on 32bits */
7
8 #include "src/mc/remote/RemoteProcess.hpp"
9
10 #include "src/mc/sosp/Snapshot.hpp"
11 #include "xbt/file.hpp"
12 #include "xbt/log.h"
13
14 #include <fcntl.h>
15 #include <libunwind-ptrace.h>
16 #include <sys/mman.h> // PROT_*
17
18 #include <algorithm>
19 #include <cerrno>
20 #include <cstring>
21 #include <memory>
22 #include <mutex>
23 #include <string>
24
25 using simgrid::mc::remote;
26
27 XBT_LOG_NEW_DEFAULT_SUBCATEGORY(mc_process, mc, "MC process information");
28
29 namespace simgrid {
30 namespace mc {
31
32 // ***** Helper stuff
33
34 // List of library which memory segments are not considered:
35 static const std::vector<std::string> filtered_libraries = {
36 #ifdef __linux__
37     "ld",
38     "ld-linux-x86",
39 #elif defined __FreeBSD__
40     "ld-elf",
41     "ld-elf32",
42     "libkvm",      /* kernel data access library */
43     "libprocstat", /* process and file information retrieval */
44     "libthr",      /* thread library */
45     "libutil",
46 #endif
47     "libargp", /* workarounds for glibc-less systems */
48     "libasan", /* gcc sanitizers */
49     "libasn1",
50     "libboost_chrono",
51     "libboost_context",
52     "libboost_context-mt",
53     "libboost_stacktrace_addr2line",
54     "libboost_stacktrace_backtrace",
55     "libboost_system",
56     "libboost_thread",
57     "libboost_timer",
58     "libbrotlicommon",
59     "libbrotlidec",
60     "libbz2",
61     "libc",
62     "libc++",
63     "libcdt",
64     "libcgraph",
65     "libcom_err",
66     "libcrypt",
67     "libcrypto",
68     "libcurl",
69     "libcurl-gnutls",
70     "libcxxrt",
71     "libdebuginfod",
72     "libdl",
73     "libdw",
74     "libelf",
75     "libevent",
76     "libexecinfo",
77     "libffi",
78     "libflang",
79     "libflangrti",
80     "libgcc_s",
81     "libgmp",
82     "libgnutls",
83     "libgcrypt",
84     "libgfortran",
85     "libgpg-error",
86     "libgssapi",
87     "libgssapi_krb5",
88     "libhcrypto",
89     "libheimbase",
90     "libheimntlm",
91     "libhx509",
92     "libhogweed",
93     "libidn2",
94     "libimf",
95     "libintlc",
96     "libirng",
97     "libk5crypto",
98     "libkeyutils",
99     "libkrb5",
100     "libkrb5support", /*odd behaviour on fedora rawhide ... remove these when fixed*/
101     "liblber",
102     "libldap",
103     "libldap_r",
104     "liblzma",
105     "libm",
106     "libmd",
107     "libnettle",
108     "libnghttp2",
109     "libomp",
110     "libp11-kit",
111     "libpapi",
112     "libpcre2",
113     "libpfm",
114     "libpgmath",
115     "libpsl",
116     "libpthread",
117     "libquadmath",
118     "libresolv",
119     "libroken",
120     "librt",
121     "librtmp",
122     "libsasl2",
123     "libselinux",
124     "libsqlite3",
125     "libssh",
126     "libssh2",
127     "libssl",
128     "libstdc++",
129     "libsvml",
130     "libtasn1",
131     "libtsan",  /* gcc sanitizers */
132     "libubsan", /* gcc sanitizers */
133     "libunistring",
134     "libunwind",
135     "libunwind-ptrace",
136     "libunwind-x86",
137     "libunwind-x86_64",
138     "libwind",
139     "libz",
140     "libzstd"};
141
142 static bool is_filtered_lib(const std::string& libname)
143 {
144   return std::find(begin(filtered_libraries), end(filtered_libraries), libname) != end(filtered_libraries);
145 }
146
147 static std::string get_lib_name(const std::string& pathname)
148 {
149   std::string map_basename = simgrid::xbt::Path(pathname).get_base_name();
150   std::string libname;
151
152   size_t pos = map_basename.rfind(".so");
153   if (pos != std::string::npos) {
154     // strip the extension (matching regex "\.so.*$")
155     libname.assign(map_basename, 0, pos);
156
157     // strip the version suffix (matching regex "-[.0-9-]*$")
158     while (true) {
159       pos = libname.rfind('-');
160       if (pos == std::string::npos || libname.find_first_not_of(".0123456789", pos + 1) != std::string::npos)
161         break;
162       libname.erase(pos);
163     }
164   }
165
166   return libname;
167 }
168
169 static ssize_t pread_whole(int fd, void* buf, size_t count, off_t offset)
170 {
171   auto* buffer       = static_cast<char*>(buf);
172   ssize_t real_count = count;
173   while (count) {
174     ssize_t res = pread(fd, buffer, count, offset);
175     if (res > 0) {
176       count -= res;
177       buffer += res;
178       offset += res;
179     } else if (res == 0)
180       return -1;
181     else if (errno != EINTR) {
182       XBT_ERROR("pread_whole: %s", strerror(errno));
183       return -1;
184     }
185   }
186   return real_count;
187 }
188
189 static ssize_t pwrite_whole(int fd, const void* buf, size_t count, off_t offset)
190 {
191   const auto* buffer = static_cast<const char*>(buf);
192   ssize_t real_count = count;
193   while (count) {
194     ssize_t res = pwrite(fd, buffer, count, offset);
195     if (res > 0) {
196       count -= res;
197       buffer += res;
198       offset += res;
199     } else if (res == 0)
200       return -1;
201     else if (errno != EINTR) {
202       XBT_ERROR("pwrite_whole: %s", strerror(errno));
203       return -1;
204     }
205   }
206   return real_count;
207 }
208
209 int open_vm(pid_t pid, int flags)
210 {
211   std::string buffer = "/proc/" + std::to_string(pid) + "/mem";
212   return open(buffer.c_str(), flags);
213 }
214
215 // ***** RemoteProcess
216
217 RemoteProcess::RemoteProcess(pid_t pid) : AddressSpace(this), pid_(pid), running_(true) {}
218
219 void RemoteProcess::init(xbt_mheap_t mmalloc_default_mdp, unsigned long* maxpid, xbt_dynar_t actors,
220                          xbt_dynar_t dead_actors)
221 {
222   this->heap_address      = remote(mmalloc_default_mdp);
223   this->maxpid_addr_      = remote(maxpid);
224   this->actors_addr_      = remote(actors);
225   this->dead_actors_addr_ = remote(dead_actors);
226
227   this->memory_map_ = simgrid::xbt::get_memory_map(this->pid_);
228   this->init_memory_map_info();
229
230   int fd = open_vm(this->pid_, O_RDWR);
231   xbt_assert(fd >= 0, "Could not open file for process virtual address space");
232   this->memory_file = fd;
233
234   this->smx_actors_infos.clear();
235   this->smx_dead_actors_infos.clear();
236   this->unw_addr_space            = simgrid::mc::UnwindContext::createUnwindAddressSpace();
237   this->unw_underlying_addr_space = simgrid::unw::create_addr_space();
238   this->unw_underlying_context    = simgrid::unw::create_context(this->unw_underlying_addr_space, this->pid_);
239 }
240
241 RemoteProcess::~RemoteProcess()
242 {
243   if (this->memory_file >= 0)
244     close(this->memory_file);
245
246   if (this->unw_underlying_addr_space != unw_local_addr_space) {
247     if (this->unw_underlying_addr_space)
248       unw_destroy_addr_space(this->unw_underlying_addr_space);
249     if (this->unw_underlying_context)
250       _UPT_destroy(this->unw_underlying_context);
251   }
252
253   unw_destroy_addr_space(this->unw_addr_space);
254 }
255
256 /** Refresh the information about the process
257  *
258  *  Do not use directly, this is used by the getters when appropriate
259  *  in order to have fresh data.
260  */
261 void RemoteProcess::refresh_heap()
262 {
263   // Read/dereference/refresh the std_heap pointer:
264   if (not this->heap)
265     this->heap = std::make_unique<s_xbt_mheap_t>();
266   this->read(this->heap.get(), this->heap_address);
267   this->cache_flags_ |= RemoteProcess::cache_heap;
268 }
269
270 /** Refresh the information about the process
271  *
272  *  Do not use directly, this is used by the getters when appropriate
273  *  in order to have fresh data.
274  * */
275 void RemoteProcess::refresh_malloc_info()
276 {
277   // Refresh process->heapinfo:
278   if (this->cache_flags_ & RemoteProcess::cache_malloc)
279     return;
280   size_t count = this->heap->heaplimit + 1;
281   if (this->heap_info.size() < count)
282     this->heap_info.resize(count);
283   this->read_bytes(this->heap_info.data(), count * sizeof(malloc_info), remote(this->heap->heapinfo));
284   this->cache_flags_ |= RemoteProcess::cache_malloc;
285 }
286
287 /** @brief Finds the range of the different memory segments and binary paths */
288 void RemoteProcess::init_memory_map_info()
289 {
290   XBT_DEBUG("Get debug information ...");
291   this->maestro_stack_start_ = nullptr;
292   this->maestro_stack_end_   = nullptr;
293   this->object_infos.clear();
294   this->binary_info = nullptr;
295
296   std::vector<simgrid::xbt::VmMap> const& maps = this->memory_map_;
297
298   const char* current_name = nullptr;
299
300   for (size_t i = 0; i < maps.size(); i++) {
301     simgrid::xbt::VmMap const& reg = maps[i];
302     const char* pathname           = maps[i].pathname.c_str();
303
304     // Nothing to do
305     if (maps[i].pathname.empty()) {
306       current_name = nullptr;
307       continue;
308     }
309
310     // [stack], [vvar], [vsyscall], [vdso] ...
311     if (pathname[0] == '[') {
312       if ((reg.prot & PROT_WRITE) && not memcmp(pathname, "[stack]", 7)) {
313         this->maestro_stack_start_ = remote(reg.start_addr);
314         this->maestro_stack_end_   = remote(reg.end_addr);
315       }
316       current_name = nullptr;
317       continue;
318     }
319
320     if (current_name && strcmp(current_name, pathname) == 0)
321       continue;
322
323     current_name = pathname;
324     if (not(reg.prot & PROT_READ) && (reg.prot & PROT_EXEC))
325       continue;
326
327     const bool is_executable = not i;
328     std::string libname;
329     if (not is_executable) {
330       libname = get_lib_name(pathname);
331       if (is_filtered_lib(libname)) {
332         continue;
333       }
334     }
335
336     std::shared_ptr<simgrid::mc::ObjectInformation> info =
337         simgrid::mc::createObjectInformation(this->memory_map_, pathname);
338     this->object_infos.push_back(info);
339     if (is_executable)
340       this->binary_info = info;
341   }
342
343   xbt_assert(this->maestro_stack_start_, "Did not find maestro_stack_start");
344   xbt_assert(this->maestro_stack_end_, "Did not find maestro_stack_end");
345
346   XBT_DEBUG("Get debug information done !");
347 }
348
349 std::shared_ptr<simgrid::mc::ObjectInformation> RemoteProcess::find_object_info(RemotePtr<void> addr) const
350 {
351   for (auto const& object_info : this->object_infos)
352     if (addr.address() >= (std::uint64_t)object_info->start && addr.address() <= (std::uint64_t)object_info->end)
353       return object_info;
354   return nullptr;
355 }
356
357 std::shared_ptr<ObjectInformation> RemoteProcess::find_object_info_exec(RemotePtr<void> addr) const
358 {
359   for (std::shared_ptr<ObjectInformation> const& info : this->object_infos)
360     if (addr.address() >= (std::uint64_t)info->start_exec && addr.address() <= (std::uint64_t)info->end_exec)
361       return info;
362   return nullptr;
363 }
364
365 std::shared_ptr<ObjectInformation> RemoteProcess::find_object_info_rw(RemotePtr<void> addr) const
366 {
367   for (std::shared_ptr<ObjectInformation> const& info : this->object_infos)
368     if (addr.address() >= (std::uint64_t)info->start_rw && addr.address() <= (std::uint64_t)info->end_rw)
369       return info;
370   return nullptr;
371 }
372
373 simgrid::mc::Frame* RemoteProcess::find_function(RemotePtr<void> ip) const
374 {
375   std::shared_ptr<simgrid::mc::ObjectInformation> info = this->find_object_info_exec(ip);
376   return info ? info->find_function((void*)ip.address()) : nullptr;
377 }
378
379 /** Find (one occurrence of) the named variable definition
380  */
381 const simgrid::mc::Variable* RemoteProcess::find_variable(const char* name) const
382 {
383   // First lookup the variable in the executable shared object.
384   // A global variable used directly by the executable code from a library
385   // is reinstantiated in the executable memory .data/.bss.
386   // We need to look up the variable in the executable first.
387   if (this->binary_info) {
388     std::shared_ptr<simgrid::mc::ObjectInformation> const& info = this->binary_info;
389     const simgrid::mc::Variable* var                            = info->find_variable(name);
390     if (var)
391       return var;
392   }
393
394   for (std::shared_ptr<simgrid::mc::ObjectInformation> const& info : this->object_infos) {
395     const simgrid::mc::Variable* var = info->find_variable(name);
396     if (var)
397       return var;
398   }
399
400   return nullptr;
401 }
402
403 void RemoteProcess::read_variable(const char* name, void* target, size_t size) const
404 {
405   const simgrid::mc::Variable* var = this->find_variable(name);
406   xbt_assert(var, "Variable %s not found", name);
407   xbt_assert(var->address, "No simple location for this variable");
408
409   if (not var->type->full_type) // Try to resolve this type. The needed ObjectInfo was maybe (lazily) loaded recently
410     for (auto const& object_info : this->object_infos)
411       postProcessObjectInformation(this, object_info.get());
412   xbt_assert(var->type->full_type, "Partial type for %s (even after re-resolving types), cannot retrieve its size.",
413              name);
414   xbt_assert((size_t)var->type->full_type->byte_size == size, "Unexpected size for %s (expected %zu, received %zu).",
415              name, size, (size_t)var->type->full_type->byte_size);
416   this->read_bytes(target, size, remote(var->address));
417 }
418
419 std::string RemoteProcess::read_string(RemotePtr<char> address) const
420 {
421   if (not address)
422     return {};
423
424   std::vector<char> res(128);
425   off_t off = 0;
426
427   while (true) {
428     ssize_t c = pread(this->memory_file, res.data() + off, res.size() - off, (off_t)address.address() + off);
429     if (c == -1 && errno == EINTR)
430       continue;
431     xbt_assert(c > 0, "Could not read string from remote process");
432
433     const void* p = memchr(res.data() + off, '\0', c);
434     if (p)
435       return std::string(res.data());
436
437     off += c;
438     if (off == (off_t)res.size())
439       res.resize(res.size() * 2);
440   }
441 }
442
443 void* RemoteProcess::read_bytes(void* buffer, std::size_t size, RemotePtr<void> address, ReadOptions /*options*/) const
444 {
445   xbt_assert(pread_whole(this->memory_file, buffer, size, (size_t)address.address()) != -1,
446              "Read at %p from process %lli failed", (void*)address.address(), (long long)this->pid_);
447   return buffer;
448 }
449
450 /** Write data to a process memory
451  *
452  *  @param buffer   local memory address (source)
453  *  @param len      data size
454  *  @param address  target process memory address (target)
455  */
456 void RemoteProcess::write_bytes(const void* buffer, size_t len, RemotePtr<void> address) const
457 {
458   xbt_assert(pwrite_whole(this->memory_file, buffer, len, (size_t)address.address()) != -1,
459              "Write to process %lli failed", (long long)this->pid_);
460 }
461
462 static void zero_buffer_init(const void** zero_buffer, size_t zero_buffer_size)
463 {
464   int fd = open("/dev/zero", O_RDONLY);
465   xbt_assert(fd >= 0, "Could not open /dev/zero");
466   *zero_buffer = mmap(nullptr, zero_buffer_size, PROT_READ, MAP_SHARED, fd, 0);
467   xbt_assert(*zero_buffer != MAP_FAILED, "Could not map the zero buffer");
468   close(fd);
469 }
470
471 void RemoteProcess::clear_bytes(RemotePtr<void> address, size_t len) const
472 {
473   static constexpr size_t zero_buffer_size = 10 * 4096;
474   static const void* zero_buffer;
475   static std::once_flag zero_buffer_flag;
476
477   std::call_once(zero_buffer_flag, zero_buffer_init, &zero_buffer, zero_buffer_size);
478   while (len) {
479     size_t s = len > zero_buffer_size ? zero_buffer_size : len;
480     this->write_bytes(zero_buffer, s, address);
481     address = remote((char*)address.address() + s);
482     len -= s;
483   }
484 }
485
486 void RemoteProcess::ignore_region(std::uint64_t addr, std::size_t size)
487 {
488   IgnoredRegion region;
489   region.addr = addr;
490   region.size = size;
491
492   auto pos = std::lower_bound(ignored_regions_.begin(), ignored_regions_.end(), region,
493                               [](auto const& reg1, auto const& reg2) {
494                                 return reg1.addr < reg2.addr || (reg1.addr == reg2.addr && reg1.size < reg2.size);
495                               });
496   if (pos == ignored_regions_.end() || pos->addr != addr || pos->size != size)
497     ignored_regions_.insert(pos, region);
498 }
499
500 void RemoteProcess::ignore_heap(IgnoredHeapRegion const& region)
501 {
502   // Binary search the position of insertion:
503   auto pos = std::lower_bound(ignored_heap_.begin(), ignored_heap_.end(), region.address,
504                               [](auto const& reg, auto const* addr) { return reg.address < addr; });
505   if (pos == ignored_heap_.end() || pos->address != region.address) {
506     // Insert it:
507     ignored_heap_.insert(pos, region);
508   }
509 }
510
511 void RemoteProcess::unignore_heap(void* address, size_t size)
512 {
513   // Binary search:
514   auto pos = std::lower_bound(ignored_heap_.begin(), ignored_heap_.end(), address,
515                               [](auto const& reg, auto const* addr) { return reg.address < addr; });
516   if (pos != ignored_heap_.end() && static_cast<char*>(pos->address) <= static_cast<char*>(address) + size)
517     ignored_heap_.erase(pos);
518 }
519
520 void RemoteProcess::ignore_local_variable(const char* var_name, const char* frame_name) const
521 {
522   if (frame_name != nullptr && strcmp(frame_name, "*") == 0)
523     frame_name = nullptr;
524   for (std::shared_ptr<simgrid::mc::ObjectInformation> const& info : this->object_infos)
525     info->remove_local_variable(var_name, frame_name);
526 }
527
528 std::vector<simgrid::mc::ActorInformation>& RemoteProcess::actors()
529 {
530   this->refresh_simix();
531   return smx_actors_infos;
532 }
533
534 std::vector<simgrid::mc::ActorInformation>& RemoteProcess::dead_actors()
535 {
536   this->refresh_simix();
537   return smx_dead_actors_infos;
538 }
539
540 void RemoteProcess::dump_stack() const
541 {
542   unw_addr_space_t as = unw_create_addr_space(&_UPT_accessors, BYTE_ORDER);
543   if (as == nullptr) {
544     XBT_ERROR("Could not initialize ptrace address space");
545     return;
546   }
547
548   void* context = _UPT_create(this->pid_);
549   if (context == nullptr) {
550     unw_destroy_addr_space(as);
551     XBT_ERROR("Could not initialize ptrace context");
552     return;
553   }
554
555   unw_cursor_t cursor;
556   if (unw_init_remote(&cursor, as, context) != 0) {
557     _UPT_destroy(context);
558     unw_destroy_addr_space(as);
559     XBT_ERROR("Could not initialiez ptrace cursor");
560     return;
561   }
562
563   simgrid::mc::dumpStack(stderr, &cursor);
564
565   _UPT_destroy(context);
566   unw_destroy_addr_space(as);
567 }
568 } // namespace mc
569 } // namespace simgrid