Logo AND Algorithmique Numérique Distribuée

Public GIT Repository
Resize(0) is redundant.
[simgrid.git] / src / mc / remote / RemoteProcess.cpp
1 /* Copyright (c) 2014-2021. The SimGrid Team. All rights reserved.          */
2
3 /* This program is free software; you can redistribute it and/or modify it
4  * under the terms of the license (GNU LGPL) which comes with this package. */
5
6 #define _FILE_OFFSET_BITS 64 /* needed for pread_whole to work as expected on 32bits */
7
8 #include "src/mc/remote/RemoteProcess.hpp"
9
10 #include "src/mc/sosp/Snapshot.hpp"
11 #include "xbt/file.hpp"
12 #include "xbt/log.h"
13
14 #include <fcntl.h>
15 #include <libunwind-ptrace.h>
16 #include <sys/mman.h> // PROT_*
17
18 #include <algorithm>
19 #include <cerrno>
20 #include <cstring>
21 #include <memory>
22 #include <mutex>
23 #include <string>
24
25 using simgrid::mc::remote;
26
27 XBT_LOG_NEW_DEFAULT_SUBCATEGORY(mc_process, mc, "MC process information");
28
29 namespace simgrid {
30 namespace mc {
31
32 // ***** Helper stuff
33
34 // List of library which memory segments are not considered:
35 static const std::vector<std::string> filtered_libraries = {
36 #ifdef __linux__
37     "ld",
38 #elif defined __FreeBSD__
39     "ld-elf",
40     "ld-elf32",
41     "libkvm",      /* kernel data access library */
42     "libprocstat", /* process and file information retrieval */
43     "libthr",      /* thread library */
44     "libutil",
45 #endif
46     "libargp", /* workarounds for glibc-less systems */
47     "libasan", /* gcc sanitizers */
48     "libasn1",
49     "libboost_chrono",
50     "libboost_context",
51     "libboost_context-mt",
52     "libboost_stacktrace_addr2line",
53     "libboost_stacktrace_backtrace",
54     "libboost_system",
55     "libboost_thread",
56     "libboost_timer",
57     "libbrotlicommon",
58     "libbrotlidec",
59     "libbz2",
60     "libc",
61     "libc++",
62     "libcdt",
63     "libcgraph",
64     "libcom_err",
65     "libcrypt",
66     "libcrypto",
67     "libcurl",
68     "libcurl-gnutls",
69     "libcxxrt",
70     "libdebuginfod",
71     "libdl",
72     "libdw",
73     "libelf",
74     "libevent",
75     "libexecinfo",
76     "libffi",
77     "libflang",
78     "libflangrti",
79     "libgcc_s",
80     "libgmp",
81     "libgnutls",
82     "libgcrypt",
83     "libgfortran",
84     "libgpg-error",
85     "libgssapi",
86     "libgssapi_krb5",
87     "libhcrypto",
88     "libheimbase",
89     "libheimntlm",
90     "libhx509",
91     "libhogweed",
92     "libidn2",
93     "libimf",
94     "libintlc",
95     "libirng",
96     "libk5crypto",
97     "libkeyutils",
98     "libkrb5",
99     "libkrb5support", /*odd behaviour on fedora rawhide ... remove these when fixed*/
100     "liblber",
101     "libldap",
102     "libldap_r",
103     "liblua5.1",
104     "liblua5.3",
105     "liblzma",
106     "libm",
107     "libmd",
108     "libnettle",
109     "libnghttp2",
110     "libomp",
111     "libp11-kit",
112     "libpapi",
113     "libpcre2",
114     "libpfm",
115     "libpgmath",
116     "libpsl",
117     "libpthread",
118     "libquadmath",
119     "libresolv",
120     "libroken",
121     "librt",
122     "librtmp",
123     "libsasl2",
124     "libselinux",
125     "libsqlite3",
126     "libssh",
127     "libssh2",
128     "libssl",
129     "libstdc++",
130     "libsvml",
131     "libtasn1",
132     "libtsan",  /* gcc sanitizers */
133     "libubsan", /* gcc sanitizers */
134     "libunistring",
135     "libunwind",
136     "libunwind-ptrace",
137     "libunwind-x86",
138     "libunwind-x86_64",
139     "libwind",
140     "libz",
141     "libzstd"};
142
143 static bool is_filtered_lib(const std::string& libname)
144 {
145   return std::find(begin(filtered_libraries), end(filtered_libraries), libname) != end(filtered_libraries);
146 }
147
148 static std::string get_lib_name(const std::string& pathname)
149 {
150   std::string map_basename = simgrid::xbt::Path(pathname).get_base_name();
151   std::string libname;
152
153   size_t pos = map_basename.rfind(".so");
154   if (pos != std::string::npos) {
155     // strip the extension (matching regex "\.so.*$")
156     libname.assign(map_basename, 0, pos);
157
158     // strip the version suffix (matching regex "-[.0-9-]*$")
159     while (true) {
160       pos = libname.rfind('-');
161       if (pos == std::string::npos || libname.find_first_not_of(".0123456789", pos + 1) != std::string::npos)
162         break;
163       libname.erase(pos);
164     }
165   }
166
167   return libname;
168 }
169
170 static ssize_t pread_whole(int fd, void* buf, size_t count, off_t offset)
171 {
172   auto* buffer       = static_cast<char*>(buf);
173   ssize_t real_count = count;
174   while (count) {
175     ssize_t res = pread(fd, buffer, count, offset);
176     if (res > 0) {
177       count -= res;
178       buffer += res;
179       offset += res;
180     } else if (res == 0)
181       return -1;
182     else if (errno != EINTR) {
183       XBT_ERROR("pread_whole: %s", strerror(errno));
184       return -1;
185     }
186   }
187   return real_count;
188 }
189
190 static ssize_t pwrite_whole(int fd, const void* buf, size_t count, off_t offset)
191 {
192   const auto* buffer = static_cast<const char*>(buf);
193   ssize_t real_count = count;
194   while (count) {
195     ssize_t res = pwrite(fd, buffer, count, offset);
196     if (res > 0) {
197       count -= res;
198       buffer += res;
199       offset += res;
200     } else if (res == 0)
201       return -1;
202     else if (errno != EINTR) {
203       XBT_ERROR("pwrite_whole: %s", strerror(errno));
204       return -1;
205     }
206   }
207   return real_count;
208 }
209
210 int open_vm(pid_t pid, int flags)
211 {
212   std::string buffer = "/proc/" + std::to_string(pid) + "/mem";
213   return open(buffer.c_str(), flags);
214 }
215
216 // ***** RemoteProcess
217
218 RemoteProcess::RemoteProcess(pid_t pid) : AddressSpace(this), pid_(pid), running_(true) {}
219
220 void RemoteProcess::init(xbt_mheap_t mmalloc_default_mdp, unsigned long* maxpid, xbt_dynar_t actors,
221                          xbt_dynar_t dead_actors)
222 {
223   this->heap_address      = remote(mmalloc_default_mdp);
224   this->maxpid_addr_      = remote(maxpid);
225   this->actors_addr_      = remote(actors);
226   this->dead_actors_addr_ = remote(dead_actors);
227
228   this->memory_map_ = simgrid::xbt::get_memory_map(this->pid_);
229   this->init_memory_map_info();
230
231   int fd = open_vm(this->pid_, O_RDWR);
232   xbt_assert(fd >= 0, "Could not open file for process virtual address space");
233   this->memory_file = fd;
234
235   this->smx_actors_infos.clear();
236   this->smx_dead_actors_infos.clear();
237   this->unw_addr_space            = simgrid::mc::UnwindContext::createUnwindAddressSpace();
238   this->unw_underlying_addr_space = simgrid::unw::create_addr_space();
239   this->unw_underlying_context    = simgrid::unw::create_context(this->unw_underlying_addr_space, this->pid_);
240 }
241
242 RemoteProcess::~RemoteProcess()
243 {
244   if (this->memory_file >= 0)
245     close(this->memory_file);
246
247   if (this->unw_underlying_addr_space != unw_local_addr_space) {
248     if (this->unw_underlying_addr_space)
249       unw_destroy_addr_space(this->unw_underlying_addr_space);
250     if (this->unw_underlying_context)
251       _UPT_destroy(this->unw_underlying_context);
252   }
253
254   unw_destroy_addr_space(this->unw_addr_space);
255 }
256
257 /** Refresh the information about the process
258  *
259  *  Do not use directly, this is used by the getters when appropriate
260  *  in order to have fresh data.
261  */
262 void RemoteProcess::refresh_heap()
263 {
264   // Read/dereference/refresh the std_heap pointer:
265   if (not this->heap)
266     this->heap = std::make_unique<s_xbt_mheap_t>();
267   this->read(this->heap.get(), this->heap_address);
268   this->cache_flags_ |= RemoteProcess::cache_heap;
269 }
270
271 /** Refresh the information about the process
272  *
273  *  Do not use directly, this is used by the getters when appropriate
274  *  in order to have fresh data.
275  * */
276 void RemoteProcess::refresh_malloc_info()
277 {
278   // Refresh process->heapinfo:
279   if (this->cache_flags_ & RemoteProcess::cache_malloc)
280     return;
281   size_t count = this->heap->heaplimit + 1;
282   if (this->heap_info.size() < count)
283     this->heap_info.resize(count);
284   this->read_bytes(this->heap_info.data(), count * sizeof(malloc_info), remote(this->heap->heapinfo));
285   this->cache_flags_ |= RemoteProcess::cache_malloc;
286 }
287
288 /** @brief Finds the range of the different memory segments and binary paths */
289 void RemoteProcess::init_memory_map_info()
290 {
291   XBT_DEBUG("Get debug information ...");
292   this->maestro_stack_start_ = nullptr;
293   this->maestro_stack_end_   = nullptr;
294   this->object_infos.clear();
295   this->binary_info = nullptr;
296
297   std::vector<simgrid::xbt::VmMap> const& maps = this->memory_map_;
298
299   const char* current_name = nullptr;
300
301   for (size_t i = 0; i < maps.size(); i++) {
302     simgrid::xbt::VmMap const& reg = maps[i];
303     const char* pathname           = maps[i].pathname.c_str();
304
305     // Nothing to do
306     if (maps[i].pathname.empty()) {
307       current_name = nullptr;
308       continue;
309     }
310
311     // [stack], [vvar], [vsyscall], [vdso] ...
312     if (pathname[0] == '[') {
313       if ((reg.prot & PROT_WRITE) && not memcmp(pathname, "[stack]", 7)) {
314         this->maestro_stack_start_ = remote(reg.start_addr);
315         this->maestro_stack_end_   = remote(reg.end_addr);
316       }
317       current_name = nullptr;
318       continue;
319     }
320
321     if (current_name && strcmp(current_name, pathname) == 0)
322       continue;
323
324     current_name = pathname;
325     if (not(reg.prot & PROT_READ) && (reg.prot & PROT_EXEC))
326       continue;
327
328     const bool is_executable = not i;
329     std::string libname;
330     if (not is_executable) {
331       libname = get_lib_name(pathname);
332       if (is_filtered_lib(libname)) {
333         continue;
334       }
335     }
336
337     std::shared_ptr<simgrid::mc::ObjectInformation> info =
338         simgrid::mc::createObjectInformation(this->memory_map_, pathname);
339     this->object_infos.push_back(info);
340     if (is_executable)
341       this->binary_info = info;
342   }
343
344   xbt_assert(this->maestro_stack_start_, "Did not find maestro_stack_start");
345   xbt_assert(this->maestro_stack_end_, "Did not find maestro_stack_end");
346
347   XBT_DEBUG("Get debug information done !");
348 }
349
350 std::shared_ptr<simgrid::mc::ObjectInformation> RemoteProcess::find_object_info(RemotePtr<void> addr) const
351 {
352   for (auto const& object_info : this->object_infos)
353     if (addr.address() >= (std::uint64_t)object_info->start && addr.address() <= (std::uint64_t)object_info->end)
354       return object_info;
355   return nullptr;
356 }
357
358 std::shared_ptr<ObjectInformation> RemoteProcess::find_object_info_exec(RemotePtr<void> addr) const
359 {
360   for (std::shared_ptr<ObjectInformation> const& info : this->object_infos)
361     if (addr.address() >= (std::uint64_t)info->start_exec && addr.address() <= (std::uint64_t)info->end_exec)
362       return info;
363   return nullptr;
364 }
365
366 std::shared_ptr<ObjectInformation> RemoteProcess::find_object_info_rw(RemotePtr<void> addr) const
367 {
368   for (std::shared_ptr<ObjectInformation> const& info : this->object_infos)
369     if (addr.address() >= (std::uint64_t)info->start_rw && addr.address() <= (std::uint64_t)info->end_rw)
370       return info;
371   return nullptr;
372 }
373
374 simgrid::mc::Frame* RemoteProcess::find_function(RemotePtr<void> ip) const
375 {
376   std::shared_ptr<simgrid::mc::ObjectInformation> info = this->find_object_info_exec(ip);
377   return info ? info->find_function((void*)ip.address()) : nullptr;
378 }
379
380 /** Find (one occurrence of) the named variable definition
381  */
382 const simgrid::mc::Variable* RemoteProcess::find_variable(const char* name) const
383 {
384   // First lookup the variable in the executable shared object.
385   // A global variable used directly by the executable code from a library
386   // is reinstantiated in the executable memory .data/.bss.
387   // We need to look up the variable in the executable first.
388   if (this->binary_info) {
389     std::shared_ptr<simgrid::mc::ObjectInformation> const& info = this->binary_info;
390     const simgrid::mc::Variable* var                            = info->find_variable(name);
391     if (var)
392       return var;
393   }
394
395   for (std::shared_ptr<simgrid::mc::ObjectInformation> const& info : this->object_infos) {
396     const simgrid::mc::Variable* var = info->find_variable(name);
397     if (var)
398       return var;
399   }
400
401   return nullptr;
402 }
403
404 void RemoteProcess::read_variable(const char* name, void* target, size_t size) const
405 {
406   const simgrid::mc::Variable* var = this->find_variable(name);
407   xbt_assert(var, "Variable %s not found", name);
408   xbt_assert(var->address, "No simple location for this variable");
409
410   if (not var->type->full_type) // Try to resolve this type. The needed ObjectInfo was maybe (lazily) loaded recently
411     for (auto const& object_info : this->object_infos)
412       postProcessObjectInformation(this, object_info.get());
413   xbt_assert(var->type->full_type, "Partial type for %s (even after re-resolving types), cannot retrieve its size.",
414              name);
415   xbt_assert((size_t)var->type->full_type->byte_size == size, "Unexpected size for %s (expected %zu, received %zu).",
416              name, size, (size_t)var->type->full_type->byte_size);
417   this->read_bytes(target, size, remote(var->address));
418 }
419
420 std::string RemoteProcess::read_string(RemotePtr<char> address) const
421 {
422   if (not address)
423     return {};
424
425   std::vector<char> res(128);
426   off_t off = 0;
427
428   while (true) {
429     ssize_t c = pread(this->memory_file, res.data() + off, res.size() - off, (off_t)address.address() + off);
430     if (c == -1 && errno == EINTR)
431       continue;
432     xbt_assert(c > 0, "Could not read string from remote process");
433
434     const void* p = memchr(res.data() + off, '\0', c);
435     if (p)
436       return std::string(res.data());
437
438     off += c;
439     if (off == (off_t)res.size())
440       res.resize(res.size() * 2);
441   }
442 }
443
444 void* RemoteProcess::read_bytes(void* buffer, std::size_t size, RemotePtr<void> address, ReadOptions /*options*/) const
445 {
446   xbt_assert(pread_whole(this->memory_file, buffer, size, (size_t)address.address()) != -1,
447              "Read at %p from process %lli failed", (void*)address.address(), (long long)this->pid_);
448   return buffer;
449 }
450
451 /** Write data to a process memory
452  *
453  *  @param buffer   local memory address (source)
454  *  @param len      data size
455  *  @param address  target process memory address (target)
456  */
457 void RemoteProcess::write_bytes(const void* buffer, size_t len, RemotePtr<void> address) const
458 {
459   xbt_assert(pwrite_whole(this->memory_file, buffer, len, (size_t)address.address()) != -1,
460              "Write to process %lli failed", (long long)this->pid_);
461 }
462
463 static void zero_buffer_init(const void** zero_buffer, size_t zero_buffer_size)
464 {
465   int fd = open("/dev/zero", O_RDONLY);
466   xbt_assert(fd >= 0, "Could not open /dev/zero");
467   *zero_buffer = mmap(nullptr, zero_buffer_size, PROT_READ, MAP_SHARED, fd, 0);
468   xbt_assert(*zero_buffer != MAP_FAILED, "Could not map the zero buffer");
469   close(fd);
470 }
471
472 void RemoteProcess::clear_bytes(RemotePtr<void> address, size_t len) const
473 {
474   static constexpr size_t zero_buffer_size = 10 * 4096;
475   static const void* zero_buffer;
476   static std::once_flag zero_buffer_flag;
477
478   std::call_once(zero_buffer_flag, zero_buffer_init, &zero_buffer, zero_buffer_size);
479   while (len) {
480     size_t s = len > zero_buffer_size ? zero_buffer_size : len;
481     this->write_bytes(zero_buffer, s, address);
482     address = remote((char*)address.address() + s);
483     len -= s;
484   }
485 }
486
487 void RemoteProcess::ignore_region(std::uint64_t addr, std::size_t size)
488 {
489   IgnoredRegion region;
490   region.addr = addr;
491   region.size = size;
492
493   auto pos = std::lower_bound(ignored_regions_.begin(), ignored_regions_.end(), region,
494                               [](auto const& reg1, auto const& reg2) {
495                                 return reg1.addr < reg2.addr || (reg1.addr == reg2.addr && reg1.size < reg2.size);
496                               });
497   if (pos == ignored_regions_.end() || pos->addr != addr || pos->size != size)
498     ignored_regions_.insert(pos, region);
499 }
500
501 void RemoteProcess::ignore_heap(IgnoredHeapRegion const& region)
502 {
503   // Binary search the position of insertion:
504   auto pos = std::lower_bound(ignored_heap_.begin(), ignored_heap_.end(), region.address,
505                               [](auto const& reg, auto const* addr) { return reg.address < addr; });
506   if (pos == ignored_heap_.end() || pos->address != region.address) {
507     // Insert it:
508     ignored_heap_.insert(pos, region);
509   }
510 }
511
512 void RemoteProcess::unignore_heap(void* address, size_t size)
513 {
514   // Binary search:
515   auto pos = std::lower_bound(ignored_heap_.begin(), ignored_heap_.end(), address,
516                               [](auto const& reg, auto const* addr) { return reg.address < addr; });
517   if (pos != ignored_heap_.end() && static_cast<char*>(pos->address) <= static_cast<char*>(address) + size)
518     ignored_heap_.erase(pos);
519 }
520
521 void RemoteProcess::ignore_local_variable(const char* var_name, const char* frame_name) const
522 {
523   if (frame_name != nullptr && strcmp(frame_name, "*") == 0)
524     frame_name = nullptr;
525   for (std::shared_ptr<simgrid::mc::ObjectInformation> const& info : this->object_infos)
526     info->remove_local_variable(var_name, frame_name);
527 }
528
529 std::vector<simgrid::mc::ActorInformation>& RemoteProcess::actors()
530 {
531   this->refresh_simix();
532   return smx_actors_infos;
533 }
534
535 std::vector<simgrid::mc::ActorInformation>& RemoteProcess::dead_actors()
536 {
537   this->refresh_simix();
538   return smx_dead_actors_infos;
539 }
540
541 void RemoteProcess::dump_stack() const
542 {
543   unw_addr_space_t as = unw_create_addr_space(&_UPT_accessors, BYTE_ORDER);
544   if (as == nullptr) {
545     XBT_ERROR("Could not initialize ptrace address space");
546     return;
547   }
548
549   void* context = _UPT_create(this->pid_);
550   if (context == nullptr) {
551     unw_destroy_addr_space(as);
552     XBT_ERROR("Could not initialize ptrace context");
553     return;
554   }
555
556   unw_cursor_t cursor;
557   if (unw_init_remote(&cursor, as, context) != 0) {
558     _UPT_destroy(context);
559     unw_destroy_addr_space(as);
560     XBT_ERROR("Could not initialiez ptrace cursor");
561     return;
562   }
563
564   simgrid::mc::dumpStack(stderr, &cursor);
565
566   _UPT_destroy(context);
567   unw_destroy_addr_space(as);
568 }
569 } // namespace mc
570 } // namespace simgrid