1 #include <unistd.h> // pread, pwrite
3 #include "mc_page_store.h"
5 #include "mc_private.h"
6 #include "mc_snapshot.h"
8 #include <xbt/mmalloc.h>
10 #define SOFT_DIRTY_BIT_NUMBER 55
11 #define SOFT_DIRTY (((uint64_t)1) << SOFT_DIRTY_BIT_NUMBER)
15 // ***** Region management:
17 /** @brief Take a per-page snapshot of a region
19 * @param data The start of the region (must be at the beginning of a page)
20 * @param pag_count Number of pages of the region
21 * @param pagemap Linux kernel pagemap values fot this region (or NULL)
22 * @param reference_pages Snapshot page numbers of the previous soft_dirty_reset (or NULL)
23 * @return Snapshot page numbers of this new snapshot
25 size_t* mc_take_page_snapshot_region(mc_process_t process,
26 void* data, size_t page_count, uint64_t* pagemap, size_t* reference_pages)
28 size_t* pagenos = (size_t*) malloc(page_count * sizeof(size_t));
30 const bool is_self = MC_process_is_self(process);
34 temp = malloc(xbt_pagesize);
36 for (size_t i=0; i!=page_count; ++i) {
37 bool softclean = pagemap && !(pagemap[i] & SOFT_DIRTY);
38 if (softclean && reference_pages) {
39 // The page is softclean, it is the same page as the reference page:
40 pagenos[i] = reference_pages[i];
41 mc_model_checker->pages->ref_page(reference_pages[i]);
43 // Otherwise, we need to store the page the hard way
44 // (by reading its content):
45 void* page = (char*) data + (i << xbt_pagebits);
46 xbt_assert(mc_page_offset(page)==0, "Not at the beginning of a page");
51 /* Adding another copy (and a syscall) will probably slow things a lot.
52 TODO, optimize this somehow (at least by grouping the syscalls)
54 - reduce the number of syscalls;
55 - let the application snapshot itself;
56 - move the segments in shared memory (this will break `fork` however).
59 MC_process_read(process, MC_ADDRESS_SPACE_READ_FLAGS_NONE,
60 temp, page, xbt_pagesize, MC_PROCESS_INDEX_DISABLED);
62 pagenos[i] = mc_model_checker->pages->store_page(page_data);
70 void mc_free_page_snapshot_region(size_t* pagenos, size_t page_count)
72 for (size_t i=0; i!=page_count; ++i) {
73 mc_model_checker->pages->unref_page(pagenos[i]);
77 /** @brief Restore a snapshot of a region
79 * If possible, the restoration will be incremental
80 * (the modified pages will not be touched).
83 * @param page_count Number of pages of the region
85 * @param pagemap Linux kernel pagemap values fot this region (or NULL)
86 * @param reference_pages Snapshot page numbers of the previous soft_dirty_reset (or NULL)
88 void mc_restore_page_snapshot_region(mc_process_t process,
89 void* start_addr, size_t page_count, size_t* pagenos, uint64_t* pagemap, size_t* reference_pagenos)
91 for (size_t i=0; i!=page_count; ++i) {
93 bool softclean = pagemap && !(pagemap[i] & SOFT_DIRTY);
94 if (softclean && reference_pagenos && pagenos[i] == reference_pagenos[i]) {
95 // The page is softclean and is the same as the reference one:
96 // the page is already in the target state.
100 // Otherwise, copy the page:
101 void* target_page = mc_page_from_number(start_addr, i);
102 const void* source_page = mc_model_checker->pages->get_page(pagenos[i]);
103 MC_process_write(process, source_page, target_page, xbt_pagesize);
107 // ***** Soft dirty tracking
109 /** @brief Like pread() but without partial reads */
110 static size_t pread_whole(int fd, void* buf, size_t count, off_t offset) {
113 char* data = (char*) buf;
115 ssize_t n = pread(fd, buf, count, offset);
128 // It might be a partial read:
138 static inline __attribute__ ((always_inline))
139 void mc_ensure_fd(int* fd, const char* path, int flags) {
142 *fd = open(path, flags);
144 xbt_die("Could not open file %s", path);
148 /** @brief Reset the soft-dirty bits
150 * This is done after checkpointing and after checkpoint restoration
151 * (if per page checkpoiting is used) in order to know which pages were
154 * See https://www.kernel.org/doc/Documentation/vm/soft-dirty.txt
156 void mc_softdirty_reset() {
157 mc_ensure_fd(&mc_model_checker->fd_clear_refs, "/proc/self/clear_refs", O_WRONLY|O_CLOEXEC);
158 if( ::write(mc_model_checker->fd_clear_refs, "4\n", 2) != 2) {
159 xbt_die("Could not reset softdirty bits");
163 /** @brief Read memory page informations
165 * For each virtual memory page of the process,
166 * /proc/self/pagemap provides a 64 bit field of information.
167 * We are interested in the soft-dirty bit: with this we can track which
168 * pages were modified between snapshots/restorations and avoid
169 * copying data which was not modified.
171 * See https://www.kernel.org/doc/Documentation/vm/pagemap.txt
173 * @param pagemap Output buffer for pagemap informations
174 * @param start_addr Address of the first page
175 * @param page_count Number of pages
177 static void mc_read_pagemap(uint64_t* pagemap, size_t page_start, size_t page_count)
179 mc_ensure_fd(&mc_model_checker->fd_pagemap, "/proc/self/pagemap", O_RDONLY|O_CLOEXEC);
180 size_t bytesize = sizeof(uint64_t) * page_count;
181 off_t offset = sizeof(uint64_t) * page_start;
182 if (pread_whole(mc_model_checker->fd_pagemap, pagemap, bytesize, offset) != bytesize) {
183 xbt_die("Could not read pagemap");
187 // ***** High level API
189 mc_mem_region_t mc_region_new_sparse(mc_region_type_t region_type,
190 void *start_addr, void* permanent_addr, size_t size,
191 mc_mem_region_t ref_reg)
193 mc_process_t process = &mc_model_checker->process;
195 mc_mem_region_t region = xbt_new(s_mc_mem_region_t, 1);
196 region->region_type = region_type;
197 region->storage_type = MC_REGION_STORAGE_TYPE_CHUNKED;
198 region->start_addr = start_addr;
199 region->permanent_addr = permanent_addr;
202 xbt_assert((((uintptr_t)start_addr) & (xbt_pagesize-1)) == 0,
203 "Not at the beginning of a page");
204 xbt_assert((((uintptr_t)permanent_addr) & (xbt_pagesize-1)) == 0,
205 "Not at the beginning of a page");
206 size_t page_count = mc_page_count(size);
208 uint64_t* pagemap = NULL;
209 if (_sg_mc_soft_dirty && mc_model_checker->parent_snapshot &&
210 MC_process_is_self(process)) {
211 pagemap = (uint64_t*) malloc_no_memset(sizeof(uint64_t) * page_count);
212 mc_read_pagemap(pagemap, mc_page_number(NULL, permanent_addr), page_count);
215 size_t* reg_page_numbers = NULL;
216 if (ref_reg!=NULL && ref_reg->storage_type == MC_REGION_STORAGE_TYPE_CHUNKED)
217 reg_page_numbers = ref_reg->chunked.page_numbers;
219 // Take incremental snapshot:
220 region->chunked.page_numbers = mc_take_page_snapshot_region(process,
221 permanent_addr, page_count, pagemap, reg_page_numbers);
224 mfree(mc_heap, pagemap);
229 void mc_region_restore_sparse(mc_process_t process, mc_mem_region_t reg, mc_mem_region_t ref_reg)
231 xbt_assert((((uintptr_t)reg->permanent_addr) & (xbt_pagesize-1)) == 0,
232 "Not at the beginning of a page");
233 size_t page_count = mc_page_count(reg->size);
235 uint64_t* pagemap = NULL;
237 // Read soft-dirty bits if necessary in order to know which pages have changed:
238 if (_sg_mc_soft_dirty && mc_model_checker->parent_snapshot
239 && MC_process_is_self(process)) {
240 pagemap = (uint64_t*) malloc_no_memset(sizeof(uint64_t) * page_count);
241 mc_read_pagemap(pagemap, mc_page_number(NULL, reg->permanent_addr), page_count);
244 // Incremental per-page snapshot restoration:s
245 size_t* reg_page_numbers = NULL;
246 if (ref_reg && ref_reg->storage_type == MC_REGION_STORAGE_TYPE_CHUNKED)
247 reg_page_numbers = ref_reg->chunked.page_numbers;
249 mc_restore_page_snapshot_region(process,
250 reg->permanent_addr, page_count, reg->chunked.page_numbers,
251 pagemap, reg_page_numbers);