2 * Copyright (c) 2021 Dell Inc. or its subsidiaries. All Rights Reserved.
3 * Copyright (c) 2022 The FreeBSD Foundation
5 * Portions of this software were developed by Mark Johnston under sponsorship
6 * from the FreeBSD Foundation.
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * Test behavior when a mapping of a shared shadow vm object is
32 * invalidated by COW from another mapping. In particular, when
33 * minherit(INHERT_SHARE) is applied to a COW mapping, a subsequently
34 * forked child process will share the parent's shadow object. Thus,
35 * pages already mapped into one sharing process may be written from
36 * another, triggering a copy into the shadow object. The VM system
37 * expects that a fully shadowed page is unmapped, but at one point the
38 * use of a shared shadow object could break this invariant.
40 * This is a regression test for an issue isolated by rlibby@FreeBSD.org
41 * from an issue detected by stress2's collapse.sh by jeff@FreeBSD.org.
42 * The issue became CVE-2021-29626.
44 * This file is written as an ATF test suite but may be compiled as a
45 * standalone program with -DSTANDALONE (and optionally -DDEBUG).
48 #include <sys/param.h>
50 #include <sys/procctl.h>
51 #include <sys/resource.h>
52 #include <sys/sysctl.h>
55 #include <machine/atomic.h>
66 #define ATF_REQUIRE(x) do { \
75 #define dprintf(...) printf(__VA_ARGS__)
82 #define FLAG_COLLAPSE 0x1
83 #define FLAG_BLOCK_XFER 0x2
84 #define FLAG_FULLMOD 0x4
85 #define FLAG_MASK (FLAG_COLLAPSE | FLAG_BLOCK_XFER | FLAG_FULLMOD)
96 volatile bool exiting[DEPTH];
98 volatile bool p3_did_write;
102 * Program flow. There are three or four processes that are descendants
103 * of the process running the test (P0), where arrows go from parents to
104 * children, and thicker arrows indicate sharing a certain memory region
105 * without COW semantics:
106 * P0 -> P1 -> P2 => P3
108 * The main idea is that P1 maps a memory region, and that region is
109 * shared with P2/P3, but with COW semantics. When P3 modifies the
110 * memory, P2 ought to see that modification. P4 optionally exists to
111 * defeat a COW optimization.
114 #define child_err(...) do { \
116 err(1, __VA_ARGS__); \
119 #define child_errx(...) do { \
121 errx(1, __VA_ARGS__); \
124 #define SLEEP_TIME_US 1000
126 static void child(struct shared_state *ss, int depth);
129 child_fork(struct shared_state *ss, int depth)
140 child_fault(struct shared_state *ss)
144 for (i = 0; i < ss->len; i += ss->pagesize)
145 (void)((volatile char *)ss->p)[i];
149 child_write(struct shared_state *ss, int val, size_t len)
153 for (i = 0; i < len; i += ss->pagesize)
154 ((int *)ss->p)[i / sizeof(int)] = val;
155 atomic_thread_fence_rel();
159 child_wait_p3_write(struct shared_state *ss)
161 while (!ss->p3_did_write) {
164 usleep(SLEEP_TIME_US);
166 atomic_thread_fence_acq();
170 child_verify(struct shared_state *ss, int depth, int newval, int oldval)
173 int expectval, foundval;
175 for (i = 0; i < ss->len; i += ss->pagesize) {
176 expectval = i < ss->modlen ? newval : oldval;
177 foundval = ((int *)ss->p)[i / sizeof(int)];
178 if (foundval == expectval)
180 child_errx("P%d saw %d but expected %d, %d was the old value",
181 depth, foundval, expectval, oldval);
186 child(struct shared_state *ss, int depth)
188 pid_t mypid, oldval, pid;
190 if (depth < 1 || depth >= DEPTH)
191 child_errx("Bad depth %d", depth);
193 dprintf("P%d (pid %d) started\n", depth, mypid);
196 /* Shared memory undergoing test. */
197 ss->p = mmap(NULL, ss->len, PROT_READ | PROT_WRITE,
198 MAP_SHARED | MAP_ANON, -1, 0);
199 if (ss->p == MAP_FAILED)
202 /* P1 stamps the shared memory. */
203 child_write(ss, mypid, ss->len);
205 if (mlock(ss->p, ss->len) == -1)
207 if (mprotect(ss->p, ss->len, PROT_READ) == -1)
208 child_err("mprotect");
210 if (ss->block_xfer) {
212 * P4 is forked so that its existence blocks a page COW
213 * path where the page is simply transferred between
214 * objects, rather than being copied.
219 * P1 specifies that modifications from its child processes not
220 * be shared with P1. Child process reads can be serviced from
221 * pages in P1's object, but writes must be COW'd.
223 if (minherit(ss->p, ss->len, INHERIT_COPY) != 0)
224 child_err("minherit");
226 child_fork(ss, depth + 1);
227 /* P1 and P4 wait for P3's writes before exiting. */
228 child_wait_p3_write(ss);
229 child_verify(ss, depth, mypid, mypid);
231 /* Hang around to prevent collapse. */
233 usleep(SLEEP_TIME_US);
235 /* Exit so the P2 -> P1/P4 shadow chain can collapse. */
239 * P2 now specifies that modifications from its child processes
240 * be shared. P2 and P3 will share a shadow object.
242 if (minherit(ss->p, ss->len, INHERIT_SHARE) != 0)
243 child_err("minherit");
246 * P2 faults a page in P1's object before P1 exits and the
247 * shadow chain is collapsed. This may be redundant if the
248 * (read-only) mappings were copied by fork(), but it doesn't
252 oldval = atomic_load_acq_int(ss->p);
255 pid = child_fork(ss, depth + 1);
257 /* Wait for P1 and P4 to exit, triggering collapse. */
258 while (!ss->exiting[1] ||
259 (ss->block_xfer && !ss->exiting[4]))
260 usleep(SLEEP_TIME_US);
262 * This is racy, just guess at how long it may take
263 * them to finish exiting.
267 /* P2 waits for P3's modification. */
268 child_wait_p3_write(ss);
269 child_verify(ss, depth, pid, oldval);
275 * Use mlock()+mprotect() to trigger the COW. This
276 * exercises a different COW handler than the one used
280 if (mlock(ss->p, ss->len) == -1)
282 if (mprotect(ss->p, ss->len, PROT_READ | PROT_WRITE) ==
284 child_err("mprotect");
288 * P3 writes the memory. A page is faulted into the shared
289 * P2/P3 shadow object. P2's mapping of the page in P1's
290 * object must now be shot down, or else P2 will wrongly
291 * continue to have that page mapped.
293 child_write(ss, mypid, ss->modlen);
294 ss->p3_did_write = true;
295 dprintf("P3 (pid %d) wrote its pid\n", mypid);
298 /* Just hang around until P3 is done writing. */
299 oldval = atomic_load_acq_int(ss->p);
300 child_wait_p3_write(ss);
301 child_verify(ss, depth, oldval, oldval);
304 child_errx("Bad depth %d", depth);
307 dprintf("P%d (pid %d) exiting\n", depth, mypid);
308 ss->exiting[depth] = true;
313 do_one_shared_shadow_inval(bool lazy_cow, size_t pagesize, size_t len,
316 struct shared_state *ss;
322 dprintf("P0 (pid %d) %s(collapse=%d, block_xfer=%d, full_mod=%d)\n",
323 pid, __func__, (int)collapse, (int)block_xfer, (int)full_mod);
325 ATF_REQUIRE(procctl(P_PID, pid, PROC_REAP_ACQUIRE, NULL) == 0);
327 /* Shared memory for coordination. */
328 ss = mmap(NULL, sizeof(*ss), PROT_READ | PROT_WRITE,
329 MAP_SHARED | MAP_ANON, -1, 0);
330 ATF_REQUIRE(ss != MAP_FAILED);
333 ss->modlen = (flags & FLAG_FULLMOD) ? ss->len : ss->len / 2;
334 ss->pagesize = pagesize;
335 ss->collapse = (flags & FLAG_COLLAPSE) != 0;
336 ss->block_xfer = (flags & FLAG_BLOCK_XFER) != 0;
337 ss->lazy_cow = lazy_cow;
340 ATF_REQUIRE(pid != -1);
344 /* Wait for all descendants to exit. */
347 ATF_REQUIRE(WIFEXITED(status));
348 } while (pid != -1 || errno != ECHILD);
350 atomic_thread_fence_acq();
351 ATF_REQUIRE(ss->okay);
353 ATF_REQUIRE(munmap(ss, sizeof(*ss)) == 0);
354 ATF_REQUIRE(procctl(P_PID, getpid(), PROC_REAP_RELEASE, NULL) == 0);
358 do_shared_shadow_inval(bool lazy_cow)
360 size_t largepagesize, pagesize, pagesizes[MAXPAGESIZES], sysctllen;
362 sysctllen = sizeof(pagesizes);
363 ATF_REQUIRE(sysctlbyname("hw.pagesizes", pagesizes, &sysctllen, NULL,
365 ATF_REQUIRE(sysctllen >= sizeof(size_t));
367 pagesize = pagesizes[0];
368 largepagesize = MAXPAGESIZES >= 2 &&
369 sysctllen >= 2 * sizeof(size_t) && pagesizes[1] != 0 ?
370 pagesizes[1] : 2 * 1024 * 1024;
372 for (unsigned int i = 0; i <= FLAG_MASK; i++) {
373 do_one_shared_shadow_inval(lazy_cow, pagesize,
375 do_one_shared_shadow_inval(lazy_cow, pagesize,
377 do_one_shared_shadow_inval(lazy_cow, pagesize,
378 largepagesize - pagesize, i);
379 do_one_shared_shadow_inval(lazy_cow, pagesize,
381 do_one_shared_shadow_inval(lazy_cow, pagesize,
382 largepagesize + pagesize, i);
387 do_shared_shadow_inval_eager(void)
391 rl.rlim_cur = rl.rlim_max = RLIM_INFINITY;
392 ATF_REQUIRE(setrlimit(RLIMIT_MEMLOCK, &rl) == 0);
394 do_shared_shadow_inval(false);
398 do_shared_shadow_inval_lazy(void)
400 do_shared_shadow_inval(true);
407 do_shared_shadow_inval_lazy();
408 do_shared_shadow_inval_eager();
412 ATF_TC_WITHOUT_HEAD(shared_shadow_inval__lazy_cow);
413 ATF_TC_BODY(shared_shadow_inval__lazy_cow, tc)
415 do_shared_shadow_inval_lazy();
418 ATF_TC(shared_shadow_inval__eager_cow);
419 ATF_TC_HEAD(shared_shadow_inval__eager_cow, tc)
421 /* Needed to raise the mlock() limit. */
422 atf_tc_set_md_var(tc, "require.user", "root");
424 ATF_TC_BODY(shared_shadow_inval__eager_cow, tc)
426 do_shared_shadow_inval_eager();
431 ATF_TP_ADD_TC(tp, shared_shadow_inval__lazy_cow);
432 ATF_TP_ADD_TC(tp, shared_shadow_inval__eager_cow);
433 return (atf_no_error());
435 #endif /* !STANDALONE */