1 // Tests of Linux-specific functionality
6 #include <sys/socket.h>
7 #include <sys/timerfd.h>
8 #include <sys/signalfd.h>
9 #include <sys/eventfd.h>
10 #include <sys/epoll.h>
11 #include <sys/inotify.h>
12 #include <sys/fanotify.h>
14 #include <sys/capability.h> // Requires e.g. libcap-dev package for POSIX.1e capabilities headers
15 #include <linux/aio_abi.h>
16 #include <linux/filter.h>
17 #include <linux/seccomp.h>
18 #include <linux/version.h>
29 #include "capsicum-test.h"
31 TEST(Linux, TimerFD) {
32 int fd = timerfd_create(CLOCK_MONOTONIC, 0);
35 cap_rights_init(&r_ro, CAP_READ);
37 cap_rights_init(&r_wo, CAP_WRITE);
39 cap_rights_init(&r_rw, CAP_READ, CAP_WRITE);
40 cap_rights_t r_rwpoll;
41 cap_rights_init(&r_rwpoll, CAP_READ, CAP_WRITE, CAP_EVENT);
43 int cap_fd_ro = dup(fd);
45 EXPECT_OK(cap_rights_limit(cap_fd_ro, &r_ro));
46 int cap_fd_wo = dup(fd);
48 EXPECT_OK(cap_rights_limit(cap_fd_wo, &r_wo));
49 int cap_fd_rw = dup(fd);
51 EXPECT_OK(cap_rights_limit(cap_fd_rw, &r_rw));
52 int cap_fd_all = dup(fd);
53 EXPECT_OK(cap_fd_all);
54 EXPECT_OK(cap_rights_limit(cap_fd_all, &r_rwpoll));
56 struct itimerspec old_ispec;
57 struct itimerspec ispec;
58 ispec.it_interval.tv_sec = 0;
59 ispec.it_interval.tv_nsec = 0;
60 ispec.it_value.tv_sec = 0;
61 ispec.it_value.tv_nsec = 100000000; // 100ms
62 EXPECT_NOTCAPABLE(timerfd_settime(cap_fd_ro, 0, &ispec, NULL));
63 EXPECT_NOTCAPABLE(timerfd_settime(cap_fd_wo, 0, &ispec, &old_ispec));
64 EXPECT_OK(timerfd_settime(cap_fd_wo, 0, &ispec, NULL));
65 EXPECT_OK(timerfd_settime(cap_fd_rw, 0, &ispec, NULL));
66 EXPECT_OK(timerfd_settime(cap_fd_all, 0, &ispec, NULL));
68 EXPECT_NOTCAPABLE(timerfd_gettime(cap_fd_wo, &old_ispec));
69 EXPECT_OK(timerfd_gettime(cap_fd_ro, &old_ispec));
70 EXPECT_OK(timerfd_gettime(cap_fd_rw, &old_ispec));
71 EXPECT_OK(timerfd_gettime(cap_fd_all, &old_ispec));
73 // To be able to poll() for the timer pop, still need CAP_EVENT.
74 struct pollfd poll_fd;
75 for (int ii = 0; ii < 3; ii++) {
77 poll_fd.events = POLLIN;
79 case 0: poll_fd.fd = cap_fd_ro; break;
80 case 1: poll_fd.fd = cap_fd_wo; break;
81 case 2: poll_fd.fd = cap_fd_rw; break;
83 // Poll immediately returns with POLLNVAL
84 EXPECT_OK(poll(&poll_fd, 1, 400));
85 EXPECT_EQ(0, (poll_fd.revents & POLLIN));
86 EXPECT_NE(0, (poll_fd.revents & POLLNVAL));
89 poll_fd.fd = cap_fd_all;
90 EXPECT_OK(poll(&poll_fd, 1, 400));
91 EXPECT_NE(0, (poll_fd.revents & POLLIN));
92 EXPECT_EQ(0, (poll_fd.revents & POLLNVAL));
94 EXPECT_OK(timerfd_gettime(cap_fd_all, &old_ispec));
95 EXPECT_EQ(0, old_ispec.it_value.tv_sec);
96 EXPECT_EQ(0, old_ispec.it_value.tv_nsec);
97 EXPECT_EQ(0, old_ispec.it_interval.tv_sec);
98 EXPECT_EQ(0, old_ispec.it_interval.tv_nsec);
107 FORK_TEST(Linux, SignalFD) {
109 TEST_SKIPPED("multi-threaded run clashes with signals");
115 sigaddset(&mask, SIGUSR1);
117 // Block signals before registering against a new signal FD.
118 EXPECT_OK(sigprocmask(SIG_BLOCK, &mask, NULL));
119 int fd = signalfd(-1, &mask, 0);
123 cap_rights_init(&r_rs, CAP_READ, CAP_SEEK);
125 cap_rights_init(&r_ws, CAP_WRITE, CAP_SEEK);
127 cap_rights_init(&r_sig, CAP_FSIGNAL);
128 cap_rights_t r_rssig;
129 cap_rights_init(&r_rssig, CAP_FSIGNAL, CAP_READ, CAP_SEEK);
130 cap_rights_t r_rssig_poll;
131 cap_rights_init(&r_rssig_poll, CAP_FSIGNAL, CAP_READ, CAP_SEEK, CAP_EVENT);
133 // Various capability variants.
134 int cap_fd_none = dup(fd);
135 EXPECT_OK(cap_fd_none);
136 EXPECT_OK(cap_rights_limit(cap_fd_none, &r_ws));
137 int cap_fd_read = dup(fd);
138 EXPECT_OK(cap_fd_read);
139 EXPECT_OK(cap_rights_limit(cap_fd_read, &r_rs));
140 int cap_fd_sig = dup(fd);
141 EXPECT_OK(cap_fd_sig);
142 EXPECT_OK(cap_rights_limit(cap_fd_sig, &r_sig));
143 int cap_fd_sig_read = dup(fd);
144 EXPECT_OK(cap_fd_sig_read);
145 EXPECT_OK(cap_rights_limit(cap_fd_sig_read, &r_rssig));
146 int cap_fd_all = dup(fd);
147 EXPECT_OK(cap_fd_all);
148 EXPECT_OK(cap_rights_limit(cap_fd_all, &r_rssig_poll));
150 struct signalfd_siginfo fdsi;
152 // Need CAP_READ to read the signal information
154 EXPECT_NOTCAPABLE(read(cap_fd_none, &fdsi, sizeof(struct signalfd_siginfo)));
155 EXPECT_NOTCAPABLE(read(cap_fd_sig, &fdsi, sizeof(struct signalfd_siginfo)));
156 int len = read(cap_fd_read, &fdsi, sizeof(struct signalfd_siginfo));
158 EXPECT_EQ(sizeof(struct signalfd_siginfo), (size_t)len);
159 EXPECT_EQ(SIGUSR1, (int)fdsi.ssi_signo);
161 // Need CAP_FSIGNAL to modify the signal mask.
163 sigaddset(&mask, SIGUSR1);
164 sigaddset(&mask, SIGUSR2);
165 EXPECT_OK(sigprocmask(SIG_BLOCK, &mask, NULL));
166 EXPECT_NOTCAPABLE(signalfd(cap_fd_none, &mask, 0));
167 EXPECT_NOTCAPABLE(signalfd(cap_fd_read, &mask, 0));
168 EXPECT_EQ(cap_fd_sig, signalfd(cap_fd_sig, &mask, 0));
170 // Need CAP_EVENT to get notification of a signal in poll(2).
173 struct pollfd poll_fd;
175 poll_fd.events = POLLIN;
176 poll_fd.fd = cap_fd_sig_read;
177 EXPECT_OK(poll(&poll_fd, 1, 400));
178 EXPECT_EQ(0, (poll_fd.revents & POLLIN));
179 EXPECT_NE(0, (poll_fd.revents & POLLNVAL));
181 poll_fd.fd = cap_fd_all;
182 EXPECT_OK(poll(&poll_fd, 1, 400));
183 EXPECT_NE(0, (poll_fd.revents & POLLIN));
184 EXPECT_EQ(0, (poll_fd.revents & POLLNVAL));
187 TEST(Linux, EventFD) {
188 int fd = eventfd(0, 0);
192 cap_rights_init(&r_rs, CAP_READ, CAP_SEEK);
194 cap_rights_init(&r_ws, CAP_WRITE, CAP_SEEK);
196 cap_rights_init(&r_rws, CAP_READ, CAP_WRITE, CAP_SEEK);
197 cap_rights_t r_rwspoll;
198 cap_rights_init(&r_rwspoll, CAP_READ, CAP_WRITE, CAP_SEEK, CAP_EVENT);
200 int cap_ro = dup(fd);
202 EXPECT_OK(cap_rights_limit(cap_ro, &r_rs));
203 int cap_wo = dup(fd);
205 EXPECT_OK(cap_rights_limit(cap_wo, &r_ws));
206 int cap_rw = dup(fd);
208 EXPECT_OK(cap_rights_limit(cap_rw, &r_rws));
209 int cap_all = dup(fd);
211 EXPECT_OK(cap_rights_limit(cap_all, &r_rwspoll));
213 pid_t child = fork();
215 // Child: write counter to eventfd
217 EXPECT_NOTCAPABLE(write(cap_ro, &u, sizeof(u)));
218 EXPECT_OK(write(cap_wo, &u, sizeof(u)));
222 sleep(1); // Allow child to write
224 struct pollfd poll_fd;
226 poll_fd.events = POLLIN;
228 EXPECT_OK(poll(&poll_fd, 1, 400));
229 EXPECT_EQ(0, (poll_fd.revents & POLLIN));
230 EXPECT_NE(0, (poll_fd.revents & POLLNVAL));
232 poll_fd.fd = cap_all;
233 EXPECT_OK(poll(&poll_fd, 1, 400));
234 EXPECT_NE(0, (poll_fd.revents & POLLIN));
235 EXPECT_EQ(0, (poll_fd.revents & POLLNVAL));
238 EXPECT_NOTCAPABLE(read(cap_wo, &u, sizeof(u)));
239 EXPECT_OK(read(cap_ro, &u, sizeof(u)));
240 EXPECT_EQ(42, (int)u);
242 // Wait for the child.
244 EXPECT_EQ(child, waitpid(child, &status, 0));
245 int rc = WIFEXITED(status) ? WEXITSTATUS(status) : -1;
255 FORK_TEST(Linux, epoll) {
257 EXPECT_OK(socketpair(AF_UNIX, SOCK_STREAM, 0, sock_fds));
259 char buffer[4] = {1, 2, 3, 4};
260 EXPECT_OK(write(sock_fds[1], buffer, sizeof(buffer)));
262 EXPECT_OK(cap_enter()); // Enter capability mode.
264 int epoll_fd = epoll_create(1);
268 cap_rights_init(&r_rs, CAP_READ, CAP_SEEK);
270 cap_rights_init(&r_ws, CAP_WRITE, CAP_SEEK);
272 cap_rights_init(&r_rws, CAP_READ, CAP_WRITE, CAP_SEEK);
273 cap_rights_t r_rwspoll;
274 cap_rights_init(&r_rwspoll, CAP_READ, CAP_WRITE, CAP_SEEK, CAP_EVENT);
275 cap_rights_t r_epoll;
276 cap_rights_init(&r_epoll, CAP_EPOLL_CTL);
278 int cap_epoll_wo = dup(epoll_fd);
279 EXPECT_OK(cap_epoll_wo);
280 EXPECT_OK(cap_rights_limit(cap_epoll_wo, &r_ws));
281 int cap_epoll_ro = dup(epoll_fd);
282 EXPECT_OK(cap_epoll_ro);
283 EXPECT_OK(cap_rights_limit(cap_epoll_ro, &r_rs));
284 int cap_epoll_rw = dup(epoll_fd);
285 EXPECT_OK(cap_epoll_rw);
286 EXPECT_OK(cap_rights_limit(cap_epoll_rw, &r_rws));
287 int cap_epoll_poll = dup(epoll_fd);
288 EXPECT_OK(cap_epoll_poll);
289 EXPECT_OK(cap_rights_limit(cap_epoll_poll, &r_rwspoll));
290 int cap_epoll_ctl = dup(epoll_fd);
291 EXPECT_OK(cap_epoll_ctl);
292 EXPECT_OK(cap_rights_limit(cap_epoll_ctl, &r_epoll));
294 // Can only modify the FDs being monitored if the CAP_EPOLL_CTL right is present.
295 struct epoll_event eev;
296 memset(&eev, 0, sizeof(eev));
297 eev.events = EPOLLIN|EPOLLOUT|EPOLLPRI;
298 EXPECT_NOTCAPABLE(epoll_ctl(cap_epoll_ro, EPOLL_CTL_ADD, sock_fds[0], &eev));
299 EXPECT_NOTCAPABLE(epoll_ctl(cap_epoll_wo, EPOLL_CTL_ADD, sock_fds[0], &eev));
300 EXPECT_NOTCAPABLE(epoll_ctl(cap_epoll_rw, EPOLL_CTL_ADD, sock_fds[0], &eev));
301 EXPECT_OK(epoll_ctl(cap_epoll_ctl, EPOLL_CTL_ADD, sock_fds[0], &eev));
302 eev.events = EPOLLIN|EPOLLOUT;
303 EXPECT_NOTCAPABLE(epoll_ctl(cap_epoll_ro, EPOLL_CTL_MOD, sock_fds[0], &eev));
304 EXPECT_NOTCAPABLE(epoll_ctl(cap_epoll_wo, EPOLL_CTL_MOD, sock_fds[0], &eev));
305 EXPECT_NOTCAPABLE(epoll_ctl(cap_epoll_rw, EPOLL_CTL_MOD, sock_fds[0], &eev));
306 EXPECT_OK(epoll_ctl(cap_epoll_ctl, EPOLL_CTL_MOD, sock_fds[0], &eev));
308 // Running epoll_pwait(2) requires CAP_EVENT.
310 EXPECT_NOTCAPABLE(epoll_pwait(cap_epoll_ro, &eev, 1, 100, NULL));
311 EXPECT_NOTCAPABLE(epoll_pwait(cap_epoll_wo, &eev, 1, 100, NULL));
312 EXPECT_NOTCAPABLE(epoll_pwait(cap_epoll_rw, &eev, 1, 100, NULL));
313 EXPECT_OK(epoll_pwait(cap_epoll_poll, &eev, 1, 100, NULL));
314 EXPECT_EQ(EPOLLIN, eev.events & EPOLLIN);
316 EXPECT_NOTCAPABLE(epoll_ctl(cap_epoll_ro, EPOLL_CTL_DEL, sock_fds[0], &eev));
317 EXPECT_NOTCAPABLE(epoll_ctl(cap_epoll_wo, EPOLL_CTL_DEL, sock_fds[0], &eev));
318 EXPECT_NOTCAPABLE(epoll_ctl(cap_epoll_rw, EPOLL_CTL_DEL, sock_fds[0], &eev));
319 EXPECT_OK(epoll_ctl(epoll_fd, EPOLL_CTL_DEL, sock_fds[0], &eev));
321 close(cap_epoll_ctl);
322 close(cap_epoll_poll);
331 TEST(Linux, fstatat) {
332 int fd = open(TmpFile("cap_fstatat"), O_CREAT|O_RDWR, 0644);
334 unsigned char buffer[] = {1, 2, 3, 4};
335 EXPECT_OK(write(fd, buffer, sizeof(buffer)));
337 int cap_rf = dup(fd);
339 EXPECT_OK(cap_rights_limit(cap_rf, cap_rights_init(&rights, CAP_READ, CAP_FSTAT)));
340 int cap_ro = dup(fd);
342 EXPECT_OK(cap_rights_limit(cap_ro, cap_rights_init(&rights, CAP_READ)));
345 EXPECT_OK(fstatat(fd, "", &info, AT_EMPTY_PATH));
346 EXPECT_NOTCAPABLE(fstatat(cap_ro, "", &info, AT_EMPTY_PATH));
347 EXPECT_OK(fstatat(cap_rf, "", &info, AT_EMPTY_PATH));
353 int dir = open(tmpdir.c_str(), O_RDONLY);
355 int dir_rf = dup(dir);
357 EXPECT_OK(cap_rights_limit(dir_rf, cap_rights_init(&rights, CAP_READ, CAP_FSTAT)));
358 int dir_ro = dup(fd);
360 EXPECT_OK(cap_rights_limit(dir_ro, cap_rights_init(&rights, CAP_READ)));
362 EXPECT_OK(fstatat(dir, "cap_fstatat", &info, AT_EMPTY_PATH));
363 EXPECT_NOTCAPABLE(fstatat(dir_ro, "cap_fstatat", &info, AT_EMPTY_PATH));
364 EXPECT_OK(fstatat(dir_rf, "cap_fstatat", &info, AT_EMPTY_PATH));
370 unlink(TmpFile("cap_fstatat"));
373 // fanotify support may not be available at compile-time
374 #ifdef __NR_fanotify_init
375 TEST(Linux, fanotify) {
377 int fa_fd = fanotify_init(FAN_CLASS_NOTIF, O_RDWR);
379 if (fa_fd < 0) return; // May not be enabled
382 cap_rights_init(&r_rs, CAP_READ, CAP_SEEK);
384 cap_rights_init(&r_ws, CAP_WRITE, CAP_SEEK);
386 cap_rights_init(&r_rws, CAP_READ, CAP_WRITE, CAP_SEEK);
387 cap_rights_t r_rwspoll;
388 cap_rights_init(&r_rwspoll, CAP_READ, CAP_WRITE, CAP_SEEK, CAP_EVENT);
389 cap_rights_t r_rwsnotify;
390 cap_rights_init(&r_rwsnotify, CAP_READ, CAP_WRITE, CAP_SEEK, CAP_NOTIFY);
392 cap_rights_init(&r_rsl, CAP_READ, CAP_SEEK, CAP_LOOKUP);
393 cap_rights_t r_rslstat;
394 cap_rights_init(&r_rslstat, CAP_READ, CAP_SEEK, CAP_LOOKUP, CAP_FSTAT);
395 cap_rights_t r_rsstat;
396 cap_rights_init(&r_rsstat, CAP_READ, CAP_SEEK, CAP_FSTAT);
398 int cap_fd_ro = dup(fa_fd);
399 EXPECT_OK(cap_fd_ro);
400 EXPECT_OK(cap_rights_limit(cap_fd_ro, &r_rs));
401 int cap_fd_wo = dup(fa_fd);
402 EXPECT_OK(cap_fd_wo);
403 EXPECT_OK(cap_rights_limit(cap_fd_wo, &r_ws));
404 int cap_fd_rw = dup(fa_fd);
405 EXPECT_OK(cap_fd_rw);
406 EXPECT_OK(cap_rights_limit(cap_fd_rw, &r_rws));
407 int cap_fd_poll = dup(fa_fd);
408 EXPECT_OK(cap_fd_poll);
409 EXPECT_OK(cap_rights_limit(cap_fd_poll, &r_rwspoll));
410 int cap_fd_not = dup(fa_fd);
411 EXPECT_OK(cap_fd_not);
412 EXPECT_OK(cap_rights_limit(cap_fd_not, &r_rwsnotify));
414 int rc = mkdir(TmpFile("cap_notify"), 0755);
415 EXPECT_TRUE(rc == 0 || errno == EEXIST);
416 int dfd = open(TmpFile("cap_notify"), O_RDONLY);
418 int fd = open(TmpFile("cap_notify/file"), O_CREAT|O_RDWR, 0644);
420 int cap_dfd = dup(dfd);
422 EXPECT_OK(cap_rights_limit(cap_dfd, &r_rslstat));
424 int cap_dfd_rs = dup(dfd);
425 EXPECT_OK(cap_dfd_rs);
426 EXPECT_OK(cap_rights_limit(cap_dfd_rs, &r_rs));
427 EXPECT_OK(cap_dfd_rs);
428 int cap_dfd_rsstat = dup(dfd);
429 EXPECT_OK(cap_dfd_rsstat);
430 EXPECT_OK(cap_rights_limit(cap_dfd_rsstat, &r_rsstat));
431 EXPECT_OK(cap_dfd_rsstat);
432 int cap_dfd_rsl = dup(dfd);
433 EXPECT_OK(cap_dfd_rsl);
434 EXPECT_OK(cap_rights_limit(cap_dfd_rsl, &r_rsl));
435 EXPECT_OK(cap_dfd_rsl);
437 // Need CAP_NOTIFY to change what's monitored.
438 EXPECT_NOTCAPABLE(fanotify_mark(cap_fd_ro, FAN_MARK_ADD, FAN_OPEN|FAN_MODIFY|FAN_EVENT_ON_CHILD, cap_dfd, NULL));
439 EXPECT_NOTCAPABLE(fanotify_mark(cap_fd_wo, FAN_MARK_ADD, FAN_OPEN|FAN_MODIFY|FAN_EVENT_ON_CHILD, cap_dfd, NULL));
440 EXPECT_NOTCAPABLE(fanotify_mark(cap_fd_rw, FAN_MARK_ADD, FAN_OPEN|FAN_MODIFY|FAN_EVENT_ON_CHILD, cap_dfd, NULL));
441 EXPECT_OK(fanotify_mark(cap_fd_not, FAN_MARK_ADD, FAN_OPEN|FAN_MODIFY|FAN_EVENT_ON_CHILD, cap_dfd, NULL));
443 // Need CAP_FSTAT on the thing monitored.
444 EXPECT_NOTCAPABLE(fanotify_mark(cap_fd_not, FAN_MARK_ADD, FAN_OPEN|FAN_MODIFY|FAN_EVENT_ON_CHILD, cap_dfd_rs, NULL));
445 EXPECT_OK(fanotify_mark(cap_fd_not, FAN_MARK_ADD, FAN_OPEN|FAN_MODIFY|FAN_EVENT_ON_CHILD, cap_dfd_rsstat, NULL));
447 // Too add monitoring of a file under a dfd, need CAP_LOOKUP|CAP_FSTAT on the dfd.
448 EXPECT_NOTCAPABLE(fanotify_mark(cap_fd_not, FAN_MARK_ADD, FAN_OPEN|FAN_MODIFY, cap_dfd_rsstat, "file"));
449 EXPECT_NOTCAPABLE(fanotify_mark(cap_fd_not, FAN_MARK_ADD, FAN_OPEN|FAN_MODIFY, cap_dfd_rsl, "file"));
450 EXPECT_OK(fanotify_mark(cap_fd_not, FAN_MARK_ADD, FAN_OPEN|FAN_MODIFY, cap_dfd, "file"));
452 pid_t child = fork();
454 // Child: Perform activity in the directory under notify.
456 unlink(TmpFile("cap_notify/temp"));
457 int fd = open(TmpFile("cap_notify/temp"), O_CREAT|O_RDWR, 0644);
462 // Need CAP_EVENT to poll.
463 struct pollfd poll_fd;
465 poll_fd.events = POLLIN;
466 poll_fd.fd = cap_fd_rw;
467 EXPECT_OK(poll(&poll_fd, 1, 1400));
468 EXPECT_EQ(0, (poll_fd.revents & POLLIN));
469 EXPECT_NE(0, (poll_fd.revents & POLLNVAL));
471 poll_fd.fd = cap_fd_not;
472 EXPECT_OK(poll(&poll_fd, 1, 1400));
473 EXPECT_EQ(0, (poll_fd.revents & POLLIN));
474 EXPECT_NE(0, (poll_fd.revents & POLLNVAL));
476 poll_fd.fd = cap_fd_poll;
477 EXPECT_OK(poll(&poll_fd, 1, 1400));
478 EXPECT_NE(0, (poll_fd.revents & POLLIN));
479 EXPECT_EQ(0, (poll_fd.revents & POLLNVAL));
481 // Need CAP_READ to read.
482 struct fanotify_event_metadata ev;
483 memset(&ev, 0, sizeof(ev));
484 EXPECT_NOTCAPABLE(read(cap_fd_wo, &ev, sizeof(ev)));
485 rc = read(fa_fd, &ev, sizeof(ev));
487 EXPECT_EQ((int)sizeof(struct fanotify_event_metadata), rc);
488 EXPECT_EQ(child, ev.pid);
491 // TODO(drysdale): reinstate if/when capsicum-linux propagates rights
492 // to fanotify-generated FDs.
494 // fanotify(7) gives us a FD for the changed file. This should
495 // only have rights that are a subset of those for the original
496 // monitored directory file descriptor.
498 CAP_SET_ALL(&rights);
499 EXPECT_OK(cap_rights_get(ev.fd, &rights));
500 EXPECT_RIGHTS_IN(&rights, &r_rslstat);
503 // Wait for the child.
505 EXPECT_EQ(child, waitpid(child, &status, 0));
506 rc = WIFEXITED(status) ? WEXITSTATUS(status) : -1;
509 close(cap_dfd_rsstat);
514 unlink(TmpFile("cap_notify/file"));
515 unlink(TmpFile("cap_notify/temp"));
516 rmdir(TmpFile("cap_notify"));
526 TEST(Linux, inotify) {
527 int i_fd = inotify_init();
531 cap_rights_init(&r_rs, CAP_READ, CAP_SEEK);
533 cap_rights_init(&r_ws, CAP_WRITE, CAP_SEEK);
535 cap_rights_init(&r_rws, CAP_READ, CAP_WRITE, CAP_SEEK);
536 cap_rights_t r_rwsnotify;
537 cap_rights_init(&r_rwsnotify, CAP_READ, CAP_WRITE, CAP_SEEK, CAP_NOTIFY);
539 int cap_fd_ro = dup(i_fd);
540 EXPECT_OK(cap_fd_ro);
541 EXPECT_OK(cap_rights_limit(cap_fd_ro, &r_rs));
542 int cap_fd_wo = dup(i_fd);
543 EXPECT_OK(cap_fd_wo);
544 EXPECT_OK(cap_rights_limit(cap_fd_wo, &r_ws));
545 int cap_fd_rw = dup(i_fd);
546 EXPECT_OK(cap_fd_rw);
547 EXPECT_OK(cap_rights_limit(cap_fd_rw, &r_rws));
548 int cap_fd_all = dup(i_fd);
549 EXPECT_OK(cap_fd_all);
550 EXPECT_OK(cap_rights_limit(cap_fd_all, &r_rwsnotify));
552 int fd = open(TmpFile("cap_inotify"), O_CREAT|O_RDWR, 0644);
553 EXPECT_NOTCAPABLE(inotify_add_watch(cap_fd_rw, TmpFile("cap_inotify"), IN_ACCESS|IN_MODIFY));
554 int wd = inotify_add_watch(i_fd, TmpFile("cap_inotify"), IN_ACCESS|IN_MODIFY);
557 unsigned char buffer[] = {1, 2, 3, 4};
558 EXPECT_OK(write(fd, buffer, sizeof(buffer)));
560 struct inotify_event iev;
561 memset(&iev, 0, sizeof(iev));
562 EXPECT_NOTCAPABLE(read(cap_fd_wo, &iev, sizeof(iev)));
563 int rc = read(cap_fd_ro, &iev, sizeof(iev));
565 EXPECT_EQ((int)sizeof(iev), rc);
566 EXPECT_EQ(wd, iev.wd);
568 EXPECT_NOTCAPABLE(inotify_rm_watch(cap_fd_wo, wd));
569 EXPECT_OK(inotify_rm_watch(cap_fd_all, wd));
577 unlink(TmpFile("cap_inotify"));
580 TEST(Linux, ArchChange) {
581 const char* prog_candidates[] = {"./mini-me.32", "./mini-me.x32", "./mini-me.64"};
582 const char* progs[] = {NULL, NULL, NULL};
583 char* argv_pass[] = {(char*)"to-come", (char*)"--capmode", NULL};
584 char* null_envp[] = {NULL};
588 for (int ii = 0; ii < 3; ii++) {
589 fds[count] = open(prog_candidates[ii], O_RDONLY);
590 if (fds[count] >= 0) {
591 progs[count] = prog_candidates[ii];
596 TEST_SKIPPED("no different-architecture programs available");
600 for (int ii = 0; ii < count; ii++) {
601 // Fork-and-exec a binary of this architecture.
602 pid_t child = fork();
604 EXPECT_OK(cap_enter()); // Enter capability mode
605 if (verbose) fprintf(stderr, "[%d] call fexecve(%s, %s)\n",
606 getpid_(), progs[ii], argv_pass[1]);
607 argv_pass[0] = (char *)progs[ii];
608 int rc = fexecve_(fds[ii], argv_pass, null_envp);
609 fprintf(stderr, "fexecve(%s) returned %d errno %d\n", progs[ii], rc, errno);
610 exit(99); // Should not reach here.
613 EXPECT_EQ(child, waitpid(child, &status, 0));
614 int rc = WIFEXITED(status) ? WEXITSTATUS(status) : -1;
620 FORK_TEST(Linux, Namespace) {
622 pid_t me = getpid_();
624 // Create a new UTS namespace.
625 EXPECT_OK(unshare(CLONE_NEWUTS));
626 // Open an FD to its symlink.
628 sprintf(buffer, "/proc/%d/ns/uts", me);
629 int ns_fd = open(buffer, O_RDONLY);
631 cap_rights_t r_rwlstat;
632 cap_rights_init(&r_rwlstat, CAP_READ, CAP_WRITE, CAP_LOOKUP, CAP_FSTAT);
633 cap_rights_t r_rwlstatns;
634 cap_rights_init(&r_rwlstatns, CAP_READ, CAP_WRITE, CAP_LOOKUP, CAP_FSTAT, CAP_SETNS);
636 int cap_fd = dup(ns_fd);
638 EXPECT_OK(cap_rights_limit(cap_fd, &r_rwlstat));
639 int cap_fd_setns = dup(ns_fd);
640 EXPECT_OK(cap_fd_setns);
641 EXPECT_OK(cap_rights_limit(cap_fd_setns, &r_rwlstatns));
642 EXPECT_NOTCAPABLE(setns(cap_fd, CLONE_NEWUTS));
643 EXPECT_OK(setns(cap_fd_setns, CLONE_NEWUTS));
645 EXPECT_OK(cap_enter()); // Enter capability mode.
647 // No setns(2) but unshare(2) is allowed.
648 EXPECT_CAPMODE(setns(ns_fd, CLONE_NEWUTS));
649 EXPECT_OK(unshare(CLONE_NEWUTS));
652 static void SendFD(int fd, int over) {
654 mh.msg_name = NULL; // No address needed
658 iov[0].iov_base = buffer1;
659 iov[0].iov_len = sizeof(buffer1);
663 mh.msg_control = buffer2;
664 mh.msg_controllen = CMSG_LEN(sizeof(int));
665 struct cmsghdr *cmptr = CMSG_FIRSTHDR(&mh);
666 cmptr->cmsg_level = SOL_SOCKET;
667 cmptr->cmsg_type = SCM_RIGHTS;
668 cmptr->cmsg_len = CMSG_LEN(sizeof(int));
669 *(int *)CMSG_DATA(cmptr) = fd;
672 int rc = sendmsg(over, &mh, 0);
676 static int ReceiveFD(int over) {
678 mh.msg_name = NULL; // No address needed
682 iov[0].iov_base = buffer1;
683 iov[0].iov_len = sizeof(buffer1);
687 mh.msg_control = buffer2;
688 mh.msg_controllen = sizeof(buffer2);
689 int rc = recvmsg(over, &mh, 0);
691 EXPECT_LE(CMSG_LEN(sizeof(int)), mh.msg_controllen);
692 struct cmsghdr *cmptr = CMSG_FIRSTHDR(&mh);
693 int fd = *(int*)CMSG_DATA(cmptr);
694 EXPECT_EQ(CMSG_LEN(sizeof(int)), cmptr->cmsg_len);
695 cmptr = CMSG_NXTHDR(&mh, cmptr);
696 EXPECT_TRUE(cmptr == NULL);
700 static int shared_pd = -1;
701 static int shared_sock_fds[2];
703 static int ChildFunc(void *arg) {
704 // This function is running in a new PID namespace, and so is pid 1.
705 if (verbose) fprintf(stderr, " ChildFunc: pid=%d, ppid=%d\n", getpid_(), getppid());
706 EXPECT_EQ(1, getpid_());
707 EXPECT_EQ(0, getppid());
709 // The shared process descriptor is outside our namespace, so we cannot
711 if (verbose) fprintf(stderr, " ChildFunc: shared_pd=%d\n", shared_pd);
712 pid_t shared_child = -1;
713 EXPECT_OK(pdgetpid(shared_pd, &shared_child));
714 if (verbose) fprintf(stderr, " ChildFunc: corresponding pid=%d\n", shared_child);
715 EXPECT_EQ(0, shared_child);
717 // But we can pdkill() it even so.
718 if (verbose) fprintf(stderr, " ChildFunc: call pdkill(pd=%d)\n", shared_pd);
719 EXPECT_OK(pdkill(shared_pd, SIGINT));
722 pid_t child = pdfork(&pd, 0);
725 // Child: expect pid 2.
726 if (verbose) fprintf(stderr, " child of ChildFunc: pid=%d, ppid=%d\n", getpid_(), getppid());
727 EXPECT_EQ(2, getpid_());
728 EXPECT_EQ(1, getppid());
730 if (verbose) fprintf(stderr, " child of ChildFunc: \"I aten't dead\"\n");
736 EXPECT_PID_ALIVE(child);
737 if (verbose) fprintf(stderr, " ChildFunc: pdfork() -> pd=%d, corresponding pid=%d state='%c'\n",
738 pd, child, ProcessState(child));
741 EXPECT_OK(pdgetpid(pd, &pid));
742 EXPECT_EQ(child, pid);
746 // Send the process descriptor over UNIX domain socket back to parent.
747 SendFD(pd, shared_sock_fds[1]);
749 // Wait for death of (grand)child, killed by our parent.
750 if (verbose) fprintf(stderr, " ChildFunc: wait on pid=%d\n", child);
752 EXPECT_EQ(child, wait4(child, &status, __WALL, NULL));
754 if (verbose) fprintf(stderr, " ChildFunc: return 0\n");
758 #define STACK_SIZE (1024 * 1024)
759 static char child_stack[STACK_SIZE];
761 // TODO(drysdale): fork into a user namespace first so REQUIRE_ROOT can be removed.
762 TEST(Linux, PidNamespacePdFork) {
764 // Pass process descriptors in both directions across a PID namespace boundary.
765 // pdfork() off a child before we start, holding its process descriptor in a global
766 // variable that's accessible to children.
767 pid_t firstborn = pdfork(&shared_pd, 0);
768 EXPECT_OK(firstborn);
769 if (firstborn == 0) {
771 if (verbose) fprintf(stderr, " Firstborn: \"I aten't dead\"\n");
776 EXPECT_PID_ALIVE(firstborn);
777 if (verbose) fprintf(stderr, "Parent: pre-pdfork()ed pd=%d, pid=%d state='%c'\n",
778 shared_pd, firstborn, ProcessState(firstborn));
781 // Prepare sockets to communicate with child process.
782 EXPECT_OK(socketpair(AF_UNIX, SOCK_STREAM, 0, shared_sock_fds));
784 // Clone into a child process with a new pid namespace.
785 pid_t child = clone(ChildFunc, child_stack + STACK_SIZE,
786 CLONE_FILES|CLONE_NEWPID|SIGCHLD, NULL);
788 EXPECT_PID_ALIVE(child);
789 if (verbose) fprintf(stderr, "Parent: child is %d state='%c'\n", child, ProcessState(child));
791 // Ensure the child runs. First thing it does is to kill our firstborn, using shared_pd.
793 EXPECT_PID_DEAD(firstborn);
795 // But we can still retrieve firstborn's PID, as it's not been reaped yet.
797 EXPECT_OK(pdgetpid(shared_pd, &child0));
798 EXPECT_EQ(firstborn, child0);
799 if (verbose) fprintf(stderr, "Parent: check on firstborn: pdgetpid(pd=%d) -> child=%d state='%c'\n",
800 shared_pd, child0, ProcessState(child0));
804 EXPECT_EQ(firstborn, waitpid(firstborn, &status, __WALL));
806 // Get the process descriptor of the child-of-child via socket transfer.
807 int grandchild_pd = ReceiveFD(shared_sock_fds[0]);
809 // Our notion of the pid associated with the grandchild is in the main PID namespace.
811 EXPECT_OK(pdgetpid(grandchild_pd, &grandchild));
812 EXPECT_NE(2, grandchild);
813 if (verbose) fprintf(stderr, "Parent: pre-pdkill: pdgetpid(grandchild_pd=%d) -> grandchild=%d state='%c'\n",
814 grandchild_pd, grandchild, ProcessState(grandchild));
815 EXPECT_PID_ALIVE(grandchild);
817 // Kill the grandchild via the process descriptor.
818 EXPECT_OK(pdkill(grandchild_pd, SIGINT));
820 if (verbose) fprintf(stderr, "Parent: post-pdkill: pdgetpid(grandchild_pd=%d) -> grandchild=%d state='%c'\n",
821 grandchild_pd, grandchild, ProcessState(grandchild));
822 EXPECT_PID_DEAD(grandchild);
826 // Wait for the child.
827 EXPECT_EQ(child, waitpid(child, &status, WNOHANG));
828 int rc = WIFEXITED(status) ? WEXITSTATUS(status) : -1;
831 close(shared_sock_fds[0]);
832 close(shared_sock_fds[1]);
834 close(grandchild_pd);
837 int NSInit(void *data) {
838 // This function is running in a new PID namespace, and so is pid 1.
839 if (verbose) fprintf(stderr, " NSInit: pid=%d, ppid=%d\n", getpid_(), getppid());
840 EXPECT_EQ(1, getpid_());
841 EXPECT_EQ(0, getppid());
844 pid_t child = pdfork(&pd, 0);
847 // Child: loop forever until terminated.
848 if (verbose) fprintf(stderr, " child of NSInit: pid=%d, ppid=%d\n", getpid_(), getppid());
850 if (verbose) fprintf(stderr, " child of NSInit: \"I aten't dead\"\n");
856 EXPECT_PID_ALIVE(child);
857 if (verbose) fprintf(stderr, " NSInit: pdfork() -> pd=%d, corresponding pid=%d state='%c'\n",
858 pd, child, ProcessState(child));
861 // Send the process descriptor over UNIX domain socket back to parent.
862 SendFD(pd, shared_sock_fds[1]);
865 // Wait for a byte back in the other direction.
867 if (verbose) fprintf(stderr, " NSInit: block waiting for value\n");
868 read(shared_sock_fds[1], &value, sizeof(value));
870 if (verbose) fprintf(stderr, " NSInit: return 0\n");
874 TEST(Linux, DeadNSInit) {
877 // Prepare sockets to communicate with child process.
878 EXPECT_OK(socketpair(AF_UNIX, SOCK_STREAM, 0, shared_sock_fds));
880 // Clone into a child process with a new pid namespace.
881 pid_t child = clone(NSInit, child_stack + STACK_SIZE,
882 CLONE_FILES|CLONE_NEWPID|SIGCHLD, NULL);
885 EXPECT_PID_ALIVE(child);
886 if (verbose) fprintf(stderr, "Parent: child is %d state='%c'\n", child, ProcessState(child));
888 // Get the process descriptor of the child-of-child via socket transfer.
889 int grandchild_pd = ReceiveFD(shared_sock_fds[0]);
891 EXPECT_OK(pdgetpid(grandchild_pd, &grandchild));
892 if (verbose) fprintf(stderr, "Parent: grandchild is %d state='%c'\n", grandchild, ProcessState(grandchild));
894 // Send an int to the child to trigger its termination. Grandchild should also
895 // go, as its init process is gone.
897 if (verbose) fprintf(stderr, "Parent: write 0 to pipe\n");
898 write(shared_sock_fds[0], &zero, sizeof(zero));
899 EXPECT_PID_ZOMBIE(child);
900 EXPECT_PID_GONE(grandchild);
902 // Wait for the child.
904 EXPECT_EQ(child, waitpid(child, &status, WNOHANG));
905 int rc = WIFEXITED(status) ? WEXITSTATUS(status) : -1;
907 EXPECT_PID_GONE(child);
909 close(shared_sock_fds[0]);
910 close(shared_sock_fds[1]);
911 close(grandchild_pd);
914 fprintf(stderr, "Parent: child %d in state='%c'\n", child, ProcessState(child));
915 fprintf(stderr, "Parent: grandchild %d in state='%c'\n", grandchild, ProcessState(grandchild));
919 TEST(Linux, DeadNSInit2) {
922 // Prepare sockets to communicate with child process.
923 EXPECT_OK(socketpair(AF_UNIX, SOCK_STREAM, 0, shared_sock_fds));
925 // Clone into a child process with a new pid namespace.
926 pid_t child = clone(NSInit, child_stack + STACK_SIZE,
927 CLONE_FILES|CLONE_NEWPID|SIGCHLD, NULL);
930 EXPECT_PID_ALIVE(child);
931 if (verbose) fprintf(stderr, "Parent: child is %d state='%c'\n", child, ProcessState(child));
933 // Get the process descriptor of the child-of-child via socket transfer.
934 int grandchild_pd = ReceiveFD(shared_sock_fds[0]);
936 EXPECT_OK(pdgetpid(grandchild_pd, &grandchild));
937 if (verbose) fprintf(stderr, "Parent: grandchild is %d state='%c'\n", grandchild, ProcessState(grandchild));
939 // Kill the grandchild
940 EXPECT_OK(pdkill(grandchild_pd, SIGINT));
942 EXPECT_PID_ZOMBIE(grandchild);
943 // Close the process descriptor, so there are now no procdesc references to grandchild.
944 close(grandchild_pd);
946 // Send an int to the child to trigger its termination. Grandchild should also
947 // go, as its init process is gone.
949 if (verbose) fprintf(stderr, "Parent: write 0 to pipe\n");
950 write(shared_sock_fds[0], &zero, sizeof(zero));
951 EXPECT_PID_ZOMBIE(child);
952 EXPECT_PID_GONE(grandchild);
954 // Wait for the child.
956 EXPECT_EQ(child, waitpid(child, &status, WNOHANG));
957 int rc = WIFEXITED(status) ? WEXITSTATUS(status) : -1;
960 close(shared_sock_fds[0]);
961 close(shared_sock_fds[1]);
964 fprintf(stderr, "Parent: child %d in state='%c'\n", child, ProcessState(child));
965 fprintf(stderr, "Parent: grandchild %d in state='%c'\n", grandchild, ProcessState(grandchild));
970 FORK_TEST(Linux, CheckHighWord) {
971 EXPECT_OK(cap_enter()); // Enter capability mode.
973 int rc = prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0);
975 EXPECT_EQ(1, rc); // no_new_privs = 1
977 // Set some of the high 32-bits of argument zero.
978 uint64_t big_cmd = PR_GET_NO_NEW_PRIVS | 0x100000000LL;
979 EXPECT_CAPMODE(syscall(__NR_prctl, big_cmd, 0, 0, 0, 0));
983 FORK_TEST(Linux, PrctlOpenatBeneath) {
984 // Set no_new_privs = 1
985 EXPECT_OK(prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0));
986 int rc = prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0);
988 EXPECT_EQ(1, rc); // no_new_privs = 1
990 // Set openat-beneath mode
991 EXPECT_OK(prctl(PR_SET_OPENAT_BENEATH, 1, 0, 0, 0));
992 rc = prctl(PR_GET_OPENAT_BENEATH, 0, 0, 0, 0);
994 EXPECT_EQ(1, rc); // openat_beneath = 1
996 // Clear openat-beneath mode
997 EXPECT_OK(prctl(PR_SET_OPENAT_BENEATH, 0, 0, 0, 0));
998 rc = prctl(PR_GET_OPENAT_BENEATH, 0, 0, 0, 0);
1000 EXPECT_EQ(0, rc); // openat_beneath = 0
1002 EXPECT_OK(cap_enter()); // Enter capability mode
1004 // Expect to be in openat_beneath mode
1005 rc = prctl(PR_GET_OPENAT_BENEATH, 0, 0, 0, 0);
1007 EXPECT_EQ(1, rc); // openat_beneath = 1
1009 // Expect this to be immutable.
1010 EXPECT_CAPMODE(prctl(PR_SET_OPENAT_BENEATH, 0, 0, 0, 0));
1011 rc = prctl(PR_GET_OPENAT_BENEATH, 0, 0, 0, 0);
1013 EXPECT_EQ(1, rc); // openat_beneath = 1
1017 FORK_TEST(Linux, NoNewPrivs) {
1018 if (getuid() == 0) {
1019 // If root, drop CAP_SYS_ADMIN POSIX.1e capability.
1020 struct __user_cap_header_struct hdr;
1021 hdr.version = _LINUX_CAPABILITY_VERSION_3;
1022 hdr.pid = getpid_();
1023 struct __user_cap_data_struct data[3];
1024 EXPECT_OK(capget(&hdr, &data[0]));
1025 data[0].effective &= ~(1 << CAP_SYS_ADMIN);
1026 data[0].permitted &= ~(1 << CAP_SYS_ADMIN);
1027 data[0].inheritable &= ~(1 << CAP_SYS_ADMIN);
1028 EXPECT_OK(capset(&hdr, &data[0]));
1030 int rc = prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0);
1032 EXPECT_EQ(0, rc); // no_new_privs == 0
1034 // Can't enter seccomp-bpf mode with no_new_privs == 0
1035 struct sock_filter filter[] = {
1036 BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW)
1038 struct sock_fprog bpf;
1039 bpf.len = (sizeof(filter) / sizeof(filter[0]));
1040 bpf.filter = filter;
1041 rc = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &bpf, 0, 0);
1043 EXPECT_EQ(EACCES, errno);
1045 // Set no_new_privs = 1
1046 EXPECT_OK(prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0));
1047 rc = prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0);
1049 EXPECT_EQ(1, rc); // no_new_privs = 1
1051 // Can now turn on seccomp mode
1052 EXPECT_OK(prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &bpf, 0, 0));
1055 /* Macros for BPF generation */
1056 #define BPF_RETURN_ERRNO(err) \
1057 BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ERRNO | (err & 0xFFFF))
1058 #define BPF_KILL_PROCESS \
1059 BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_KILL)
1061 BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW)
1062 #define EXAMINE_SYSCALL \
1063 BPF_STMT(BPF_LD+BPF_W+BPF_ABS, offsetof(struct seccomp_data, nr))
1064 #define ALLOW_SYSCALL(name) \
1065 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, __NR_##name, 0, 1), \
1067 #define KILL_SYSCALL(name) \
1068 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, __NR_##name, 0, 1), \
1070 #define FAIL_SYSCALL(name, err) \
1071 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, __NR_##name, 0, 1), \
1072 BPF_RETURN_ERRNO(err)
1074 TEST(Linux, CapModeWithBPF) {
1075 pid_t child = fork();
1078 int fd = open(TmpFile("cap_bpf_capmode"), O_CREAT|O_RDWR, 0644);
1079 cap_rights_t rights;
1080 cap_rights_init(&rights, CAP_READ, CAP_WRITE, CAP_SEEK, CAP_FSYNC);
1081 EXPECT_OK(cap_rights_limit(fd, &rights));
1083 struct sock_filter filter[] = { EXAMINE_SYSCALL,
1084 FAIL_SYSCALL(fchmod, ENOMEM),
1085 FAIL_SYSCALL(fstat, ENOEXEC),
1086 ALLOW_SYSCALL(close),
1087 KILL_SYSCALL(fsync),
1089 struct sock_fprog bpf = {.len = (sizeof(filter) / sizeof(filter[0])),
1091 // Set up seccomp-bpf first.
1092 EXPECT_OK(prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0));
1093 EXPECT_OK(prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &bpf, 0, 0));
1095 EXPECT_OK(cap_enter()); // Enter capability mode.
1097 // fchmod is allowed by Capsicum, but failed by BPF.
1098 EXPECT_SYSCALL_FAIL(ENOMEM, fchmod(fd, 0644));
1099 // open is allowed by BPF, but failed by Capsicum
1100 EXPECT_SYSCALL_FAIL(ECAPMODE, open(TmpFile("cap_bpf_capmode"), O_RDONLY));
1101 // fstat is failed by both BPF and Capsicum; tie-break is on errno
1103 EXPECT_SYSCALL_FAIL(ENOEXEC, fstat(fd, &buf));
1104 // fsync is allowed by Capsicum, but BPF's SIGSYS generation take precedence
1105 fsync(fd); // terminate with unhandled SIGSYS
1109 EXPECT_EQ(child, waitpid(child, &status, 0));
1110 EXPECT_TRUE(WIFSIGNALED(status));
1111 EXPECT_EQ(SIGSYS, WTERMSIG(status));
1112 unlink(TmpFile("cap_bpf_capmode"));
1116 int fd = open(TmpFile("cap_aio"), O_CREAT|O_RDWR, 0644);
1120 cap_rights_init(&r_rs, CAP_READ, CAP_SEEK);
1122 cap_rights_init(&r_ws, CAP_WRITE, CAP_SEEK);
1123 cap_rights_t r_rwssync;
1124 cap_rights_init(&r_rwssync, CAP_READ, CAP_WRITE, CAP_SEEK, CAP_FSYNC);
1126 int cap_ro = dup(fd);
1128 EXPECT_OK(cap_rights_limit(cap_ro, &r_rs));
1130 int cap_wo = dup(fd);
1132 EXPECT_OK(cap_rights_limit(cap_wo, &r_ws));
1134 int cap_all = dup(fd);
1136 EXPECT_OK(cap_rights_limit(cap_all, &r_rwssync));
1139 // Linux: io_setup, io_submit, io_getevents, io_cancel, io_destroy
1140 aio_context_t ctx = 0;
1141 EXPECT_OK(syscall(__NR_io_setup, 10, &ctx));
1143 unsigned char buffer[32] = {1, 2, 3, 4};
1145 memset(&req, 0, sizeof(req));
1146 req.aio_reqprio = 0;
1147 req.aio_fildes = fd;
1148 uintptr_t bufaddr = (uintptr_t)buffer;
1149 req.aio_buf = (__u64)bufaddr;
1152 struct iocb* reqs[1] = {&req};
1155 req.aio_lio_opcode = IOCB_CMD_PWRITE;
1156 req.aio_fildes = cap_ro;
1157 EXPECT_NOTCAPABLE(syscall(__NR_io_submit, ctx, 1, reqs));
1158 req.aio_fildes = cap_wo;
1159 EXPECT_OK(syscall(__NR_io_submit, ctx, 1, reqs));
1162 req.aio_lio_opcode = IOCB_CMD_FSYNC;
1163 EXPECT_NOTCAPABLE(syscall(__NR_io_submit, ctx, 1, reqs));
1164 req.aio_lio_opcode = IOCB_CMD_FDSYNC;
1165 EXPECT_NOTCAPABLE(syscall(__NR_io_submit, ctx, 1, reqs));
1166 // Even with CAP_FSYNC, turns out fsync/fdsync aren't implemented
1167 req.aio_fildes = cap_all;
1168 EXPECT_FAIL_NOT_NOTCAPABLE(syscall(__NR_io_submit, ctx, 1, reqs));
1169 req.aio_lio_opcode = IOCB_CMD_FSYNC;
1170 EXPECT_FAIL_NOT_NOTCAPABLE(syscall(__NR_io_submit, ctx, 1, reqs));
1173 req.aio_lio_opcode = IOCB_CMD_PREAD;
1174 req.aio_fildes = cap_wo;
1175 EXPECT_NOTCAPABLE(syscall(__NR_io_submit, ctx, 1, reqs));
1176 req.aio_fildes = cap_ro;
1177 EXPECT_OK(syscall(__NR_io_submit, ctx, 1, reqs));
1179 EXPECT_OK(syscall(__NR_io_destroy, ctx));
1185 unlink(TmpFile("cap_aio"));
1192 // This requires CONFIG_CHECKPOINT_RESTORE in kernel config.
1193 int fd = open("/etc/passwd", O_RDONLY);
1195 pid_t parent = getpid_();
1198 int rc = syscall(__NR_kcmp, parent, parent, KCMP_FILE, fd, fd);
1199 if (rc == -1 && errno == ENOSYS) {
1200 TEST_SKIPPED("kcmp(2) gives -ENOSYS");
1204 pid_t child = fork();
1206 // Child: limit rights on FD.
1208 EXPECT_OK(syscall(__NR_kcmp, parent, child, KCMP_FILE, fd, fd));
1209 cap_rights_t rights;
1210 cap_rights_init(&rights, CAP_READ, CAP_WRITE);
1211 EXPECT_OK(cap_rights_limit(fd, &rights));
1212 // A capability wrapping a normal FD is different (from a kcmp(2) perspective)
1213 // than the original file.
1214 EXPECT_NE(0, syscall(__NR_kcmp, parent, child, KCMP_FILE, fd, fd));
1217 // Wait for the child.
1219 EXPECT_EQ(child, waitpid(child, &status, 0));
1220 rc = WIFEXITED(status) ? WEXITSTATUS(status) : -1;
1226 TEST(Linux, ProcFS) {
1227 cap_rights_t rights;
1228 cap_rights_init(&rights, CAP_READ, CAP_SEEK);
1229 int fd = open("/etc/passwd", O_RDONLY);
1231 lseek(fd, 4, SEEK_SET);
1234 EXPECT_OK(cap_rights_limit(cap, &rights));
1235 pid_t me = getpid_();
1238 sprintf(buffer, "/proc/%d/fdinfo/%d", me, cap);
1239 int procfd = open(buffer, O_RDONLY);
1240 EXPECT_OK(procfd) << " failed to open " << buffer;
1241 if (procfd < 0) return;
1242 int proccap = dup(procfd);
1244 EXPECT_OK(cap_rights_limit(proccap, &rights));
1246 EXPECT_OK(read(proccap, buffer, sizeof(buffer)));
1247 // The fdinfo should include the file pos of the underlying file
1248 EXPECT_NE((char*)NULL, strstr(buffer, "pos:\t4"));
1249 // ...and the rights of the Capsicum capability.
1250 EXPECT_NE((char*)NULL, strstr(buffer, "rights:\t0x"));
1258 FORK_TEST(Linux, ProcessClocks) {
1259 pid_t self = getpid_();
1260 pid_t child = fork();
1268 EXPECT_OK(cap_enter()); // Enter capability mode.
1270 // Nefariously build a clock ID for the child's CPU time.
1271 // This relies on knowledge of the internal layout of clock IDs.
1272 clockid_t child_clock;
1273 child_clock = ((~child) << 3) | 0x0;
1275 memset(&ts, 0, sizeof(ts));
1277 // TODO(drysdale): Should not be possible to retrieve info about a
1278 // different process, as the PID global namespace should be locked
1280 EXPECT_OK(clock_gettime(child_clock, &ts));
1281 if (verbose) fprintf(stderr, "[parent: %d] clock_gettime(child=%d->0x%08x) is %ld.%09ld \n",
1282 self, child, child_clock, (long)ts.tv_sec, (long)ts.tv_nsec);
1284 child_clock = ((~1) << 3) | 0x0;
1285 memset(&ts, 0, sizeof(ts));
1286 EXPECT_OK(clock_gettime(child_clock, &ts));
1287 if (verbose) fprintf(stderr, "[parent: %d] clock_gettime(init=1->0x%08x) is %ld.%09ld \n",
1288 self, child_clock, (long)ts.tv_sec, (long)ts.tv_nsec);
1290 // Orphan the child.
1293 TEST(Linux, SetLease) {
1294 int fd_all = open(TmpFile("cap_lease"), O_CREAT|O_RDWR, 0644);
1296 int fd_rw = dup(fd_all);
1300 cap_rights_init(&r_all, CAP_READ, CAP_WRITE, CAP_FLOCK, CAP_FSIGNAL);
1301 EXPECT_OK(cap_rights_limit(fd_all, &r_all));
1304 cap_rights_init(&r_rw, CAP_READ, CAP_WRITE);
1305 EXPECT_OK(cap_rights_limit(fd_rw, &r_rw));
1307 EXPECT_NOTCAPABLE(fcntl(fd_rw, F_SETLEASE, F_WRLCK));
1308 EXPECT_NOTCAPABLE(fcntl(fd_rw, F_GETLEASE));
1310 if (!tmpdir_on_tmpfs) { // tmpfs doesn't support leases
1311 EXPECT_OK(fcntl(fd_all, F_SETLEASE, F_WRLCK));
1312 EXPECT_EQ(F_WRLCK, fcntl(fd_all, F_GETLEASE));
1314 EXPECT_OK(fcntl(fd_all, F_SETLEASE, F_UNLCK, 0));
1315 EXPECT_EQ(F_UNLCK, fcntl(fd_all, F_GETLEASE));
1319 unlink(TmpFile("cap_lease"));
1322 TEST(Linux, InvalidRightsSyscall) {
1323 int fd = open(TmpFile("cap_invalid_rights"), O_RDONLY|O_CREAT, 0644);
1326 cap_rights_t rights;
1327 cap_rights_init(&rights, CAP_READ, CAP_WRITE, CAP_FCHMOD, CAP_FSTAT);
1329 // Use the raw syscall throughout.
1330 EXPECT_EQ(0, syscall(__NR_cap_rights_limit, fd, &rights, 0, 0, NULL, 0));
1332 // Directly access the syscall, and find all unseemly manner of use for it.
1334 EXPECT_EQ(-1, syscall(__NR_cap_rights_limit, fd, &rights, 0, 0, NULL, 1));
1335 EXPECT_EQ(EINVAL, errno);
1336 // - Specify an fcntl subright, but no CAP_FCNTL set
1337 EXPECT_EQ(-1, syscall(__NR_cap_rights_limit, fd, &rights, CAP_FCNTL_GETFL, 0, NULL, 0));
1338 EXPECT_EQ(EINVAL, errno);
1339 // - Specify an ioctl subright, but no CAP_IOCTL set
1340 unsigned int ioctl1 = 1;
1341 EXPECT_EQ(-1, syscall(__NR_cap_rights_limit, fd, &rights, 0, 1, &ioctl1, 0));
1342 EXPECT_EQ(EINVAL, errno);
1343 // - N ioctls, but null pointer passed
1344 EXPECT_EQ(-1, syscall(__NR_cap_rights_limit, fd, &rights, 0, 1, NULL, 0));
1345 EXPECT_EQ(EINVAL, errno);
1346 // - Invalid nioctls
1347 EXPECT_EQ(-1, syscall(__NR_cap_rights_limit, fd, &rights, 0, -2, NULL, 0));
1348 EXPECT_EQ(EINVAL, errno);
1349 // - Null primary rights
1350 EXPECT_EQ(-1, syscall(__NR_cap_rights_limit, fd, NULL, 0, 0, NULL, 0));
1351 EXPECT_EQ(EFAULT, errno);
1352 // - Invalid index bitmask
1353 rights.cr_rights[0] |= 3ULL << 57;
1354 EXPECT_EQ(-1, syscall(__NR_cap_rights_limit, fd, &rights, 0, 0, NULL, 0));
1355 EXPECT_EQ(EINVAL, errno);
1356 // - Invalid version
1357 rights.cr_rights[0] |= 2ULL << 62;
1358 EXPECT_EQ(-1, syscall(__NR_cap_rights_limit, fd, &rights, 0, 0, NULL, 0));
1359 EXPECT_EQ(EINVAL, errno);
1362 unlink(TmpFile("cap_invalid_rights"));
1365 FORK_TEST_ON(Linux, OpenByHandleAt, TmpFile("cap_openbyhandle_testfile")) {
1367 int dir = open(tmpdir.c_str(), O_RDONLY);
1369 int fd = openat(dir, "cap_openbyhandle_testfile", O_RDWR|O_CREAT, 0644);
1371 const char* message = "Saved text";
1372 EXPECT_OK(write(fd, message, strlen(message)));
1375 struct file_handle* fhandle = (struct file_handle*)malloc(sizeof(struct file_handle) + MAX_HANDLE_SZ);
1376 fhandle->handle_bytes = MAX_HANDLE_SZ;
1378 EXPECT_OK(name_to_handle_at(dir, "cap_openbyhandle_testfile", fhandle, &mount_id, 0));
1380 fd = open_by_handle_at(dir, fhandle, O_RDONLY);
1383 EXPECT_OK(read(fd, buffer, 199));
1384 EXPECT_EQ(std::string(message), std::string(buffer));
1387 // Cannot issue open_by_handle_at after entering capability mode.
1389 EXPECT_CAPMODE(open_by_handle_at(dir, fhandle, O_RDONLY));
1394 int getrandom_(void *buf, size_t buflen, unsigned int flags) {
1395 #ifdef __NR_getrandom
1396 return syscall(__NR_getrandom, buf, buflen, flags);
1403 #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 17, 0)
1404 #include <linux/random.h> // Requires 3.17 kernel
1405 FORK_TEST(Linux, GetRandom) {
1406 EXPECT_OK(cap_enter());
1407 unsigned char buffer[1024];
1408 unsigned char buffer2[1024];
1409 EXPECT_OK(getrandom_(buffer, sizeof(buffer), GRND_NONBLOCK));
1410 EXPECT_OK(getrandom_(buffer2, sizeof(buffer2), GRND_NONBLOCK));
1411 EXPECT_NE(0, memcmp(buffer, buffer2, sizeof(buffer)));
1415 int memfd_create_(const char *name, unsigned int flags) {
1416 #ifdef __NR_memfd_create
1417 return syscall(__NR_memfd_create, name, flags);
1424 #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 17, 0)
1425 #include <linux/memfd.h> // Requires 3.17 kernel
1426 TEST(Linux, MemFDDeathTest) {
1427 int memfd = memfd_create_("capsicum-test", MFD_ALLOW_SEALING);
1428 if (memfd == -1 && errno == ENOSYS) {
1429 TEST_SKIPPED("memfd_create(2) gives -ENOSYS");
1433 EXPECT_OK(ftruncate(memfd, LEN));
1434 int memfd_ro = dup(memfd);
1435 int memfd_rw = dup(memfd);
1436 EXPECT_OK(memfd_ro);
1437 EXPECT_OK(memfd_rw);
1438 cap_rights_t rights;
1439 EXPECT_OK(cap_rights_limit(memfd_ro, cap_rights_init(&rights, CAP_MMAP_R, CAP_FSTAT)));
1440 EXPECT_OK(cap_rights_limit(memfd_rw, cap_rights_init(&rights, CAP_MMAP_RW, CAP_FCHMOD)));
1442 unsigned char *p_ro = (unsigned char *)mmap(NULL, LEN, PROT_READ, MAP_SHARED, memfd_ro, 0);
1443 EXPECT_NE((unsigned char *)MAP_FAILED, p_ro);
1444 unsigned char *p_rw = (unsigned char *)mmap(NULL, LEN, PROT_READ|PROT_WRITE, MAP_SHARED, memfd_rw, 0);
1445 EXPECT_NE((unsigned char *)MAP_FAILED, p_rw);
1446 EXPECT_EQ(MAP_FAILED,
1447 mmap(NULL, LEN, PROT_READ|PROT_WRITE, MAP_SHARED, memfd_ro, 0));
1450 EXPECT_EQ(42, *p_ro);
1451 EXPECT_DEATH(*p_ro = 42, "");
1454 // Hack for when libc6 does not yet include the updated linux/fcntl.h from kernel 3.17
1455 #define _F_LINUX_SPECIFIC_BASE F_SETLEASE
1456 #define F_ADD_SEALS (_F_LINUX_SPECIFIC_BASE + 9)
1457 #define F_GET_SEALS (_F_LINUX_SPECIFIC_BASE + 10)
1458 #define F_SEAL_SEAL 0x0001 /* prevent further seals from being set */
1459 #define F_SEAL_SHRINK 0x0002 /* prevent file from shrinking */
1460 #define F_SEAL_GROW 0x0004 /* prevent file from growing */
1461 #define F_SEAL_WRITE 0x0008 /* prevent writes */
1464 // Reading the seal information requires CAP_FSTAT.
1465 int seals = fcntl(memfd, F_GET_SEALS);
1467 if (verbose) fprintf(stderr, "seals are %08x on base fd\n", seals);
1468 int seals_ro = fcntl(memfd_ro, F_GET_SEALS);
1469 EXPECT_EQ(seals, seals_ro);
1470 if (verbose) fprintf(stderr, "seals are %08x on read-only fd\n", seals_ro);
1471 int seals_rw = fcntl(memfd_rw, F_GET_SEALS);
1472 EXPECT_NOTCAPABLE(seals_rw);
1474 // Fail to seal as a writable mapping exists.
1475 EXPECT_EQ(-1, fcntl(memfd_rw, F_ADD_SEALS, F_SEAL_WRITE));
1476 EXPECT_EQ(EBUSY, errno);
1479 // Seal the rw version; need to unmap first.
1482 EXPECT_OK(fcntl(memfd_rw, F_ADD_SEALS, F_SEAL_WRITE));
1484 seals = fcntl(memfd, F_GET_SEALS);
1486 if (verbose) fprintf(stderr, "seals are %08x on base fd\n", seals);
1487 seals_ro = fcntl(memfd_ro, F_GET_SEALS);
1488 EXPECT_EQ(seals, seals_ro);
1489 if (verbose) fprintf(stderr, "seals are %08x on read-only fd\n", seals_ro);
1491 // Remove the CAP_FCHMOD right, can no longer add seals.
1492 EXPECT_OK(cap_rights_limit(memfd_rw, cap_rights_init(&rights, CAP_MMAP_RW)));
1493 EXPECT_NOTCAPABLE(fcntl(memfd_rw, F_ADD_SEALS, F_SEAL_WRITE));