2 * SPDX-License-Identifier: BSD-2-Clause
4 * Copyright (c) 2014 Dmitry Chagin <dchagin@FreeBSD.org>
5 * Copyright (c) 2023 Jake Freeland <jfree@FreeBSD.org>
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 #include <sys/param.h>
30 #include <sys/systm.h>
31 #include <sys/callout.h>
32 #include <sys/fcntl.h>
34 #include <sys/filedesc.h>
35 #include <sys/filio.h>
36 #include <sys/kernel.h>
38 #include <sys/malloc.h>
39 #include <sys/mount.h>
40 #include <sys/mutex.h>
43 #include <sys/queue.h>
44 #include <sys/selinfo.h>
47 #include <sys/syscallsubr.h>
48 #include <sys/sysctl.h>
49 #include <sys/sysent.h>
50 #include <sys/sysproto.h>
51 #include <sys/timerfd.h>
52 #include <sys/timespec.h>
56 #include <security/audit/audit.h>
58 static MALLOC_DEFINE(M_TIMERFD, "timerfd", "timerfd structures");
60 static struct mtx timerfd_list_lock;
61 static LIST_HEAD(, timerfd) timerfd_list;
62 MTX_SYSINIT(timerfd, &timerfd_list_lock, "timerfd_list_lock", MTX_DEF);
64 static struct unrhdr64 tfdino_unr;
66 #define TFD_NOJUMP 0 /* Realtime clock has not jumped. */
67 #define TFD_READ 1 /* Jumped, tfd has been read since. */
68 #define TFD_ZREAD 2 /* Jumped backwards, CANCEL_ON_SET=false. */
69 #define TFD_CANCELED 4 /* Jumped, CANCEL_ON_SET=true. */
70 #define TFD_JUMPED (TFD_ZREAD | TFD_CANCELED)
73 * One structure allocated per timerfd descriptor.
76 * (t) locked by tfd_lock mtx
77 * (l) locked by timerfd_list_lock sx
78 * (c) const until freeing
82 struct itimerspec tfd_time; /* (t) tfd timer */
83 clockid_t tfd_clockid; /* (c) timing base */
84 int tfd_flags; /* (c) creation flags */
85 int tfd_timflags; /* (t) timer flags */
87 /* Used internally. */
88 timerfd_t tfd_count; /* (t) expiration count since read */
89 bool tfd_expired; /* (t) true upon initial expiration */
90 struct mtx tfd_lock; /* tfd mtx lock */
91 struct callout tfd_callout; /* (t) expiration notification */
92 struct selinfo tfd_sel; /* (t) I/O alerts */
93 struct timespec tfd_boottim; /* (t) cached boottime */
94 int tfd_jumped; /* (t) timer jump status */
95 LIST_ENTRY(timerfd) entry; /* (l) entry in list */
98 ino_t tfd_ino; /* (c) inode number */
99 struct timespec tfd_atim; /* (t) time of last read */
100 struct timespec tfd_mtim; /* (t) time of last settime */
101 struct timespec tfd_birthtim; /* (c) creation time */
105 timerfd_init(void *data)
107 new_unrhdr64(&tfdino_unr, 1);
110 SYSINIT(timerfd, SI_SUB_VFS, SI_ORDER_ANY, timerfd_init, NULL);
113 timerfd_getboottime(struct timespec *ts)
118 TIMEVAL_TO_TIMESPEC(&tv, ts);
122 * Call when a discontinuous jump has occured in CLOCK_REALTIME and
123 * update timerfd's cached boottime. A jump can be triggered using
124 * functions like clock_settime(2) or settimeofday(2).
126 * Timer is marked TFD_CANCELED if TFD_TIMER_CANCEL_ON_SET is set
127 * and the realtime clock jumps.
128 * Timer is marked TFD_ZREAD if TFD_TIMER_CANCEL_ON_SET is not set,
129 * but the realtime clock jumps backwards.
135 struct timespec boottime, diff;
137 if (LIST_EMPTY(&timerfd_list))
140 timerfd_getboottime(&boottime);
141 mtx_lock(&timerfd_list_lock);
142 LIST_FOREACH(tfd, &timerfd_list, entry) {
143 mtx_lock(&tfd->tfd_lock);
144 if (tfd->tfd_clockid != CLOCK_REALTIME ||
145 (tfd->tfd_timflags & TFD_TIMER_ABSTIME) == 0 ||
146 timespeccmp(&boottime, &tfd->tfd_boottim, ==)) {
147 mtx_unlock(&tfd->tfd_lock);
151 if (callout_active(&tfd->tfd_callout)) {
152 if ((tfd->tfd_timflags & TFD_TIMER_CANCEL_ON_SET) != 0)
153 tfd->tfd_jumped = TFD_CANCELED;
154 else if (timespeccmp(&boottime, &tfd->tfd_boottim, <))
155 tfd->tfd_jumped = TFD_ZREAD;
158 * Do not reschedule callout when
159 * inside interval time loop.
161 if (!tfd->tfd_expired) {
162 timespecsub(&boottime,
163 &tfd->tfd_boottim, &diff);
164 timespecsub(&tfd->tfd_time.it_value,
165 &diff, &tfd->tfd_time.it_value);
166 if (callout_stop(&tfd->tfd_callout) == 1) {
167 callout_schedule_sbt(&tfd->tfd_callout,
168 tstosbt(tfd->tfd_time.it_value),
174 tfd->tfd_boottim = boottime;
175 mtx_unlock(&tfd->tfd_lock);
177 mtx_unlock(&timerfd_list_lock);
181 timerfd_read(struct file *fp, struct uio *uio, struct ucred *active_cred,
182 int flags, struct thread *td)
184 struct timerfd *tfd = fp->f_data;
188 if (uio->uio_resid < sizeof(timerfd_t))
191 mtx_lock(&tfd->tfd_lock);
193 getnanotime(&tfd->tfd_atim);
194 if ((tfd->tfd_jumped & TFD_JUMPED) != 0) {
195 if (tfd->tfd_jumped == TFD_CANCELED)
197 tfd->tfd_jumped = TFD_READ;
199 mtx_unlock(&tfd->tfd_lock);
202 tfd->tfd_jumped = TFD_NOJUMP;
204 if (tfd->tfd_count == 0) {
205 if ((fp->f_flag & FNONBLOCK) != 0) {
206 mtx_unlock(&tfd->tfd_lock);
209 td->td_rtcgen = atomic_load_acq_int(&rtc_generation);
210 error = mtx_sleep(&tfd->tfd_count, &tfd->tfd_lock,
215 mtx_unlock(&tfd->tfd_lock);
220 count = tfd->tfd_count;
222 mtx_unlock(&tfd->tfd_lock);
223 error = uiomove(&count, sizeof(timerfd_t), uio);
229 timerfd_ioctl(struct file *fp, u_long cmd, void *data,
230 struct ucred *active_cred, struct thread *td)
234 if (*(int *)data != 0)
235 atomic_set_int(&fp->f_flag, FASYNC);
237 atomic_clear_int(&fp->f_flag, FASYNC);
240 if (*(int *)data != 0)
241 atomic_set_int(&fp->f_flag, FNONBLOCK);
243 atomic_clear_int(&fp->f_flag, FNONBLOCK);
250 timerfd_poll(struct file *fp, int events, struct ucred *active_cred,
253 struct timerfd *tfd = fp->f_data;
256 mtx_lock(&tfd->tfd_lock);
257 if ((events & (POLLIN | POLLRDNORM)) != 0 &&
258 tfd->tfd_count > 0 && tfd->tfd_jumped != TFD_READ)
259 revents |= events & (POLLIN | POLLRDNORM);
261 selrecord(td, &tfd->tfd_sel);
262 mtx_unlock(&tfd->tfd_lock);
268 filt_timerfddetach(struct knote *kn)
270 struct timerfd *tfd = kn->kn_hook;
272 mtx_lock(&tfd->tfd_lock);
273 knlist_remove(&tfd->tfd_sel.si_note, kn, 1);
274 mtx_unlock(&tfd->tfd_lock);
278 filt_timerfdread(struct knote *kn, long hint)
280 struct timerfd *tfd = kn->kn_hook;
282 mtx_assert(&tfd->tfd_lock, MA_OWNED);
283 kn->kn_data = (int64_t)tfd->tfd_count;
284 return (tfd->tfd_count > 0);
287 static struct filterops timerfd_rfiltops = {
289 .f_detach = filt_timerfddetach,
290 .f_event = filt_timerfdread,
294 timerfd_kqfilter(struct file *fp, struct knote *kn)
296 struct timerfd *tfd = fp->f_data;
298 if (kn->kn_filter != EVFILT_READ)
301 kn->kn_fop = &timerfd_rfiltops;
303 knlist_add(&tfd->tfd_sel.si_note, kn, 0);
309 timerfd_stat(struct file *fp, struct stat *sb, struct ucred *active_cred)
311 struct timerfd *tfd = fp->f_data;
313 bzero(sb, sizeof(*sb));
314 sb->st_nlink = fp->f_count - 1;
315 sb->st_uid = fp->f_cred->cr_uid;
316 sb->st_gid = fp->f_cred->cr_gid;
317 sb->st_blksize = PAGE_SIZE;
318 mtx_lock(&tfd->tfd_lock);
319 sb->st_atim = tfd->tfd_atim;
320 sb->st_mtim = tfd->tfd_mtim;
321 mtx_unlock(&tfd->tfd_lock);
322 sb->st_ctim = sb->st_mtim;
323 sb->st_ino = tfd->tfd_ino;
324 sb->st_birthtim = tfd->tfd_birthtim;
330 timerfd_close(struct file *fp, struct thread *td)
332 struct timerfd *tfd = fp->f_data;
334 mtx_lock(&timerfd_list_lock);
335 LIST_REMOVE(tfd, entry);
336 mtx_unlock(&timerfd_list_lock);
338 callout_drain(&tfd->tfd_callout);
339 seldrain(&tfd->tfd_sel);
340 knlist_destroy(&tfd->tfd_sel.si_note);
341 mtx_destroy(&tfd->tfd_lock);
342 free(tfd, M_TIMERFD);
343 fp->f_ops = &badfileops;
349 timerfd_fill_kinfo(struct file *fp, struct kinfo_file *kif,
350 struct filedesc *fdp)
352 struct timerfd *tfd = fp->f_data;
354 kif->kf_type = KF_TYPE_TIMERFD;
355 kif->kf_un.kf_timerfd.kf_timerfd_clockid = tfd->tfd_clockid;
356 kif->kf_un.kf_timerfd.kf_timerfd_flags = tfd->tfd_flags;
357 kif->kf_un.kf_timerfd.kf_timerfd_addr = (uintptr_t)tfd;
362 static struct fileops timerfdops = {
363 .fo_read = timerfd_read,
364 .fo_write = invfo_rdwr,
365 .fo_truncate = invfo_truncate,
366 .fo_ioctl = timerfd_ioctl,
367 .fo_poll = timerfd_poll,
368 .fo_kqfilter = timerfd_kqfilter,
369 .fo_stat = timerfd_stat,
370 .fo_close = timerfd_close,
371 .fo_chmod = invfo_chmod,
372 .fo_chown = invfo_chown,
373 .fo_sendfile = invfo_sendfile,
374 .fo_fill_kinfo = timerfd_fill_kinfo,
375 .fo_cmp = file_kcmp_generic,
376 .fo_flags = DFLAG_PASSABLE,
380 timerfd_curval(struct timerfd *tfd, struct itimerspec *old_value)
382 struct timespec curr_value;
384 mtx_assert(&tfd->tfd_lock, MA_OWNED);
385 *old_value = tfd->tfd_time;
386 if (timespecisset(&tfd->tfd_time.it_value)) {
387 nanouptime(&curr_value);
388 timespecsub(&tfd->tfd_time.it_value, &curr_value,
389 &old_value->it_value);
394 timerfd_expire(void *arg)
396 struct timerfd *tfd = (struct timerfd *)arg;
397 struct timespec uptime;
400 tfd->tfd_expired = true;
401 if (timespecisset(&tfd->tfd_time.it_interval)) {
402 /* Count missed events. */
404 if (timespeccmp(&uptime, &tfd->tfd_time.it_value, >)) {
405 timespecsub(&uptime, &tfd->tfd_time.it_value, &uptime);
406 tfd->tfd_count += tstosbt(uptime) /
407 tstosbt(tfd->tfd_time.it_interval);
409 timespecadd(&tfd->tfd_time.it_value,
410 &tfd->tfd_time.it_interval, &tfd->tfd_time.it_value);
411 callout_schedule_sbt(&tfd->tfd_callout,
412 tstosbt(tfd->tfd_time.it_value),
415 /* Single shot timer. */
416 callout_deactivate(&tfd->tfd_callout);
417 timespecclear(&tfd->tfd_time.it_value);
420 wakeup(&tfd->tfd_count);
421 selwakeup(&tfd->tfd_sel);
422 KNOTE_LOCKED(&tfd->tfd_sel.si_note, 0);
426 kern_timerfd_create(struct thread *td, int clockid, int flags)
430 int error, fd, fflags;
432 AUDIT_ARG_VALUE(clockid);
433 AUDIT_ARG_FFLAGS(flags);
438 case CLOCK_MONOTONIC:
442 * CLOCK_BOOTTIME should be added once different from
449 if ((flags & ~(TFD_CLOEXEC | TFD_NONBLOCK)) != 0)
453 if ((flags & TFD_CLOEXEC) != 0)
455 if ((flags & TFD_NONBLOCK) != 0)
458 error = falloc(td, &fp, &fd, fflags);
462 tfd = malloc(sizeof(*tfd), M_TIMERFD, M_WAITOK | M_ZERO);
463 tfd->tfd_clockid = (clockid_t)clockid;
464 tfd->tfd_flags = flags;
465 tfd->tfd_ino = alloc_unr64(&tfdino_unr);
466 mtx_init(&tfd->tfd_lock, "timerfd", NULL, MTX_DEF);
467 callout_init_mtx(&tfd->tfd_callout, &tfd->tfd_lock, 0);
468 knlist_init_mtx(&tfd->tfd_sel.si_note, &tfd->tfd_lock);
469 timerfd_getboottime(&tfd->tfd_boottim);
470 getnanotime(&tfd->tfd_birthtim);
471 mtx_lock(&timerfd_list_lock);
472 LIST_INSERT_HEAD(&timerfd_list, tfd, entry);
473 mtx_unlock(&timerfd_list_lock);
475 finit(fp, fflags, DTYPE_TIMERFD, tfd, &timerfdops);
479 td->td_retval[0] = fd;
484 kern_timerfd_gettime(struct thread *td, int fd, struct itimerspec *curr_value)
490 error = fget(td, fd, &cap_write_rights, &fp);
493 if (fp->f_type != DTYPE_TIMERFD) {
499 mtx_lock(&tfd->tfd_lock);
500 timerfd_curval(tfd, curr_value);
501 mtx_unlock(&tfd->tfd_lock);
508 kern_timerfd_settime(struct thread *td, int fd, int flags,
509 const struct itimerspec *new_value, struct itimerspec *old_value)
516 if ((flags & ~(TFD_TIMER_ABSTIME | TFD_TIMER_CANCEL_ON_SET)) != 0)
518 if (!timespecvalid_interval(&new_value->it_value) ||
519 !timespecvalid_interval(&new_value->it_interval))
522 error = fget(td, fd, &cap_write_rights, &fp);
525 if (fp->f_type != DTYPE_TIMERFD) {
531 mtx_lock(&tfd->tfd_lock);
532 getnanotime(&tfd->tfd_mtim);
533 tfd->tfd_timflags = flags;
535 /* Store old itimerspec, if applicable. */
536 if (old_value != NULL)
537 timerfd_curval(tfd, old_value);
539 /* Set new expiration. */
540 tfd->tfd_time = *new_value;
541 if (timespecisset(&tfd->tfd_time.it_value)) {
542 if ((flags & TFD_TIMER_ABSTIME) == 0) {
544 timespecadd(&tfd->tfd_time.it_value, &ts,
545 &tfd->tfd_time.it_value);
546 } else if (tfd->tfd_clockid == CLOCK_REALTIME) {
547 /* ECANCELED if unread jump is pending. */
548 if (tfd->tfd_jumped == TFD_CANCELED)
550 /* Convert from CLOCK_REALTIME to CLOCK_BOOTTIME. */
551 timespecsub(&tfd->tfd_time.it_value, &tfd->tfd_boottim,
552 &tfd->tfd_time.it_value);
554 callout_reset_sbt(&tfd->tfd_callout,
555 tstosbt(tfd->tfd_time.it_value),
556 0, timerfd_expire, tfd, C_ABSOLUTE);
558 callout_stop(&tfd->tfd_callout);
561 tfd->tfd_expired = false;
562 tfd->tfd_jumped = TFD_NOJUMP;
563 mtx_unlock(&tfd->tfd_lock);
570 sys_timerfd_create(struct thread *td, struct timerfd_create_args *uap)
572 return (kern_timerfd_create(td, uap->clockid, uap->flags));
576 sys_timerfd_gettime(struct thread *td, struct timerfd_gettime_args *uap)
578 struct itimerspec curr_value;
581 error = kern_timerfd_gettime(td, uap->fd, &curr_value);
583 error = copyout(&curr_value, uap->curr_value,
590 sys_timerfd_settime(struct thread *td, struct timerfd_settime_args *uap)
592 struct itimerspec new_value, old_value;
595 error = copyin(uap->new_value, &new_value, sizeof(new_value));
598 if (uap->old_value == NULL) {
599 error = kern_timerfd_settime(td, uap->fd, uap->flags,
602 error = kern_timerfd_settime(td, uap->fd, uap->flags,
603 &new_value, &old_value);
605 error = copyout(&old_value, uap->old_value,