2 * Copyright (c) 2002 Alfred Perlstein <alfred@FreeBSD.org>
3 * Copyright (c) 2005 Robert N. M. Watson
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 #include <sys/cdefs.h>
29 __FBSDID("$FreeBSD$");
31 #include "opt_posix.h"
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/sysproto.h>
36 #include <sys/eventhandler.h>
37 #include <sys/kernel.h>
40 #include <sys/mutex.h>
41 #include <sys/module.h>
42 #include <sys/condvar.h>
45 #include <sys/syscall.h>
47 #include <sys/sysent.h>
48 #include <sys/sysctl.h>
50 #include <sys/malloc.h>
51 #include <sys/fcntl.h>
53 #include <posix4/ksem.h>
54 #include <posix4/posix4.h>
55 #include <posix4/semaphore.h>
56 #include <posix4/_semaphore.h>
58 static int sem_count_proc(struct proc *p);
59 static struct ksem *sem_lookup_byname(const char *name);
60 static int sem_create(struct thread *td, const char *name,
61 struct ksem **ksret, mode_t mode, unsigned int value);
62 static void sem_free(struct ksem *ksnew);
63 static int sem_perm(struct thread *td, struct ksem *ks);
64 static void sem_enter(struct proc *p, struct ksem *ks);
65 static int sem_leave(struct proc *p, struct ksem *ks);
66 static void sem_exithook(void *arg, struct proc *p);
67 static void sem_forkhook(void *arg, struct proc *p1, struct proc *p2,
69 static int sem_hasopen(struct thread *td, struct ksem *ks);
71 static int kern_sem_close(struct thread *td, semid_t id);
72 static int kern_sem_post(struct thread *td, semid_t id);
73 static int kern_sem_wait(struct thread *td, semid_t id, int tryflag,
74 struct timespec *abstime);
75 static int kern_sem_init(struct thread *td, int dir, unsigned int value,
77 static int kern_sem_open(struct thread *td, int dir, const char *name,
78 int oflag, mode_t mode, unsigned int value, semid_t *idp);
79 static int kern_sem_unlink(struct thread *td, const char *name);
85 #define SEM_MAX_NAMELEN 14
87 #define SEM_TO_ID(x) ((intptr_t)(x))
88 #define ID_TO_SEM(x) id_to_sem(x)
91 * available semaphores go here, this includes sem_init and any semaphores
92 * created via sem_open that have not yet been unlinked.
94 LIST_HEAD(, ksem) ksem_head = LIST_HEAD_INITIALIZER(&ksem_head);
96 * semaphores still in use but have been sem_unlink()'d go here.
98 LIST_HEAD(, ksem) ksem_deadhead = LIST_HEAD_INITIALIZER(&ksem_deadhead);
100 static struct mtx sem_lock;
101 static MALLOC_DEFINE(M_SEM, "sems", "semaphore data");
103 static int nsems = 0;
104 SYSCTL_DECL(_p1003_1b);
105 SYSCTL_INT(_p1003_1b, OID_AUTO, nsems, CTLFLAG_RD, &nsems, 0, "");
107 static eventhandler_tag sem_exit_tag, sem_exec_tag, sem_fork_tag;
110 #define DP(x) printf x
117 sem_ref(struct ksem *ks)
120 mtx_assert(&sem_lock, MA_OWNED);
122 DP(("sem_ref: ks = %p, ref = %d\n", ks, ks->ks_ref));
127 sem_rel(struct ksem *ks)
130 mtx_assert(&sem_lock, MA_OWNED);
131 DP(("sem_rel: ks = %p, ref = %d\n", ks, ks->ks_ref - 1));
132 if (--ks->ks_ref == 0)
136 static __inline struct ksem *id_to_sem(semid_t id);
145 mtx_assert(&sem_lock, MA_OWNED);
146 DP(("id_to_sem: id = %0x,%p\n", id, (struct ksem *)id));
147 LIST_FOREACH(ks, &ksem_head, ks_entry) {
148 DP(("id_to_sem: ks = %p\n", ks));
149 if (ks == (struct ksem *)id)
156 sem_lookup_byname(name)
161 mtx_assert(&sem_lock, MA_OWNED);
162 LIST_FOREACH(ks, &ksem_head, ks_entry)
163 if (ks->ks_name != NULL && strcmp(ks->ks_name, name) == 0)
169 sem_create(td, name, ksret, mode, value)
182 DP(("sem_create\n"));
185 if (value > SEM_VALUE_MAX)
187 ret = malloc(sizeof(*ret), M_SEM, M_WAITOK | M_ZERO);
190 if (len > SEM_MAX_NAMELEN) {
192 return (ENAMETOOLONG);
194 /* name must start with a '/' but not contain one. */
195 if (*name != '/' || len < 2 || index(name + 1, '/') != NULL) {
199 ret->ks_name = malloc(len + 1, M_SEM, M_WAITOK);
200 strcpy(ret->ks_name, name);
205 ret->ks_value = value;
208 ret->ks_uid = uc->cr_uid;
209 ret->ks_gid = uc->cr_gid;
211 cv_init(&ret->ks_cv, "sem");
212 LIST_INIT(&ret->ks_users);
214 sem_enter(td->td_proc, ret);
217 if (nsems >= p31b_getcfg(CTL_P1003_1B_SEM_NSEMS_MAX)) {
218 sem_leave(td->td_proc, ret);
225 mtx_unlock(&sem_lock);
229 #ifndef _SYS_SYSPROTO_H_
230 struct ksem_init_args {
234 int ksem_init(struct thread *td, struct ksem_init_args *uap);
239 struct ksem_init_args *uap;
243 error = kern_sem_init(td, UIO_USERSPACE, uap->value, uap->idp);
248 kern_sem_init(td, dir, value, idp)
258 error = sem_create(td, NULL, &ks, S_IRWXU | S_IRWXG, value);
262 if (dir == UIO_USERSPACE) {
263 error = copyout(&id, idp, sizeof(id));
267 mtx_unlock(&sem_lock);
274 LIST_INSERT_HEAD(&ksem_head, ks, ks_entry);
276 mtx_unlock(&sem_lock);
280 #ifndef _SYS_SYSPROTO_H_
281 struct ksem_open_args {
288 int ksem_open(struct thread *td, struct ksem_open_args *uap);
293 struct ksem_open_args *uap;
295 char name[SEM_MAX_NAMELEN + 1];
299 error = copyinstr(uap->name, name, SEM_MAX_NAMELEN + 1, &done);
302 DP((">>> sem_open start\n"));
303 error = kern_sem_open(td, UIO_USERSPACE,
304 name, uap->oflag, uap->mode, uap->value, uap->idp);
305 DP(("<<< sem_open end\n"));
310 kern_sem_open(td, dir, name, oflag, mode, value, idp)
319 struct ksem *ksnew, *ks;
325 ks = sem_lookup_byname(name);
327 * If we found it but O_EXCL is set, error.
329 if (ks != NULL && (oflag & O_EXCL) != 0) {
330 mtx_unlock(&sem_lock);
334 * If we didn't find it...
338 * didn't ask for creation? error.
340 if ((oflag & O_CREAT) == 0) {
341 mtx_unlock(&sem_lock);
345 * We may block during creation, so drop the lock.
347 mtx_unlock(&sem_lock);
348 error = sem_create(td, name, &ksnew, mode, value);
351 id = SEM_TO_ID(ksnew);
352 if (dir == UIO_USERSPACE) {
353 DP(("about to copyout! %d to %p\n", id, idp));
354 error = copyout(&id, idp, sizeof(id));
357 sem_leave(td->td_proc, ksnew);
359 mtx_unlock(&sem_lock);
363 DP(("about to set! %d to %p\n", id, idp));
367 * We need to make sure we haven't lost a race while
368 * allocating during creation.
371 ks = sem_lookup_byname(name);
374 sem_leave(td->td_proc, ksnew);
376 /* we lost and we can't loose... */
377 if ((oflag & O_EXCL) != 0) {
378 mtx_unlock(&sem_lock);
382 DP(("sem_create: about to add to list...\n"));
383 LIST_INSERT_HEAD(&ksem_head, ksnew, ks_entry);
384 DP(("sem_create: setting list bit...\n"));
385 ksnew->ks_onlist = 1;
386 DP(("sem_create: done, about to unlock...\n"));
388 mtx_unlock(&sem_lock);
391 * if we aren't the creator, then enforce permissions.
393 error = sem_perm(td, ks);
396 mtx_unlock(&sem_lock);
400 if (dir == UIO_USERSPACE) {
401 error = copyout(&id, idp, sizeof(id));
405 mtx_unlock(&sem_lock);
411 sem_enter(td->td_proc, ks);
414 mtx_unlock(&sem_lock);
427 DP(("sem_perm: uc(%d,%d) ks(%d,%d,%o)\n",
428 uc->cr_uid, uc->cr_gid,
429 ks->ks_uid, ks->ks_gid, ks->ks_mode));
430 if ((uc->cr_uid == ks->ks_uid && (ks->ks_mode & S_IWUSR) != 0) ||
431 (uc->cr_gid == ks->ks_gid && (ks->ks_mode & S_IWGRP) != 0) ||
432 (ks->ks_mode & S_IWOTH) != 0 || suser(td) == 0)
438 sem_free(struct ksem *ks)
443 LIST_REMOVE(ks, ks_entry);
444 if (ks->ks_name != NULL)
445 free(ks->ks_name, M_SEM);
446 cv_destroy(&ks->ks_cv);
450 static __inline struct kuser *sem_getuser(struct proc *p, struct ksem *ks);
452 static __inline struct kuser *
459 LIST_FOREACH(k, &ks->ks_users, ku_next)
460 if (k->ku_pid == p->p_pid)
471 return ((ks->ks_name == NULL && sem_perm(td, ks) == 0)
472 || sem_getuser(td->td_proc, ks) != NULL);
482 DP(("sem_leave: ks = %p\n", ks));
483 k = sem_getuser(p, ks);
484 DP(("sem_leave: ks = %p, k = %p\n", ks, k));
486 LIST_REMOVE(k, ku_next);
488 DP(("sem_leave: about to free k\n"));
490 DP(("sem_leave: returning\n"));
501 struct kuser *ku, *k;
503 ku = malloc(sizeof(*ku), M_SEM, M_WAITOK);
504 ku->ku_pid = p->p_pid;
506 k = sem_getuser(p, ks);
508 mtx_unlock(&sem_lock);
512 LIST_INSERT_HEAD(&ks->ks_users, ku, ku_next);
514 mtx_unlock(&sem_lock);
517 #ifndef _SYS_SYSPROTO_H_
518 struct ksem_unlink_args {
521 int ksem_unlink(struct thread *td, struct ksem_unlink_args *uap);
527 struct ksem_unlink_args *uap;
529 char name[SEM_MAX_NAMELEN + 1];
533 error = copyinstr(uap->name, name, SEM_MAX_NAMELEN + 1, &done);
534 return (error ? error :
535 kern_sem_unlink(td, name));
539 kern_sem_unlink(td, name)
547 ks = sem_lookup_byname(name);
551 error = sem_perm(td, ks);
552 DP(("sem_unlink: '%s' ks = %p, error = %d\n", name, ks, error));
554 LIST_REMOVE(ks, ks_entry);
555 LIST_INSERT_HEAD(&ksem_deadhead, ks, ks_entry);
558 mtx_unlock(&sem_lock);
562 #ifndef _SYS_SYSPROTO_H_
563 struct ksem_close_args {
566 int ksem_close(struct thread *td, struct ksem_close_args *uap);
570 ksem_close(struct thread *td, struct ksem_close_args *uap)
573 return (kern_sem_close(td, uap->id));
577 kern_sem_close(td, id)
587 /* this is not a valid operation for unnamed sems */
588 if (ks != NULL && ks->ks_name != NULL)
589 error = sem_leave(td->td_proc, ks);
590 mtx_unlock(&sem_lock);
594 #ifndef _SYS_SYSPROTO_H_
595 struct ksem_post_args {
598 int ksem_post(struct thread *td, struct ksem_post_args *uap);
603 struct ksem_post_args *uap;
606 return (kern_sem_post(td, uap->id));
610 kern_sem_post(td, id)
619 if (ks == NULL || !sem_hasopen(td, ks)) {
623 if (ks->ks_value == SEM_VALUE_MAX) {
628 if (ks->ks_waiters > 0)
629 cv_signal(&ks->ks_cv);
632 mtx_unlock(&sem_lock);
636 #ifndef _SYS_SYSPROTO_H_
637 struct ksem_wait_args {
640 int ksem_wait(struct thread *td, struct ksem_wait_args *uap);
646 struct ksem_wait_args *uap;
649 return (kern_sem_wait(td, uap->id, 0, NULL));
652 #ifndef _SYS_SYSPROTO_H_
653 struct ksem_timedwait_args {
655 struct timespec *abstime;
657 int ksem_timedwait(struct thread *td, struct ksem_timedwait_args *uap);
660 ksem_timedwait(td, uap)
662 struct ksem_timedwait_args *uap;
664 struct timespec abstime;
668 /* We allow a null timespec (wait forever). */
669 if (uap->abstime == NULL)
672 error = copyin(uap->abstime, &abstime, sizeof(abstime));
675 if (abstime.tv_nsec >= 1000000000 || abstime.tv_nsec < 0)
679 return (kern_sem_wait(td, uap->id, 0, ts));
682 #ifndef _SYS_SYSPROTO_H_
683 struct ksem_trywait_args {
686 int ksem_trywait(struct thread *td, struct ksem_trywait_args *uap);
689 ksem_trywait(td, uap)
691 struct ksem_trywait_args *uap;
694 return (kern_sem_wait(td, uap->id, 1, NULL));
698 kern_sem_wait(td, id, tryflag, abstime)
702 struct timespec *abstime;
704 struct timespec ts1, ts2;
709 DP((">>> kern_sem_wait entered!\n"));
713 DP(("kern_sem_wait ks == NULL\n"));
718 if (!sem_hasopen(td, ks)) {
719 DP(("kern_sem_wait hasopen failed\n"));
723 DP(("kern_sem_wait value = %d, tryflag %d\n", ks->ks_value, tryflag));
724 if (ks->ks_value == 0) {
728 else if (abstime == NULL)
729 error = cv_wait_sig(&ks->ks_cv, &sem_lock);
734 timespecsub(&ts1, &ts2);
735 TIMESPEC_TO_TIMEVAL(&tv, &ts1);
740 error = cv_timedwait_sig(&ks->ks_cv,
741 &sem_lock, tvtohz(&tv));
742 if (error != EWOULDBLOCK)
755 mtx_unlock(&sem_lock);
756 DP(("<<< kern_sem_wait leaving, error = %d\n", error));
760 #ifndef _SYS_SYSPROTO_H_
761 struct ksem_getvalue_args {
765 int ksem_getvalue(struct thread *td, struct ksem_getvalue_args *uap);
768 ksem_getvalue(td, uap)
770 struct ksem_getvalue_args *uap;
776 ks = ID_TO_SEM(uap->id);
777 if (ks == NULL || !sem_hasopen(td, ks)) {
778 mtx_unlock(&sem_lock);
782 mtx_unlock(&sem_lock);
783 error = copyout(&val, uap->val, sizeof(val));
787 #ifndef _SYS_SYSPROTO_H_
788 struct ksem_destroy_args {
791 int ksem_destroy(struct thread *td, struct ksem_destroy_args *uap);
794 ksem_destroy(td, uap)
796 struct ksem_destroy_args *uap;
802 ks = ID_TO_SEM(uap->id);
803 if (ks == NULL || !sem_hasopen(td, ks) ||
804 ks->ks_name != NULL) {
808 if (ks->ks_waiters != 0) {
815 mtx_unlock(&sem_lock);
820 * Count the number of kusers associated with a proc, so as to guess at how
821 * many to allocate when forking.
831 mtx_assert(&sem_lock, MA_OWNED);
834 LIST_FOREACH(ks, &ksem_head, ks_entry) {
835 LIST_FOREACH(ku, &ks->ks_users, ku_next) {
836 if (ku->ku_pid == p->p_pid)
840 LIST_FOREACH(ks, &ksem_deadhead, ks_entry) {
841 LIST_FOREACH(ku, &ks->ks_users, ku_next) {
842 if (ku->ku_pid == p->p_pid)
850 * When a process forks, the child process must gain a reference to each open
851 * semaphore in the parent process, whether it is unlinked or not. This
852 * requires allocating a kuser structure for each semaphore reference in the
853 * new process. Because the set of semaphores in the parent can change while
854 * the fork is in progress, we have to handle races -- first we attempt to
855 * allocate enough storage to acquire references to each of the semaphores,
856 * then we enter the semaphores and release the temporary references.
859 sem_forkhook(arg, p1, p2, flags)
865 struct ksem *ks, **sem_array;
866 int count, i, new_count;
870 count = sem_count_proc(p1);
872 mtx_assert(&sem_lock, MA_OWNED);
873 mtx_unlock(&sem_lock);
874 sem_array = malloc(sizeof(struct ksem *) * count, M_TEMP, M_WAITOK);
876 new_count = sem_count_proc(p1);
877 if (count < new_count) {
878 /* Lost race, repeat and allocate more storage. */
879 free(sem_array, M_TEMP);
884 * Given an array capable of storing an adequate number of semaphore
885 * references, now walk the list of semaphores and acquire a new
886 * reference for any semaphore opened by p1.
890 LIST_FOREACH(ks, &ksem_head, ks_entry) {
891 LIST_FOREACH(ku, &ks->ks_users, ku_next) {
892 if (ku->ku_pid == p1->p_pid) {
900 LIST_FOREACH(ks, &ksem_deadhead, ks_entry) {
901 LIST_FOREACH(ku, &ks->ks_users, ku_next) {
902 if (ku->ku_pid == p1->p_pid) {
910 mtx_unlock(&sem_lock);
911 KASSERT(i == count, ("sem_forkhook: i != count (%d, %d)", i, count));
913 * Now cause p2 to enter each of the referenced semaphores, then
914 * release our temporary reference. This is pretty inefficient.
915 * Finally, free our temporary array.
917 for (i = 0; i < count; i++) {
918 sem_enter(p2, sem_array[i]);
920 sem_rel(sem_array[i]);
921 mtx_unlock(&sem_lock);
923 free(sem_array, M_TEMP);
931 struct ksem *ks, *ksnext;
934 ks = LIST_FIRST(&ksem_head);
936 ksnext = LIST_NEXT(ks, ks_entry);
940 ks = LIST_FIRST(&ksem_deadhead);
942 ksnext = LIST_NEXT(ks, ks_entry);
946 mtx_unlock(&sem_lock);
950 sem_modload(struct module *module, int cmd, void *arg)
956 mtx_init(&sem_lock, "sem", "semaphore", MTX_DEF);
957 p31b_setcfg(CTL_P1003_1B_SEM_NSEMS_MAX, SEM_MAX);
958 p31b_setcfg(CTL_P1003_1B_SEM_VALUE_MAX, SEM_VALUE_MAX);
959 sem_exit_tag = EVENTHANDLER_REGISTER(process_exit, sem_exithook,
960 NULL, EVENTHANDLER_PRI_ANY);
961 sem_exec_tag = EVENTHANDLER_REGISTER(process_exec, sem_exithook,
962 NULL, EVENTHANDLER_PRI_ANY);
963 sem_fork_tag = EVENTHANDLER_REGISTER(process_fork, sem_forkhook, NULL, EVENTHANDLER_PRI_ANY);
970 EVENTHANDLER_DEREGISTER(process_exit, sem_exit_tag);
971 EVENTHANDLER_DEREGISTER(process_exec, sem_exec_tag);
972 EVENTHANDLER_DEREGISTER(process_fork, sem_fork_tag);
973 mtx_destroy(&sem_lock);
984 static moduledata_t sem_mod = {
990 SYSCALL_MODULE_HELPER(ksem_init);
991 SYSCALL_MODULE_HELPER(ksem_open);
992 SYSCALL_MODULE_HELPER(ksem_unlink);
993 SYSCALL_MODULE_HELPER(ksem_close);
994 SYSCALL_MODULE_HELPER(ksem_post);
995 SYSCALL_MODULE_HELPER(ksem_wait);
996 SYSCALL_MODULE_HELPER(ksem_timedwait);
997 SYSCALL_MODULE_HELPER(ksem_trywait);
998 SYSCALL_MODULE_HELPER(ksem_getvalue);
999 SYSCALL_MODULE_HELPER(ksem_destroy);
1001 DECLARE_MODULE(sem, sem_mod, SI_SUB_SYSV_SEM, SI_ORDER_FIRST);
1002 MODULE_VERSION(sem, 1);