2 * Copyright (c) 1982, 1986, 1989, 1993
3 * The Regents of the University of California. All rights reserved.
5 * This code is derived from software contributed to Berkeley by
6 * Scooter Morris at Genentech Inc.
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. All advertising materials mentioning features or use of this software
17 * must display the following acknowledgement:
18 * This product includes software developed by the University of
19 * California, Berkeley and its contributors.
20 * 4. Neither the name of the University nor the names of its contributors
21 * may be used to endorse or promote products derived from this software
22 * without specific prior written permission.
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * @(#)ufs_lockf.c 8.3 (Berkeley) 1/6/94
40 #include "opt_debug_lockf.h"
42 #include <sys/param.h>
43 #include <sys/systm.h>
44 #include <sys/kernel.h>
46 #include <sys/mutex.h>
48 #include <sys/unistd.h>
49 #include <sys/vnode.h>
50 #include <sys/malloc.h>
51 #include <sys/fcntl.h>
52 #include <sys/lockf.h>
54 #include <machine/limits.h>
57 * This variable controls the maximum number of processes that will
58 * be checked in doing deadlock detection.
60 static int maxlockdepth = MAXDEPTH;
63 #include <sys/kernel.h>
64 #include <sys/sysctl.h>
66 #include <ufs/ufs/quota.h>
67 #include <ufs/ufs/inode.h>
70 static int lockf_debug = 0;
71 SYSCTL_INT(_debug, OID_AUTO, lockf_debug, CTLFLAG_RW, &lockf_debug, 0, "");
74 MALLOC_DEFINE(M_LOCKF, "lockf", "Byte-range locking structures");
76 #define NOLOCKF (struct lockf *)0
79 static int lf_clearlock(struct lockf *);
80 static int lf_findoverlap(struct lockf *,
81 struct lockf *, int, struct lockf ***, struct lockf **);
83 lf_getblock(struct lockf *);
84 static int lf_getlock(struct lockf *, struct flock *);
85 static int lf_setlock(struct lockf *);
86 static void lf_split(struct lockf *, struct lockf *);
87 static void lf_wakelock(struct lockf *);
90 * Advisory record locking support
93 lf_advlock(ap, head, size)
94 struct vop_advlock_args /* {
104 register struct flock *fl = ap->a_fl;
105 register struct lockf *lock;
106 off_t start, end, oadd;
110 * Convert the flock structure into a start and end.
112 switch (fl->l_whence) {
117 * Caller is responsible for adding any necessary offset
118 * when SEEK_CUR is used.
124 if (size > OFF_MAX ||
125 (fl->l_start > 0 && size > OFF_MAX - fl->l_start))
127 start = size + fl->l_start;
142 } else if (fl->l_len == 0)
145 oadd = fl->l_len - 1;
146 if (oadd > OFF_MAX - start)
151 * Avoid the common case of unlocking when inode has no locks.
153 if (*head == (struct lockf *)0) {
154 if (ap->a_op != F_SETLK) {
155 fl->l_type = F_UNLCK;
160 * Create the lockf structure
162 MALLOC(lock, struct lockf *, sizeof *lock, M_LOCKF, M_WAITOK);
163 lock->lf_start = start;
165 lock->lf_id = ap->a_id;
167 * XXX The problem is that VTOI is ufs specific, so it will
168 * break LOCKF_DEBUG for all other FS's other than UFS because
169 * it casts the vnode->data ptr to struct inode *.
171 /* lock->lf_inode = VTOI(ap->a_vp); */
172 lock->lf_inode = (struct inode *)0;
173 lock->lf_type = fl->l_type;
174 lock->lf_head = head;
175 lock->lf_next = (struct lockf *)0;
176 TAILQ_INIT(&lock->lf_blkhd);
177 lock->lf_flags = ap->a_flags;
179 * Do the requested operation.
183 return (lf_setlock(lock));
186 error = lf_clearlock(lock);
191 error = lf_getlock(lock, fl);
203 * Set a byte-range lock.
207 register struct lockf *lock;
209 register struct lockf *block;
210 struct lockf **head = lock->lf_head;
211 struct lockf **prev, *overlap, *ltmp;
212 static char lockstr[] = "lockf";
213 int ovcase, priority, needtolink, error;
217 lf_print("lf_setlock", lock);
218 #endif /* LOCKF_DEBUG */
224 if (lock->lf_type == F_WRLCK)
228 * Scan lock list for this file looking for locks that would block us.
230 while ((block = lf_getblock(lock))) {
232 * Free the structure and return if nonblocking.
234 if ((lock->lf_flags & F_WAIT) == 0) {
239 * We are blocked. Since flock style locks cover
240 * the whole file, there is no chance for deadlock.
241 * For byte-range locks we must check for deadlock.
243 * Deadlock detection is done by looking through the
244 * wait channels to see if there are any cycles that
245 * involve us. MAXDEPTH is set just to make sure we
246 * do not go off into neverland.
248 if ((lock->lf_flags & F_POSIX) &&
249 (block->lf_flags & F_POSIX)) {
250 register struct proc *wproc;
252 register struct lockf *waitblock;
255 /* The block is waiting on something */
256 /* XXXKSE this is not complete under threads */
257 wproc = (struct proc *)block->lf_id;
258 mtx_lock_spin(&sched_lock);
259 FOREACH_THREAD_IN_PROC(wproc, td) {
260 while (td->td_wchan &&
261 (td->td_wmesg == lockstr) &&
262 (i++ < maxlockdepth)) {
263 waitblock = (struct lockf *)td->td_wchan;
264 /* Get the owner of the blocking lock */
265 waitblock = waitblock->lf_next;
266 if ((waitblock->lf_flags & F_POSIX) == 0)
268 wproc = (struct proc *)waitblock->lf_id;
269 if (wproc == (struct proc *)lock->lf_id) {
270 mtx_unlock_spin(&sched_lock);
276 mtx_unlock_spin(&sched_lock);
279 * For flock type locks, we must first remove
280 * any shared locks that we hold before we sleep
281 * waiting for an exclusive lock.
283 if ((lock->lf_flags & F_FLOCK) &&
284 lock->lf_type == F_WRLCK) {
285 lock->lf_type = F_UNLCK;
286 (void) lf_clearlock(lock);
287 lock->lf_type = F_WRLCK;
290 * Add our lock to the blocked list and sleep until we're free.
291 * Remember who blocked us (for deadlock detection).
293 lock->lf_next = block;
294 TAILQ_INSERT_TAIL(&block->lf_blkhd, lock, lf_block);
296 if (lockf_debug & 1) {
297 lf_print("lf_setlock: blocking on", block);
298 lf_printlist("lf_setlock", block);
300 #endif /* LOCKF_DEBUG */
301 error = tsleep((caddr_t)lock, priority, lockstr, 0);
303 * We may have been awakened by a signal and/or by a
304 * debugger continuing us (in which cases we must remove
305 * ourselves from the blocked list) and/or by another
306 * process releasing a lock (in which case we have
307 * already been removed from the blocked list and our
308 * lf_next field set to NOLOCKF).
311 TAILQ_REMOVE(&lock->lf_next->lf_blkhd, lock, lf_block);
312 lock->lf_next = NOLOCKF;
320 * No blocks!! Add the lock. Note that we will
321 * downgrade or upgrade any overlapping locks this
322 * process already owns.
324 * Skip over locks owned by other processes.
325 * Handle any locks that overlap and are owned by ourselves.
331 ovcase = lf_findoverlap(block, lock, SELF, &prev, &overlap);
333 block = overlap->lf_next;
338 * 2) overlap contains lock
339 * 3) lock contains overlap
340 * 4) overlap starts before lock
341 * 5) overlap ends after lock
344 case 0: /* no overlap */
347 lock->lf_next = overlap;
351 case 1: /* overlap == lock */
353 * If downgrading lock, others may be
354 * able to acquire it.
356 if (lock->lf_type == F_RDLCK &&
357 overlap->lf_type == F_WRLCK)
358 lf_wakelock(overlap);
359 overlap->lf_type = lock->lf_type;
361 lock = overlap; /* for debug output below */
364 case 2: /* overlap contains lock */
366 * Check for common starting point and different types.
368 if (overlap->lf_type == lock->lf_type) {
370 lock = overlap; /* for debug output below */
373 if (overlap->lf_start == lock->lf_start) {
375 lock->lf_next = overlap;
376 overlap->lf_start = lock->lf_end + 1;
378 lf_split(overlap, lock);
379 lf_wakelock(overlap);
382 case 3: /* lock contains overlap */
384 * If downgrading lock, others may be able to
385 * acquire it, otherwise take the list.
387 if (lock->lf_type == F_RDLCK &&
388 overlap->lf_type == F_WRLCK) {
389 lf_wakelock(overlap);
391 while (!TAILQ_EMPTY(&overlap->lf_blkhd)) {
392 ltmp = TAILQ_FIRST(&overlap->lf_blkhd);
393 TAILQ_REMOVE(&overlap->lf_blkhd, ltmp,
395 TAILQ_INSERT_TAIL(&lock->lf_blkhd,
397 ltmp->lf_next = lock;
401 * Add the new lock if necessary and delete the overlap.
405 lock->lf_next = overlap->lf_next;
406 prev = &lock->lf_next;
409 *prev = overlap->lf_next;
410 free(overlap, M_LOCKF);
413 case 4: /* overlap starts before lock */
415 * Add lock after overlap on the list.
417 lock->lf_next = overlap->lf_next;
418 overlap->lf_next = lock;
419 overlap->lf_end = lock->lf_start - 1;
420 prev = &lock->lf_next;
421 lf_wakelock(overlap);
425 case 5: /* overlap ends after lock */
427 * Add the new lock before overlap.
431 lock->lf_next = overlap;
433 overlap->lf_start = lock->lf_end + 1;
434 lf_wakelock(overlap);
440 if (lockf_debug & 1) {
441 lf_print("lf_setlock: got the lock", lock);
442 lf_printlist("lf_setlock", lock);
444 #endif /* LOCKF_DEBUG */
449 * Remove a byte-range lock on an inode.
451 * Generally, find the lock (or an overlap to that lock)
452 * and remove it (or shrink it), then wakeup anyone we can.
456 register struct lockf *unlock;
458 struct lockf **head = unlock->lf_head;
459 register struct lockf *lf = *head;
460 struct lockf *overlap, **prev;
466 if (unlock->lf_type != F_UNLCK)
467 panic("lf_clearlock: bad type");
469 lf_print("lf_clearlock", unlock);
470 #endif /* LOCKF_DEBUG */
472 while ((ovcase = lf_findoverlap(lf, unlock, SELF, &prev, &overlap))) {
474 * Wakeup the list of locks to be retried.
476 lf_wakelock(overlap);
480 case 1: /* overlap == lock */
481 *prev = overlap->lf_next;
482 FREE(overlap, M_LOCKF);
485 case 2: /* overlap contains lock: split it */
486 if (overlap->lf_start == unlock->lf_start) {
487 overlap->lf_start = unlock->lf_end + 1;
490 lf_split(overlap, unlock);
491 overlap->lf_next = unlock->lf_next;
494 case 3: /* lock contains overlap */
495 *prev = overlap->lf_next;
496 lf = overlap->lf_next;
497 free(overlap, M_LOCKF);
500 case 4: /* overlap starts before lock */
501 overlap->lf_end = unlock->lf_start - 1;
502 prev = &overlap->lf_next;
503 lf = overlap->lf_next;
506 case 5: /* overlap ends after lock */
507 overlap->lf_start = unlock->lf_end + 1;
514 lf_printlist("lf_clearlock", unlock);
515 #endif /* LOCKF_DEBUG */
520 * Check whether there is a blocking lock,
521 * and if so return its process identifier.
525 register struct lockf *lock;
526 register struct flock *fl;
528 register struct lockf *block;
532 lf_print("lf_getlock", lock);
533 #endif /* LOCKF_DEBUG */
535 if ((block = lf_getblock(lock))) {
536 fl->l_type = block->lf_type;
537 fl->l_whence = SEEK_SET;
538 fl->l_start = block->lf_start;
539 if (block->lf_end == -1)
542 fl->l_len = block->lf_end - block->lf_start + 1;
543 if (block->lf_flags & F_POSIX)
544 fl->l_pid = ((struct proc *)(block->lf_id))->p_pid;
548 fl->l_type = F_UNLCK;
554 * Walk the list of locks for an inode and
555 * return the first blocking lock.
557 static struct lockf *
559 register struct lockf *lock;
561 struct lockf **prev, *overlap, *lf = *(lock->lf_head);
564 prev = lock->lf_head;
565 while ((ovcase = lf_findoverlap(lf, lock, OTHERS, &prev, &overlap))) {
567 * We've found an overlap, see if it blocks us
569 if ((lock->lf_type == F_WRLCK || overlap->lf_type == F_WRLCK))
572 * Nope, point to the next one on the list and
573 * see if it blocks us
575 lf = overlap->lf_next;
581 * Walk the list of locks for an inode to
582 * find an overlapping lock (if any).
584 * NOTE: this returns only the FIRST overlapping lock. There
585 * may be more than one.
588 lf_findoverlap(lf, lock, type, prev, overlap)
589 register struct lockf *lf;
592 struct lockf ***prev;
593 struct lockf **overlap;
602 lf_print("lf_findoverlap: looking for overlap in", lock);
603 #endif /* LOCKF_DEBUG */
604 start = lock->lf_start;
606 while (lf != NOLOCKF) {
607 if (((type & SELF) && lf->lf_id != lock->lf_id) ||
608 ((type & OTHERS) && lf->lf_id == lock->lf_id)) {
609 *prev = &lf->lf_next;
610 *overlap = lf = lf->lf_next;
615 lf_print("\tchecking", lf);
616 #endif /* LOCKF_DEBUG */
618 * OK, check for overlap
623 * 2) overlap contains lock
624 * 3) lock contains overlap
625 * 4) overlap starts before lock
626 * 5) overlap ends after lock
628 if ((lf->lf_end != -1 && start > lf->lf_end) ||
629 (end != -1 && lf->lf_start > end)) {
633 printf("no overlap\n");
634 #endif /* LOCKF_DEBUG */
635 if ((type & SELF) && end != -1 && lf->lf_start > end)
637 *prev = &lf->lf_next;
638 *overlap = lf = lf->lf_next;
641 if ((lf->lf_start == start) && (lf->lf_end == end)) {
645 printf("overlap == lock\n");
646 #endif /* LOCKF_DEBUG */
649 if ((lf->lf_start <= start) &&
651 ((lf->lf_end >= end) || (lf->lf_end == -1))) {
655 printf("overlap contains lock\n");
656 #endif /* LOCKF_DEBUG */
659 if (start <= lf->lf_start &&
661 (lf->lf_end != -1 && end >= lf->lf_end))) {
665 printf("lock contains overlap\n");
666 #endif /* LOCKF_DEBUG */
669 if ((lf->lf_start < start) &&
670 ((lf->lf_end >= start) || (lf->lf_end == -1))) {
674 printf("overlap starts before lock\n");
675 #endif /* LOCKF_DEBUG */
678 if ((lf->lf_start > start) &&
680 ((lf->lf_end > end) || (lf->lf_end == -1))) {
684 printf("overlap ends after lock\n");
685 #endif /* LOCKF_DEBUG */
688 panic("lf_findoverlap: default");
694 * Split a lock and a contained region into
695 * two or three locks as necessary.
698 lf_split(lock1, lock2)
699 register struct lockf *lock1;
700 register struct lockf *lock2;
702 register struct lockf *splitlock;
705 if (lockf_debug & 2) {
706 lf_print("lf_split", lock1);
707 lf_print("splitting from", lock2);
709 #endif /* LOCKF_DEBUG */
711 * Check to see if spliting into only two pieces.
713 if (lock1->lf_start == lock2->lf_start) {
714 lock1->lf_start = lock2->lf_end + 1;
715 lock2->lf_next = lock1;
718 if (lock1->lf_end == lock2->lf_end) {
719 lock1->lf_end = lock2->lf_start - 1;
720 lock2->lf_next = lock1->lf_next;
721 lock1->lf_next = lock2;
725 * Make a new lock consisting of the last part of
726 * the encompassing lock
728 MALLOC(splitlock, struct lockf *, sizeof *splitlock, M_LOCKF, M_WAITOK);
729 bcopy((caddr_t)lock1, (caddr_t)splitlock, sizeof *splitlock);
730 splitlock->lf_start = lock2->lf_end + 1;
731 TAILQ_INIT(&splitlock->lf_blkhd);
732 lock1->lf_end = lock2->lf_start - 1;
736 splitlock->lf_next = lock1->lf_next;
737 lock2->lf_next = splitlock;
738 lock1->lf_next = lock2;
745 lf_wakelock(listhead)
746 struct lockf *listhead;
748 register struct lockf *wakelock;
750 while (!TAILQ_EMPTY(&listhead->lf_blkhd)) {
751 wakelock = TAILQ_FIRST(&listhead->lf_blkhd);
752 TAILQ_REMOVE(&listhead->lf_blkhd, wakelock, lf_block);
753 wakelock->lf_next = NOLOCKF;
756 lf_print("lf_wakelock: awakening", wakelock);
757 #endif /* LOCKF_DEBUG */
758 wakeup((caddr_t)wakelock);
769 register struct lockf *lock;
772 printf("%s: lock %p for ", tag, (void *)lock);
773 if (lock->lf_flags & F_POSIX)
774 printf("proc %ld", (long)((struct proc *)lock->lf_id)->p_pid);
776 printf("id %p", (void *)lock->lf_id);
777 if (lock->lf_inode != (struct inode *)0)
778 /* XXX no %qd in kernel. Truncate. */
779 printf(" in ino %lu on dev <%d, %d>, %s, start %ld, end %ld",
780 (u_long)lock->lf_inode->i_number,
781 major(lock->lf_inode->i_dev),
782 minor(lock->lf_inode->i_dev),
783 lock->lf_type == F_RDLCK ? "shared" :
784 lock->lf_type == F_WRLCK ? "exclusive" :
785 lock->lf_type == F_UNLCK ? "unlock" :
786 "unknown", (long)lock->lf_start, (long)lock->lf_end);
788 printf(" %s, start %ld, end %ld",
789 lock->lf_type == F_RDLCK ? "shared" :
790 lock->lf_type == F_WRLCK ? "exclusive" :
791 lock->lf_type == F_UNLCK ? "unlock" :
792 "unknown", (long)lock->lf_start, (long)lock->lf_end);
793 if (!TAILQ_EMPTY(&lock->lf_blkhd))
794 printf(" block %p\n", (void *)TAILQ_FIRST(&lock->lf_blkhd));
800 lf_printlist(tag, lock)
804 register struct lockf *lf, *blk;
806 if (lock->lf_inode == (struct inode *)0)
809 printf("%s: Lock list for ino %lu on dev <%d, %d>:\n",
810 tag, (u_long)lock->lf_inode->i_number,
811 major(lock->lf_inode->i_dev),
812 minor(lock->lf_inode->i_dev));
813 for (lf = lock->lf_inode->i_lockf; lf; lf = lf->lf_next) {
814 printf("\tlock %p for ",(void *)lf);
815 if (lf->lf_flags & F_POSIX)
817 (long)((struct proc *)lf->lf_id)->p_pid);
819 printf("id %p", (void *)lf->lf_id);
820 /* XXX no %qd in kernel. Truncate. */
821 printf(", %s, start %ld, end %ld",
822 lf->lf_type == F_RDLCK ? "shared" :
823 lf->lf_type == F_WRLCK ? "exclusive" :
824 lf->lf_type == F_UNLCK ? "unlock" :
825 "unknown", (long)lf->lf_start, (long)lf->lf_end);
826 TAILQ_FOREACH(blk, &lf->lf_blkhd, lf_block) {
827 printf("\n\t\tlock request %p for ", (void *)blk);
828 if (blk->lf_flags & F_POSIX)
830 (long)((struct proc *)blk->lf_id)->p_pid);
832 printf("id %p", (void *)blk->lf_id);
833 /* XXX no %qd in kernel. Truncate. */
834 printf(", %s, start %ld, end %ld",
835 blk->lf_type == F_RDLCK ? "shared" :
836 blk->lf_type == F_WRLCK ? "exclusive" :
837 blk->lf_type == F_UNLCK ? "unlock" :
838 "unknown", (long)blk->lf_start,
840 if (!TAILQ_EMPTY(&blk->lf_blkhd))
841 panic("lf_printlist: bad list");
846 #endif /* LOCKF_DEBUG */