2 * Copyright (c) 1982, 1986, 1989, 1993
3 * The Regents of the University of California. All rights reserved.
5 * This code is derived from software contributed to Berkeley by
6 * Scooter Morris at Genentech Inc.
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 4. Neither the name of the University nor the names of its contributors
17 * may be used to endorse or promote products derived from this software
18 * without specific prior written permission.
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * @(#)ufs_lockf.c 8.3 (Berkeley) 1/6/94
35 #include <sys/cdefs.h>
36 __FBSDID("$FreeBSD$");
38 #include "opt_debug_lockf.h"
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #include <sys/kernel.h>
43 #include <sys/limits.h>
45 #include <sys/mount.h>
46 #include <sys/mutex.h>
48 #include <sys/unistd.h>
49 #include <sys/vnode.h>
50 #include <sys/malloc.h>
51 #include <sys/fcntl.h>
52 #include <sys/lockf.h>
55 * This variable controls the maximum number of processes that will
56 * be checked in doing deadlock detection.
58 static int maxlockdepth = MAXDEPTH;
61 #include <sys/sysctl.h>
63 #include <ufs/ufs/quota.h>
64 #include <ufs/ufs/inode.h>
67 static int lockf_debug = 0;
68 SYSCTL_INT(_debug, OID_AUTO, lockf_debug, CTLFLAG_RW, &lockf_debug, 0, "");
71 MALLOC_DEFINE(M_LOCKF, "lockf", "Byte-range locking structures");
73 #define NOLOCKF (struct lockf *)0
76 static int lf_clearlock(struct lockf *);
77 static int lf_findoverlap(struct lockf *,
78 struct lockf *, int, struct lockf ***, struct lockf **);
80 lf_getblock(struct lockf *);
81 static int lf_getlock(struct lockf *, struct flock *);
82 static int lf_setlock(struct lockf *);
83 static void lf_split(struct lockf *, struct lockf *);
84 static void lf_wakelock(struct lockf *);
86 static void lf_print(char *, struct lockf *);
87 static void lf_printlist(char *, struct lockf *);
91 * Advisory record locking support
94 lf_advlock(ap, head, size)
95 struct vop_advlock_args /* {
105 register struct flock *fl = ap->a_fl;
106 register struct lockf *lock;
107 off_t start, end, oadd;
112 * Convert the flock structure into a start and end.
114 switch (fl->l_whence) {
119 * Caller is responsible for adding any necessary offset
120 * when SEEK_CUR is used.
126 if (size > OFF_MAX ||
127 (fl->l_start > 0 && size > OFF_MAX - fl->l_start)) {
131 start = size + fl->l_start;
153 } else if (fl->l_len == 0)
156 oadd = fl->l_len - 1;
157 if (oadd > OFF_MAX - start) {
164 * Avoid the common case of unlocking when inode has no locks.
166 if (*head == (struct lockf *)0) {
167 if (ap->a_op != F_SETLK) {
168 fl->l_type = F_UNLCK;
174 * Create the lockf structure
176 MALLOC(lock, struct lockf *, sizeof *lock, M_LOCKF, M_WAITOK);
177 lock->lf_start = start;
179 lock->lf_id = ap->a_id;
181 * XXX The problem is that VTOI is ufs specific, so it will
182 * break LOCKF_DEBUG for all other FS's other than UFS because
183 * it casts the vnode->data ptr to struct inode *.
185 /* lock->lf_inode = VTOI(ap->a_vp); */
186 lock->lf_inode = (struct inode *)0;
187 lock->lf_type = fl->l_type;
188 lock->lf_head = head;
189 lock->lf_next = (struct lockf *)0;
190 TAILQ_INIT(&lock->lf_blkhd);
191 lock->lf_flags = ap->a_flags;
193 * Do the requested operation.
197 error = lf_setlock(lock);
201 error = lf_clearlock(lock);
206 error = lf_getlock(lock, fl);
222 * Set a byte-range lock.
226 register struct lockf *lock;
228 register struct lockf *block;
229 struct lockf **head = lock->lf_head;
230 struct lockf **prev, *overlap, *ltmp;
231 static char lockstr[] = "lockf";
232 int ovcase, priority, needtolink, error;
236 lf_print("lf_setlock", lock);
237 #endif /* LOCKF_DEBUG */
243 if (lock->lf_type == F_WRLCK)
247 * Scan lock list for this file looking for locks that would block us.
249 while ((block = lf_getblock(lock))) {
251 * Free the structure and return if nonblocking.
253 if ((lock->lf_flags & F_WAIT) == 0) {
258 * We are blocked. Since flock style locks cover
259 * the whole file, there is no chance for deadlock.
260 * For byte-range locks we must check for deadlock.
262 * Deadlock detection is done by looking through the
263 * wait channels to see if there are any cycles that
264 * involve us. MAXDEPTH is set just to make sure we
265 * do not go off into neverland.
267 if ((lock->lf_flags & F_POSIX) &&
268 (block->lf_flags & F_POSIX)) {
269 register struct proc *wproc;
271 register struct lockf *waitblock;
274 /* The block is waiting on something */
275 /* XXXKSE this is not complete under threads */
276 wproc = (struct proc *)block->lf_id;
277 mtx_lock_spin(&sched_lock);
278 FOREACH_THREAD_IN_PROC(wproc, td) {
279 while (td->td_wchan &&
280 (td->td_wmesg == lockstr) &&
281 (i++ < maxlockdepth)) {
282 waitblock = (struct lockf *)td->td_wchan;
283 /* Get the owner of the blocking lock */
284 waitblock = waitblock->lf_next;
285 if ((waitblock->lf_flags & F_POSIX) == 0)
287 wproc = (struct proc *)waitblock->lf_id;
288 if (wproc == (struct proc *)lock->lf_id) {
289 mtx_unlock_spin(&sched_lock);
295 mtx_unlock_spin(&sched_lock);
298 * For flock type locks, we must first remove
299 * any shared locks that we hold before we sleep
300 * waiting for an exclusive lock.
302 if ((lock->lf_flags & F_FLOCK) &&
303 lock->lf_type == F_WRLCK) {
304 lock->lf_type = F_UNLCK;
305 (void) lf_clearlock(lock);
306 lock->lf_type = F_WRLCK;
309 * Add our lock to the blocked list and sleep until we're free.
310 * Remember who blocked us (for deadlock detection).
312 lock->lf_next = block;
313 TAILQ_INSERT_TAIL(&block->lf_blkhd, lock, lf_block);
315 if (lockf_debug & 1) {
316 lf_print("lf_setlock: blocking on", block);
317 lf_printlist("lf_setlock", block);
319 #endif /* LOCKF_DEBUG */
320 error = tsleep(lock, priority, lockstr, 0);
322 * We may have been awakened by a signal and/or by a
323 * debugger continuing us (in which cases we must remove
324 * ourselves from the blocked list) and/or by another
325 * process releasing a lock (in which case we have
326 * already been removed from the blocked list and our
327 * lf_next field set to NOLOCKF).
330 TAILQ_REMOVE(&lock->lf_next->lf_blkhd, lock, lf_block);
331 lock->lf_next = NOLOCKF;
339 * No blocks!! Add the lock. Note that we will
340 * downgrade or upgrade any overlapping locks this
341 * process already owns.
343 * Skip over locks owned by other processes.
344 * Handle any locks that overlap and are owned by ourselves.
350 ovcase = lf_findoverlap(block, lock, SELF, &prev, &overlap);
352 block = overlap->lf_next;
357 * 2) overlap contains lock
358 * 3) lock contains overlap
359 * 4) overlap starts before lock
360 * 5) overlap ends after lock
363 case 0: /* no overlap */
366 lock->lf_next = overlap;
370 case 1: /* overlap == lock */
372 * If downgrading lock, others may be
373 * able to acquire it.
375 if (lock->lf_type == F_RDLCK &&
376 overlap->lf_type == F_WRLCK)
377 lf_wakelock(overlap);
378 overlap->lf_type = lock->lf_type;
380 lock = overlap; /* for debug output below */
383 case 2: /* overlap contains lock */
385 * Check for common starting point and different types.
387 if (overlap->lf_type == lock->lf_type) {
389 lock = overlap; /* for debug output below */
392 if (overlap->lf_start == lock->lf_start) {
394 lock->lf_next = overlap;
395 overlap->lf_start = lock->lf_end + 1;
397 lf_split(overlap, lock);
398 lf_wakelock(overlap);
401 case 3: /* lock contains overlap */
403 * If downgrading lock, others may be able to
404 * acquire it, otherwise take the list.
406 if (lock->lf_type == F_RDLCK &&
407 overlap->lf_type == F_WRLCK) {
408 lf_wakelock(overlap);
410 while (!TAILQ_EMPTY(&overlap->lf_blkhd)) {
411 ltmp = TAILQ_FIRST(&overlap->lf_blkhd);
412 TAILQ_REMOVE(&overlap->lf_blkhd, ltmp,
414 TAILQ_INSERT_TAIL(&lock->lf_blkhd,
416 ltmp->lf_next = lock;
420 * Add the new lock if necessary and delete the overlap.
424 lock->lf_next = overlap->lf_next;
425 prev = &lock->lf_next;
428 *prev = overlap->lf_next;
429 free(overlap, M_LOCKF);
432 case 4: /* overlap starts before lock */
434 * Add lock after overlap on the list.
436 lock->lf_next = overlap->lf_next;
437 overlap->lf_next = lock;
438 overlap->lf_end = lock->lf_start - 1;
439 prev = &lock->lf_next;
440 lf_wakelock(overlap);
444 case 5: /* overlap ends after lock */
446 * Add the new lock before overlap.
450 lock->lf_next = overlap;
452 overlap->lf_start = lock->lf_end + 1;
453 lf_wakelock(overlap);
459 if (lockf_debug & 1) {
460 lf_print("lf_setlock: got the lock", lock);
461 lf_printlist("lf_setlock", lock);
463 #endif /* LOCKF_DEBUG */
468 * Remove a byte-range lock on an inode.
470 * Generally, find the lock (or an overlap to that lock)
471 * and remove it (or shrink it), then wakeup anyone we can.
475 register struct lockf *unlock;
477 struct lockf **head = unlock->lf_head;
478 register struct lockf *lf = *head;
479 struct lockf *overlap, **prev;
485 if (unlock->lf_type != F_UNLCK)
486 panic("lf_clearlock: bad type");
488 lf_print("lf_clearlock", unlock);
489 #endif /* LOCKF_DEBUG */
491 while ((ovcase = lf_findoverlap(lf, unlock, SELF, &prev, &overlap))) {
493 * Wakeup the list of locks to be retried.
495 lf_wakelock(overlap);
499 case 1: /* overlap == lock */
500 *prev = overlap->lf_next;
501 FREE(overlap, M_LOCKF);
504 case 2: /* overlap contains lock: split it */
505 if (overlap->lf_start == unlock->lf_start) {
506 overlap->lf_start = unlock->lf_end + 1;
509 lf_split(overlap, unlock);
510 overlap->lf_next = unlock->lf_next;
513 case 3: /* lock contains overlap */
514 *prev = overlap->lf_next;
515 lf = overlap->lf_next;
516 free(overlap, M_LOCKF);
519 case 4: /* overlap starts before lock */
520 overlap->lf_end = unlock->lf_start - 1;
521 prev = &overlap->lf_next;
522 lf = overlap->lf_next;
525 case 5: /* overlap ends after lock */
526 overlap->lf_start = unlock->lf_end + 1;
533 lf_printlist("lf_clearlock", unlock);
534 #endif /* LOCKF_DEBUG */
539 * Check whether there is a blocking lock,
540 * and if so return its process identifier.
544 register struct lockf *lock;
545 register struct flock *fl;
547 register struct lockf *block;
551 lf_print("lf_getlock", lock);
552 #endif /* LOCKF_DEBUG */
554 if ((block = lf_getblock(lock))) {
555 fl->l_type = block->lf_type;
556 fl->l_whence = SEEK_SET;
557 fl->l_start = block->lf_start;
558 if (block->lf_end == -1)
561 fl->l_len = block->lf_end - block->lf_start + 1;
562 if (block->lf_flags & F_POSIX)
563 fl->l_pid = ((struct proc *)(block->lf_id))->p_pid;
567 fl->l_type = F_UNLCK;
573 * Walk the list of locks for an inode and
574 * return the first blocking lock.
576 static struct lockf *
578 register struct lockf *lock;
580 struct lockf **prev, *overlap, *lf = *(lock->lf_head);
583 prev = lock->lf_head;
584 while ((ovcase = lf_findoverlap(lf, lock, OTHERS, &prev, &overlap))) {
586 * We've found an overlap, see if it blocks us
588 if ((lock->lf_type == F_WRLCK || overlap->lf_type == F_WRLCK))
591 * Nope, point to the next one on the list and
592 * see if it blocks us
594 lf = overlap->lf_next;
600 * Walk the list of locks for an inode to
601 * find an overlapping lock (if any).
603 * NOTE: this returns only the FIRST overlapping lock. There
604 * may be more than one.
607 lf_findoverlap(lf, lock, type, prev, overlap)
608 register struct lockf *lf;
611 struct lockf ***prev;
612 struct lockf **overlap;
621 lf_print("lf_findoverlap: looking for overlap in", lock);
622 #endif /* LOCKF_DEBUG */
623 start = lock->lf_start;
625 while (lf != NOLOCKF) {
626 if (((type & SELF) && lf->lf_id != lock->lf_id) ||
627 ((type & OTHERS) && lf->lf_id == lock->lf_id)) {
628 *prev = &lf->lf_next;
629 *overlap = lf = lf->lf_next;
634 lf_print("\tchecking", lf);
635 #endif /* LOCKF_DEBUG */
637 * OK, check for overlap
642 * 2) overlap contains lock
643 * 3) lock contains overlap
644 * 4) overlap starts before lock
645 * 5) overlap ends after lock
647 if ((lf->lf_end != -1 && start > lf->lf_end) ||
648 (end != -1 && lf->lf_start > end)) {
652 printf("no overlap\n");
653 #endif /* LOCKF_DEBUG */
654 if ((type & SELF) && end != -1 && lf->lf_start > end)
656 *prev = &lf->lf_next;
657 *overlap = lf = lf->lf_next;
660 if ((lf->lf_start == start) && (lf->lf_end == end)) {
664 printf("overlap == lock\n");
665 #endif /* LOCKF_DEBUG */
668 if ((lf->lf_start <= start) &&
670 ((lf->lf_end >= end) || (lf->lf_end == -1))) {
674 printf("overlap contains lock\n");
675 #endif /* LOCKF_DEBUG */
678 if (start <= lf->lf_start &&
680 (lf->lf_end != -1 && end >= lf->lf_end))) {
684 printf("lock contains overlap\n");
685 #endif /* LOCKF_DEBUG */
688 if ((lf->lf_start < start) &&
689 ((lf->lf_end >= start) || (lf->lf_end == -1))) {
693 printf("overlap starts before lock\n");
694 #endif /* LOCKF_DEBUG */
697 if ((lf->lf_start > start) &&
699 ((lf->lf_end > end) || (lf->lf_end == -1))) {
703 printf("overlap ends after lock\n");
704 #endif /* LOCKF_DEBUG */
707 panic("lf_findoverlap: default");
713 * Split a lock and a contained region into
714 * two or three locks as necessary.
717 lf_split(lock1, lock2)
718 register struct lockf *lock1;
719 register struct lockf *lock2;
721 register struct lockf *splitlock;
724 if (lockf_debug & 2) {
725 lf_print("lf_split", lock1);
726 lf_print("splitting from", lock2);
728 #endif /* LOCKF_DEBUG */
730 * Check to see if spliting into only two pieces.
732 if (lock1->lf_start == lock2->lf_start) {
733 lock1->lf_start = lock2->lf_end + 1;
734 lock2->lf_next = lock1;
737 if (lock1->lf_end == lock2->lf_end) {
738 lock1->lf_end = lock2->lf_start - 1;
739 lock2->lf_next = lock1->lf_next;
740 lock1->lf_next = lock2;
744 * Make a new lock consisting of the last part of
745 * the encompassing lock
747 MALLOC(splitlock, struct lockf *, sizeof *splitlock, M_LOCKF, M_WAITOK);
748 bcopy(lock1, splitlock, sizeof *splitlock);
749 splitlock->lf_start = lock2->lf_end + 1;
750 TAILQ_INIT(&splitlock->lf_blkhd);
751 lock1->lf_end = lock2->lf_start - 1;
755 splitlock->lf_next = lock1->lf_next;
756 lock2->lf_next = splitlock;
757 lock1->lf_next = lock2;
764 lf_wakelock(listhead)
765 struct lockf *listhead;
767 register struct lockf *wakelock;
769 while (!TAILQ_EMPTY(&listhead->lf_blkhd)) {
770 wakelock = TAILQ_FIRST(&listhead->lf_blkhd);
771 TAILQ_REMOVE(&listhead->lf_blkhd, wakelock, lf_block);
772 wakelock->lf_next = NOLOCKF;
775 lf_print("lf_wakelock: awakening", wakelock);
776 #endif /* LOCKF_DEBUG */
788 register struct lockf *lock;
791 printf("%s: lock %p for ", tag, (void *)lock);
792 if (lock->lf_flags & F_POSIX)
793 printf("proc %ld", (long)((struct proc *)lock->lf_id)->p_pid);
795 printf("id %p", (void *)lock->lf_id);
796 if (lock->lf_inode != (struct inode *)0)
797 printf(" in ino %ju on dev <%s>, %s, start %jd, end %jd",
798 (uintmax_t)lock->lf_inode->i_number,
799 devtoname(lock->lf_inode->i_dev),
800 lock->lf_type == F_RDLCK ? "shared" :
801 lock->lf_type == F_WRLCK ? "exclusive" :
802 lock->lf_type == F_UNLCK ? "unlock" : "unknown",
803 (intmax_t)lock->lf_start, (intmax_t)lock->lf_end);
805 printf(" %s, start %jd, end %jd",
806 lock->lf_type == F_RDLCK ? "shared" :
807 lock->lf_type == F_WRLCK ? "exclusive" :
808 lock->lf_type == F_UNLCK ? "unlock" : "unknown",
809 (intmax_t)lock->lf_start, (intmax_t)lock->lf_end);
810 if (!TAILQ_EMPTY(&lock->lf_blkhd))
811 printf(" block %p\n", (void *)TAILQ_FIRST(&lock->lf_blkhd));
817 lf_printlist(tag, lock)
821 register struct lockf *lf, *blk;
823 if (lock->lf_inode == (struct inode *)0)
826 printf("%s: Lock list for ino %ju on dev <%s>:\n",
827 tag, (uintmax_t)lock->lf_inode->i_number,
828 devtoname(lock->lf_inode->i_dev));
829 for (lf = lock->lf_inode->i_lockf; lf; lf = lf->lf_next) {
830 printf("\tlock %p for ",(void *)lf);
831 if (lf->lf_flags & F_POSIX)
833 (long)((struct proc *)lf->lf_id)->p_pid);
835 printf("id %p", (void *)lf->lf_id);
836 printf(", %s, start %jd, end %jd",
837 lf->lf_type == F_RDLCK ? "shared" :
838 lf->lf_type == F_WRLCK ? "exclusive" :
839 lf->lf_type == F_UNLCK ? "unlock" :
840 "unknown", (intmax_t)lf->lf_start, (intmax_t)lf->lf_end);
841 TAILQ_FOREACH(blk, &lf->lf_blkhd, lf_block) {
842 printf("\n\t\tlock request %p for ", (void *)blk);
843 if (blk->lf_flags & F_POSIX)
845 (long)((struct proc *)blk->lf_id)->p_pid);
847 printf("id %p", (void *)blk->lf_id);
848 printf(", %s, start %jd, end %jd",
849 blk->lf_type == F_RDLCK ? "shared" :
850 blk->lf_type == F_WRLCK ? "exclusive" :
851 blk->lf_type == F_UNLCK ? "unlock" :
852 "unknown", (intmax_t)blk->lf_start,
853 (intmax_t)blk->lf_end);
854 if (!TAILQ_EMPTY(&blk->lf_blkhd))
855 panic("lf_printlist: bad list");
860 #endif /* LOCKF_DEBUG */