2 * SPDX-License-Identifier: BSD-2-Clause
4 * Copyright (c) 2009 Konstantin Belousov <kib@FreeBSD.org>
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
10 * 1. Redistributions of source code must retain the above copyright
11 * notice unmodified, this list of conditions, and the following
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 #include <sys/param.h>
30 #include <sys/kernel.h>
32 #include <sys/mutex.h>
34 #include <sys/rangelock.h>
35 #include <sys/systm.h>
40 TAILQ_ENTRY(rl_q_entry) rl_q_link;
41 off_t rl_q_start, rl_q_end;
45 static uma_zone_t rl_entry_zone;
48 rangelock_sys_init(void)
51 rl_entry_zone = uma_zcreate("rl_entry", sizeof(struct rl_q_entry),
52 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
54 SYSINIT(vfs, SI_SUB_LOCK, SI_ORDER_ANY, rangelock_sys_init, NULL);
56 static struct rl_q_entry *
60 return (uma_zalloc(rl_entry_zone, M_WAITOK));
64 rlqentry_free(struct rl_q_entry *rleq)
67 uma_zfree(rl_entry_zone, rleq);
71 rangelock_init(struct rangelock *lock)
74 TAILQ_INIT(&lock->rl_waiters);
75 lock->rl_currdep = NULL;
79 rangelock_destroy(struct rangelock *lock)
82 KASSERT(TAILQ_EMPTY(&lock->rl_waiters), ("Dangling waiters"));
86 * Two entries are compatible if their ranges do not overlap, or both
87 * entries are for read.
90 ranges_overlap(const struct rl_q_entry *e1,
91 const struct rl_q_entry *e2)
94 if (e1->rl_q_start < e2->rl_q_end && e1->rl_q_end > e2->rl_q_start)
100 * Recalculate the lock->rl_currdep after an unlock.
103 rangelock_calc_block(struct rangelock *lock)
105 struct rl_q_entry *entry, *nextentry, *entry1;
107 for (entry = lock->rl_currdep; entry != NULL; entry = nextentry) {
108 nextentry = TAILQ_NEXT(entry, rl_q_link);
109 if (entry->rl_q_flags & RL_LOCK_READ) {
110 /* Reads must not overlap with granted writes. */
111 for (entry1 = TAILQ_FIRST(&lock->rl_waiters);
112 !(entry1->rl_q_flags & RL_LOCK_READ);
113 entry1 = TAILQ_NEXT(entry1, rl_q_link)) {
114 if (ranges_overlap(entry, entry1))
118 /* Write must not overlap with any granted locks. */
119 for (entry1 = TAILQ_FIRST(&lock->rl_waiters);
121 entry1 = TAILQ_NEXT(entry1, rl_q_link)) {
122 if (ranges_overlap(entry, entry1))
126 /* Move grantable write locks to the front. */
127 TAILQ_REMOVE(&lock->rl_waiters, entry, rl_q_link);
128 TAILQ_INSERT_HEAD(&lock->rl_waiters, entry, rl_q_link);
131 /* Grant this lock. */
132 entry->rl_q_flags |= RL_LOCK_GRANTED;
136 lock->rl_currdep = entry;
140 rangelock_unlock_locked(struct rangelock *lock, struct rl_q_entry *entry,
141 struct mtx *ilk, bool do_calc_block)
144 MPASS(lock != NULL && entry != NULL && ilk != NULL);
145 mtx_assert(ilk, MA_OWNED);
147 if (!do_calc_block) {
149 * This is the case where rangelock_enqueue() has been called
150 * with trylock == true and just inserted this entry in the
152 * If rl_currdep is this entry, rl_currdep needs to
153 * be set to the next entry in the rl_waiters list.
154 * However, since this entry is the last entry in the
155 * list, the next entry is NULL.
157 if (lock->rl_currdep == entry) {
158 KASSERT(TAILQ_NEXT(lock->rl_currdep, rl_q_link) == NULL,
159 ("rangelock_enqueue: next entry not NULL"));
160 lock->rl_currdep = NULL;
163 KASSERT(entry != lock->rl_currdep, ("stuck currdep"));
165 TAILQ_REMOVE(&lock->rl_waiters, entry, rl_q_link);
167 rangelock_calc_block(lock);
169 if (curthread->td_rlqe == NULL)
170 curthread->td_rlqe = entry;
172 rlqentry_free(entry);
176 rangelock_unlock(struct rangelock *lock, void *cookie, struct mtx *ilk)
179 MPASS(lock != NULL && cookie != NULL && ilk != NULL);
182 rangelock_unlock_locked(lock, cookie, ilk, true);
186 * Unlock the sub-range of granted lock.
189 rangelock_unlock_range(struct rangelock *lock, void *cookie, off_t start,
190 off_t end, struct mtx *ilk)
192 struct rl_q_entry *entry;
194 MPASS(lock != NULL && cookie != NULL && ilk != NULL);
196 KASSERT(entry->rl_q_flags & RL_LOCK_GRANTED,
197 ("Unlocking non-granted lock"));
198 KASSERT(entry->rl_q_start == start, ("wrong start"));
199 KASSERT(entry->rl_q_end >= end, ("wrong end"));
202 if (entry->rl_q_end == end) {
203 rangelock_unlock_locked(lock, cookie, ilk, true);
206 entry->rl_q_end = end;
207 rangelock_calc_block(lock);
213 * Add the lock request to the queue of the pending requests for
214 * rangelock. Sleep until the request can be granted unless trylock == true.
217 rangelock_enqueue(struct rangelock *lock, off_t start, off_t end, int mode,
218 struct mtx *ilk, bool trylock)
220 struct rl_q_entry *entry;
223 MPASS(lock != NULL && ilk != NULL);
226 if (td->td_rlqe != NULL) {
230 entry = rlqentry_alloc();
231 MPASS(entry != NULL);
232 entry->rl_q_flags = mode;
233 entry->rl_q_start = start;
234 entry->rl_q_end = end;
238 * XXXKIB TODO. Check that a thread does not try to enqueue a
239 * lock that is incompatible with another request from the same
243 TAILQ_INSERT_TAIL(&lock->rl_waiters, entry, rl_q_link);
245 * If rl_currdep == NULL, there is no entry waiting for a conflicting
246 * range to be resolved, so set rl_currdep to this entry. If there is
247 * no conflicting entry for this entry, rl_currdep will be set back to
248 * NULL by rangelock_calc_block().
250 if (lock->rl_currdep == NULL)
251 lock->rl_currdep = entry;
252 rangelock_calc_block(lock);
253 while (!(entry->rl_q_flags & RL_LOCK_GRANTED)) {
256 * For this case, the range is not actually locked
257 * yet, but removal from the list requires the same
258 * steps, except for not doing a rangelock_calc_block()
259 * call, since rangelock_calc_block() was called above.
261 rangelock_unlock_locked(lock, entry, ilk, false);
264 msleep(entry, ilk, 0, "range", 0);
271 rangelock_rlock(struct rangelock *lock, off_t start, off_t end, struct mtx *ilk)
274 return (rangelock_enqueue(lock, start, end, RL_LOCK_READ, ilk, false));
278 rangelock_tryrlock(struct rangelock *lock, off_t start, off_t end,
282 return (rangelock_enqueue(lock, start, end, RL_LOCK_READ, ilk, true));
286 rangelock_wlock(struct rangelock *lock, off_t start, off_t end, struct mtx *ilk)
289 return (rangelock_enqueue(lock, start, end, RL_LOCK_WRITE, ilk, false));
293 rangelock_trywlock(struct rangelock *lock, off_t start, off_t end,
297 return (rangelock_enqueue(lock, start, end, RL_LOCK_WRITE, ilk, true));
300 #ifdef INVARIANT_SUPPORT
302 _rangelock_cookie_assert(void *cookie, int what, const char *file, int line)
304 struct rl_q_entry *entry;
307 MPASS(cookie != NULL);
309 flags = entry->rl_q_flags;
312 if ((flags & RL_LOCK_GRANTED) == 0)
313 panic("rangelock not held @ %s:%d\n", file, line);
316 if ((flags & (RL_LOCK_GRANTED | RL_LOCK_READ)) !=
317 (RL_LOCK_GRANTED | RL_LOCK_READ))
318 panic("rangelock not rlocked @ %s:%d\n", file, line);
321 if ((flags & (RL_LOCK_GRANTED | RL_LOCK_WRITE)) !=
322 (RL_LOCK_GRANTED | RL_LOCK_WRITE))
323 panic("rangelock not wlocked @ %s:%d\n", file, line);
326 panic("Unknown rangelock assertion: %d @ %s:%d", what, file,
330 #endif /* INVARIANT_SUPPORT */