2 * SPDX-License-Identifier: BSD-3-Clause
4 * Copyright (c) 2002 Poul-Henning Kamp
5 * Copyright (c) 2002 Networks Associates Technology, Inc.
6 * Copyright (c) 2013 The FreeBSD Foundation
9 * This software was developed for the FreeBSD Project by Poul-Henning Kamp
10 * and NAI Labs, the Security Research Division of Network Associates, Inc.
11 * under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the
12 * DARPA CHATS research program.
14 * Portions of this software were developed by Konstantin Belousov
15 * under sponsorship from the FreeBSD Foundation.
17 * Redistribution and use in source and binary forms, with or without
18 * modification, are permitted provided that the following conditions
20 * 1. Redistributions of source code must retain the above copyright
21 * notice, this list of conditions and the following disclaimer.
22 * 2. Redistributions in binary form must reproduce the above copyright
23 * notice, this list of conditions and the following disclaimer in the
24 * documentation and/or other materials provided with the distribution.
25 * 3. The names of the authors may not be used to endorse or promote
26 * products derived from this software without specific prior written
29 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
30 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
33 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
34 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
35 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
36 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
37 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
38 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
42 #include <sys/cdefs.h>
43 __FBSDID("$FreeBSD$");
45 #include <sys/param.h>
46 #include <sys/systm.h>
47 #include <sys/kernel.h>
48 #include <sys/malloc.h>
53 #include <sys/stack.h>
54 #include <sys/sysctl.h>
56 #include <machine/stdarg.h>
58 #include <sys/errno.h>
59 #include <geom/geom.h>
60 #include <geom/geom_int.h>
61 #include <sys/devicestat.h>
65 #include <vm/vm_param.h>
66 #include <vm/vm_kern.h>
67 #include <vm/vm_page.h>
68 #include <vm/vm_object.h>
69 #include <vm/vm_extern.h>
70 #include <vm/vm_map.h>
72 static int g_io_transient_map_bio(struct bio *bp);
74 static struct g_bioq g_bio_run_down;
75 static struct g_bioq g_bio_run_up;
78 * Pace is a hint that we've had some trouble recently allocating
79 * bios, so we should back off trying to send I/O down the stack
80 * a bit to let the problem resolve. When pacing, we also turn
81 * off direct dispatch to also reduce memory pressure from I/Os
82 * there, at the expxense of some added latency while the memory
83 * pressures exist. See g_io_schedule_down() for more details
86 static volatile u_int pace;
88 static uma_zone_t biozone;
91 * The head of the list of classifiers used in g_io_request.
92 * Use g_register_classifier() and g_unregister_classifier()
93 * to add/remove entries to the list.
94 * Classifiers are invoked in registration order.
96 static TAILQ_HEAD(g_classifier_tailq, g_classifier_hook)
97 g_classifier_tailq = TAILQ_HEAD_INITIALIZER(g_classifier_tailq);
99 #include <machine/atomic.h>
102 g_bioq_lock(struct g_bioq *bq)
105 mtx_lock(&bq->bio_queue_lock);
109 g_bioq_unlock(struct g_bioq *bq)
112 mtx_unlock(&bq->bio_queue_lock);
117 g_bioq_destroy(struct g_bioq *bq)
120 mtx_destroy(&bq->bio_queue_lock);
125 g_bioq_init(struct g_bioq *bq)
128 TAILQ_INIT(&bq->bio_queue);
129 mtx_init(&bq->bio_queue_lock, "bio queue", NULL, MTX_DEF);
133 g_bioq_first(struct g_bioq *bq)
137 bp = TAILQ_FIRST(&bq->bio_queue);
139 KASSERT((bp->bio_flags & BIO_ONQUEUE),
140 ("Bio not on queue bp=%p target %p", bp, bq));
141 bp->bio_flags &= ~BIO_ONQUEUE;
142 TAILQ_REMOVE(&bq->bio_queue, bp, bio_queue);
143 bq->bio_queue_length--;
153 bp = uma_zalloc(biozone, M_NOWAIT | M_ZERO);
155 if ((KTR_COMPILE & KTR_GEOM) && (ktr_mask & KTR_GEOM)) {
158 CTR1(KTR_GEOM, "g_new_bio(): %p", bp);
160 CTRSTACK(KTR_GEOM, &st, 3);
171 bp = uma_zalloc(biozone, M_WAITOK | M_ZERO);
173 if ((KTR_COMPILE & KTR_GEOM) && (ktr_mask & KTR_GEOM)) {
176 CTR1(KTR_GEOM, "g_alloc_bio(): %p", bp);
178 CTRSTACK(KTR_GEOM, &st, 3);
185 g_destroy_bio(struct bio *bp)
188 if ((KTR_COMPILE & KTR_GEOM) && (ktr_mask & KTR_GEOM)) {
191 CTR1(KTR_GEOM, "g_destroy_bio(): %p", bp);
193 CTRSTACK(KTR_GEOM, &st, 3);
196 uma_zfree(biozone, bp);
200 g_clone_bio(struct bio *bp)
204 bp2 = uma_zalloc(biozone, M_NOWAIT | M_ZERO);
206 bp2->bio_parent = bp;
207 bp2->bio_cmd = bp->bio_cmd;
209 * BIO_ORDERED flag may be used by disk drivers to enforce
210 * ordering restrictions, so this flag needs to be cloned.
211 * BIO_UNMAPPED and BIO_VLIST should be inherited, to properly
212 * indicate which way the buffer is passed.
213 * Other bio flags are not suitable for cloning.
215 bp2->bio_flags = bp->bio_flags &
216 (BIO_ORDERED | BIO_UNMAPPED | BIO_VLIST);
217 bp2->bio_length = bp->bio_length;
218 bp2->bio_offset = bp->bio_offset;
219 bp2->bio_data = bp->bio_data;
220 bp2->bio_ma = bp->bio_ma;
221 bp2->bio_ma_n = bp->bio_ma_n;
222 bp2->bio_ma_offset = bp->bio_ma_offset;
223 bp2->bio_attribute = bp->bio_attribute;
224 if (bp->bio_cmd == BIO_ZONE)
225 bcopy(&bp->bio_zone, &bp2->bio_zone,
226 sizeof(bp->bio_zone));
227 /* Inherit classification info from the parent */
228 bp2->bio_classifier1 = bp->bio_classifier1;
229 bp2->bio_classifier2 = bp->bio_classifier2;
230 #if defined(BUF_TRACKING) || defined(FULL_BUF_TRACKING)
231 bp2->bio_track_bp = bp->bio_track_bp;
236 if ((KTR_COMPILE & KTR_GEOM) && (ktr_mask & KTR_GEOM)) {
239 CTR2(KTR_GEOM, "g_clone_bio(%p): %p", bp, bp2);
241 CTRSTACK(KTR_GEOM, &st, 3);
248 g_duplicate_bio(struct bio *bp)
252 bp2 = uma_zalloc(biozone, M_WAITOK | M_ZERO);
253 bp2->bio_flags = bp->bio_flags & (BIO_UNMAPPED | BIO_VLIST);
254 bp2->bio_parent = bp;
255 bp2->bio_cmd = bp->bio_cmd;
256 bp2->bio_length = bp->bio_length;
257 bp2->bio_offset = bp->bio_offset;
258 bp2->bio_data = bp->bio_data;
259 bp2->bio_ma = bp->bio_ma;
260 bp2->bio_ma_n = bp->bio_ma_n;
261 bp2->bio_ma_offset = bp->bio_ma_offset;
262 bp2->bio_attribute = bp->bio_attribute;
265 if ((KTR_COMPILE & KTR_GEOM) && (ktr_mask & KTR_GEOM)) {
268 CTR2(KTR_GEOM, "g_duplicate_bio(%p): %p", bp, bp2);
270 CTRSTACK(KTR_GEOM, &st, 3);
277 g_reset_bio(struct bio *bp)
280 bzero(bp, sizeof(*bp));
287 g_bioq_init(&g_bio_run_down);
288 g_bioq_init(&g_bio_run_up);
289 biozone = uma_zcreate("g_bio", sizeof (struct bio),
296 g_io_getattr(const char *attr, struct g_consumer *cp, int *len, void *ptr)
301 g_trace(G_T_BIO, "bio_getattr(%s)", attr);
303 bp->bio_cmd = BIO_GETATTR;
305 bp->bio_attribute = attr;
306 bp->bio_length = *len;
308 g_io_request(bp, cp);
309 error = biowait(bp, "ggetattr");
310 *len = bp->bio_completed;
316 g_io_zonecmd(struct disk_zone_args *zone_args, struct g_consumer *cp)
321 g_trace(G_T_BIO, "bio_zone(%d)", zone_args->zone_cmd);
323 bp->bio_cmd = BIO_ZONE;
326 * XXX KDM need to handle report zone data.
328 bcopy(zone_args, &bp->bio_zone, sizeof(*zone_args));
329 if (zone_args->zone_cmd == DISK_ZONE_REPORT_ZONES)
331 zone_args->zone_params.report.entries_allocated *
332 sizeof(struct disk_zone_rep_entry);
336 g_io_request(bp, cp);
337 error = biowait(bp, "gzone");
338 bcopy(&bp->bio_zone, zone_args, sizeof(*zone_args));
344 g_io_flush(struct g_consumer *cp)
349 g_trace(G_T_BIO, "bio_flush(%s)", cp->provider->name);
351 bp->bio_cmd = BIO_FLUSH;
352 bp->bio_flags |= BIO_ORDERED;
354 bp->bio_attribute = NULL;
355 bp->bio_offset = cp->provider->mediasize;
358 g_io_request(bp, cp);
359 error = biowait(bp, "gflush");
365 g_io_check(struct bio *bp)
367 struct g_consumer *cp;
368 struct g_provider *pp;
372 biotrack(bp, __func__);
377 /* Fail if access counters dont allow the operation */
378 switch(bp->bio_cmd) {
391 if ((bp->bio_zone.zone_cmd == DISK_ZONE_REPORT_ZONES) ||
392 (bp->bio_zone.zone_cmd == DISK_ZONE_GET_PARAMS)) {
395 } else if (cp->acw == 0)
401 /* if provider is marked for error, don't disturb. */
404 if (cp->flags & G_CF_ORPHAN)
407 switch(bp->bio_cmd) {
411 /* Zero sectorsize or mediasize is probably a lack of media. */
412 if (pp->sectorsize == 0 || pp->mediasize == 0)
414 /* Reject I/O not on sector boundary */
415 if (bp->bio_offset % pp->sectorsize)
417 /* Reject I/O not integral sector long */
418 if (bp->bio_length % pp->sectorsize)
420 /* Reject requests before or past the end of media. */
421 if (bp->bio_offset < 0)
423 if (bp->bio_offset > pp->mediasize)
426 /* Truncate requests to the end of providers media. */
427 excess = bp->bio_offset + bp->bio_length;
428 if (excess > bp->bio_to->mediasize) {
429 KASSERT((bp->bio_flags & BIO_UNMAPPED) == 0 ||
430 round_page(bp->bio_ma_offset +
431 bp->bio_length) / PAGE_SIZE == bp->bio_ma_n,
432 ("excess bio %p too short", bp));
433 excess -= bp->bio_to->mediasize;
434 bp->bio_length -= excess;
435 if ((bp->bio_flags & BIO_UNMAPPED) != 0) {
436 bp->bio_ma_n = round_page(bp->bio_ma_offset +
437 bp->bio_length) / PAGE_SIZE;
440 CTR3(KTR_GEOM, "g_down truncated bio "
441 "%p provider %s by %d", bp,
442 bp->bio_to->name, excess);
445 /* Deliver zero length transfers right here. */
446 if (bp->bio_length == 0) {
447 CTR2(KTR_GEOM, "g_down terminated 0-length "
448 "bp %p provider %s", bp, bp->bio_to->name);
452 if ((bp->bio_flags & BIO_UNMAPPED) != 0 &&
453 (bp->bio_to->flags & G_PF_ACCEPT_UNMAPPED) == 0 &&
454 (bp->bio_cmd == BIO_READ || bp->bio_cmd == BIO_WRITE)) {
455 if ((error = g_io_transient_map_bio(bp)) >= 0)
462 return (EJUSTRETURN);
466 * bio classification support.
468 * g_register_classifier() and g_unregister_classifier()
469 * are used to add/remove a classifier from the list.
470 * The list is protected using the g_bio_run_down lock,
471 * because the classifiers are called in this path.
473 * g_io_request() passes bio's that are not already classified
474 * (i.e. those with bio_classifier1 == NULL) to g_run_classifiers().
475 * Classifiers can store their result in the two fields
476 * bio_classifier1 and bio_classifier2.
477 * A classifier that updates one of the fields should
478 * return a non-zero value.
479 * If no classifier updates the field, g_run_classifiers() sets
480 * bio_classifier1 = BIO_NOTCLASSIFIED to avoid further calls.
484 g_register_classifier(struct g_classifier_hook *hook)
487 g_bioq_lock(&g_bio_run_down);
488 TAILQ_INSERT_TAIL(&g_classifier_tailq, hook, link);
489 g_bioq_unlock(&g_bio_run_down);
495 g_unregister_classifier(struct g_classifier_hook *hook)
497 struct g_classifier_hook *entry;
499 g_bioq_lock(&g_bio_run_down);
500 TAILQ_FOREACH(entry, &g_classifier_tailq, link) {
502 TAILQ_REMOVE(&g_classifier_tailq, hook, link);
506 g_bioq_unlock(&g_bio_run_down);
510 g_run_classifiers(struct bio *bp)
512 struct g_classifier_hook *hook;
515 biotrack(bp, __func__);
517 TAILQ_FOREACH(hook, &g_classifier_tailq, link)
518 classified |= hook->func(hook->arg, bp);
521 bp->bio_classifier1 = BIO_NOTCLASSIFIED;
525 g_io_request(struct bio *bp, struct g_consumer *cp)
527 struct g_provider *pp;
529 int direct, error, first;
532 biotrack(bp, __func__);
534 KASSERT(cp != NULL, ("NULL cp in g_io_request"));
535 KASSERT(bp != NULL, ("NULL bp in g_io_request"));
537 KASSERT(pp != NULL, ("consumer not attached in g_io_request"));
539 KASSERT(bp->bio_driver1 == NULL,
540 ("bio_driver1 used by the consumer (geom %s)", cp->geom->name));
541 KASSERT(bp->bio_driver2 == NULL,
542 ("bio_driver2 used by the consumer (geom %s)", cp->geom->name));
543 KASSERT(bp->bio_pflags == 0,
544 ("bio_pflags used by the consumer (geom %s)", cp->geom->name));
546 * Remember consumer's private fields, so we can detect if they were
547 * modified by the provider.
549 bp->_bio_caller1 = bp->bio_caller1;
550 bp->_bio_caller2 = bp->bio_caller2;
551 bp->_bio_cflags = bp->bio_cflags;
555 if (cmd == BIO_READ || cmd == BIO_WRITE || cmd == BIO_GETATTR) {
556 KASSERT(bp->bio_data != NULL,
557 ("NULL bp->data in g_io_request(cmd=%hu)", bp->bio_cmd));
559 if (cmd == BIO_DELETE || cmd == BIO_FLUSH) {
560 KASSERT(bp->bio_data == NULL,
561 ("non-NULL bp->data in g_io_request(cmd=%hu)",
564 if (cmd == BIO_READ || cmd == BIO_WRITE || cmd == BIO_DELETE) {
565 KASSERT(bp->bio_offset % cp->provider->sectorsize == 0,
566 ("wrong offset %jd for sectorsize %u",
567 bp->bio_offset, cp->provider->sectorsize));
568 KASSERT(bp->bio_length % cp->provider->sectorsize == 0,
569 ("wrong length %jd for sectorsize %u",
570 bp->bio_length, cp->provider->sectorsize));
573 g_trace(G_T_BIO, "bio_request(%p) from %p(%s) to %p(%s) cmd %d",
574 bp, cp, cp->geom->name, pp, pp->name, bp->bio_cmd);
579 bp->bio_completed = 0;
581 KASSERT(!(bp->bio_flags & BIO_ONQUEUE),
582 ("Bio already on queue bp=%p", bp));
583 if ((g_collectstats & G_STATS_CONSUMERS) != 0 ||
584 ((g_collectstats & G_STATS_PROVIDERS) != 0 && pp->stat != NULL))
585 binuptime(&bp->bio_t0);
587 getbinuptime(&bp->bio_t0);
589 #ifdef GET_STACK_USAGE
590 direct = (cp->flags & G_CF_DIRECT_SEND) != 0 &&
591 (pp->flags & G_PF_DIRECT_RECEIVE) != 0 &&
592 !g_is_geom_thread(curthread) &&
593 ((pp->flags & G_PF_ACCEPT_UNMAPPED) != 0 ||
594 (bp->bio_flags & BIO_UNMAPPED) == 0 || THREAD_CAN_SLEEP()) &&
597 /* Block direct execution if less then half of stack left. */
599 GET_STACK_USAGE(st, su);
607 if (!TAILQ_EMPTY(&g_classifier_tailq) && !bp->bio_classifier1) {
608 g_bioq_lock(&g_bio_run_down);
609 g_run_classifiers(bp);
610 g_bioq_unlock(&g_bio_run_down);
614 * The statistics collection is lockless, as such, but we
615 * can not update one instance of the statistics from more
616 * than one thread at a time, so grab the lock first.
618 mtxp = mtx_pool_find(mtxpool_sleep, pp);
620 if (g_collectstats & G_STATS_PROVIDERS)
621 devstat_start_transaction(pp->stat, &bp->bio_t0);
622 if (g_collectstats & G_STATS_CONSUMERS)
623 devstat_start_transaction(cp->stat, &bp->bio_t0);
629 error = g_io_check(bp);
631 CTR3(KTR_GEOM, "g_io_request g_io_check on bp %p "
632 "provider %s returned %d", bp, bp->bio_to->name,
634 g_io_deliver(bp, error);
637 bp->bio_to->geom->start(bp);
639 g_bioq_lock(&g_bio_run_down);
640 first = TAILQ_EMPTY(&g_bio_run_down.bio_queue);
641 TAILQ_INSERT_TAIL(&g_bio_run_down.bio_queue, bp, bio_queue);
642 bp->bio_flags |= BIO_ONQUEUE;
643 g_bio_run_down.bio_queue_length++;
644 g_bioq_unlock(&g_bio_run_down);
645 /* Pass it on down. */
647 wakeup(&g_wait_down);
652 g_io_deliver(struct bio *bp, int error)
655 struct g_consumer *cp;
656 struct g_provider *pp;
660 biotrack(bp, __func__);
662 KASSERT(bp != NULL, ("NULL bp in g_io_deliver"));
664 KASSERT(pp != NULL, ("NULL bio_to in g_io_deliver"));
667 bp->bio_error = error;
671 KASSERT(cp != NULL, ("NULL bio_from in g_io_deliver"));
672 KASSERT(cp->geom != NULL, ("NULL bio_from->geom in g_io_deliver"));
675 * Some classes - GJournal in particular - can modify bio's
676 * private fields while the bio is in transit; G_GEOM_VOLATILE_BIO
677 * flag means it's an expected behaviour for that particular geom.
679 if ((cp->geom->flags & G_GEOM_VOLATILE_BIO) == 0) {
680 KASSERT(bp->bio_caller1 == bp->_bio_caller1,
681 ("bio_caller1 used by the provider %s", pp->name));
682 KASSERT(bp->bio_caller2 == bp->_bio_caller2,
683 ("bio_caller2 used by the provider %s", pp->name));
684 KASSERT(bp->bio_cflags == bp->_bio_cflags,
685 ("bio_cflags used by the provider %s", pp->name));
688 KASSERT(bp->bio_completed >= 0, ("bio_completed can't be less than 0"));
689 KASSERT(bp->bio_completed <= bp->bio_length,
690 ("bio_completed can't be greater than bio_length"));
693 "g_io_deliver(%p) from %p(%s) to %p(%s) cmd %d error %d off %jd len %jd",
694 bp, cp, cp->geom->name, pp, pp->name, bp->bio_cmd, error,
695 (intmax_t)bp->bio_offset, (intmax_t)bp->bio_length);
697 KASSERT(!(bp->bio_flags & BIO_ONQUEUE),
698 ("Bio already on queue bp=%p", bp));
701 * XXX: next two doesn't belong here
703 bp->bio_bcount = bp->bio_length;
704 bp->bio_resid = bp->bio_bcount - bp->bio_completed;
706 #ifdef GET_STACK_USAGE
707 direct = (pp->flags & G_PF_DIRECT_SEND) &&
708 (cp->flags & G_CF_DIRECT_RECEIVE) &&
709 !g_is_geom_thread(curthread);
711 /* Block direct execution if less then half of stack left. */
713 GET_STACK_USAGE(st, su);
722 * The statistics collection is lockless, as such, but we
723 * can not update one instance of the statistics from more
724 * than one thread at a time, so grab the lock first.
726 if ((g_collectstats & G_STATS_CONSUMERS) != 0 ||
727 ((g_collectstats & G_STATS_PROVIDERS) != 0 && pp->stat != NULL))
729 mtxp = mtx_pool_find(mtxpool_sleep, cp);
731 if (g_collectstats & G_STATS_PROVIDERS)
732 devstat_end_transaction_bio_bt(pp->stat, bp, &now);
733 if (g_collectstats & G_STATS_CONSUMERS)
734 devstat_end_transaction_bio_bt(cp->stat, bp, &now);
739 if (error != ENOMEM) {
740 bp->bio_error = error;
744 g_bioq_lock(&g_bio_run_up);
745 first = TAILQ_EMPTY(&g_bio_run_up.bio_queue);
746 TAILQ_INSERT_TAIL(&g_bio_run_up.bio_queue, bp, bio_queue);
747 bp->bio_flags |= BIO_ONQUEUE;
748 g_bio_run_up.bio_queue_length++;
749 g_bioq_unlock(&g_bio_run_up);
757 printf("ENOMEM %p on %p(%s)\n", bp, pp, pp->name);
758 bp->bio_children = 0;
760 bp->bio_driver1 = NULL;
761 bp->bio_driver2 = NULL;
763 g_io_request(bp, cp);
768 SYSCTL_DECL(_kern_geom);
770 static long transient_maps;
771 SYSCTL_LONG(_kern_geom, OID_AUTO, transient_maps, CTLFLAG_RD,
773 "Total count of the transient mapping requests");
774 u_int transient_map_retries = 10;
775 SYSCTL_UINT(_kern_geom, OID_AUTO, transient_map_retries, CTLFLAG_RW,
776 &transient_map_retries, 0,
777 "Max count of retries used before giving up on creating transient map");
778 int transient_map_hard_failures;
779 SYSCTL_INT(_kern_geom, OID_AUTO, transient_map_hard_failures, CTLFLAG_RD,
780 &transient_map_hard_failures, 0,
781 "Failures to establish the transient mapping due to retry attempts "
783 int transient_map_soft_failures;
784 SYSCTL_INT(_kern_geom, OID_AUTO, transient_map_soft_failures, CTLFLAG_RD,
785 &transient_map_soft_failures, 0,
786 "Count of retried failures to establish the transient mapping");
787 int inflight_transient_maps;
788 SYSCTL_INT(_kern_geom, OID_AUTO, inflight_transient_maps, CTLFLAG_RD,
789 &inflight_transient_maps, 0,
790 "Current count of the active transient maps");
793 g_io_transient_map_bio(struct bio *bp)
799 KASSERT(unmapped_buf_allowed, ("unmapped disabled"));
801 size = round_page(bp->bio_ma_offset + bp->bio_length);
802 KASSERT(size / PAGE_SIZE == bp->bio_ma_n, ("Bio too short %p", bp));
805 atomic_add_long(&transient_maps, 1);
807 if (vmem_alloc(transient_arena, size, M_BESTFIT | M_NOWAIT, &addr)) {
808 if (transient_map_retries != 0 &&
809 retried >= transient_map_retries) {
810 CTR2(KTR_GEOM, "g_down cannot map bp %p provider %s",
811 bp, bp->bio_to->name);
812 atomic_add_int(&transient_map_hard_failures, 1);
813 return (EDEADLK/* XXXKIB */);
816 * Naive attempt to quisce the I/O to get more
817 * in-flight requests completed and defragment
818 * the transient_arena.
820 CTR3(KTR_GEOM, "g_down retrymap bp %p provider %s r %d",
821 bp, bp->bio_to->name, retried);
822 pause("g_d_tra", hz / 10);
824 atomic_add_int(&transient_map_soft_failures, 1);
828 atomic_add_int(&inflight_transient_maps, 1);
829 pmap_qenter((vm_offset_t)addr, bp->bio_ma, OFF_TO_IDX(size));
830 bp->bio_data = (caddr_t)addr + bp->bio_ma_offset;
831 bp->bio_flags |= BIO_TRANSIENT_MAPPING;
832 bp->bio_flags &= ~BIO_UNMAPPED;
833 return (EJUSTRETURN);
837 g_io_schedule_down(struct thread *tp __unused)
843 g_bioq_lock(&g_bio_run_down);
844 bp = g_bioq_first(&g_bio_run_down);
846 CTR0(KTR_GEOM, "g_down going to sleep");
847 msleep(&g_wait_down, &g_bio_run_down.bio_queue_lock,
848 PRIBIO | PDROP, "-", 0);
851 CTR0(KTR_GEOM, "g_down has work to do");
852 g_bioq_unlock(&g_bio_run_down);
853 biotrack(bp, __func__);
856 * There has been at least one memory allocation
857 * failure since the last I/O completed. Pause 1ms to
858 * give the system a chance to free up memory. We only
859 * do this once because a large number of allocations
860 * can fail in the direct dispatch case and there's no
861 * relationship between the number of these failures and
862 * the length of the outage. If there's still an outage,
863 * we'll pause again and again until it's
864 * resolved. Older versions paused longer and once per
865 * allocation failure. This was OK for a single threaded
866 * g_down, but with direct dispatch would lead to max of
867 * 10 IOPs for minutes at a time when transient memory
868 * issues prevented allocation for a batch of requests
869 * from the upper layers.
871 * XXX This pacing is really lame. It needs to be solved
872 * by other methods. This is OK only because the worst
873 * case scenario is so rare. In the worst case scenario
874 * all memory is tied up waiting for I/O to complete
875 * which can never happen since we can't allocate bios
878 CTR0(KTR_GEOM, "g_down pacing self");
879 pause("g_down", min(hz/1000, 1));
882 CTR2(KTR_GEOM, "g_down processing bp %p provider %s", bp,
884 error = g_io_check(bp);
886 CTR3(KTR_GEOM, "g_down g_io_check on bp %p provider "
887 "%s returned %d", bp, bp->bio_to->name, error);
888 g_io_deliver(bp, error);
891 THREAD_NO_SLEEPING();
892 CTR4(KTR_GEOM, "g_down starting bp %p provider %s off %ld "
893 "len %ld", bp, bp->bio_to->name, bp->bio_offset,
895 bp->bio_to->geom->start(bp);
896 THREAD_SLEEPING_OK();
901 g_io_schedule_up(struct thread *tp __unused)
906 g_bioq_lock(&g_bio_run_up);
907 bp = g_bioq_first(&g_bio_run_up);
909 CTR0(KTR_GEOM, "g_up going to sleep");
910 msleep(&g_wait_up, &g_bio_run_up.bio_queue_lock,
911 PRIBIO | PDROP, "-", 0);
914 g_bioq_unlock(&g_bio_run_up);
915 THREAD_NO_SLEEPING();
916 CTR4(KTR_GEOM, "g_up biodone bp %p provider %s off "
917 "%jd len %ld", bp, bp->bio_to->name,
918 bp->bio_offset, bp->bio_length);
920 THREAD_SLEEPING_OK();
925 g_read_data(struct g_consumer *cp, off_t offset, off_t length, int *error)
931 KASSERT(length > 0 && length >= cp->provider->sectorsize &&
932 length <= MAXPHYS, ("g_read_data(): invalid length %jd",
936 bp->bio_cmd = BIO_READ;
938 bp->bio_offset = offset;
939 bp->bio_length = length;
940 ptr = g_malloc(length, M_WAITOK);
942 g_io_request(bp, cp);
943 errorc = biowait(bp, "gread");
955 * A read function for use by ffs_sbget when used by GEOM-layer routines.
958 g_use_g_read_data(void *devfd, off_t loc, void **bufp, int size)
960 struct g_consumer *cp;
962 KASSERT(*bufp == NULL,
963 ("g_use_g_read_data: non-NULL *bufp %p\n", *bufp));
965 cp = (struct g_consumer *)devfd;
967 * Take care not to issue an invalid I/O request. The offset of
968 * the superblock candidate must be multiples of the provider's
969 * sector size, otherwise an FFS can't exist on the provider
972 if (loc % cp->provider->sectorsize != 0)
974 *bufp = g_read_data(cp, loc, size, NULL);
981 g_write_data(struct g_consumer *cp, off_t offset, void *ptr, off_t length)
986 KASSERT(length > 0 && length >= cp->provider->sectorsize &&
987 length <= MAXPHYS, ("g_write_data(): invalid length %jd",
991 bp->bio_cmd = BIO_WRITE;
993 bp->bio_offset = offset;
994 bp->bio_length = length;
996 g_io_request(bp, cp);
997 error = biowait(bp, "gwrite");
1003 * A write function for use by ffs_sbput when used by GEOM-layer routines.
1006 g_use_g_write_data(void *devfd, off_t loc, void *buf, int size)
1009 return (g_write_data((struct g_consumer *)devfd, loc, buf, size));
1013 g_delete_data(struct g_consumer *cp, off_t offset, off_t length)
1018 KASSERT(length > 0 && length >= cp->provider->sectorsize,
1019 ("g_delete_data(): invalid length %jd", (intmax_t)length));
1022 bp->bio_cmd = BIO_DELETE;
1023 bp->bio_done = NULL;
1024 bp->bio_offset = offset;
1025 bp->bio_length = length;
1026 bp->bio_data = NULL;
1027 g_io_request(bp, cp);
1028 error = biowait(bp, "gdelete");
1034 g_print_bio(const char *prefix, const struct bio *bp, const char *fmtsuffix,
1037 #ifndef PRINTF_BUFR_SIZE
1038 #define PRINTF_BUFR_SIZE 64
1040 char bufr[PRINTF_BUFR_SIZE];
1041 struct sbuf sb, *sbp __unused;
1044 sbp = sbuf_new(&sb, bufr, sizeof(bufr), SBUF_FIXEDLEN);
1045 KASSERT(sbp != NULL, ("sbuf_new misused?"));
1047 sbuf_set_drain(&sb, sbuf_printf_drain, NULL);
1049 sbuf_cat(&sb, prefix);
1050 g_format_bio(&sb, bp);
1052 va_start(ap, fmtsuffix);
1053 sbuf_vprintf(&sb, fmtsuffix, ap);
1056 sbuf_nl_terminate(&sb);
1063 g_format_bio(struct sbuf *sb, const struct bio *bp)
1065 const char *pname, *cmd = NULL;
1067 if (bp->bio_to != NULL)
1068 pname = bp->bio_to->name;
1070 pname = "[unknown]";
1072 switch (bp->bio_cmd) {
1075 sbuf_printf(sb, "%s[%s(attr=%s)]", pname, cmd,
1080 sbuf_printf(sb, "%s[%s]", pname, cmd);
1083 char *subcmd = NULL;
1085 switch (bp->bio_zone.zone_cmd) {
1086 case DISK_ZONE_OPEN:
1089 case DISK_ZONE_CLOSE:
1092 case DISK_ZONE_FINISH:
1098 case DISK_ZONE_REPORT_ZONES:
1099 subcmd = "REPORT ZONES";
1101 case DISK_ZONE_GET_PARAMS:
1102 subcmd = "GET PARAMS";
1108 sbuf_printf(sb, "%s[%s,%s]", pname, cmd, subcmd);
1122 sbuf_printf(sb, "%s[%s()]", pname, cmd);
1125 sbuf_printf(sb, "%s[%s(offset=%jd, length=%jd)]", pname, cmd,
1126 (intmax_t)bp->bio_offset, (intmax_t)bp->bio_length);