2 * Copyright (c) 2006, Cisco Systems, Inc.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * 3. Neither the name of Cisco Systems, Inc. nor the names of its contributors
15 * may be used to endorse or promote products derived from this software
16 * without specific prior written permission.
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
22 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28 * POSSIBILITY OF SUCH DAMAGE.
31 #include <sys/cdefs.h>
32 __FBSDID("$FreeBSD$");
34 #include <sys/param.h>
35 #include <sys/systm.h>
37 #include <sys/malloc.h>
38 #include <sys/kernel.h>
39 #include <sys/socket.h>
40 #include <sys/queue.h>
41 #include <sys/taskqueue.h>
42 #include <sys/namei.h>
44 #include <sys/filedesc.h>
45 #include <sys/vnode.h>
46 #include <sys/fcntl.h>
50 #include <sys/module.h>
52 #include <sys/sysctl.h>
54 #include <geom/geom.h>
56 #include <vm/vm_extern.h>
57 #include <vm/vm_kern.h>
59 #include <machine/xen-os.h>
60 #include <machine/hypervisor.h>
61 #include <machine/hypervisor-ifs.h>
62 #include <machine/xen_intr.h>
63 #include <machine/evtchn.h>
64 #include <machine/xenbus.h>
65 #include <machine/gnttab.h>
66 #include <machine/xen-public/memory.h>
67 #include <dev/xen/xenbus/xenbus_comms.h>
71 #define DPRINTF(fmt, args...) \
72 printf("blkback (%s:%d): " fmt, __FUNCTION__, __LINE__, ##args)
74 #define DPRINTF(fmt, args...) ((void)0)
77 #define WPRINTF(fmt, args...) \
78 printf("blkback (%s:%d): " fmt, __FUNCTION__, __LINE__, ##args)
80 #define BLKBACK_INVALID_HANDLE (~0)
84 grant_handle_t handle;
88 typedef struct blkback_info {
91 STAILQ_ENTRY(blkback_info) next_req;
92 int on_req_sched_list;
94 struct xenbus_device *xdev;
95 XenbusState frontend_state;
102 blkif_back_ring_t ring;
103 evtchn_port_t evtchn;
118 int sector_size_shift;
120 u_int media_num_sectors;
125 struct mtx blk_ring_lock;
137 * These are rather arbitrary. They are fairly large because adjacent requests
138 * pulled from a communication ring are quite likely to end up being part of
139 * the same scatter/gather request at the disc.
141 * ** TRY INCREASING 'blkif_reqs' IF WRITE SPEEDS SEEM TOO LOW **
143 * This will increase the chances of being able to write whole tracks.
144 * 64 should be enough to keep us competitive with Linux.
146 static int blkif_reqs = 64;
147 TUNABLE_INT("xen.vbd.blkif_reqs", &blkif_reqs);
149 static int mmap_pages;
152 * Each outstanding request that we've passed to the lower device layers has a
153 * 'pending_req' allocated to it. Each buffer_head that completes decrements
154 * the pendcnt towards zero. When it hits zero, the specified domain has a
155 * response queued for it, with the saved 'id' passed back.
157 typedef struct pending_req {
162 unsigned short operation;
164 STAILQ_ENTRY(pending_req) free_list;
167 static pending_req_t *pending_reqs;
168 static STAILQ_HEAD(pending_reqs_list, pending_req) pending_free =
169 STAILQ_HEAD_INITIALIZER(pending_free);
170 static struct mtx pending_free_lock;
172 static STAILQ_HEAD(blkback_req_sched_list, blkback_info) req_sched_list =
173 STAILQ_HEAD_INITIALIZER(req_sched_list);
174 static struct mtx req_sched_list_lock;
176 static unsigned long mmap_vstart;
177 static unsigned long *pending_vaddrs;
178 static grant_handle_t *pending_grant_handles;
180 static struct task blk_req_task;
183 static void disconnect_ring(blkif_t *blkif);
184 static int vbd_add_dev(struct xenbus_device *xdev);
186 static inline int vaddr_pagenr(pending_req_t *req, int seg)
188 return (req - pending_reqs) * BLKIF_MAX_SEGMENTS_PER_REQUEST + seg;
191 static inline unsigned long vaddr(pending_req_t *req, int seg)
193 return pending_vaddrs[vaddr_pagenr(req, seg)];
196 #define pending_handle(_req, _seg) \
197 (pending_grant_handles[vaddr_pagenr(_req, _seg)])
200 alloc_empty_page_range(unsigned long nr_pages)
204 multicall_entry_t mcl[17];
205 unsigned long mfn_list[16];
206 struct xen_memory_reservation reservation = {
207 .extent_start = mfn_list,
214 pages = malloc(nr_pages*PAGE_SIZE, M_DEVBUF, M_NOWAIT);
218 memset(mcl, 0, sizeof(mcl));
220 while (i < nr_pages) {
221 unsigned long va = (unsigned long)pages + (i++ * PAGE_SIZE);
223 mcl[j].op = __HYPERVISOR_update_va_mapping;
226 mfn_list[j++] = vtomach(va) >> PAGE_SHIFT;
228 xen_phys_machine[(vtophys(va) >> PAGE_SHIFT)] = INVALID_P2M_ENTRY;
230 if (j == 16 || i == nr_pages) {
231 mcl[j-1].args[MULTI_UVMFLAGS_INDEX] = UVMF_TLB_FLUSH|UVMF_LOCAL;
233 reservation.nr_extents = j;
235 mcl[j].op = __HYPERVISOR_memory_op;
236 mcl[j].args[0] = XENMEM_decrease_reservation;
237 mcl[j].args[1] = (unsigned long)&reservation;
239 (void)HYPERVISOR_multicall(mcl, j+1);
241 mcl[j-1].args[MULTI_UVMFLAGS_INDEX] = 0;
246 return (unsigned long)pages;
249 static pending_req_t *
253 mtx_lock(&pending_free_lock);
254 if ((req = STAILQ_FIRST(&pending_free))) {
255 STAILQ_REMOVE(&pending_free, req, pending_req, free_list);
256 STAILQ_NEXT(req, free_list) = NULL;
258 mtx_unlock(&pending_free_lock);
263 free_req(pending_req_t *req)
267 mtx_lock(&pending_free_lock);
268 was_empty = STAILQ_EMPTY(&pending_free);
269 STAILQ_INSERT_TAIL(&pending_free, req, free_list);
270 mtx_unlock(&pending_free_lock);
272 taskqueue_enqueue(taskqueue_swi, &blk_req_task);
276 fast_flush_area(pending_req_t *req)
278 struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST];
279 unsigned int i, invcount = 0;
280 grant_handle_t handle;
283 for (i = 0; i < req->nr_pages; i++) {
284 handle = pending_handle(req, i);
285 if (handle == BLKBACK_INVALID_HANDLE)
287 unmap[invcount].host_addr = vaddr(req, i);
288 unmap[invcount].dev_bus_addr = 0;
289 unmap[invcount].handle = handle;
290 pending_handle(req, i) = BLKBACK_INVALID_HANDLE;
294 ret = HYPERVISOR_grant_table_op(
295 GNTTABOP_unmap_grant_ref, unmap, invcount);
300 blkif_get(blkif_t *blkif)
302 atomic_add_int(&blkif->ref_cnt, 1);
306 blkif_put(blkif_t *blkif)
308 if (atomic_fetchadd_int(&blkif->ref_cnt, -1) == 1) {
309 DPRINTF("Removing %x\n", (unsigned int)blkif);
310 disconnect_ring(blkif);
312 free(blkif->mode, M_DEVBUF);
314 free(blkif->type, M_DEVBUF);
316 free(blkif->dev_name, M_DEVBUF);
317 free(blkif, M_DEVBUF);
322 blkif_create(struct xenbus_device *xdev, long handle, char *mode, char *type, char *params)
326 blkif = (blkif_t *)malloc(sizeof(*blkif), M_DEVBUF, M_NOWAIT | M_ZERO);
330 DPRINTF("Created %x\n", (unsigned int)blkif);
333 blkif->domid = xdev->otherend_id;
334 blkif->handle = handle;
337 blkif->dev_name = params;
341 mtx_init(&blkif->blk_ring_lock, "blk_ring_ock", "blkback ring lock", MTX_DEF);
343 if (strcmp(mode, "w"))
344 blkif->read_only = 1;
350 add_to_req_schedule_list_tail(blkif_t *blkif)
352 if (!blkif->on_req_sched_list) {
353 mtx_lock(&req_sched_list_lock);
354 if (!blkif->on_req_sched_list && (blkif->state == XenbusStateConnected)) {
356 STAILQ_INSERT_TAIL(&req_sched_list, blkif, next_req);
357 blkif->on_req_sched_list = 1;
358 taskqueue_enqueue(taskqueue_swi, &blk_req_task);
360 mtx_unlock(&req_sched_list_lock);
364 /* This routine does not call blkif_get(), does not schedule the blk_req_task to run,
365 and assumes that the state is connected */
367 add_to_req_schedule_list_tail2(blkif_t *blkif)
369 mtx_lock(&req_sched_list_lock);
370 if (!blkif->on_req_sched_list) {
371 STAILQ_INSERT_TAIL(&req_sched_list, blkif, next_req);
372 blkif->on_req_sched_list = 1;
374 mtx_unlock(&req_sched_list_lock);
377 /* Removes blkif from front of list and does not call blkif_put() (caller must) */
379 remove_from_req_schedule_list(void)
383 mtx_lock(&req_sched_list_lock);
385 if ((blkif = STAILQ_FIRST(&req_sched_list))) {
386 STAILQ_REMOVE(&req_sched_list, blkif, blkback_info, next_req);
387 STAILQ_NEXT(blkif, next_req) = NULL;
388 blkif->on_req_sched_list = 0;
391 mtx_unlock(&req_sched_list_lock);
397 make_response(blkif_t *blkif, uint64_t id,
398 unsigned short op, int st)
400 blkif_response_t *resp;
401 blkif_back_ring_t *blk_ring = &blkif->ring;
405 mtx_lock(&blkif->blk_ring_lock);
408 /* Place on the response ring for the relevant domain. */
409 resp = RING_GET_RESPONSE(blk_ring, blk_ring->rsp_prod_pvt);
411 resp->operation = op;
413 blk_ring->rsp_prod_pvt++;
414 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(blk_ring, notify);
416 if (blk_ring->rsp_prod_pvt == blk_ring->req_cons) {
418 * Tail check for pending requests. Allows frontend to avoid
419 * notifications if requests are already in flight (lower
420 * overheads and promotes batching).
422 RING_FINAL_CHECK_FOR_REQUESTS(blk_ring, more_to_do);
424 } else if (RING_HAS_UNCONSUMED_REQUESTS(blk_ring))
427 mtx_unlock(&blkif->blk_ring_lock);
430 add_to_req_schedule_list_tail(blkif);
433 notify_remote_via_irq(blkif->irq);
437 end_block_io_op(struct bio *bio)
439 pending_req_t *pending_req = bio->bio_caller2;
441 if (bio->bio_error) {
442 DPRINTF("BIO returned error %d for operation on device %s\n",
443 bio->bio_error, pending_req->blkif->dev_name);
444 pending_req->status = BLKIF_RSP_ERROR;
445 pending_req->blkif->st_err_req++;
449 printf("done: bio=%x error=%x completed=%llu resid=%lu flags=%x\n",
450 (unsigned int)bio, bio->bio_error, bio->bio_completed, bio->bio_resid, bio->bio_flags);
453 if (atomic_fetchadd_int(&pending_req->pendcnt, -1) == 1) {
454 fast_flush_area(pending_req);
455 make_response(pending_req->blkif, pending_req->id,
456 pending_req->operation, pending_req->status);
457 blkif_put(pending_req->blkif);
458 free_req(pending_req);
465 dispatch_rw_block_io(blkif_t *blkif, blkif_request_t *req, pending_req_t *pending_req)
467 struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST];
469 unsigned long buf; unsigned int nsec;
470 } seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
471 unsigned int nseg = req->nr_segments, nr_sects = 0;
472 struct bio *biolist[BLKIF_MAX_SEGMENTS_PER_REQUEST];
473 int operation, ret, i, nbio = 0;
475 /* Check that number of segments is sane. */
476 if (unlikely(nseg == 0) ||
477 unlikely(nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST)) {
478 DPRINTF("Bad number of segments in request (%d)\n", nseg);
482 if (req->operation == BLKIF_OP_WRITE) {
483 if (blkif->read_only) {
484 DPRINTF("Attempt to write to read only device %s\n", blkif->dev_name);
487 operation = BIO_WRITE;
489 operation = BIO_READ;
491 pending_req->blkif = blkif;
492 pending_req->id = req->id;
493 pending_req->operation = req->operation;
494 pending_req->status = BLKIF_RSP_OKAY;
495 pending_req->nr_pages = nseg;
497 for (i = 0; i < nseg; i++) {
498 seg[i].nsec = req->seg[i].last_sect -
499 req->seg[i].first_sect + 1;
501 if ((req->seg[i].last_sect >= (PAGE_SIZE >> 9)) ||
504 nr_sects += seg[i].nsec;
506 map[i].host_addr = vaddr(pending_req, i);
507 map[i].dom = blkif->domid;
508 map[i].ref = req->seg[i].gref;
509 map[i].flags = GNTMAP_host_map;
510 if (operation == BIO_WRITE)
511 map[i].flags |= GNTMAP_readonly;
514 /* Convert to the disk's sector size */
515 nr_sects = (nr_sects << 9) >> blkif->sector_size_shift;
517 ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, map, nseg);
520 for (i = 0; i < nseg; i++) {
521 if (unlikely(map[i].status != 0)) {
522 DPRINTF("invalid buffer -- could not remap it\n");
526 pending_handle(pending_req, i) = map[i].handle;
528 /* Can't do this in FreeBSD since vtophys() returns the pfn */
529 /* of the remote domain who loaned us the machine page - DPT */
530 xen_phys_machine[(vtophys(vaddr(pending_req, i)) >> PAGE_SHIFT)] =
531 map[i]dev_bus_addr >> PAGE_SHIFT;
533 seg[i].buf = map[i].dev_bus_addr |
534 (req->seg[i].first_sect << 9);
537 if (req->sector_number + nr_sects > blkif->media_num_sectors) {
538 DPRINTF("%s of [%llu,%llu] extends past end of device %s\n",
539 operation == BIO_READ ? "read" : "write",
541 req->sector_number + nr_sects, blkif->dev_name);
545 for (i = 0; i < nseg; i++) {
548 if ((int)seg[i].nsec & ((blkif->sector_size >> 9) - 1)) {
549 DPRINTF("Misaligned I/O request from domain %d", blkif->domid);
553 bio = biolist[nbio++] = g_new_bio();
554 if (unlikely(bio == NULL))
557 bio->bio_cmd = operation;
558 bio->bio_offset = req->sector_number << blkif->sector_size_shift;
559 bio->bio_length = seg[i].nsec << 9;
560 bio->bio_bcount = bio->bio_length;
561 bio->bio_data = (caddr_t)(vaddr(pending_req, i) | (seg[i].buf & PAGE_MASK));
562 bio->bio_done = end_block_io_op;
563 bio->bio_caller2 = pending_req;
564 bio->bio_dev = blkif->cdev;
566 req->sector_number += (seg[i].nsec << 9) >> blkif->sector_size_shift;
568 printf("new: bio=%x cmd=%d sect=%llu nsect=%u iosize_max=%u @ %08lx\n",
569 (unsigned int)bio, req->operation, req->sector_number, seg[i].nsec,
570 blkif->cdev->si_iosize_max, seg[i].buf);
574 pending_req->pendcnt = nbio;
577 for (i = 0; i < nbio; i++)
578 (*blkif->csw->d_strategy)(biolist[i]);
583 for (i = 0; i < (nbio-1); i++)
584 g_destroy_bio(biolist[i]);
586 fast_flush_area(pending_req);
588 make_response(blkif, req->id, req->operation, BLKIF_RSP_ERROR);
589 free_req(pending_req);
593 blk_req_action(void *context, int pending)
599 while (!STAILQ_EMPTY(&req_sched_list)) {
600 blkif_back_ring_t *blk_ring;
603 blkif = remove_from_req_schedule_list();
605 blk_ring = &blkif->ring;
606 rc = blk_ring->req_cons;
607 rp = blk_ring->sring->req_prod;
608 rmb(); /* Ensure we see queued requests up to 'rp'. */
610 while ((rc != rp) && !RING_REQUEST_CONS_OVERFLOW(blk_ring, rc)) {
611 blkif_request_t *req;
612 pending_req_t *pending_req;
614 pending_req = alloc_req();
615 if (pending_req == NULL)
618 req = RING_GET_REQUEST(blk_ring, rc);
619 blk_ring->req_cons = ++rc; /* before make_response() */
621 switch (req->operation) {
624 dispatch_rw_block_io(blkif, req, pending_req);
628 dispatch_rw_block_io(blkif, req, pending_req);
632 DPRINTF("error: unknown block io operation [%d]\n",
634 make_response(blkif, req->id, req->operation,
636 free_req(pending_req);
647 /* We ran out of pending req structs */
648 /* Just requeue interface and wait to be rescheduled to run when one is freed */
649 add_to_req_schedule_list_tail2(blkif);
653 /* Handle interrupt from a frontend */
655 blkback_intr(void *arg)
657 blkif_t *blkif = arg;
658 DPRINTF("%x\n", (unsigned int)blkif);
659 add_to_req_schedule_list_tail(blkif);
662 /* Map grant ref for ring */
664 map_ring(grant_ref_t ref, domid_t dom, struct ring_ref *ring)
666 struct gnttab_map_grant_ref op;
668 ring->va = kmem_alloc_nofault(kernel_map, PAGE_SIZE);
672 op.host_addr = ring->va;
673 op.flags = GNTMAP_host_map;
676 HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1);
678 WPRINTF("grant table op err=%d\n", op.status);
679 kmem_free(kernel_map, ring->va, PAGE_SIZE);
684 ring->handle = op.handle;
685 ring->bus_addr = op.dev_bus_addr;
690 /* Unmap grant ref for ring */
692 unmap_ring(struct ring_ref *ring)
694 struct gnttab_unmap_grant_ref op;
696 op.host_addr = ring->va;
697 op.dev_bus_addr = ring->bus_addr;
698 op.handle = ring->handle;
699 HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1);
701 WPRINTF("grant table op err=%d\n", op.status);
703 kmem_free(kernel_map, ring->va, PAGE_SIZE);
708 connect_ring(blkif_t *blkif)
710 struct xenbus_device *xdev = blkif->xdev;
712 unsigned long ring_ref;
713 evtchn_port_t evtchn;
714 evtchn_op_t op = { .cmd = EVTCHNOP_bind_interdomain };
717 if (blkif->ring_connected)
720 // Grab FE data and map his memory
721 err = xenbus_gather(NULL, xdev->otherend,
722 "ring-ref", "%lu", &ring_ref,
723 "event-channel", "%u", &evtchn, NULL);
725 xenbus_dev_fatal(xdev, err,
726 "reading %s/ring-ref and event-channel",
731 err = map_ring(ring_ref, blkif->domid, &blkif->rr);
733 xenbus_dev_fatal(xdev, err, "mapping ring");
736 ring = (blkif_sring_t *)blkif->rr.va;
737 BACK_RING_INIT(&blkif->ring, ring, PAGE_SIZE);
739 op.u.bind_interdomain.remote_dom = blkif->domid;
740 op.u.bind_interdomain.remote_port = evtchn;
741 err = HYPERVISOR_event_channel_op(&op);
743 unmap_ring(&blkif->rr);
744 xenbus_dev_fatal(xdev, err, "binding event channel");
747 blkif->evtchn = op.u.bind_interdomain.local_port;
749 /* bind evtchn to irq handler */
751 bind_evtchn_to_irqhandler(blkif->evtchn, "blkback",
752 blkback_intr, blkif, INTR_TYPE_NET|INTR_MPSAFE, &blkif->irq_cookie);
754 blkif->ring_connected = 1;
756 DPRINTF("%x rings connected! evtchn=%d irq=%d\n",
757 (unsigned int)blkif, blkif->evtchn, blkif->irq);
763 disconnect_ring(blkif_t *blkif)
767 if (blkif->ring_connected) {
768 unbind_from_irqhandler(blkif->irq, blkif->irq_cookie);
770 unmap_ring(&blkif->rr);
771 blkif->ring_connected = 0;
776 connect(blkif_t *blkif)
778 struct xenbus_transaction *xbt;
779 struct xenbus_device *xdev = blkif->xdev;
782 if (!blkif->ring_connected ||
784 blkif->state == XenbusStateConnected)
787 DPRINTF("%s\n", xdev->otherend);
789 /* Supply the information about the device the frontend needs */
791 xbt = xenbus_transaction_start();
793 xenbus_dev_fatal(xdev, PTR_ERR(xbt),
794 "Error writing configuration for backend "
795 "(start transaction)");
799 err = xenbus_printf(xbt, xdev->nodename, "sectors", "%u",
800 blkif->media_num_sectors);
802 xenbus_dev_fatal(xdev, err, "writing %s/sectors",
807 err = xenbus_printf(xbt, xdev->nodename, "info", "%u",
808 blkif->read_only ? VDISK_READONLY : 0);
810 xenbus_dev_fatal(xdev, err, "writing %s/info",
814 err = xenbus_printf(xbt, xdev->nodename, "sector-size", "%u",
817 xenbus_dev_fatal(xdev, err, "writing %s/sector-size",
822 err = xenbus_transaction_end(xbt, 0);
826 xenbus_dev_fatal(xdev, err, "ending transaction");
828 err = xenbus_switch_state(xdev, NULL, XenbusStateConnected);
830 xenbus_dev_fatal(xdev, err, "switching to Connected state",
833 blkif->state = XenbusStateConnected;
838 xenbus_transaction_end(xbt, 1);
842 blkback_probe(struct xenbus_device *xdev, const struct xenbus_device_id *id)
845 char *p, *mode = NULL, *type = NULL, *params = NULL;
848 DPRINTF("node=%s\n", xdev->nodename);
850 p = strrchr(xdev->otherend, '/') + 1;
851 handle = strtoul(p, NULL, 0);
853 mode = xenbus_read(NULL, xdev->nodename, "mode", NULL);
855 xenbus_dev_fatal(xdev, PTR_ERR(mode), "reading mode");
860 type = xenbus_read(NULL, xdev->nodename, "type", NULL);
862 xenbus_dev_fatal(xdev, PTR_ERR(type), "reading type");
867 params = xenbus_read(NULL, xdev->nodename, "params", NULL);
869 xenbus_dev_fatal(xdev, PTR_ERR(params), "reading params");
870 err = PTR_ERR(params);
874 err = blkif_create(xdev, handle, mode, type, params);
876 xenbus_dev_fatal(xdev, err, "creating blkif");
880 err = vbd_add_dev(xdev);
882 blkif_put((blkif_t *)xdev->data);
883 xenbus_dev_fatal(xdev, err, "adding vbd device");
890 free(mode, M_DEVBUF);
892 free(type, M_DEVBUF);
894 free(params, M_DEVBUF);
899 blkback_remove(struct xenbus_device *xdev)
901 blkif_t *blkif = xdev->data;
904 DPRINTF("node=%s\n", xdev->nodename);
906 blkif->state = XenbusStateClosing;
908 if ((ndev = blkif->ndev)) {
923 blkback_resume(struct xenbus_device *xdev)
925 DPRINTF("node=%s\n", xdev->nodename);
930 frontend_changed(struct xenbus_device *xdev,
931 XenbusState frontend_state)
933 blkif_t *blkif = xdev->data;
935 DPRINTF("state=%d\n", frontend_state);
937 blkif->frontend_state = frontend_state;
939 switch (frontend_state) {
940 case XenbusStateInitialising:
942 case XenbusStateInitialised:
943 case XenbusStateConnected:
947 case XenbusStateClosing:
948 xenbus_switch_state(xdev, NULL, XenbusStateClosing);
950 case XenbusStateClosed:
951 xenbus_remove_device(xdev);
953 case XenbusStateUnknown:
954 case XenbusStateInitWait:
955 xenbus_dev_fatal(xdev, EINVAL, "saw state %d at frontend",
961 /* ** Driver registration ** */
963 static struct xenbus_device_id blkback_ids[] = {
968 static struct xenbus_driver blkback = {
971 .probe = blkback_probe,
972 .remove = blkback_remove,
973 .resume = blkback_resume,
974 .otherend_changed = frontend_changed,
978 blkback_init(void *unused)
982 TASK_INIT(&blk_req_task, 0, blk_req_action, NULL);
983 mtx_init(&req_sched_list_lock, "blk_req_sched_lock", "blkback req sched lock", MTX_DEF);
985 mtx_init(&pending_free_lock, "blk_pending_req_ock", "blkback pending request lock", MTX_DEF);
987 mmap_pages = blkif_reqs * BLKIF_MAX_SEGMENTS_PER_REQUEST;
988 pending_reqs = malloc(sizeof(pending_reqs[0]) *
989 blkif_reqs, M_DEVBUF, M_ZERO|M_NOWAIT);
990 pending_grant_handles = malloc(sizeof(pending_grant_handles[0]) *
991 mmap_pages, M_DEVBUF, M_NOWAIT);
992 pending_vaddrs = malloc(sizeof(pending_vaddrs[0]) *
993 mmap_pages, M_DEVBUF, M_NOWAIT);
994 mmap_vstart = alloc_empty_page_range(mmap_pages);
995 if (!pending_reqs || !pending_grant_handles || !pending_vaddrs || !mmap_vstart) {
997 free(pending_reqs, M_DEVBUF);
998 if (pending_grant_handles)
999 free(pending_grant_handles, M_DEVBUF);
1001 free(pending_vaddrs, M_DEVBUF);
1002 WPRINTF("out of memory\n");
1006 for (i = 0; i < mmap_pages; i++) {
1007 pending_vaddrs[i] = mmap_vstart + (i << PAGE_SHIFT);
1008 pending_grant_handles[i] = BLKBACK_INVALID_HANDLE;
1011 for (i = 0; i < blkif_reqs; i++) {
1012 STAILQ_INSERT_TAIL(&pending_free, &pending_reqs[i], free_list);
1015 DPRINTF("registering %s\n", blkback.name);
1016 xenbus_register_backend(&blkback);
1019 SYSINIT(xbbedev, SI_SUB_PSEUDO, SI_ORDER_ANY, blkback_init, NULL)
1022 close_device(blkif_t *blkif)
1024 DPRINTF("closing dev=%s\n", blkif->dev_name);
1028 if (!blkif->read_only)
1032 dev_relthread(blkif->cdev);
1036 (void)vn_close(blkif->vn, flags, NOCRED, curthread);
1042 open_device(blkif_t *blkif)
1044 struct nameidata nd;
1047 struct cdevsw *devsw;
1048 int flags = FREAD, err = 0;
1050 DPRINTF("opening dev=%s\n", blkif->dev_name);
1052 if (!blkif->read_only)
1055 if (!curthread->td_proc->p_fd->fd_cdir) {
1056 curthread->td_proc->p_fd->fd_cdir = rootvnode;
1059 if (!curthread->td_proc->p_fd->fd_rdir) {
1060 curthread->td_proc->p_fd->fd_rdir = rootvnode;
1063 if (!curthread->td_proc->p_fd->fd_jdir) {
1064 curthread->td_proc->p_fd->fd_jdir = rootvnode;
1069 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, blkif->dev_name, curthread);
1070 err = vn_open(&nd, &flags, 0, -1);
1072 if (blkif->dev_name[0] != '/') {
1073 char *dev_path = "/dev/";
1076 /* Try adding device path at beginning of name */
1077 dev_name = malloc(strlen(blkif->dev_name) + strlen(dev_path) + 1, M_DEVBUF, M_NOWAIT);
1079 sprintf(dev_name, "%s%s", dev_path, blkif->dev_name);
1080 free(blkif->dev_name, M_DEVBUF);
1081 blkif->dev_name = dev_name;
1085 xenbus_dev_fatal(blkif->xdev, err, "error opening device %s", blkif->dev_name);
1088 NDFREE(&nd, NDF_ONLY_PNBUF);
1090 blkif->vn = nd.ni_vp;
1092 /* We only support disks for now */
1093 if (!vn_isdisk(blkif->vn, &err)) {
1094 xenbus_dev_fatal(blkif->xdev, err, "device %s is not a disk", blkif->dev_name);
1095 VOP_UNLOCK(blkif->vn, 0, curthread);
1099 blkif->cdev = blkif->vn->v_rdev;
1100 blkif->csw = dev_refthread(blkif->cdev);
1101 PANIC_IF(blkif->csw == NULL);
1103 err = VOP_GETATTR(blkif->vn, &vattr, NOCRED);
1105 xenbus_dev_fatal(blkif->xdev, err,
1106 "error getting vnode attributes for device %s", blkif->dev_name);
1107 VOP_UNLOCK(blkif->vn, 0, curthread);
1111 VOP_UNLOCK(blkif->vn, 0, curthread);
1113 dev = blkif->vn->v_rdev;
1114 devsw = dev->si_devsw;
1115 if (!devsw->d_ioctl) {
1117 xenbus_dev_fatal(blkif->xdev, err,
1118 "no d_ioctl for device %s!", blkif->dev_name);
1122 err = (*devsw->d_ioctl)(dev, DIOCGSECTORSIZE, (caddr_t)&blkif->sector_size, FREAD, curthread);
1124 xenbus_dev_fatal(blkif->xdev, err,
1125 "error calling ioctl DIOCGSECTORSIZE for device %s", blkif->dev_name);
1128 blkif->sector_size_shift = fls(blkif->sector_size) - 1;
1130 err = (*devsw->d_ioctl)(dev, DIOCGMEDIASIZE, (caddr_t)&blkif->media_size, FREAD, curthread);
1132 xenbus_dev_fatal(blkif->xdev, err,
1133 "error calling ioctl DIOCGMEDIASIZE for device %s", blkif->dev_name);
1136 blkif->media_num_sectors = blkif->media_size >> blkif->sector_size_shift;
1138 blkif->major = major(vattr.va_rdev);
1139 blkif->minor = minor(vattr.va_rdev);
1141 DPRINTF("opened dev=%s major=%d minor=%d sector_size=%u media_size=%lld\n",
1142 blkif->dev_name, blkif->major, blkif->minor, blkif->sector_size, blkif->media_size);
1147 close_device(blkif);
1152 vbd_add_dev(struct xenbus_device *xdev)
1154 blkif_t *blkif = xdev->data;
1155 device_t nexus, ndev;
1161 /* We will add a vbd device as a child of nexus0 (for now) */
1162 if (!(dc = devclass_find("nexus")) ||
1163 !(nexus = devclass_get_device(dc, 0))) {
1164 WPRINTF("could not find nexus0!\n");
1170 /* Create a newbus device representing the vbd */
1171 ndev = BUS_ADD_CHILD(nexus, 0, "vbd", blkif->handle);
1173 WPRINTF("could not create newbus device vbd%d!\n", blkif->handle);
1179 device_set_ivars(ndev, blkif);
1182 device_probe_and_attach(ndev);
1193 VBD_SYSCTL_ST_RD_REQ,
1194 VBD_SYSCTL_ST_WR_REQ,
1195 VBD_SYSCTL_ST_OO_REQ,
1196 VBD_SYSCTL_ST_ERR_REQ,
1201 vbd_sysctl_ring_info(blkif_t *blkif, int cmd)
1203 char *buf = malloc(256, M_DEVBUF, M_WAITOK);
1205 if (!blkif->ring_connected)
1206 sprintf(buf, "ring not connected\n");
1208 blkif_back_ring_t *ring = &blkif->ring;
1209 sprintf(buf, "nr_ents=%x req_cons=%x"
1210 " req_prod=%x req_event=%x"
1211 " rsp_prod=%x rsp_event=%x",
1212 ring->nr_ents, ring->req_cons,
1213 ring->sring->req_prod, ring->sring->req_event,
1214 ring->sring->rsp_prod, ring->sring->rsp_event);
1221 vbd_sysctl_handler(SYSCTL_HANDLER_ARGS)
1223 device_t dev = (device_t)arg1;
1224 blkif_t *blkif = (blkif_t *)device_get_ivars(dev);
1230 case VBD_SYSCTL_DOMID:
1231 return sysctl_handle_int(oidp, NULL, blkif->domid, req);
1232 case VBD_SYSCTL_ST_RD_REQ:
1233 return sysctl_handle_int(oidp, NULL, blkif->st_rd_req, req);
1234 case VBD_SYSCTL_ST_WR_REQ:
1235 return sysctl_handle_int(oidp, NULL, blkif->st_wr_req, req);
1236 case VBD_SYSCTL_ST_OO_REQ:
1237 return sysctl_handle_int(oidp, NULL, blkif->st_oo_req, req);
1238 case VBD_SYSCTL_ST_ERR_REQ:
1239 return sysctl_handle_int(oidp, NULL, blkif->st_err_req, req);
1240 case VBD_SYSCTL_RING:
1241 value = buf = vbd_sysctl_ring_info(blkif, arg2);
1247 err = SYSCTL_OUT(req, value, strlen(value));
1249 free(buf, M_DEVBUF);
1254 /* Newbus vbd device driver probe */
1256 vbd_probe(device_t dev)
1258 DPRINTF("vbd%d\n", device_get_unit(dev));
1262 /* Newbus vbd device driver attach */
1264 vbd_attach(device_t dev)
1266 blkif_t *blkif = (blkif_t *)device_get_ivars(dev);
1268 DPRINTF("%s\n", blkif->dev_name);
1270 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
1271 OID_AUTO, "domid", CTLTYPE_INT|CTLFLAG_RD,
1272 dev, VBD_SYSCTL_DOMID, vbd_sysctl_handler, "I",
1273 "domid of frontend");
1274 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
1275 OID_AUTO, "rd_reqs", CTLTYPE_INT|CTLFLAG_RD,
1276 dev, VBD_SYSCTL_ST_RD_REQ, vbd_sysctl_handler, "I",
1277 "number of read reqs");
1278 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
1279 OID_AUTO, "wr_reqs", CTLTYPE_INT|CTLFLAG_RD,
1280 dev, VBD_SYSCTL_ST_WR_REQ, vbd_sysctl_handler, "I",
1281 "number of write reqs");
1282 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
1283 OID_AUTO, "oo_reqs", CTLTYPE_INT|CTLFLAG_RD,
1284 dev, VBD_SYSCTL_ST_OO_REQ, vbd_sysctl_handler, "I",
1285 "number of deferred reqs");
1286 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
1287 OID_AUTO, "err_reqs", CTLTYPE_INT|CTLFLAG_RD,
1288 dev, VBD_SYSCTL_ST_ERR_REQ, vbd_sysctl_handler, "I",
1289 "number of reqs that returned error");
1290 #if XEN_BLKBACK_DEBUG
1291 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
1292 OID_AUTO, "ring", CTLFLAG_RD,
1293 dev, VBD_SYSCTL_RING, vbd_sysctl_handler, "A",
1297 if (!open_device(blkif))
1300 return bus_generic_attach(dev);
1303 /* Newbus vbd device driver detach */
1305 vbd_detach(device_t dev)
1307 blkif_t *blkif = (blkif_t *)device_get_ivars(dev);
1309 DPRINTF("%s\n", blkif->dev_name);
1311 close_device(blkif);
1313 bus_generic_detach(dev);
1320 static device_method_t vbd_methods[] = {
1321 /* Device interface */
1322 DEVMETHOD(device_probe, vbd_probe),
1323 DEVMETHOD(device_attach, vbd_attach),
1324 DEVMETHOD(device_detach, vbd_detach),
1325 DEVMETHOD(device_shutdown, bus_generic_shutdown),
1326 DEVMETHOD(device_suspend, bus_generic_suspend),
1327 DEVMETHOD(device_resume, bus_generic_resume),
1331 static devclass_t vbd_devclass;
1333 static driver_t vbd_driver = {
1339 DRIVER_MODULE(vbd, nexus, vbd_driver, vbd_devclass, 0, 0);
1344 * c-set-style: "BSD"
1347 * indent-tabs-mode: t