]> CyberLeo.Net >> Repos - FreeBSD/releng/10.2.git/blob - usr.sbin/bhyve/block_if.c
- Copy stable/10@285827 to releng/10.2 in preparation for 10.2-RC1
[FreeBSD/releng/10.2.git] / usr.sbin / bhyve / block_if.c
1 /*-
2  * Copyright (c) 2013  Peter Grehan <grehan@freebsd.org>
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  *
26  * $FreeBSD$
27  */
28
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31
32 #include <sys/param.h>
33 #include <sys/queue.h>
34 #include <sys/errno.h>
35 #include <sys/stat.h>
36 #include <sys/ioctl.h>
37 #include <sys/disk.h>
38
39 #include <assert.h>
40 #include <fcntl.h>
41 #include <stdio.h>
42 #include <stdlib.h>
43 #include <string.h>
44 #include <pthread.h>
45 #include <pthread_np.h>
46 #include <signal.h>
47 #include <unistd.h>
48
49 #include <machine/atomic.h>
50
51 #include "bhyverun.h"
52 #include "mevent.h"
53 #include "block_if.h"
54
55 #define BLOCKIF_SIG     0xb109b109
56
57 #define BLOCKIF_NUMTHR  8
58 #define BLOCKIF_MAXREQ  (64 + BLOCKIF_NUMTHR)
59
60 enum blockop {
61         BOP_READ,
62         BOP_WRITE,
63         BOP_FLUSH,
64         BOP_DELETE
65 };
66
67 enum blockstat {
68         BST_FREE,
69         BST_BLOCK,
70         BST_PEND,
71         BST_BUSY,
72         BST_DONE
73 };
74
75 struct blockif_elem {
76         TAILQ_ENTRY(blockif_elem) be_link;
77         struct blockif_req  *be_req;
78         enum blockop         be_op;
79         enum blockstat       be_status;
80         pthread_t            be_tid;
81         off_t                be_block;
82 };
83
84 struct blockif_ctxt {
85         int                     bc_magic;
86         int                     bc_fd;
87         int                     bc_ischr;
88         int                     bc_isgeom;
89         int                     bc_candelete;
90         int                     bc_rdonly;
91         off_t                   bc_size;
92         int                     bc_sectsz;
93         int                     bc_psectsz;
94         int                     bc_psectoff;
95         int                     bc_closing;
96         pthread_t               bc_btid[BLOCKIF_NUMTHR];
97         pthread_mutex_t         bc_mtx;
98         pthread_cond_t          bc_cond;
99
100         /* Request elements and free/pending/busy queues */
101         TAILQ_HEAD(, blockif_elem) bc_freeq;       
102         TAILQ_HEAD(, blockif_elem) bc_pendq;
103         TAILQ_HEAD(, blockif_elem) bc_busyq;
104         struct blockif_elem     bc_reqs[BLOCKIF_MAXREQ];
105 };
106
107 static pthread_once_t blockif_once = PTHREAD_ONCE_INIT;
108
109 struct blockif_sig_elem {
110         pthread_mutex_t                 bse_mtx;
111         pthread_cond_t                  bse_cond;
112         int                             bse_pending;
113         struct blockif_sig_elem         *bse_next;
114 };
115
116 static struct blockif_sig_elem *blockif_bse_head;
117
118 static int
119 blockif_enqueue(struct blockif_ctxt *bc, struct blockif_req *breq,
120                 enum blockop op)
121 {
122         struct blockif_elem *be, *tbe;
123         off_t off;
124         int i;
125
126         be = TAILQ_FIRST(&bc->bc_freeq);
127         assert(be != NULL);
128         assert(be->be_status == BST_FREE);
129         TAILQ_REMOVE(&bc->bc_freeq, be, be_link);
130         be->be_req = breq;
131         be->be_op = op;
132         switch (op) {
133         case BOP_READ:
134         case BOP_WRITE:
135         case BOP_DELETE:
136                 off = breq->br_offset;
137                 for (i = 0; i < breq->br_iovcnt; i++)
138                         off += breq->br_iov[i].iov_len;
139                 break;
140         default:
141                 off = OFF_MAX;
142         }
143         be->be_block = off;
144         TAILQ_FOREACH(tbe, &bc->bc_pendq, be_link) {
145                 if (tbe->be_block == breq->br_offset)
146                         break;
147         }
148         if (tbe == NULL) {
149                 TAILQ_FOREACH(tbe, &bc->bc_busyq, be_link) {
150                         if (tbe->be_block == breq->br_offset)
151                                 break;
152                 }
153         }
154         if (tbe == NULL)
155                 be->be_status = BST_PEND;
156         else
157                 be->be_status = BST_BLOCK;
158         TAILQ_INSERT_TAIL(&bc->bc_pendq, be, be_link);
159         return (be->be_status == BST_PEND);
160 }
161
162 static int
163 blockif_dequeue(struct blockif_ctxt *bc, pthread_t t, struct blockif_elem **bep)
164 {
165         struct blockif_elem *be;
166
167         TAILQ_FOREACH(be, &bc->bc_pendq, be_link) {
168                 if (be->be_status == BST_PEND)
169                         break;
170                 assert(be->be_status == BST_BLOCK);
171         }
172         if (be == NULL)
173                 return (0);
174         TAILQ_REMOVE(&bc->bc_pendq, be, be_link);
175         be->be_status = BST_BUSY;
176         be->be_tid = t;
177         TAILQ_INSERT_TAIL(&bc->bc_busyq, be, be_link);
178         *bep = be;
179         return (1);
180 }
181
182 static void
183 blockif_complete(struct blockif_ctxt *bc, struct blockif_elem *be)
184 {
185         struct blockif_elem *tbe;
186
187         if (be->be_status == BST_DONE || be->be_status == BST_BUSY)
188                 TAILQ_REMOVE(&bc->bc_busyq, be, be_link);
189         else
190                 TAILQ_REMOVE(&bc->bc_pendq, be, be_link);
191         TAILQ_FOREACH(tbe, &bc->bc_pendq, be_link) {
192                 if (tbe->be_req->br_offset == be->be_block)
193                         tbe->be_status = BST_PEND;
194         }
195         be->be_tid = 0;
196         be->be_status = BST_FREE;
197         be->be_req = NULL;
198         TAILQ_INSERT_TAIL(&bc->bc_freeq, be, be_link);
199 }
200
201 static void
202 blockif_proc(struct blockif_ctxt *bc, struct blockif_elem *be, uint8_t *buf)
203 {
204         struct blockif_req *br;
205         off_t arg[2];
206         ssize_t clen, len, off, boff, voff;
207         int i, err;
208
209         br = be->be_req;
210         if (br->br_iovcnt <= 1)
211                 buf = NULL;
212         err = 0;
213         switch (be->be_op) {
214         case BOP_READ:
215                 if (buf == NULL) {
216                         if ((len = preadv(bc->bc_fd, br->br_iov, br->br_iovcnt,
217                                    br->br_offset)) < 0)
218                                 err = errno;
219                         else
220                                 br->br_resid -= len;
221                         break;
222                 }
223                 i = 0;
224                 off = voff = 0;
225                 while (br->br_resid > 0) {
226                         len = MIN(br->br_resid, MAXPHYS);
227                         if (pread(bc->bc_fd, buf, len, br->br_offset +
228                             off) < 0) {
229                                 err = errno;
230                                 break;
231                         }
232                         boff = 0;
233                         do {
234                                 clen = MIN(len - boff, br->br_iov[i].iov_len -
235                                     voff);
236                                 memcpy(br->br_iov[i].iov_base + voff,
237                                     buf + boff, clen);
238                                 if (clen < br->br_iov[i].iov_len - voff)
239                                         voff += clen;
240                                 else {
241                                         i++;
242                                         voff = 0;
243                                 }
244                                 boff += clen;
245                         } while (boff < len);
246                         off += len;
247                         br->br_resid -= len;
248                 }
249                 break;
250         case BOP_WRITE:
251                 if (bc->bc_rdonly) {
252                         err = EROFS;
253                         break;
254                 }
255                 if (buf == NULL) {
256                         if ((len = pwritev(bc->bc_fd, br->br_iov, br->br_iovcnt,
257                                     br->br_offset)) < 0)
258                                 err = errno;
259                         else
260                                 br->br_resid -= len;
261                         break;
262                 }
263                 i = 0;
264                 off = voff = 0;
265                 while (br->br_resid > 0) {
266                         len = MIN(br->br_resid, MAXPHYS);
267                         boff = 0;
268                         do {
269                                 clen = MIN(len - boff, br->br_iov[i].iov_len -
270                                     voff);
271                                 memcpy(buf + boff,
272                                     br->br_iov[i].iov_base + voff, clen);
273                                 if (clen < br->br_iov[i].iov_len - voff)
274                                         voff += clen;
275                                 else {
276                                         i++;
277                                         voff = 0;
278                                 }
279                                 boff += clen;
280                         } while (boff < len);
281                         if (pwrite(bc->bc_fd, buf, len, br->br_offset +
282                             off) < 0) {
283                                 err = errno;
284                                 break;
285                         }
286                         off += len;
287                         br->br_resid -= len;
288                 }
289                 break;
290         case BOP_FLUSH:
291                 if (bc->bc_ischr) {
292                         if (ioctl(bc->bc_fd, DIOCGFLUSH))
293                                 err = errno;
294                 } else if (fsync(bc->bc_fd))
295                         err = errno;
296                 break;
297         case BOP_DELETE:
298                 if (!bc->bc_candelete)
299                         err = EOPNOTSUPP;
300                 else if (bc->bc_rdonly)
301                         err = EROFS;
302                 else if (bc->bc_ischr) {
303                         arg[0] = br->br_offset;
304                         arg[1] = br->br_resid;
305                         if (ioctl(bc->bc_fd, DIOCGDELETE, arg))
306                                 err = errno;
307                         else
308                                 br->br_resid = 0;
309                 } else
310                         err = EOPNOTSUPP;
311                 break;
312         default:
313                 err = EINVAL;
314                 break;
315         }
316
317         be->be_status = BST_DONE;
318
319         (*br->br_callback)(br, err);
320 }
321
322 static void *
323 blockif_thr(void *arg)
324 {
325         struct blockif_ctxt *bc;
326         struct blockif_elem *be;
327         pthread_t t;
328         uint8_t *buf;
329
330         bc = arg;
331         if (bc->bc_isgeom)
332                 buf = malloc(MAXPHYS);
333         else
334                 buf = NULL;
335         t = pthread_self();
336
337         pthread_mutex_lock(&bc->bc_mtx);
338         for (;;) {
339                 while (blockif_dequeue(bc, t, &be)) {
340                         pthread_mutex_unlock(&bc->bc_mtx);
341                         blockif_proc(bc, be, buf);
342                         pthread_mutex_lock(&bc->bc_mtx);
343                         blockif_complete(bc, be);
344                 }
345                 /* Check ctxt status here to see if exit requested */
346                 if (bc->bc_closing)
347                         break;
348                 pthread_cond_wait(&bc->bc_cond, &bc->bc_mtx);
349         }
350         pthread_mutex_unlock(&bc->bc_mtx);
351
352         if (buf)
353                 free(buf);
354         pthread_exit(NULL);
355         return (NULL);
356 }
357
358 static void
359 blockif_sigcont_handler(int signal, enum ev_type type, void *arg)
360 {
361         struct blockif_sig_elem *bse;
362
363         for (;;) {
364                 /*
365                  * Process the entire list even if not intended for
366                  * this thread.
367                  */
368                 do {
369                         bse = blockif_bse_head;
370                         if (bse == NULL)
371                                 return;
372                 } while (!atomic_cmpset_ptr((uintptr_t *)&blockif_bse_head,
373                                             (uintptr_t)bse,
374                                             (uintptr_t)bse->bse_next));
375
376                 pthread_mutex_lock(&bse->bse_mtx);
377                 bse->bse_pending = 0;
378                 pthread_cond_signal(&bse->bse_cond);
379                 pthread_mutex_unlock(&bse->bse_mtx);
380         }
381 }
382
383 static void
384 blockif_init(void)
385 {
386         mevent_add(SIGCONT, EVF_SIGNAL, blockif_sigcont_handler, NULL);
387         (void) signal(SIGCONT, SIG_IGN);
388 }
389
390 struct blockif_ctxt *
391 blockif_open(const char *optstr, const char *ident)
392 {
393         char tname[MAXCOMLEN + 1];
394         char name[MAXPATHLEN];
395         char *nopt, *xopts, *cp;
396         struct blockif_ctxt *bc;
397         struct stat sbuf;
398         struct diocgattr_arg arg;
399         off_t size, psectsz, psectoff;
400         int extra, fd, i, sectsz;
401         int nocache, sync, ro, candelete, geom, ssopt, pssopt;
402
403         pthread_once(&blockif_once, blockif_init);
404
405         fd = -1;
406         ssopt = 0;
407         nocache = 0;
408         sync = 0;
409         ro = 0;
410
411         /*
412          * The first element in the optstring is always a pathname.
413          * Optional elements follow
414          */
415         nopt = xopts = strdup(optstr);
416         while (xopts != NULL) {
417                 cp = strsep(&xopts, ",");
418                 if (cp == nopt)         /* file or device pathname */
419                         continue;
420                 else if (!strcmp(cp, "nocache"))
421                         nocache = 1;
422                 else if (!strcmp(cp, "sync") || !strcmp(cp, "direct"))
423                         sync = 1;
424                 else if (!strcmp(cp, "ro"))
425                         ro = 1;
426                 else if (sscanf(cp, "sectorsize=%d/%d", &ssopt, &pssopt) == 2)
427                         ;
428                 else if (sscanf(cp, "sectorsize=%d", &ssopt) == 1)
429                         pssopt = ssopt;
430                 else {
431                         fprintf(stderr, "Invalid device option \"%s\"\n", cp);
432                         goto err;
433                 }
434         }
435
436         extra = 0;
437         if (nocache)
438                 extra |= O_DIRECT;
439         if (sync)
440                 extra |= O_SYNC;
441
442         fd = open(nopt, (ro ? O_RDONLY : O_RDWR) | extra);
443         if (fd < 0 && !ro) {
444                 /* Attempt a r/w fail with a r/o open */
445                 fd = open(nopt, O_RDONLY | extra);
446                 ro = 1;
447         }
448
449         if (fd < 0) {
450                 perror("Could not open backing file");
451                 goto err;
452         }
453
454         if (fstat(fd, &sbuf) < 0) {
455                 perror("Could not stat backing file");
456                 goto err;
457         }
458
459         /*
460          * Deal with raw devices
461          */
462         size = sbuf.st_size;
463         sectsz = DEV_BSIZE;
464         psectsz = psectoff = 0;
465         candelete = geom = 0;
466         if (S_ISCHR(sbuf.st_mode)) {
467                 if (ioctl(fd, DIOCGMEDIASIZE, &size) < 0 ||
468                     ioctl(fd, DIOCGSECTORSIZE, &sectsz)) {
469                         perror("Could not fetch dev blk/sector size");
470                         goto err;
471                 }
472                 assert(size != 0);
473                 assert(sectsz != 0);
474                 if (ioctl(fd, DIOCGSTRIPESIZE, &psectsz) == 0 && psectsz > 0)
475                         ioctl(fd, DIOCGSTRIPEOFFSET, &psectoff);
476                 strlcpy(arg.name, "GEOM::candelete", sizeof(arg.name));
477                 arg.len = sizeof(arg.value.i);
478                 if (ioctl(fd, DIOCGATTR, &arg) == 0)
479                         candelete = arg.value.i;
480                 if (ioctl(fd, DIOCGPROVIDERNAME, name) == 0)
481                         geom = 1;
482         } else
483                 psectsz = sbuf.st_blksize;
484
485         if (ssopt != 0) {
486                 if (!powerof2(ssopt) || !powerof2(pssopt) || ssopt < 512 ||
487                     ssopt > pssopt) {
488                         fprintf(stderr, "Invalid sector size %d/%d\n",
489                             ssopt, pssopt);
490                         goto err;
491                 }
492
493                 /*
494                  * Some backend drivers (e.g. cd0, ada0) require that the I/O
495                  * size be a multiple of the device's sector size.
496                  *
497                  * Validate that the emulated sector size complies with this
498                  * requirement.
499                  */
500                 if (S_ISCHR(sbuf.st_mode)) {
501                         if (ssopt < sectsz || (ssopt % sectsz) != 0) {
502                                 fprintf(stderr, "Sector size %d incompatible "
503                                     "with underlying device sector size %d\n",
504                                     ssopt, sectsz);
505                                 goto err;
506                         }
507                 }
508
509                 sectsz = ssopt;
510                 psectsz = pssopt;
511                 psectoff = 0;
512         }
513
514         bc = calloc(1, sizeof(struct blockif_ctxt));
515         if (bc == NULL) {
516                 perror("calloc");
517                 goto err;
518         }
519
520         bc->bc_magic = BLOCKIF_SIG;
521         bc->bc_fd = fd;
522         bc->bc_ischr = S_ISCHR(sbuf.st_mode);
523         bc->bc_isgeom = geom;
524         bc->bc_candelete = candelete;
525         bc->bc_rdonly = ro;
526         bc->bc_size = size;
527         bc->bc_sectsz = sectsz;
528         bc->bc_psectsz = psectsz;
529         bc->bc_psectoff = psectoff;
530         pthread_mutex_init(&bc->bc_mtx, NULL);
531         pthread_cond_init(&bc->bc_cond, NULL);
532         TAILQ_INIT(&bc->bc_freeq);
533         TAILQ_INIT(&bc->bc_pendq);
534         TAILQ_INIT(&bc->bc_busyq);
535         for (i = 0; i < BLOCKIF_MAXREQ; i++) {
536                 bc->bc_reqs[i].be_status = BST_FREE;
537                 TAILQ_INSERT_HEAD(&bc->bc_freeq, &bc->bc_reqs[i], be_link);
538         }
539
540         for (i = 0; i < BLOCKIF_NUMTHR; i++) {
541                 pthread_create(&bc->bc_btid[i], NULL, blockif_thr, bc);
542                 snprintf(tname, sizeof(tname), "blk-%s-%d", ident, i);
543                 pthread_set_name_np(bc->bc_btid[i], tname);
544         }
545
546         return (bc);
547 err:
548         if (fd >= 0)
549                 close(fd);
550         return (NULL);
551 }
552
553 static int
554 blockif_request(struct blockif_ctxt *bc, struct blockif_req *breq,
555                 enum blockop op)
556 {
557         int err;
558
559         err = 0;
560
561         pthread_mutex_lock(&bc->bc_mtx);
562         if (!TAILQ_EMPTY(&bc->bc_freeq)) {
563                 /*
564                  * Enqueue and inform the block i/o thread
565                  * that there is work available
566                  */
567                 if (blockif_enqueue(bc, breq, op))
568                         pthread_cond_signal(&bc->bc_cond);
569         } else {
570                 /*
571                  * Callers are not allowed to enqueue more than
572                  * the specified blockif queue limit. Return an
573                  * error to indicate that the queue length has been
574                  * exceeded.
575                  */
576                 err = E2BIG;
577         }
578         pthread_mutex_unlock(&bc->bc_mtx);
579
580         return (err);
581 }
582
583 int
584 blockif_read(struct blockif_ctxt *bc, struct blockif_req *breq)
585 {
586
587         assert(bc->bc_magic == BLOCKIF_SIG);
588         return (blockif_request(bc, breq, BOP_READ));
589 }
590
591 int
592 blockif_write(struct blockif_ctxt *bc, struct blockif_req *breq)
593 {
594
595         assert(bc->bc_magic == BLOCKIF_SIG);
596         return (blockif_request(bc, breq, BOP_WRITE));
597 }
598
599 int
600 blockif_flush(struct blockif_ctxt *bc, struct blockif_req *breq)
601 {
602
603         assert(bc->bc_magic == BLOCKIF_SIG);
604         return (blockif_request(bc, breq, BOP_FLUSH));
605 }
606
607 int
608 blockif_delete(struct blockif_ctxt *bc, struct blockif_req *breq)
609 {
610
611         assert(bc->bc_magic == BLOCKIF_SIG);
612         return (blockif_request(bc, breq, BOP_DELETE));
613 }
614
615 int
616 blockif_cancel(struct blockif_ctxt *bc, struct blockif_req *breq)
617 {
618         struct blockif_elem *be;
619
620         assert(bc->bc_magic == BLOCKIF_SIG);
621
622         pthread_mutex_lock(&bc->bc_mtx);
623         /*
624          * Check pending requests.
625          */
626         TAILQ_FOREACH(be, &bc->bc_pendq, be_link) {
627                 if (be->be_req == breq)
628                         break;
629         }
630         if (be != NULL) {
631                 /*
632                  * Found it.
633                  */
634                 blockif_complete(bc, be);
635                 pthread_mutex_unlock(&bc->bc_mtx);
636
637                 return (0);
638         }
639
640         /*
641          * Check in-flight requests.
642          */
643         TAILQ_FOREACH(be, &bc->bc_busyq, be_link) {
644                 if (be->be_req == breq)
645                         break;
646         }
647         if (be == NULL) {
648                 /*
649                  * Didn't find it.
650                  */
651                 pthread_mutex_unlock(&bc->bc_mtx);
652                 return (EINVAL);
653         }
654
655         /*
656          * Interrupt the processing thread to force it return
657          * prematurely via it's normal callback path.
658          */
659         while (be->be_status == BST_BUSY) {
660                 struct blockif_sig_elem bse, *old_head;
661
662                 pthread_mutex_init(&bse.bse_mtx, NULL);
663                 pthread_cond_init(&bse.bse_cond, NULL);
664
665                 bse.bse_pending = 1;
666
667                 do {
668                         old_head = blockif_bse_head;
669                         bse.bse_next = old_head;
670                 } while (!atomic_cmpset_ptr((uintptr_t *)&blockif_bse_head,
671                                             (uintptr_t)old_head,
672                                             (uintptr_t)&bse));
673
674                 pthread_kill(be->be_tid, SIGCONT);
675
676                 pthread_mutex_lock(&bse.bse_mtx);
677                 while (bse.bse_pending)
678                         pthread_cond_wait(&bse.bse_cond, &bse.bse_mtx);
679                 pthread_mutex_unlock(&bse.bse_mtx);
680         }
681
682         pthread_mutex_unlock(&bc->bc_mtx);
683
684         /*
685          * The processing thread has been interrupted.  Since it's not
686          * clear if the callback has been invoked yet, return EBUSY.
687          */
688         return (EBUSY);
689 }
690
691 int
692 blockif_close(struct blockif_ctxt *bc)
693 {
694         void *jval;
695         int err, i;
696
697         err = 0;
698
699         assert(bc->bc_magic == BLOCKIF_SIG);
700
701         /*
702          * Stop the block i/o thread
703          */
704         pthread_mutex_lock(&bc->bc_mtx);
705         bc->bc_closing = 1;
706         pthread_mutex_unlock(&bc->bc_mtx);
707         pthread_cond_broadcast(&bc->bc_cond);
708         for (i = 0; i < BLOCKIF_NUMTHR; i++)
709                 pthread_join(bc->bc_btid[i], &jval);
710
711         /* XXX Cancel queued i/o's ??? */
712
713         /*
714          * Release resources
715          */
716         bc->bc_magic = 0;
717         close(bc->bc_fd);
718         free(bc);
719
720         return (0);
721 }
722
723 /*
724  * Return virtual C/H/S values for a given block. Use the algorithm
725  * outlined in the VHD specification to calculate values.
726  */
727 void
728 blockif_chs(struct blockif_ctxt *bc, uint16_t *c, uint8_t *h, uint8_t *s)
729 {
730         off_t sectors;          /* total sectors of the block dev */
731         off_t hcyl;             /* cylinders times heads */
732         uint16_t secpt;         /* sectors per track */
733         uint8_t heads;
734
735         assert(bc->bc_magic == BLOCKIF_SIG);
736
737         sectors = bc->bc_size / bc->bc_sectsz;
738
739         /* Clamp the size to the largest possible with CHS */
740         if (sectors > 65535UL*16*255)
741                 sectors = 65535UL*16*255;
742
743         if (sectors >= 65536UL*16*63) {
744                 secpt = 255;
745                 heads = 16;
746                 hcyl = sectors / secpt;
747         } else {
748                 secpt = 17;
749                 hcyl = sectors / secpt;
750                 heads = (hcyl + 1023) / 1024;
751
752                 if (heads < 4)
753                         heads = 4;
754
755                 if (hcyl >= (heads * 1024) || heads > 16) {
756                         secpt = 31;
757                         heads = 16;
758                         hcyl = sectors / secpt;
759                 }
760                 if (hcyl >= (heads * 1024)) {
761                         secpt = 63;
762                         heads = 16;
763                         hcyl = sectors / secpt;
764                 }
765         }
766
767         *c = hcyl / heads;
768         *h = heads;
769         *s = secpt;
770 }
771
772 /*
773  * Accessors
774  */
775 off_t
776 blockif_size(struct blockif_ctxt *bc)
777 {
778
779         assert(bc->bc_magic == BLOCKIF_SIG);
780         return (bc->bc_size);
781 }
782
783 int
784 blockif_sectsz(struct blockif_ctxt *bc)
785 {
786
787         assert(bc->bc_magic == BLOCKIF_SIG);
788         return (bc->bc_sectsz);
789 }
790
791 void
792 blockif_psectsz(struct blockif_ctxt *bc, int *size, int *off)
793 {
794
795         assert(bc->bc_magic == BLOCKIF_SIG);
796         *size = bc->bc_psectsz;
797         *off = bc->bc_psectoff;
798 }
799
800 int
801 blockif_queuesz(struct blockif_ctxt *bc)
802 {
803
804         assert(bc->bc_magic == BLOCKIF_SIG);
805         return (BLOCKIF_MAXREQ - 1);
806 }
807
808 int
809 blockif_is_ro(struct blockif_ctxt *bc)
810 {
811
812         assert(bc->bc_magic == BLOCKIF_SIG);
813         return (bc->bc_rdonly);
814 }
815
816 int
817 blockif_candelete(struct blockif_ctxt *bc)
818 {
819
820         assert(bc->bc_magic == BLOCKIF_SIG);
821         return (bc->bc_candelete);
822 }