]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/geom/bde/g_bde_work.c
Merge vendor lld/docs directory from r337145
[FreeBSD/FreeBSD.git] / sys / geom / bde / g_bde_work.c
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2002 Poul-Henning Kamp
5  * Copyright (c) 2002 Networks Associates Technology, Inc.
6  * All rights reserved.
7  *
8  * This software was developed for the FreeBSD Project by Poul-Henning Kamp
9  * and NAI Labs, the Security Research Division of Network Associates, Inc.
10  * under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the
11  * DARPA CHATS research program.
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  * 1. Redistributions of source code must retain the above copyright
17  *    notice, this list of conditions and the following disclaimer.
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer in the
20  *    documentation and/or other materials provided with the distribution.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  *
34  * $FreeBSD$
35  */
36 /*
37  * This source file contains the state-engine which makes things happen in the
38  * right order.
39  *
40  * Outline:
41  *   1) g_bde_start1()
42  *      Break the struct bio into multiple work packets one per zone.
43  *   2) g_bde_start2()
44  *      Setup the necessary sector buffers and start those read operations
45  *      which we can start at this time and put the item on the work-list.
46  *   3) g_bde_worker()
47  *      Scan the work-list for items which are ready for crypto processing
48  *      and call the matching crypto function in g_bde_crypt.c and schedule
49  *      any writes needed.  Read operations finish here by releasing the
50  *      sector buffers and delivering the original bio request.
51  *   4) g_bde_write_done()
52  *      Release sector buffers and deliver the original bio request.
53  *
54  * Because of the C-scope rules, the functions are almost perfectly in the
55  * opposite order in this source file.
56  *
57  * XXX: A switch to the hardware assisted crypto in src/sys/opencrypto will add
58  * XXX: additional states to this state-engine.  Since no hardware available
59  * XXX: at this time has AES support, implementing this has been postponed
60  * XXX: until such time as it would result in a benefit.
61  */
62
63 #include <sys/param.h>
64 #include <sys/bio.h>
65 #include <sys/lock.h>
66 #include <sys/mutex.h>
67 #include <sys/queue.h>
68 #include <sys/malloc.h>
69 #include <sys/systm.h>
70 #include <sys/kernel.h>
71 #include <sys/sysctl.h>
72 #include <sys/proc.h>
73 #include <sys/kthread.h>
74
75 #include <crypto/rijndael/rijndael-api-fst.h>
76 #include <crypto/sha2/sha512.h>
77 #include <geom/geom.h>
78 #include <geom/bde/g_bde.h>
79
80 static void g_bde_delete_sector(struct g_bde_softc *wp, struct g_bde_sector *sp);
81 static struct g_bde_sector * g_bde_new_sector(struct g_bde_work *wp, u_int len);
82 static void g_bde_release_keysector(struct g_bde_work *wp);
83 static struct g_bde_sector *g_bde_get_keysector(struct g_bde_work *wp);
84 static int g_bde_start_read(struct g_bde_sector *sp);
85 static void g_bde_purge_sector(struct g_bde_softc *sc, int fraction);
86
87 /*
88  * Work item allocation.
89  *
90  * C++ would call these constructors and destructors.
91  */
92 static u_int g_bde_nwork;
93 SYSCTL_UINT(_debug, OID_AUTO, gbde_nwork, CTLFLAG_RD, &g_bde_nwork, 0, "");
94
95 static MALLOC_DEFINE(M_GBDE, "gbde", "GBDE data structures");
96
97 static struct g_bde_work *
98 g_bde_new_work(struct g_bde_softc *sc)
99 {
100         struct g_bde_work *wp;
101
102         wp = malloc(sizeof *wp, M_GBDE, M_NOWAIT | M_ZERO);
103         if (wp == NULL)
104                 return (wp);
105         wp->state = SETUP;
106         wp->softc = sc;
107         g_bde_nwork++;
108         sc->nwork++;
109         TAILQ_INSERT_TAIL(&sc->worklist, wp, list);
110         return (wp);
111 }
112
113 static void
114 g_bde_delete_work(struct g_bde_work *wp)
115 {
116         struct g_bde_softc *sc;
117
118         sc = wp->softc;
119         g_bde_nwork--;
120         sc->nwork--;
121         TAILQ_REMOVE(&sc->worklist, wp, list);
122         free(wp, M_GBDE);
123 }
124
125 /*
126  * Sector buffer allocation
127  *
128  * These two functions allocate and free back variable sized sector buffers
129  */
130
131 static u_int g_bde_nsect;
132 SYSCTL_UINT(_debug, OID_AUTO, gbde_nsect, CTLFLAG_RD, &g_bde_nsect, 0, "");
133
134 static void
135 g_bde_delete_sector(struct g_bde_softc *sc, struct g_bde_sector *sp)
136 {
137
138         g_bde_nsect--;
139         sc->nsect--;
140         if (sp->malloc)
141                 free(sp->data, M_GBDE);
142         free(sp, M_GBDE);
143 }
144
145 static struct g_bde_sector *
146 g_bde_new_sector(struct g_bde_work *wp, u_int len)
147 {
148         struct g_bde_sector *sp;
149
150         sp = malloc(sizeof *sp, M_GBDE, M_NOWAIT | M_ZERO);
151         if (sp == NULL)
152                 return (sp);
153         if (len > 0) {
154                 sp->data = malloc(len, M_GBDE, M_NOWAIT | M_ZERO);
155                 if (sp->data == NULL) {
156                         free(sp, M_GBDE);
157                         return (NULL);
158                 }
159                 sp->malloc = 1;
160         }
161         g_bde_nsect++;
162         wp->softc->nsect++;
163         sp->size = len;
164         sp->softc = wp->softc;
165         sp->ref = 1;
166         sp->owner = wp;
167         sp->offset = wp->so;
168         sp->state = JUNK;
169         return (sp);
170 }
171
172 /*
173  * Skey sector cache.
174  *
175  * Nothing prevents two separate I/O requests from addressing the same zone
176  * and thereby needing the same skey sector.  We therefore need to sequence
177  * I/O operations to the skey sectors.  A certain amount of caching is also
178  * desirable, although the extent of benefit from this is not at this point
179  * determined.
180  *
181  * XXX: GEOM may be able to grow a generic caching facility at some point
182  * XXX: to support such needs.
183  */
184
185 static u_int g_bde_ncache;
186 SYSCTL_UINT(_debug, OID_AUTO, gbde_ncache, CTLFLAG_RD, &g_bde_ncache, 0, "");
187
188 static void
189 g_bde_purge_one_sector(struct g_bde_softc *sc, struct g_bde_sector *sp)
190 {
191
192         g_trace(G_T_TOPOLOGY, "g_bde_purge_one_sector(%p, %p)", sc, sp);
193         if (sp->ref != 0)
194                 return;
195         TAILQ_REMOVE(&sc->freelist, sp, list);
196         g_bde_ncache--;
197         sc->ncache--;
198         bzero(sp->data, sp->size);
199         g_bde_delete_sector(sc, sp);
200 }
201
202 static struct g_bde_sector *
203 g_bde_get_keysector(struct g_bde_work *wp)
204 {
205         struct g_bde_sector *sp;
206         struct g_bde_softc *sc;
207         off_t offset;
208
209         offset = wp->kso;
210         g_trace(G_T_TOPOLOGY, "g_bde_get_keysector(%p, %jd)", wp, (intmax_t)offset);
211         sc = wp->softc;
212
213         if (malloc_last_fail() < g_bde_ncache)
214                 g_bde_purge_sector(sc, -1);
215
216         sp = TAILQ_FIRST(&sc->freelist);
217         if (sp != NULL && sp->ref == 0 && sp->used + 300 < time_uptime)
218                 g_bde_purge_one_sector(sc, sp);
219
220         TAILQ_FOREACH(sp, &sc->freelist, list) {
221                 if (sp->offset == offset)
222                         break;
223         }
224         if (sp != NULL) {
225                 sp->ref++;
226                 KASSERT(sp->offset == offset, ("wrong offset"));
227                 KASSERT(sp->softc == wp->softc, ("wrong softc"));
228                 if (sp->ref == 1)
229                         sp->owner = wp;
230         } else {
231                 if (malloc_last_fail() < g_bde_ncache) {
232                         TAILQ_FOREACH(sp, &sc->freelist, list)
233                                 if (sp->ref == 0)
234                                         break;
235                 }
236                 if (sp == NULL && !TAILQ_EMPTY(&sc->freelist))
237                         sp = TAILQ_FIRST(&sc->freelist);
238                 if (sp != NULL && sp->ref > 0)
239                         sp = NULL;
240                 if (sp == NULL) {
241                         sp = g_bde_new_sector(wp, sc->sectorsize);
242                         if (sp != NULL) {
243                                 g_bde_ncache++;
244                                 sc->ncache++;
245                                 TAILQ_INSERT_TAIL(&sc->freelist, sp, list);
246                                 sp->malloc = 2;
247                         }
248                 }
249                 if (sp != NULL) {
250                         sp->offset = offset;
251                         sp->softc = wp->softc;
252                         sp->ref = 1;
253                         sp->owner = wp;
254                         sp->state = JUNK;
255                         sp->error = 0;
256                 }
257         }
258         if (sp != NULL) {
259                 TAILQ_REMOVE(&sc->freelist, sp, list);
260                 TAILQ_INSERT_TAIL(&sc->freelist, sp, list);
261                 sp->used = time_uptime;
262         }
263         wp->ksp = sp;
264         return(sp);
265 }
266
267 static void
268 g_bde_release_keysector(struct g_bde_work *wp)
269 {
270         struct g_bde_softc *sc;
271         struct g_bde_work *wp2;
272         struct g_bde_sector *sp;
273
274         sp = wp->ksp;
275         g_trace(G_T_TOPOLOGY, "g_bde_release_keysector(%p)", sp);
276         KASSERT(sp->malloc == 2, ("Wrong sector released"));
277         sc = sp->softc;
278         KASSERT(sc != NULL, ("NULL sp->softc"));
279         KASSERT(wp == sp->owner, ("Releasing, not owner"));
280         sp->owner = NULL;
281         wp->ksp = NULL;
282         sp->ref--;
283         if (sp->ref > 0) {
284                 TAILQ_REMOVE(&sc->freelist, sp, list);
285                 TAILQ_INSERT_TAIL(&sc->freelist, sp, list);
286                 TAILQ_FOREACH(wp2, &sc->worklist, list) {
287                         if (wp2->ksp == sp) {
288                                 KASSERT(wp2 != wp, ("Self-reowning"));
289                                 sp->owner = wp2;
290                                 wakeup(sp->softc);
291                                 break;
292                         }
293                 }
294                 KASSERT(wp2 != NULL, ("Failed to pick up owner for %p\n", sp));
295         } else if (sp->error != 0) {
296                 sp->offset = ~0;
297                 sp->error = 0;
298                 sp->state = JUNK;
299         }
300         TAILQ_REMOVE(&sc->freelist, sp, list);
301         TAILQ_INSERT_HEAD(&sc->freelist, sp, list);
302 }
303
304 static void
305 g_bde_purge_sector(struct g_bde_softc *sc, int fraction)
306 {
307         struct g_bde_sector *sp;
308         int n;
309
310         g_trace(G_T_TOPOLOGY, "g_bde_purge_sector(%p)", sc);
311         if (fraction > 0)
312                 n = sc->ncache / fraction + 1;
313         else 
314                 n = g_bde_ncache - malloc_last_fail();
315         if (n < 0)
316                 return;
317         if (n > sc->ncache)
318                 n = sc->ncache;
319         while(n--) {
320                 TAILQ_FOREACH(sp, &sc->freelist, list) {
321                         if (sp->ref != 0)
322                                 continue;
323                         TAILQ_REMOVE(&sc->freelist, sp, list);
324                         g_bde_ncache--;
325                         sc->ncache--;
326                         bzero(sp->data, sp->size);
327                         g_bde_delete_sector(sc, sp);
328                         break;
329                 }
330         }
331 }
332
333 static struct g_bde_sector *
334 g_bde_read_keysector(struct g_bde_softc *sc, struct g_bde_work *wp)
335 {
336         struct g_bde_sector *sp;
337
338         g_trace(G_T_TOPOLOGY, "g_bde_read_keysector(%p)", wp);
339         sp = g_bde_get_keysector(wp);
340         if (sp == NULL) {
341                 g_bde_purge_sector(sc, -1);
342                 sp = g_bde_get_keysector(wp);
343         }
344         if (sp == NULL)
345                 return (sp);
346         if (sp->owner != wp)
347                 return (sp);
348         if (sp->state == VALID)
349                 return (sp);
350         if (g_bde_start_read(sp) == 0)
351                 return (sp);
352         g_bde_release_keysector(wp);
353         return (NULL);
354 }
355
356 /*
357  * Contribute to the completion of the original bio request.
358  *
359  * We have no simple way to tell how many bits the original bio request has
360  * been segmented into, so the easiest way to determine when we can deliver
361  * it is to keep track of the number of bytes we have completed.  We keep
362  * track of any errors underway and latch onto the first one.
363  *
364  * We always report "nothing done" in case of error, because random bits here
365  * and there may be completed and returning a number of completed bytes does
366  * not convey any useful information about which bytes they were.  If some
367  * piece of broken code somewhere interprets this to mean that nothing has
368  * changed on the underlying media they deserve the lossage headed for them.
369  *
370  * A single mutex per g_bde instance is used to prevent contention.
371  */
372
373 static void
374 g_bde_contribute(struct bio *bp, off_t bytes, int error)
375 {
376
377         g_trace(G_T_TOPOLOGY, "g_bde_contribute bp %p bytes %jd error %d",
378              bp, (intmax_t)bytes, error);
379         if (bp->bio_error == 0)
380                 bp->bio_error = error;
381         bp->bio_completed += bytes;
382         KASSERT(bp->bio_completed <= bp->bio_length, ("Too large contribution"));
383         if (bp->bio_completed == bp->bio_length) {
384                 if (bp->bio_error != 0)
385                         bp->bio_completed = 0;
386                 g_io_deliver(bp, bp->bio_error);
387         }
388 }
389
390 /*
391  * This is the common case "we're done with this work package" function
392  */
393
394 static void
395 g_bde_work_done(struct g_bde_work *wp, int error)
396 {
397
398         g_bde_contribute(wp->bp, wp->length, error);
399         if (wp->sp != NULL)
400                 g_bde_delete_sector(wp->softc, wp->sp);
401         if (wp->ksp != NULL)
402                 g_bde_release_keysector(wp);
403         g_bde_delete_work(wp);
404 }
405
406 /*
407  * A write operation has finished.  When we have all expected cows in the
408  * barn close the door and call it a day.
409  */
410
411 static void
412 g_bde_write_done(struct bio *bp)
413 {
414         struct g_bde_sector *sp;
415         struct g_bde_work *wp;
416         struct g_bde_softc *sc;
417
418         sp = bp->bio_caller1;
419         sc = bp->bio_caller2;
420         mtx_lock(&sc->worklist_mutex);
421         KASSERT(sp != NULL, ("NULL sp"));
422         KASSERT(sc != NULL, ("NULL sc"));
423         KASSERT(sp->owner != NULL, ("NULL sp->owner"));
424         g_trace(G_T_TOPOLOGY, "g_bde_write_done(%p)", sp);
425         if (bp->bio_error == 0 && bp->bio_completed != sp->size)
426                 bp->bio_error = EIO;
427         sp->error = bp->bio_error;
428         g_destroy_bio(bp);
429         wp = sp->owner;
430         if (wp->error == 0)
431                 wp->error = sp->error;
432
433         if (wp->bp->bio_cmd == BIO_DELETE) {
434                 KASSERT(sp == wp->sp, ("trashed delete op"));
435                 g_bde_work_done(wp, wp->error);
436                 mtx_unlock(&sc->worklist_mutex);
437                 return;
438         }
439
440         KASSERT(wp->bp->bio_cmd == BIO_WRITE, ("Confused in g_bde_write_done()"));
441         KASSERT(sp == wp->sp || sp == wp->ksp, ("trashed write op"));
442         if (wp->sp == sp) {
443                 g_bde_delete_sector(sc, wp->sp);
444                 wp->sp = NULL;
445         } else {
446                 sp->state = VALID;
447         }
448         if (wp->sp == NULL && wp->ksp != NULL && wp->ksp->state == VALID)
449                 g_bde_work_done(wp, wp->error);
450         mtx_unlock(&sc->worklist_mutex);
451         return;
452 }
453
454 /*
455  * Send a write request for the given sector down the pipeline.
456  */
457
458 static int
459 g_bde_start_write(struct g_bde_sector *sp)
460 {
461         struct bio *bp;
462         struct g_bde_softc *sc;
463
464         g_trace(G_T_TOPOLOGY, "g_bde_start_write(%p)", sp);
465         sc = sp->softc;
466         KASSERT(sc != NULL, ("NULL sc in g_bde_start_write"));
467         KASSERT(sp->owner != NULL, ("NULL sp->owner in g_bde_start_write"));
468         bp = g_new_bio();
469         if (bp == NULL)
470                 return (ENOMEM);
471         bp->bio_cmd = BIO_WRITE;
472         bp->bio_offset = sp->offset;
473         bp->bio_data = sp->data;
474         bp->bio_length = sp->size;
475         bp->bio_done = g_bde_write_done;
476         bp->bio_caller1 = sp;
477         bp->bio_caller2 = sc;
478         sp->state = IO;
479         g_io_request(bp, sc->consumer);
480         return(0);
481 }
482
483 /*
484  * A read operation has finished.  Mark the sector no longer iobusy and
485  * wake up the worker thread and let it do its thing.
486  */
487
488 static void
489 g_bde_read_done(struct bio *bp)
490 {
491         struct g_bde_sector *sp;
492         struct g_bde_softc *sc;
493
494         sp = bp->bio_caller1;
495         g_trace(G_T_TOPOLOGY, "g_bde_read_done(%p)", sp);
496         sc = bp->bio_caller2;
497         mtx_lock(&sc->worklist_mutex);
498         if (bp->bio_error == 0 && bp->bio_completed != sp->size)
499                 bp->bio_error = EIO;
500         sp->error = bp->bio_error;
501         if (sp->error == 0)
502                 sp->state = VALID;
503         else
504                 sp->state = JUNK;
505         wakeup(sc);
506         g_destroy_bio(bp);
507         mtx_unlock(&sc->worklist_mutex);
508 }
509
510 /*
511  * Send a read request for the given sector down the pipeline.
512  */
513
514 static int
515 g_bde_start_read(struct g_bde_sector *sp)
516 {
517         struct bio *bp;
518         struct g_bde_softc *sc;
519
520         g_trace(G_T_TOPOLOGY, "g_bde_start_read(%p)", sp);
521         sc = sp->softc;
522         KASSERT(sc != NULL, ("Null softc in sp %p", sp));
523         bp = g_new_bio();
524         if (bp == NULL)
525                 return (ENOMEM);
526         bp->bio_cmd = BIO_READ;
527         bp->bio_offset = sp->offset;
528         bp->bio_data = sp->data;
529         bp->bio_length = sp->size;
530         bp->bio_done = g_bde_read_done;
531         bp->bio_caller1 = sp;
532         bp->bio_caller2 = sc;
533         sp->state = IO;
534         g_io_request(bp, sc->consumer);
535         return(0);
536 }
537
538 /*
539  * The worker thread.
540  *
541  * The up/down path of GEOM is not allowed to sleep or do any major work
542  * so we use this thread to do the actual crypto operations and to push
543  * the state engine onwards.
544  *
545  * XXX: if we switch to the src/sys/opencrypt hardware assisted encryption
546  * XXX: using a thread here is probably not needed.
547  */
548
549 void
550 g_bde_worker(void *arg)
551 {
552         struct g_bde_softc *sc;
553         struct g_bde_work *wp, *twp;
554         struct g_geom *gp;
555         int restart, error;
556
557         gp = arg;
558         sc = gp->softc;
559
560         mtx_lock(&sc->worklist_mutex);
561         for (;;) {
562                 restart = 0;
563                 g_trace(G_T_TOPOLOGY, "g_bde_worker scan");
564                 TAILQ_FOREACH_SAFE(wp, &sc->worklist, list, twp) {
565                         KASSERT(wp != NULL, ("NULL wp"));
566                         KASSERT(wp->softc != NULL, ("NULL wp->softc"));
567                         if (wp->state != WAIT)
568                                 continue;       /* Not interesting here */
569
570                         KASSERT(wp->bp != NULL, ("NULL wp->bp"));
571                         KASSERT(wp->sp != NULL, ("NULL wp->sp"));
572
573                         if (wp->ksp != NULL) {
574                                 if (wp->ksp->owner != wp)
575                                         continue;
576                                 if (wp->ksp->state == IO)
577                                         continue;
578                                 KASSERT(wp->ksp->state == VALID,
579                                     ("Illegal sector state (%d)",
580                                     wp->ksp->state));
581                         }
582
583                         if (wp->bp->bio_cmd == BIO_READ && wp->sp->state == IO)
584                                 continue;
585
586                         if (wp->ksp != NULL && wp->ksp->error != 0) {
587                                 g_bde_work_done(wp, wp->ksp->error);
588                                 continue;
589                         } 
590                         switch(wp->bp->bio_cmd) {
591                         case BIO_READ:
592                                 if (wp->ksp == NULL) {
593                                         KASSERT(wp->error != 0,
594                                             ("BIO_READ, no ksp and no error"));
595                                         g_bde_work_done(wp, wp->error);
596                                         break;
597                                 }
598                                 if (wp->sp->error != 0) {
599                                         g_bde_work_done(wp, wp->sp->error);
600                                         break;
601                                 }
602                                 mtx_unlock(&sc->worklist_mutex);
603                                 g_bde_crypt_read(wp);
604                                 mtx_lock(&sc->worklist_mutex);
605                                 restart++;
606                                 g_bde_work_done(wp, wp->sp->error);
607                                 break;
608                         case BIO_WRITE:
609                                 wp->state = FINISH;
610                                 KASSERT(wp->sp->owner == wp,
611                                     ("Write not owner sp"));
612                                 KASSERT(wp->ksp->owner == wp,
613                                     ("Write not owner ksp"));
614                                 mtx_unlock(&sc->worklist_mutex);
615                                 g_bde_crypt_write(wp);
616                                 mtx_lock(&sc->worklist_mutex);
617                                 restart++;
618                                 error = g_bde_start_write(wp->sp);
619                                 if (error) {
620                                         g_bde_work_done(wp, error);
621                                         break;
622                                 }
623                                 error = g_bde_start_write(wp->ksp);
624                                 if (wp->error != 0)
625                                         wp->error = error;
626                                 break;
627                         case BIO_DELETE:
628                                 wp->state = FINISH;
629                                 mtx_unlock(&sc->worklist_mutex);
630                                 g_bde_crypt_delete(wp);
631                                 mtx_lock(&sc->worklist_mutex);
632                                 restart++;
633                                 g_bde_start_write(wp->sp);
634                                 break;
635                         }
636                         if (restart)
637                                 break;
638                 }
639                 if (!restart) {
640                         /*
641                          * We don't look for our death-warrant until we are
642                          * idle.  Shouldn't make a difference in practice.
643                          */
644                         if (sc->dead)
645                                 break;
646                         g_trace(G_T_TOPOLOGY, "g_bde_worker sleep");
647                         error = msleep(sc, &sc->worklist_mutex,
648                             PRIBIO, "-", hz);
649                         if (error == EWOULDBLOCK) {
650                                 /*
651                                  * Lose our skey cache in an orderly fashion.
652                                  * The exact rate can be tuned to be less
653                                  * aggressive if this is desirable.  10% per
654                                  * second means that the cache is gone in a
655                                  * few minutes.
656                                  */
657                                 g_bde_purge_sector(sc, 10);
658                         }
659                 }
660         }
661         g_trace(G_T_TOPOLOGY, "g_bde_worker die");
662         g_bde_purge_sector(sc, 1);
663         KASSERT(sc->nwork == 0, ("Dead but %d work remaining", sc->nwork));
664         KASSERT(sc->ncache == 0, ("Dead but %d cache remaining", sc->ncache));
665         KASSERT(sc->nsect == 0, ("Dead but %d sect remaining", sc->nsect));
666         mtx_unlock(&sc->worklist_mutex);
667         sc->dead = 2;
668         wakeup(sc);
669         kproc_exit(0);
670 }
671
672 /*
673  * g_bde_start1 has chopped the incoming request up so all the requests
674  * we see here are inside a single zone.  Map the data and key locations
675  * grab the buffers we need and fire off the first volley of read requests.
676  */
677
678 static void
679 g_bde_start2(struct g_bde_work *wp)
680 {
681         struct g_bde_softc *sc;
682
683         KASSERT(wp != NULL, ("NULL wp in g_bde_start2"));
684         KASSERT(wp->softc != NULL, ("NULL wp->softc"));
685         g_trace(G_T_TOPOLOGY, "g_bde_start2(%p)", wp);
686         sc = wp->softc;
687         switch (wp->bp->bio_cmd) {
688         case BIO_READ:
689                 wp->sp = g_bde_new_sector(wp, 0);
690                 if (wp->sp == NULL) {
691                         g_bde_work_done(wp, ENOMEM);
692                         return;
693                 }
694                 wp->sp->size = wp->length;
695                 wp->sp->data = wp->data;
696                 if (g_bde_start_read(wp->sp) != 0) {
697                         g_bde_work_done(wp, ENOMEM);
698                         return;
699                 }
700                 g_bde_read_keysector(sc, wp);
701                 if (wp->ksp == NULL)
702                         wp->error = ENOMEM;
703                 break;
704         case BIO_DELETE:
705                 wp->sp = g_bde_new_sector(wp, wp->length);
706                 if (wp->sp == NULL) {
707                         g_bde_work_done(wp, ENOMEM);
708                         return;
709                 }
710                 break;
711         case BIO_WRITE:
712                 wp->sp = g_bde_new_sector(wp, wp->length);
713                 if (wp->sp == NULL) {
714                         g_bde_work_done(wp, ENOMEM);
715                         return;
716                 }
717                 g_bde_read_keysector(sc, wp);
718                 if (wp->ksp == NULL) {
719                         g_bde_work_done(wp, ENOMEM);
720                         return;
721                 }
722                 break;
723         default:
724                 KASSERT(0 == 1, 
725                     ("Wrong bio_cmd %d in g_bde_start2", wp->bp->bio_cmd));
726         }
727
728         wp->state = WAIT;
729         wakeup(sc);
730 }
731
732 /*
733  * Create a sequence of work structures, and have g_bde_map_sector() determine
734  * how long they each can be.  Feed them to g_bde_start2().
735  */
736
737 void
738 g_bde_start1(struct bio *bp)
739 {
740         struct g_bde_softc *sc;
741         struct g_bde_work *wp;
742         off_t done;
743
744         sc = bp->bio_to->geom->softc;
745         bp->bio_driver1 = sc;
746
747         mtx_lock(&sc->worklist_mutex);
748         for(done = 0; done < bp->bio_length; ) {
749                 wp = g_bde_new_work(sc);
750                 if (wp != NULL) {
751                         wp->bp = bp;
752                         wp->offset = bp->bio_offset + done;
753                         wp->data = bp->bio_data + done;
754                         wp->length = bp->bio_length - done;
755                         g_bde_map_sector(wp);
756                         done += wp->length;
757                         g_bde_start2(wp);
758                 }
759                 if (wp == NULL || bp->bio_error != 0) {
760                         g_bde_contribute(bp, bp->bio_length - done, ENOMEM);
761                         break;
762                 }
763         }
764         mtx_unlock(&sc->worklist_mutex);
765         return;
766 }