]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/geom/bde/g_bde_work.c
Prefer Em to Ar for emphasis
[FreeBSD/FreeBSD.git] / sys / geom / bde / g_bde_work.c
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2002 Poul-Henning Kamp
5  * Copyright (c) 2002 Networks Associates Technology, Inc.
6  * All rights reserved.
7  *
8  * This software was developed for the FreeBSD Project by Poul-Henning Kamp
9  * and NAI Labs, the Security Research Division of Network Associates, Inc.
10  * under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the
11  * DARPA CHATS research program.
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  * 1. Redistributions of source code must retain the above copyright
17  *    notice, this list of conditions and the following disclaimer.
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer in the
20  *    documentation and/or other materials provided with the distribution.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  *
34  * $FreeBSD$
35  */
36 /*
37  * This source file contains the state-engine which makes things happen in the
38  * right order.
39  *
40  * Outline:
41  *   1) g_bde_start1()
42  *      Break the struct bio into multiple work packets one per zone.
43  *   2) g_bde_start2()
44  *      Setup the necessary sector buffers and start those read operations
45  *      which we can start at this time and put the item on the work-list.
46  *   3) g_bde_worker()
47  *      Scan the work-list for items which are ready for crypto processing
48  *      and call the matching crypto function in g_bde_crypt.c and schedule
49  *      any writes needed.  Read operations finish here by releasing the
50  *      sector buffers and delivering the original bio request.
51  *   4) g_bde_write_done()
52  *      Release sector buffers and deliver the original bio request.
53  *
54  * Because of the C-scope rules, the functions are almost perfectly in the
55  * opposite order in this source file.
56  *
57  * XXX: A switch to the hardware assisted crypto in src/sys/opencrypto will add
58  * XXX: additional states to this state-engine.  Since no hardware available
59  * XXX: at this time has AES support, implementing this has been postponed
60  * XXX: until such time as it would result in a benefit.
61  */
62
63 #include <sys/param.h>
64 #include <sys/bio.h>
65 #include <sys/lock.h>
66 #include <sys/mutex.h>
67 #include <sys/queue.h>
68 #include <sys/malloc.h>
69 #include <sys/systm.h>
70 #include <sys/kernel.h>
71 #include <sys/sysctl.h>
72 #include <sys/proc.h>
73 #include <sys/kthread.h>
74
75 #include <crypto/rijndael/rijndael-api-fst.h>
76 #include <crypto/sha2/sha512.h>
77 #include <geom/geom.h>
78 #include <geom/bde/g_bde.h>
79
80 /*
81  * FIXME: This used to call malloc_last_fail which in practice was almost
82  * guaranteed to return time_uptime even in face of severe memory shortage.
83  * As GBDE is the only consumer the kludge below was added to facilitate the
84  * removal with minimial changes. The code should be fixed to respond to memory
85  * pressure (e.g., by using lowmem eventhandler) instead.
86  */
87 static int
88 g_bde_malloc_last_fail(void)
89 {
90
91         return (time_uptime);
92 }
93
94 static void g_bde_delete_sector(struct g_bde_softc *wp, struct g_bde_sector *sp);
95 static struct g_bde_sector * g_bde_new_sector(struct g_bde_work *wp, u_int len);
96 static void g_bde_release_keysector(struct g_bde_work *wp);
97 static struct g_bde_sector *g_bde_get_keysector(struct g_bde_work *wp);
98 static int g_bde_start_read(struct g_bde_sector *sp);
99 static void g_bde_purge_sector(struct g_bde_softc *sc, int fraction);
100
101 /*
102  * Work item allocation.
103  *
104  * C++ would call these constructors and destructors.
105  */
106 static u_int g_bde_nwork;
107 SYSCTL_UINT(_debug, OID_AUTO, gbde_nwork, CTLFLAG_RD, &g_bde_nwork, 0, "");
108
109 static MALLOC_DEFINE(M_GBDE, "gbde", "GBDE data structures");
110
111 static struct g_bde_work *
112 g_bde_new_work(struct g_bde_softc *sc)
113 {
114         struct g_bde_work *wp;
115
116         wp = malloc(sizeof *wp, M_GBDE, M_NOWAIT | M_ZERO);
117         if (wp == NULL)
118                 return (wp);
119         wp->state = SETUP;
120         wp->softc = sc;
121         g_bde_nwork++;
122         sc->nwork++;
123         TAILQ_INSERT_TAIL(&sc->worklist, wp, list);
124         return (wp);
125 }
126
127 static void
128 g_bde_delete_work(struct g_bde_work *wp)
129 {
130         struct g_bde_softc *sc;
131
132         sc = wp->softc;
133         g_bde_nwork--;
134         sc->nwork--;
135         TAILQ_REMOVE(&sc->worklist, wp, list);
136         free(wp, M_GBDE);
137 }
138
139 /*
140  * Sector buffer allocation
141  *
142  * These two functions allocate and free back variable sized sector buffers
143  */
144
145 static u_int g_bde_nsect;
146 SYSCTL_UINT(_debug, OID_AUTO, gbde_nsect, CTLFLAG_RD, &g_bde_nsect, 0, "");
147
148 static void
149 g_bde_delete_sector(struct g_bde_softc *sc, struct g_bde_sector *sp)
150 {
151
152         g_bde_nsect--;
153         sc->nsect--;
154         if (sp->malloc)
155                 free(sp->data, M_GBDE);
156         free(sp, M_GBDE);
157 }
158
159 static struct g_bde_sector *
160 g_bde_new_sector(struct g_bde_work *wp, u_int len)
161 {
162         struct g_bde_sector *sp;
163
164         sp = malloc(sizeof *sp, M_GBDE, M_NOWAIT | M_ZERO);
165         if (sp == NULL)
166                 return (sp);
167         if (len > 0) {
168                 sp->data = malloc(len, M_GBDE, M_NOWAIT | M_ZERO);
169                 if (sp->data == NULL) {
170                         free(sp, M_GBDE);
171                         return (NULL);
172                 }
173                 sp->malloc = 1;
174         }
175         g_bde_nsect++;
176         wp->softc->nsect++;
177         sp->size = len;
178         sp->softc = wp->softc;
179         sp->ref = 1;
180         sp->owner = wp;
181         sp->offset = wp->so;
182         sp->state = JUNK;
183         return (sp);
184 }
185
186 /*
187  * Skey sector cache.
188  *
189  * Nothing prevents two separate I/O requests from addressing the same zone
190  * and thereby needing the same skey sector.  We therefore need to sequence
191  * I/O operations to the skey sectors.  A certain amount of caching is also
192  * desirable, although the extent of benefit from this is not at this point
193  * determined.
194  *
195  * XXX: GEOM may be able to grow a generic caching facility at some point
196  * XXX: to support such needs.
197  */
198
199 static u_int g_bde_ncache;
200 SYSCTL_UINT(_debug, OID_AUTO, gbde_ncache, CTLFLAG_RD, &g_bde_ncache, 0, "");
201
202 static void
203 g_bde_purge_one_sector(struct g_bde_softc *sc, struct g_bde_sector *sp)
204 {
205
206         g_trace(G_T_TOPOLOGY, "g_bde_purge_one_sector(%p, %p)", sc, sp);
207         if (sp->ref != 0)
208                 return;
209         TAILQ_REMOVE(&sc->freelist, sp, list);
210         g_bde_ncache--;
211         sc->ncache--;
212         bzero(sp->data, sp->size);
213         g_bde_delete_sector(sc, sp);
214 }
215
216 static struct g_bde_sector *
217 g_bde_get_keysector(struct g_bde_work *wp)
218 {
219         struct g_bde_sector *sp;
220         struct g_bde_softc *sc;
221         off_t offset;
222
223         offset = wp->kso;
224         g_trace(G_T_TOPOLOGY, "g_bde_get_keysector(%p, %jd)", wp, (intmax_t)offset);
225         sc = wp->softc;
226
227         if (g_bde_malloc_last_fail() < g_bde_ncache)
228                 g_bde_purge_sector(sc, -1);
229
230         sp = TAILQ_FIRST(&sc->freelist);
231         if (sp != NULL && sp->ref == 0 && sp->used + 300 < time_uptime)
232                 g_bde_purge_one_sector(sc, sp);
233
234         TAILQ_FOREACH(sp, &sc->freelist, list) {
235                 if (sp->offset == offset)
236                         break;
237         }
238         if (sp != NULL) {
239                 sp->ref++;
240                 KASSERT(sp->offset == offset, ("wrong offset"));
241                 KASSERT(sp->softc == wp->softc, ("wrong softc"));
242                 if (sp->ref == 1)
243                         sp->owner = wp;
244         } else {
245                 if (g_bde_malloc_last_fail() < g_bde_ncache) {
246                         TAILQ_FOREACH(sp, &sc->freelist, list)
247                                 if (sp->ref == 0)
248                                         break;
249                 }
250                 if (sp == NULL && !TAILQ_EMPTY(&sc->freelist))
251                         sp = TAILQ_FIRST(&sc->freelist);
252                 if (sp != NULL && sp->ref > 0)
253                         sp = NULL;
254                 if (sp == NULL) {
255                         sp = g_bde_new_sector(wp, sc->sectorsize);
256                         if (sp != NULL) {
257                                 g_bde_ncache++;
258                                 sc->ncache++;
259                                 TAILQ_INSERT_TAIL(&sc->freelist, sp, list);
260                                 sp->malloc = 2;
261                         }
262                 }
263                 if (sp != NULL) {
264                         sp->offset = offset;
265                         sp->softc = wp->softc;
266                         sp->ref = 1;
267                         sp->owner = wp;
268                         sp->state = JUNK;
269                         sp->error = 0;
270                 }
271         }
272         if (sp != NULL) {
273                 TAILQ_REMOVE(&sc->freelist, sp, list);
274                 TAILQ_INSERT_TAIL(&sc->freelist, sp, list);
275                 sp->used = time_uptime;
276         }
277         wp->ksp = sp;
278         return(sp);
279 }
280
281 static void
282 g_bde_release_keysector(struct g_bde_work *wp)
283 {
284         struct g_bde_softc *sc;
285         struct g_bde_work *wp2;
286         struct g_bde_sector *sp;
287
288         sp = wp->ksp;
289         g_trace(G_T_TOPOLOGY, "g_bde_release_keysector(%p)", sp);
290         KASSERT(sp->malloc == 2, ("Wrong sector released"));
291         sc = sp->softc;
292         KASSERT(sc != NULL, ("NULL sp->softc"));
293         KASSERT(wp == sp->owner, ("Releasing, not owner"));
294         sp->owner = NULL;
295         wp->ksp = NULL;
296         sp->ref--;
297         if (sp->ref > 0) {
298                 TAILQ_REMOVE(&sc->freelist, sp, list);
299                 TAILQ_INSERT_TAIL(&sc->freelist, sp, list);
300                 TAILQ_FOREACH(wp2, &sc->worklist, list) {
301                         if (wp2->ksp == sp) {
302                                 KASSERT(wp2 != wp, ("Self-reowning"));
303                                 sp->owner = wp2;
304                                 wakeup(sp->softc);
305                                 break;
306                         }
307                 }
308                 KASSERT(wp2 != NULL, ("Failed to pick up owner for %p\n", sp));
309         } else if (sp->error != 0) {
310                 sp->offset = ~0;
311                 sp->error = 0;
312                 sp->state = JUNK;
313         }
314         TAILQ_REMOVE(&sc->freelist, sp, list);
315         TAILQ_INSERT_HEAD(&sc->freelist, sp, list);
316 }
317
318 static void
319 g_bde_purge_sector(struct g_bde_softc *sc, int fraction)
320 {
321         struct g_bde_sector *sp;
322         int n;
323
324         g_trace(G_T_TOPOLOGY, "g_bde_purge_sector(%p)", sc);
325         if (fraction > 0)
326                 n = sc->ncache / fraction + 1;
327         else 
328                 n = g_bde_ncache - g_bde_malloc_last_fail();
329         if (n < 0)
330                 return;
331         if (n > sc->ncache)
332                 n = sc->ncache;
333         while(n--) {
334                 TAILQ_FOREACH(sp, &sc->freelist, list) {
335                         if (sp->ref != 0)
336                                 continue;
337                         TAILQ_REMOVE(&sc->freelist, sp, list);
338                         g_bde_ncache--;
339                         sc->ncache--;
340                         bzero(sp->data, sp->size);
341                         g_bde_delete_sector(sc, sp);
342                         break;
343                 }
344         }
345 }
346
347 static struct g_bde_sector *
348 g_bde_read_keysector(struct g_bde_softc *sc, struct g_bde_work *wp)
349 {
350         struct g_bde_sector *sp;
351
352         g_trace(G_T_TOPOLOGY, "g_bde_read_keysector(%p)", wp);
353         sp = g_bde_get_keysector(wp);
354         if (sp == NULL) {
355                 g_bde_purge_sector(sc, -1);
356                 sp = g_bde_get_keysector(wp);
357         }
358         if (sp == NULL)
359                 return (sp);
360         if (sp->owner != wp)
361                 return (sp);
362         if (sp->state == VALID)
363                 return (sp);
364         if (g_bde_start_read(sp) == 0)
365                 return (sp);
366         g_bde_release_keysector(wp);
367         return (NULL);
368 }
369
370 /*
371  * Contribute to the completion of the original bio request.
372  *
373  * We have no simple way to tell how many bits the original bio request has
374  * been segmented into, so the easiest way to determine when we can deliver
375  * it is to keep track of the number of bytes we have completed.  We keep
376  * track of any errors underway and latch onto the first one.
377  *
378  * We always report "nothing done" in case of error, because random bits here
379  * and there may be completed and returning a number of completed bytes does
380  * not convey any useful information about which bytes they were.  If some
381  * piece of broken code somewhere interprets this to mean that nothing has
382  * changed on the underlying media they deserve the lossage headed for them.
383  *
384  * A single mutex per g_bde instance is used to prevent contention.
385  */
386
387 static void
388 g_bde_contribute(struct bio *bp, off_t bytes, int error)
389 {
390
391         g_trace(G_T_TOPOLOGY, "g_bde_contribute bp %p bytes %jd error %d",
392              bp, (intmax_t)bytes, error);
393         if (bp->bio_error == 0)
394                 bp->bio_error = error;
395         bp->bio_completed += bytes;
396         KASSERT(bp->bio_completed <= bp->bio_length, ("Too large contribution"));
397         if (bp->bio_completed == bp->bio_length) {
398                 if (bp->bio_error != 0)
399                         bp->bio_completed = 0;
400                 g_io_deliver(bp, bp->bio_error);
401         }
402 }
403
404 /*
405  * This is the common case "we're done with this work package" function
406  */
407
408 static void
409 g_bde_work_done(struct g_bde_work *wp, int error)
410 {
411
412         g_bde_contribute(wp->bp, wp->length, error);
413         if (wp->sp != NULL)
414                 g_bde_delete_sector(wp->softc, wp->sp);
415         if (wp->ksp != NULL)
416                 g_bde_release_keysector(wp);
417         g_bde_delete_work(wp);
418 }
419
420 /*
421  * A write operation has finished.  When we have all expected cows in the
422  * barn close the door and call it a day.
423  */
424
425 static void
426 g_bde_write_done(struct bio *bp)
427 {
428         struct g_bde_sector *sp;
429         struct g_bde_work *wp;
430         struct g_bde_softc *sc;
431
432         sp = bp->bio_caller1;
433         sc = bp->bio_caller2;
434         mtx_lock(&sc->worklist_mutex);
435         KASSERT(sp != NULL, ("NULL sp"));
436         KASSERT(sc != NULL, ("NULL sc"));
437         KASSERT(sp->owner != NULL, ("NULL sp->owner"));
438         g_trace(G_T_TOPOLOGY, "g_bde_write_done(%p)", sp);
439         if (bp->bio_error == 0 && bp->bio_completed != sp->size)
440                 bp->bio_error = EIO;
441         sp->error = bp->bio_error;
442         g_destroy_bio(bp);
443         wp = sp->owner;
444         if (wp->error == 0)
445                 wp->error = sp->error;
446
447         if (wp->bp->bio_cmd == BIO_DELETE) {
448                 KASSERT(sp == wp->sp, ("trashed delete op"));
449                 g_bde_work_done(wp, wp->error);
450                 mtx_unlock(&sc->worklist_mutex);
451                 return;
452         }
453
454         KASSERT(wp->bp->bio_cmd == BIO_WRITE, ("Confused in g_bde_write_done()"));
455         KASSERT(sp == wp->sp || sp == wp->ksp, ("trashed write op"));
456         if (wp->sp == sp) {
457                 g_bde_delete_sector(sc, wp->sp);
458                 wp->sp = NULL;
459         } else {
460                 sp->state = VALID;
461         }
462         if (wp->sp == NULL && wp->ksp != NULL && wp->ksp->state == VALID)
463                 g_bde_work_done(wp, wp->error);
464         mtx_unlock(&sc->worklist_mutex);
465         return;
466 }
467
468 /*
469  * Send a write request for the given sector down the pipeline.
470  */
471
472 static int
473 g_bde_start_write(struct g_bde_sector *sp)
474 {
475         struct bio *bp;
476         struct g_bde_softc *sc;
477
478         g_trace(G_T_TOPOLOGY, "g_bde_start_write(%p)", sp);
479         sc = sp->softc;
480         KASSERT(sc != NULL, ("NULL sc in g_bde_start_write"));
481         KASSERT(sp->owner != NULL, ("NULL sp->owner in g_bde_start_write"));
482         bp = g_new_bio();
483         if (bp == NULL)
484                 return (ENOMEM);
485         bp->bio_cmd = BIO_WRITE;
486         bp->bio_offset = sp->offset;
487         bp->bio_data = sp->data;
488         bp->bio_length = sp->size;
489         bp->bio_done = g_bde_write_done;
490         bp->bio_caller1 = sp;
491         bp->bio_caller2 = sc;
492         sp->state = IO;
493         g_io_request(bp, sc->consumer);
494         return(0);
495 }
496
497 /*
498  * A read operation has finished.  Mark the sector no longer iobusy and
499  * wake up the worker thread and let it do its thing.
500  */
501
502 static void
503 g_bde_read_done(struct bio *bp)
504 {
505         struct g_bde_sector *sp;
506         struct g_bde_softc *sc;
507
508         sp = bp->bio_caller1;
509         g_trace(G_T_TOPOLOGY, "g_bde_read_done(%p)", sp);
510         sc = bp->bio_caller2;
511         mtx_lock(&sc->worklist_mutex);
512         if (bp->bio_error == 0 && bp->bio_completed != sp->size)
513                 bp->bio_error = EIO;
514         sp->error = bp->bio_error;
515         if (sp->error == 0)
516                 sp->state = VALID;
517         else
518                 sp->state = JUNK;
519         wakeup(sc);
520         g_destroy_bio(bp);
521         mtx_unlock(&sc->worklist_mutex);
522 }
523
524 /*
525  * Send a read request for the given sector down the pipeline.
526  */
527
528 static int
529 g_bde_start_read(struct g_bde_sector *sp)
530 {
531         struct bio *bp;
532         struct g_bde_softc *sc;
533
534         g_trace(G_T_TOPOLOGY, "g_bde_start_read(%p)", sp);
535         sc = sp->softc;
536         KASSERT(sc != NULL, ("Null softc in sp %p", sp));
537         bp = g_new_bio();
538         if (bp == NULL)
539                 return (ENOMEM);
540         bp->bio_cmd = BIO_READ;
541         bp->bio_offset = sp->offset;
542         bp->bio_data = sp->data;
543         bp->bio_length = sp->size;
544         bp->bio_done = g_bde_read_done;
545         bp->bio_caller1 = sp;
546         bp->bio_caller2 = sc;
547         sp->state = IO;
548         g_io_request(bp, sc->consumer);
549         return(0);
550 }
551
552 /*
553  * The worker thread.
554  *
555  * The up/down path of GEOM is not allowed to sleep or do any major work
556  * so we use this thread to do the actual crypto operations and to push
557  * the state engine onwards.
558  *
559  * XXX: if we switch to the src/sys/opencrypt hardware assisted encryption
560  * XXX: using a thread here is probably not needed.
561  */
562
563 void
564 g_bde_worker(void *arg)
565 {
566         struct g_bde_softc *sc;
567         struct g_bde_work *wp, *twp;
568         struct g_geom *gp;
569         int restart, error;
570
571         gp = arg;
572         sc = gp->softc;
573
574         mtx_lock(&sc->worklist_mutex);
575         for (;;) {
576                 restart = 0;
577                 g_trace(G_T_TOPOLOGY, "g_bde_worker scan");
578                 TAILQ_FOREACH_SAFE(wp, &sc->worklist, list, twp) {
579                         KASSERT(wp != NULL, ("NULL wp"));
580                         KASSERT(wp->softc != NULL, ("NULL wp->softc"));
581                         if (wp->state != WAIT)
582                                 continue;       /* Not interesting here */
583
584                         KASSERT(wp->bp != NULL, ("NULL wp->bp"));
585                         KASSERT(wp->sp != NULL, ("NULL wp->sp"));
586
587                         if (wp->ksp != NULL) {
588                                 if (wp->ksp->owner != wp)
589                                         continue;
590                                 if (wp->ksp->state == IO)
591                                         continue;
592                                 KASSERT(wp->ksp->state == VALID,
593                                     ("Illegal sector state (%d)",
594                                     wp->ksp->state));
595                         }
596
597                         if (wp->bp->bio_cmd == BIO_READ && wp->sp->state == IO)
598                                 continue;
599
600                         if (wp->ksp != NULL && wp->ksp->error != 0) {
601                                 g_bde_work_done(wp, wp->ksp->error);
602                                 continue;
603                         } 
604                         switch(wp->bp->bio_cmd) {
605                         case BIO_READ:
606                                 if (wp->ksp == NULL) {
607                                         KASSERT(wp->error != 0,
608                                             ("BIO_READ, no ksp and no error"));
609                                         g_bde_work_done(wp, wp->error);
610                                         break;
611                                 }
612                                 if (wp->sp->error != 0) {
613                                         g_bde_work_done(wp, wp->sp->error);
614                                         break;
615                                 }
616                                 mtx_unlock(&sc->worklist_mutex);
617                                 g_bde_crypt_read(wp);
618                                 mtx_lock(&sc->worklist_mutex);
619                                 restart++;
620                                 g_bde_work_done(wp, wp->sp->error);
621                                 break;
622                         case BIO_WRITE:
623                                 wp->state = FINISH;
624                                 KASSERT(wp->sp->owner == wp,
625                                     ("Write not owner sp"));
626                                 KASSERT(wp->ksp->owner == wp,
627                                     ("Write not owner ksp"));
628                                 mtx_unlock(&sc->worklist_mutex);
629                                 g_bde_crypt_write(wp);
630                                 mtx_lock(&sc->worklist_mutex);
631                                 restart++;
632                                 error = g_bde_start_write(wp->sp);
633                                 if (error) {
634                                         g_bde_work_done(wp, error);
635                                         break;
636                                 }
637                                 error = g_bde_start_write(wp->ksp);
638                                 if (wp->error != 0)
639                                         wp->error = error;
640                                 break;
641                         case BIO_DELETE:
642                                 wp->state = FINISH;
643                                 mtx_unlock(&sc->worklist_mutex);
644                                 g_bde_crypt_delete(wp);
645                                 mtx_lock(&sc->worklist_mutex);
646                                 restart++;
647                                 g_bde_start_write(wp->sp);
648                                 break;
649                         }
650                         if (restart)
651                                 break;
652                 }
653                 if (!restart) {
654                         /*
655                          * We don't look for our death-warrant until we are
656                          * idle.  Shouldn't make a difference in practice.
657                          */
658                         if (sc->dead)
659                                 break;
660                         g_trace(G_T_TOPOLOGY, "g_bde_worker sleep");
661                         error = msleep(sc, &sc->worklist_mutex,
662                             PRIBIO, "-", hz);
663                         if (error == EWOULDBLOCK) {
664                                 /*
665                                  * Lose our skey cache in an orderly fashion.
666                                  * The exact rate can be tuned to be less
667                                  * aggressive if this is desirable.  10% per
668                                  * second means that the cache is gone in a
669                                  * few minutes.
670                                  */
671                                 g_bde_purge_sector(sc, 10);
672                         }
673                 }
674         }
675         g_trace(G_T_TOPOLOGY, "g_bde_worker die");
676         g_bde_purge_sector(sc, 1);
677         KASSERT(sc->nwork == 0, ("Dead but %d work remaining", sc->nwork));
678         KASSERT(sc->ncache == 0, ("Dead but %d cache remaining", sc->ncache));
679         KASSERT(sc->nsect == 0, ("Dead but %d sect remaining", sc->nsect));
680         mtx_unlock(&sc->worklist_mutex);
681         sc->dead = 2;
682         wakeup(sc);
683         kproc_exit(0);
684 }
685
686 /*
687  * g_bde_start1 has chopped the incoming request up so all the requests
688  * we see here are inside a single zone.  Map the data and key locations
689  * grab the buffers we need and fire off the first volley of read requests.
690  */
691
692 static void
693 g_bde_start2(struct g_bde_work *wp)
694 {
695         struct g_bde_softc *sc;
696
697         KASSERT(wp != NULL, ("NULL wp in g_bde_start2"));
698         KASSERT(wp->softc != NULL, ("NULL wp->softc"));
699         g_trace(G_T_TOPOLOGY, "g_bde_start2(%p)", wp);
700         sc = wp->softc;
701         switch (wp->bp->bio_cmd) {
702         case BIO_READ:
703                 wp->sp = g_bde_new_sector(wp, 0);
704                 if (wp->sp == NULL) {
705                         g_bde_work_done(wp, ENOMEM);
706                         return;
707                 }
708                 wp->sp->size = wp->length;
709                 wp->sp->data = wp->data;
710                 if (g_bde_start_read(wp->sp) != 0) {
711                         g_bde_work_done(wp, ENOMEM);
712                         return;
713                 }
714                 g_bde_read_keysector(sc, wp);
715                 if (wp->ksp == NULL)
716                         wp->error = ENOMEM;
717                 break;
718         case BIO_DELETE:
719                 wp->sp = g_bde_new_sector(wp, wp->length);
720                 if (wp->sp == NULL) {
721                         g_bde_work_done(wp, ENOMEM);
722                         return;
723                 }
724                 break;
725         case BIO_WRITE:
726                 wp->sp = g_bde_new_sector(wp, wp->length);
727                 if (wp->sp == NULL) {
728                         g_bde_work_done(wp, ENOMEM);
729                         return;
730                 }
731                 g_bde_read_keysector(sc, wp);
732                 if (wp->ksp == NULL) {
733                         g_bde_work_done(wp, ENOMEM);
734                         return;
735                 }
736                 break;
737         default:
738                 KASSERT(0 == 1, 
739                     ("Wrong bio_cmd %d in g_bde_start2", wp->bp->bio_cmd));
740         }
741
742         wp->state = WAIT;
743         wakeup(sc);
744 }
745
746 /*
747  * Create a sequence of work structures, and have g_bde_map_sector() determine
748  * how long they each can be.  Feed them to g_bde_start2().
749  */
750
751 void
752 g_bde_start1(struct bio *bp)
753 {
754         struct g_bde_softc *sc;
755         struct g_bde_work *wp;
756         off_t done;
757
758         sc = bp->bio_to->geom->softc;
759         bp->bio_driver1 = sc;
760
761         mtx_lock(&sc->worklist_mutex);
762         for(done = 0; done < bp->bio_length; ) {
763                 wp = g_bde_new_work(sc);
764                 if (wp != NULL) {
765                         wp->bp = bp;
766                         wp->offset = bp->bio_offset + done;
767                         wp->data = bp->bio_data + done;
768                         wp->length = bp->bio_length - done;
769                         g_bde_map_sector(wp);
770                         done += wp->length;
771                         g_bde_start2(wp);
772                 }
773                 if (wp == NULL || bp->bio_error != 0) {
774                         g_bde_contribute(bp, bp->bio_length - done, ENOMEM);
775                         break;
776                 }
777         }
778         mtx_unlock(&sc->worklist_mutex);
779         return;
780 }