]> CyberLeo.Net >> Repos - FreeBSD/releng/7.2.git/blob - sys/geom/vinum/geom_vinum_volume.c
Create releng/7.2 from stable/7 in preparation for 7.2-RELEASE.
[FreeBSD/releng/7.2.git] / sys / geom / vinum / geom_vinum_volume.c
1 /*-
2  * Copyright (c) 2004 Lukas Ertl
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  */
26
27 #include <sys/cdefs.h>
28 __FBSDID("$FreeBSD$");
29
30 #include <sys/param.h>
31 #include <sys/bio.h>
32 #include <sys/conf.h>
33 #include <sys/kernel.h>
34 #include <sys/kthread.h>
35 #include <sys/libkern.h>
36 #include <sys/lock.h>
37 #include <sys/malloc.h>
38 #include <sys/module.h>
39 #include <sys/mutex.h>
40 #include <sys/systm.h>
41
42 #include <geom/geom.h>
43 #include <geom/vinum/geom_vinum_var.h>
44 #include <geom/vinum/geom_vinum.h>
45
46 static void gv_vol_completed_request(struct gv_volume *, struct bio *);
47 static void gv_vol_normal_request(struct gv_volume *, struct bio *);
48
49 static void
50 gv_volume_orphan(struct g_consumer *cp)
51 {
52         struct g_geom *gp;
53         struct gv_volume *v;
54         int error;
55
56         g_topology_assert();
57         gp = cp->geom;
58         g_trace(G_T_TOPOLOGY, "gv_volume_orphan(%s)", gp->name);
59         if (cp->acr != 0 || cp->acw != 0 || cp->ace != 0)
60                 g_access(cp, -cp->acr, -cp->acw, -cp->ace);
61         error = cp->provider->error;
62         if (error == 0)
63                 error = ENXIO;
64         g_detach(cp);
65         g_destroy_consumer(cp); 
66         if (!LIST_EMPTY(&gp->consumer))
67                 return;
68         v = gp->softc;
69         if (v != NULL) {
70                 gv_kill_vol_thread(v);
71                 v->geom = NULL;
72         }
73         gp->softc = NULL;
74         g_wither_geom(gp, error);
75 }
76
77 /* We end up here after the requests to our plexes are done. */
78 static void
79 gv_volume_done(struct bio *bp)
80 {
81         struct gv_volume *v;
82
83         v = bp->bio_from->geom->softc;
84         bp->bio_cflags |= GV_BIO_DONE;
85         mtx_lock(&v->bqueue_mtx);
86         bioq_insert_tail(v->bqueue, bp);
87         wakeup(v);
88         mtx_unlock(&v->bqueue_mtx);
89 }
90
91 static void
92 gv_volume_start(struct bio *bp)
93 {
94         struct gv_volume *v;
95
96         switch(bp->bio_cmd) {
97         case BIO_READ:
98         case BIO_WRITE:
99         case BIO_DELETE:
100                 break;
101         case BIO_GETATTR:
102         default:
103                 g_io_deliver(bp, EOPNOTSUPP);
104                 return;
105         }
106
107         v = bp->bio_to->geom->softc;
108         if (v->state != GV_VOL_UP) {
109                 g_io_deliver(bp, ENXIO);
110                 return;
111         }
112
113         mtx_lock(&v->bqueue_mtx);
114         bioq_disksort(v->bqueue, bp);
115         wakeup(v);
116         mtx_unlock(&v->bqueue_mtx);
117 }
118
119 static void
120 gv_vol_worker(void *arg)
121 {
122         struct bio *bp;
123         struct gv_volume *v;
124
125         v = arg;
126         KASSERT(v != NULL, ("NULL v"));
127         mtx_lock(&v->bqueue_mtx);
128         for (;;) {
129                 /* We were signaled to exit. */
130                 if (v->flags & GV_VOL_THREAD_DIE)
131                         break;
132
133                 /* Take the first BIO from our queue. */
134                 bp = bioq_takefirst(v->bqueue);
135                 if (bp == NULL) {
136                         msleep(v, &v->bqueue_mtx, PRIBIO, "-", hz/10);
137                         continue;
138                 }
139                 mtx_unlock(&v->bqueue_mtx);
140
141                 if (bp->bio_cflags & GV_BIO_DONE)
142                         gv_vol_completed_request(v, bp);
143                 else
144                         gv_vol_normal_request(v, bp);
145
146                 mtx_lock(&v->bqueue_mtx);
147         }
148         mtx_unlock(&v->bqueue_mtx);
149         v->flags |= GV_VOL_THREAD_DEAD;
150         wakeup(v);
151
152         kthread_exit(ENXIO);
153 }
154
155 static void
156 gv_vol_completed_request(struct gv_volume *v, struct bio *bp)
157 {
158         struct bio *pbp;
159         struct g_geom *gp;
160         struct g_consumer *cp, *cp2;
161
162         pbp = bp->bio_parent;
163
164         if (pbp->bio_error == 0)
165                 pbp->bio_error = bp->bio_error;
166
167         switch (pbp->bio_cmd) {
168         case BIO_READ:
169                 if (bp->bio_error == 0)
170                         break;
171
172                 if (pbp->bio_cflags & GV_BIO_RETRY)
173                         break;
174
175                 /* Check if we have another plex left. */
176                 cp = bp->bio_from;
177                 gp = cp->geom;
178                 cp2 = LIST_NEXT(cp, consumer);
179                 if (cp2 == NULL)
180                         break;
181
182                 if (LIST_NEXT(cp2, consumer) == NULL)
183                         pbp->bio_cflags |= GV_BIO_RETRY;
184
185                 g_destroy_bio(bp);
186                 pbp->bio_children--;
187                 mtx_lock(&v->bqueue_mtx);
188                 bioq_disksort(v->bqueue, pbp);
189                 mtx_unlock(&v->bqueue_mtx);
190                 return;
191
192         case BIO_WRITE:
193         case BIO_DELETE:
194                 /* Remember if this write request succeeded. */
195                 if (bp->bio_error == 0)
196                         pbp->bio_cflags |= GV_BIO_SUCCEED;
197                 break;
198         }
199
200         /* When the original request is finished, we deliver it. */
201         pbp->bio_inbed++;
202         if (pbp->bio_inbed == pbp->bio_children) {
203                 if (pbp->bio_cflags & GV_BIO_SUCCEED)
204                         pbp->bio_error = 0;
205                 pbp->bio_completed = bp->bio_length;
206                 g_io_deliver(pbp, pbp->bio_error);
207         }
208
209         g_destroy_bio(bp);
210 }
211
212 static void
213 gv_vol_normal_request(struct gv_volume *v, struct bio *bp)
214 {
215         struct bio_queue_head queue;
216         struct g_geom *gp;
217         struct gv_plex *p, *lp;
218         struct bio *cbp;
219
220         gp = v->geom;
221
222         switch (bp->bio_cmd) {
223         case BIO_READ:
224                 cbp = g_clone_bio(bp);
225                 if (cbp == NULL) {
226                         g_io_deliver(bp, ENOMEM);
227                         return;
228                 }
229                 cbp->bio_done = gv_volume_done;
230                 /*
231                  * Try to find a good plex where we can send the request to.
232                  * The plex either has to be up, or it's a degraded RAID5 plex.
233                  */
234                 lp = v->last_read_plex;
235                 if (lp == NULL)
236                         lp = LIST_FIRST(&v->plexes);
237                 p = LIST_NEXT(lp, in_volume);
238                 do {
239                         if (p == NULL)
240                                 p = LIST_FIRST(&v->plexes);
241                         if ((p->state > GV_PLEX_DEGRADED) ||
242                             (p->state >= GV_PLEX_DEGRADED &&
243                             p->org == GV_PLEX_RAID5))
244                                 break;
245                         p = LIST_NEXT(p, in_volume);
246                 } while (p != lp);
247
248                 if (p == NULL ||
249                     (p->org == GV_PLEX_RAID5 && p->state < GV_PLEX_DEGRADED) ||
250                     (p->org != GV_PLEX_RAID5 && p->state <= GV_PLEX_DEGRADED)) {
251                         g_destroy_bio(cbp);
252                         bp->bio_children--;
253                         g_io_deliver(bp, ENXIO);
254                         return;
255                 }
256                 g_io_request(cbp, p->consumer);
257                 v->last_read_plex = p;
258
259                 break;
260
261         case BIO_WRITE:
262         case BIO_DELETE:
263                 bioq_init(&queue);
264                 LIST_FOREACH(p, &v->plexes, in_volume) {
265                         if (p->state < GV_PLEX_DEGRADED)
266                                 continue;
267                         cbp = g_clone_bio(bp);
268                         if (cbp == NULL) {
269                                 for (cbp = bioq_first(&queue); cbp != NULL;
270                                     cbp = bioq_first(&queue)) {
271                                         bioq_remove(&queue, cbp);
272                                         g_destroy_bio(cbp);
273                                 }
274                                 if (bp->bio_error == 0)
275                                         bp->bio_error = ENOMEM;
276                                 g_io_deliver(bp, bp->bio_error);
277                                 return;
278                         }
279                         bioq_insert_tail(&queue, cbp);
280                         cbp->bio_done = gv_volume_done;
281                         cbp->bio_caller1 = p->consumer;
282                 }
283                 /* Fire off all sub-requests. */
284                 for (cbp = bioq_first(&queue); cbp != NULL;
285                      cbp = bioq_first(&queue)) {
286                         bioq_remove(&queue, cbp);
287                         g_io_request(cbp, cbp->bio_caller1);
288                 }
289                 break;
290         }
291 }
292
293 static int
294 gv_volume_access(struct g_provider *pp, int dr, int dw, int de)
295 {
296         struct g_geom *gp;
297         struct g_consumer *cp, *cp2;
298         int error;
299
300         gp = pp->geom;
301
302         error = ENXIO;
303         LIST_FOREACH(cp, &gp->consumer, consumer) {
304                 error = g_access(cp, dr, dw, de);
305                 if (error) {
306                         LIST_FOREACH(cp2, &gp->consumer, consumer) {
307                                 if (cp == cp2)
308                                         break;
309                                 g_access(cp2, -dr, -dw, -de);
310                         }
311                         return (error);
312                 }
313         }
314         return (error);
315 }
316
317 static struct g_geom *
318 gv_volume_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
319 {
320         struct g_geom *gp;
321         struct g_provider *pp2;
322         struct g_consumer *cp, *ocp;
323         struct gv_softc *sc;
324         struct gv_volume *v;
325         struct gv_plex *p;
326         int error, first;
327
328         g_trace(G_T_TOPOLOGY, "gv_volume_taste(%s, %s)", mp->name, pp->name);
329         g_topology_assert();
330
331         /* First, find the VINUM class and its associated geom. */
332         gp = find_vinum_geom();
333         if (gp == NULL)
334                 return (NULL);
335
336         sc = gp->softc;
337         KASSERT(sc != NULL, ("gv_volume_taste: NULL sc"));
338
339         gp = pp->geom;
340
341         /* We only want to attach to plexes. */
342         if (strcmp(gp->class->name, "VINUMPLEX"))
343                 return (NULL);
344
345         first = 0;
346         p = gp->softc;
347
348         /* Let's see if the volume this plex wants is already configured. */
349         v = gv_find_vol(sc, p->volume);
350         if (v == NULL)
351                 return (NULL);
352         if (v->geom == NULL) {
353                 gp = g_new_geomf(mp, "%s", p->volume);
354                 gp->start = gv_volume_start;
355                 gp->orphan = gv_volume_orphan;
356                 gp->access = gv_volume_access;
357                 gp->softc = v;
358                 first++;
359         } else
360                 gp = v->geom;
361
362         /* Create bio queue, queue mutex, and worker thread, if necessary. */
363         if (v->bqueue == NULL) {
364                 v->bqueue = g_malloc(sizeof(struct bio_queue_head),
365                     M_WAITOK | M_ZERO);
366                 bioq_init(v->bqueue);
367         }
368         if (mtx_initialized(&v->bqueue_mtx) == 0)
369                 mtx_init(&v->bqueue_mtx, "gv_plex", NULL, MTX_DEF);
370
371         if (!(v->flags & GV_VOL_THREAD_ACTIVE)) {
372                 kthread_create(gv_vol_worker, v, NULL, 0, 0, "gv_v %s",
373                     v->name);
374                 v->flags |= GV_VOL_THREAD_ACTIVE;
375         }
376
377         /*
378          * Create a new consumer and attach it to the plex geom.  Since this
379          * volume might already have a plex attached, we need to adjust the
380          * access counts of the new consumer.
381          */
382         ocp = LIST_FIRST(&gp->consumer);
383         cp = g_new_consumer(gp);
384         g_attach(cp, pp);
385         if ((ocp != NULL) && (ocp->acr > 0 || ocp->acw > 0 || ocp->ace > 0)) {
386                 error = g_access(cp, ocp->acr, ocp->acw, ocp->ace);
387                 if (error) {
388                         printf("GEOM_VINUM: failed g_access %s -> %s; "
389                             "errno %d\n", v->name, p->name, error);
390                         g_detach(cp);
391                         g_destroy_consumer(cp);
392                         if (first)
393                                 g_destroy_geom(gp);
394                         return (NULL);
395                 }
396         }
397
398         p->consumer = cp;
399
400         if (p->vol_sc != v) {
401                 p->vol_sc = v;
402                 v->plexcount++;
403                 LIST_INSERT_HEAD(&v->plexes, p, in_volume);
404         }
405
406         /* We need to setup a new VINUMVOLUME geom. */
407         if (first) {
408                 pp2 = g_new_providerf(gp, "gvinum/%s", v->name);
409                 pp2->mediasize = pp->mediasize;
410                 pp2->sectorsize = pp->sectorsize;
411                 g_error_provider(pp2, 0);
412                 v->size = pp2->mediasize;
413                 v->geom = gp;
414                 return (gp);
415         }
416
417         return (NULL);
418 }
419
420 static int
421 gv_volume_destroy_geom(struct gctl_req *req, struct g_class *mp,
422     struct g_geom *gp)
423 {
424         struct gv_volume *v;
425
426         g_trace(G_T_TOPOLOGY, "gv_volume_destroy_geom: %s", gp->name);
427         g_topology_assert();
428
429         v = gp->softc;
430         gv_kill_vol_thread(v);
431         g_wither_geom(gp, ENXIO);
432         return (0);
433 }
434
435 #define VINUMVOLUME_CLASS_NAME "VINUMVOLUME"
436
437 static struct g_class g_vinum_volume_class = {
438         .name = VINUMVOLUME_CLASS_NAME,
439         .version = G_VERSION,
440         .taste = gv_volume_taste,
441         .destroy_geom = gv_volume_destroy_geom,
442 };
443
444 DECLARE_GEOM_CLASS(g_vinum_volume_class, g_vinum_volume);