]> CyberLeo.Net >> Repos - FreeBSD/releng/9.1.git/blob - sys/geom/raid/md_intel.c
MFC r240465:
[FreeBSD/releng/9.1.git] / sys / geom / raid / md_intel.c
1 /*-
2  * Copyright (c) 2010 Alexander Motin <mav@FreeBSD.org>
3  * Copyright (c) 2000 - 2008 Søren Schmidt <sos@FreeBSD.org>
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  */
27
28 #include <sys/cdefs.h>
29 __FBSDID("$FreeBSD$");
30
31 #include <sys/param.h>
32 #include <sys/bio.h>
33 #include <sys/endian.h>
34 #include <sys/kernel.h>
35 #include <sys/kobj.h>
36 #include <sys/limits.h>
37 #include <sys/lock.h>
38 #include <sys/malloc.h>
39 #include <sys/mutex.h>
40 #include <sys/systm.h>
41 #include <sys/taskqueue.h>
42 #include <geom/geom.h>
43 #include "geom/raid/g_raid.h"
44 #include "g_raid_md_if.h"
45
46 static MALLOC_DEFINE(M_MD_INTEL, "md_intel_data", "GEOM_RAID Intel metadata");
47
48 struct intel_raid_map {
49         uint32_t        offset;
50         uint32_t        disk_sectors;
51         uint32_t        stripe_count;
52         uint16_t        strip_sectors;
53         uint8_t         status;
54 #define INTEL_S_READY           0x00
55 #define INTEL_S_UNINITIALIZED   0x01
56 #define INTEL_S_DEGRADED        0x02
57 #define INTEL_S_FAILURE         0x03
58
59         uint8_t         type;
60 #define INTEL_T_RAID0           0x00
61 #define INTEL_T_RAID1           0x01
62 #define INTEL_T_RAID5           0x05
63
64         uint8_t         total_disks;
65         uint8_t         total_domains;
66         uint8_t         failed_disk_num;
67         uint8_t         ddf;
68         uint32_t        offset_hi;
69         uint32_t        disk_sectors_hi;
70         uint32_t        stripe_count_hi;
71         uint32_t        filler_2[4];
72         uint32_t        disk_idx[1];    /* total_disks entries. */
73 #define INTEL_DI_IDX    0x00ffffff
74 #define INTEL_DI_RBLD   0x01000000
75 } __packed;
76
77 struct intel_raid_vol {
78         uint8_t         name[16];
79         u_int64_t       total_sectors __packed;
80         uint32_t        state;
81 #define INTEL_ST_BOOTABLE               0x00000001
82 #define INTEL_ST_BOOT_DEVICE            0x00000002
83 #define INTEL_ST_READ_COALESCING        0x00000004
84 #define INTEL_ST_WRITE_COALESCING       0x00000008
85 #define INTEL_ST_LAST_SHUTDOWN_DIRTY    0x00000010
86 #define INTEL_ST_HIDDEN_AT_BOOT         0x00000020
87 #define INTEL_ST_CURRENTLY_HIDDEN       0x00000040
88 #define INTEL_ST_VERIFY_AND_FIX         0x00000080
89 #define INTEL_ST_MAP_STATE_UNINIT       0x00000100
90 #define INTEL_ST_NO_AUTO_RECOVERY       0x00000200
91 #define INTEL_ST_CLONE_N_GO             0x00000400
92 #define INTEL_ST_CLONE_MAN_SYNC         0x00000800
93 #define INTEL_ST_CNG_MASTER_DISK_NUM    0x00001000
94         uint32_t        reserved;
95         uint8_t         migr_priority;
96         uint8_t         num_sub_vols;
97         uint8_t         tid;
98         uint8_t         cng_master_disk;
99         uint16_t        cache_policy;
100         uint8_t         cng_state;
101         uint8_t         cng_sub_state;
102         uint32_t        filler_0[10];
103
104         uint32_t        curr_migr_unit;
105         uint32_t        checkpoint_id;
106         uint8_t         migr_state;
107         uint8_t         migr_type;
108 #define INTEL_MT_INIT           0
109 #define INTEL_MT_REBUILD        1
110 #define INTEL_MT_VERIFY         2
111 #define INTEL_MT_GEN_MIGR       3
112 #define INTEL_MT_STATE_CHANGE   4
113 #define INTEL_MT_REPAIR         5
114         uint8_t         dirty;
115         uint8_t         fs_state;
116         uint16_t        verify_errors;
117         uint16_t        bad_blocks;
118         uint32_t        curr_migr_unit_hi;
119         uint32_t        filler_1[3];
120         struct intel_raid_map map[1];   /* 2 entries if migr_state != 0. */
121 } __packed;
122
123 struct intel_raid_disk {
124 #define INTEL_SERIAL_LEN        16
125         uint8_t         serial[INTEL_SERIAL_LEN];
126         uint32_t        sectors;
127         uint32_t        id;
128         uint32_t        flags;
129 #define INTEL_F_SPARE           0x01
130 #define INTEL_F_ASSIGNED        0x02
131 #define INTEL_F_FAILED          0x04
132 #define INTEL_F_ONLINE          0x08
133         uint32_t        owner_cfg_num;
134         uint32_t        sectors_hi;
135         uint32_t        filler[3];
136 } __packed;
137
138 struct intel_raid_conf {
139         uint8_t         intel_id[24];
140 #define INTEL_MAGIC             "Intel Raid ISM Cfg Sig. "
141
142         uint8_t         version[6];
143 #define INTEL_VERSION_1000      "1.0.00"        /* RAID0 */
144 #define INTEL_VERSION_1100      "1.1.00"        /* RAID1 */
145 #define INTEL_VERSION_1200      "1.2.00"        /* Many volumes */
146 #define INTEL_VERSION_1201      "1.2.01"        /* 3 or 4 disks */
147 #define INTEL_VERSION_1202      "1.2.02"        /* RAID5 */
148 #define INTEL_VERSION_1204      "1.2.04"        /* 5 or 6 disks */
149 #define INTEL_VERSION_1206      "1.2.06"        /* CNG */
150 #define INTEL_VERSION_1300      "1.3.00"        /* Attributes */
151
152         uint8_t         dummy_0[2];
153         uint32_t        checksum;
154         uint32_t        config_size;
155         uint32_t        config_id;
156         uint32_t        generation;
157         uint32_t        error_log_size;
158         uint32_t        attributes;
159 #define INTEL_ATTR_RAID0        0x00000001
160 #define INTEL_ATTR_RAID1        0x00000002
161 #define INTEL_ATTR_RAID10       0x00000004
162 #define INTEL_ATTR_RAID1E       0x00000008
163 #define INTEL_ATTR_RAID5        0x00000010
164 #define INTEL_ATTR_RAIDCNG      0x00000020
165 #define INTEL_ATTR_2TB          0x20000000
166 #define INTEL_ATTR_PM           0x40000000
167 #define INTEL_ATTR_CHECKSUM     0x80000000
168
169         uint8_t         total_disks;
170         uint8_t         total_volumes;
171         uint8_t         dummy_2[2];
172         uint32_t        filler_0[39];
173         struct intel_raid_disk  disk[1];        /* total_disks entries. */
174         /* Here goes total_volumes of struct intel_raid_vol. */
175 } __packed;
176
177 #define INTEL_MAX_MD_SIZE(ndisks)                               \
178     (sizeof(struct intel_raid_conf) +                           \
179      sizeof(struct intel_raid_disk) * (ndisks - 1) +            \
180      sizeof(struct intel_raid_vol) * 2 +                        \
181      sizeof(struct intel_raid_map) * 2 +                        \
182      sizeof(uint32_t) * (ndisks - 1) * 4)
183
184 struct g_raid_md_intel_perdisk {
185         struct intel_raid_conf  *pd_meta;
186         int                      pd_disk_pos;
187         struct intel_raid_disk   pd_disk_meta;
188 };
189
190 struct g_raid_md_intel_object {
191         struct g_raid_md_object  mdio_base;
192         uint32_t                 mdio_config_id;
193         uint32_t                 mdio_generation;
194         struct intel_raid_conf  *mdio_meta;
195         struct callout           mdio_start_co; /* STARTING state timer. */
196         int                      mdio_disks_present;
197         int                      mdio_started;
198         int                      mdio_incomplete;
199         struct root_hold_token  *mdio_rootmount; /* Root mount delay token. */
200 };
201
202 static g_raid_md_create_t g_raid_md_create_intel;
203 static g_raid_md_taste_t g_raid_md_taste_intel;
204 static g_raid_md_event_t g_raid_md_event_intel;
205 static g_raid_md_ctl_t g_raid_md_ctl_intel;
206 static g_raid_md_write_t g_raid_md_write_intel;
207 static g_raid_md_fail_disk_t g_raid_md_fail_disk_intel;
208 static g_raid_md_free_disk_t g_raid_md_free_disk_intel;
209 static g_raid_md_free_t g_raid_md_free_intel;
210
211 static kobj_method_t g_raid_md_intel_methods[] = {
212         KOBJMETHOD(g_raid_md_create,    g_raid_md_create_intel),
213         KOBJMETHOD(g_raid_md_taste,     g_raid_md_taste_intel),
214         KOBJMETHOD(g_raid_md_event,     g_raid_md_event_intel),
215         KOBJMETHOD(g_raid_md_ctl,       g_raid_md_ctl_intel),
216         KOBJMETHOD(g_raid_md_write,     g_raid_md_write_intel),
217         KOBJMETHOD(g_raid_md_fail_disk, g_raid_md_fail_disk_intel),
218         KOBJMETHOD(g_raid_md_free_disk, g_raid_md_free_disk_intel),
219         KOBJMETHOD(g_raid_md_free,      g_raid_md_free_intel),
220         { 0, 0 }
221 };
222
223 static struct g_raid_md_class g_raid_md_intel_class = {
224         "Intel",
225         g_raid_md_intel_methods,
226         sizeof(struct g_raid_md_intel_object),
227         .mdc_enable = 1,
228         .mdc_priority = 100
229 };
230
231
232 static struct intel_raid_map *
233 intel_get_map(struct intel_raid_vol *mvol, int i)
234 {
235         struct intel_raid_map *mmap;
236
237         if (i > (mvol->migr_state ? 1 : 0))
238                 return (NULL);
239         mmap = &mvol->map[0];
240         for (; i > 0; i--) {
241                 mmap = (struct intel_raid_map *)
242                     &mmap->disk_idx[mmap->total_disks];
243         }
244         return ((struct intel_raid_map *)mmap);
245 }
246
247 static struct intel_raid_vol *
248 intel_get_volume(struct intel_raid_conf *meta, int i)
249 {
250         struct intel_raid_vol *mvol;
251         struct intel_raid_map *mmap;
252
253         if (i > 1)
254                 return (NULL);
255         mvol = (struct intel_raid_vol *)&meta->disk[meta->total_disks];
256         for (; i > 0; i--) {
257                 mmap = intel_get_map(mvol, mvol->migr_state ? 1 : 0);
258                 mvol = (struct intel_raid_vol *)
259                     &mmap->disk_idx[mmap->total_disks];
260         }
261         return (mvol);
262 }
263
264 static off_t
265 intel_get_map_offset(struct intel_raid_map *mmap)
266 {
267         off_t offset = (off_t)mmap->offset_hi << 32;
268
269         offset += mmap->offset;
270         return (offset);
271 }
272
273 static void
274 intel_set_map_offset(struct intel_raid_map *mmap, off_t offset)
275 {
276
277         mmap->offset = offset & 0xffffffff;
278         mmap->offset_hi = offset >> 32;
279 }
280
281 static off_t
282 intel_get_map_disk_sectors(struct intel_raid_map *mmap)
283 {
284         off_t disk_sectors = (off_t)mmap->disk_sectors_hi << 32;
285
286         disk_sectors += mmap->disk_sectors;
287         return (disk_sectors);
288 }
289
290 static void
291 intel_set_map_disk_sectors(struct intel_raid_map *mmap, off_t disk_sectors)
292 {
293
294         mmap->disk_sectors = disk_sectors & 0xffffffff;
295         mmap->disk_sectors_hi = disk_sectors >> 32;
296 }
297
298 static void
299 intel_set_map_stripe_count(struct intel_raid_map *mmap, off_t stripe_count)
300 {
301
302         mmap->stripe_count = stripe_count & 0xffffffff;
303         mmap->stripe_count_hi = stripe_count >> 32;
304 }
305
306 static off_t
307 intel_get_disk_sectors(struct intel_raid_disk *disk)
308 {
309         off_t sectors = (off_t)disk->sectors_hi << 32;
310
311         sectors += disk->sectors;
312         return (sectors);
313 }
314
315 static void
316 intel_set_disk_sectors(struct intel_raid_disk *disk, off_t sectors)
317 {
318
319         disk->sectors = sectors & 0xffffffff;
320         disk->sectors_hi = sectors >> 32;
321 }
322
323 static off_t
324 intel_get_vol_curr_migr_unit(struct intel_raid_vol *vol)
325 {
326         off_t curr_migr_unit = (off_t)vol->curr_migr_unit_hi << 32;
327
328         curr_migr_unit += vol->curr_migr_unit;
329         return (curr_migr_unit);
330 }
331
332 static void
333 intel_set_vol_curr_migr_unit(struct intel_raid_vol *vol, off_t curr_migr_unit)
334 {
335
336         vol->curr_migr_unit = curr_migr_unit & 0xffffffff;
337         vol->curr_migr_unit_hi = curr_migr_unit >> 32;
338 }
339
340 static void
341 g_raid_md_intel_print(struct intel_raid_conf *meta)
342 {
343         struct intel_raid_vol *mvol;
344         struct intel_raid_map *mmap;
345         int i, j, k;
346
347         if (g_raid_debug < 1)
348                 return;
349
350         printf("********* ATA Intel MatrixRAID Metadata *********\n");
351         printf("intel_id            <%.24s>\n", meta->intel_id);
352         printf("version             <%.6s>\n", meta->version);
353         printf("checksum            0x%08x\n", meta->checksum);
354         printf("config_size         0x%08x\n", meta->config_size);
355         printf("config_id           0x%08x\n", meta->config_id);
356         printf("generation          0x%08x\n", meta->generation);
357         printf("attributes          0x%08x\n", meta->attributes);
358         printf("total_disks         %u\n", meta->total_disks);
359         printf("total_volumes       %u\n", meta->total_volumes);
360         printf("DISK#   serial disk_sectors disk_sectors_hi disk_id flags\n");
361         for (i = 0; i < meta->total_disks; i++ ) {
362                 printf("    %d   <%.16s> %u %u 0x%08x 0x%08x\n", i,
363                     meta->disk[i].serial, meta->disk[i].sectors,
364                     meta->disk[i].sectors_hi,
365                     meta->disk[i].id, meta->disk[i].flags);
366         }
367         for (i = 0; i < meta->total_volumes; i++) {
368                 mvol = intel_get_volume(meta, i);
369                 printf(" ****** Volume %d ******\n", i);
370                 printf(" name               %.16s\n", mvol->name);
371                 printf(" total_sectors      %ju\n", mvol->total_sectors);
372                 printf(" state              %u\n", mvol->state);
373                 printf(" reserved           %u\n", mvol->reserved);
374                 printf(" curr_migr_unit     %u\n", mvol->curr_migr_unit);
375                 printf(" curr_migr_unit_hi  %u\n", mvol->curr_migr_unit_hi);
376                 printf(" checkpoint_id      %u\n", mvol->checkpoint_id);
377                 printf(" migr_state         %u\n", mvol->migr_state);
378                 printf(" migr_type          %u\n", mvol->migr_type);
379                 printf(" dirty              %u\n", mvol->dirty);
380
381                 for (j = 0; j < (mvol->migr_state ? 2 : 1); j++) {
382                         printf("  *** Map %d ***\n", j);
383                         mmap = intel_get_map(mvol, j);
384                         printf("  offset            %u\n", mmap->offset);
385                         printf("  offset_hi         %u\n", mmap->offset_hi);
386                         printf("  disk_sectors      %u\n", mmap->disk_sectors);
387                         printf("  disk_sectors_hi   %u\n", mmap->disk_sectors_hi);
388                         printf("  stripe_count      %u\n", mmap->stripe_count);
389                         printf("  stripe_count_hi   %u\n", mmap->stripe_count_hi);
390                         printf("  strip_sectors     %u\n", mmap->strip_sectors);
391                         printf("  status            %u\n", mmap->status);
392                         printf("  type              %u\n", mmap->type);
393                         printf("  total_disks       %u\n", mmap->total_disks);
394                         printf("  total_domains     %u\n", mmap->total_domains);
395                         printf("  failed_disk_num   %u\n", mmap->failed_disk_num);
396                         printf("  ddf               %u\n", mmap->ddf);
397                         printf("  disk_idx         ");
398                         for (k = 0; k < mmap->total_disks; k++)
399                                 printf(" 0x%08x", mmap->disk_idx[k]);
400                         printf("\n");
401                 }
402         }
403         printf("=================================================\n");
404 }
405
406 static struct intel_raid_conf *
407 intel_meta_copy(struct intel_raid_conf *meta)
408 {
409         struct intel_raid_conf *nmeta;
410
411         nmeta = malloc(meta->config_size, M_MD_INTEL, M_WAITOK);
412         memcpy(nmeta, meta, meta->config_size);
413         return (nmeta);
414 }
415
416 static int
417 intel_meta_find_disk(struct intel_raid_conf *meta, char *serial)
418 {
419         int pos;
420
421         for (pos = 0; pos < meta->total_disks; pos++) {
422                 if (strncmp(meta->disk[pos].serial,
423                     serial, INTEL_SERIAL_LEN) == 0)
424                         return (pos);
425         }
426         return (-1);
427 }
428
429 static struct intel_raid_conf *
430 intel_meta_read(struct g_consumer *cp)
431 {
432         struct g_provider *pp;
433         struct intel_raid_conf *meta;
434         struct intel_raid_vol *mvol;
435         struct intel_raid_map *mmap;
436         char *buf;
437         int error, i, j, k, left, size;
438         uint32_t checksum, *ptr;
439
440         pp = cp->provider;
441
442         /* Read the anchor sector. */
443         buf = g_read_data(cp,
444             pp->mediasize - pp->sectorsize * 2, pp->sectorsize, &error);
445         if (buf == NULL) {
446                 G_RAID_DEBUG(1, "Cannot read metadata from %s (error=%d).",
447                     pp->name, error);
448                 return (NULL);
449         }
450         meta = (struct intel_raid_conf *)buf;
451
452         /* Check if this is an Intel RAID struct */
453         if (strncmp(meta->intel_id, INTEL_MAGIC, strlen(INTEL_MAGIC))) {
454                 G_RAID_DEBUG(1, "Intel signature check failed on %s", pp->name);
455                 g_free(buf);
456                 return (NULL);
457         }
458         if (meta->config_size > 65536 ||
459             meta->config_size < sizeof(struct intel_raid_conf)) {
460                 G_RAID_DEBUG(1, "Intel metadata size looks wrong: %d",
461                     meta->config_size);
462                 g_free(buf);
463                 return (NULL);
464         }
465         size = meta->config_size;
466         meta = malloc(size, M_MD_INTEL, M_WAITOK);
467         memcpy(meta, buf, min(size, pp->sectorsize));
468         g_free(buf);
469
470         /* Read all the rest, if needed. */
471         if (meta->config_size > pp->sectorsize) {
472                 left = (meta->config_size - 1) / pp->sectorsize;
473                 buf = g_read_data(cp,
474                     pp->mediasize - pp->sectorsize * (2 + left),
475                     pp->sectorsize * left, &error);
476                 if (buf == NULL) {
477                         G_RAID_DEBUG(1, "Cannot read remaining metadata"
478                             " part from %s (error=%d).",
479                             pp->name, error);
480                         free(meta, M_MD_INTEL);
481                         return (NULL);
482                 }
483                 memcpy(((char *)meta) + pp->sectorsize, buf,
484                     pp->sectorsize * left);
485                 g_free(buf);
486         }
487
488         /* Check metadata checksum. */
489         for (checksum = 0, ptr = (uint32_t *)meta, i = 0;
490             i < (meta->config_size / sizeof(uint32_t)); i++) {
491                 checksum += *ptr++;
492         }
493         checksum -= meta->checksum;
494         if (checksum != meta->checksum) {
495                 G_RAID_DEBUG(1, "Intel checksum check failed on %s", pp->name);
496                 free(meta, M_MD_INTEL);
497                 return (NULL);
498         }
499
500         /* Validate metadata size. */
501         size = sizeof(struct intel_raid_conf) +
502             sizeof(struct intel_raid_disk) * (meta->total_disks - 1) +
503             sizeof(struct intel_raid_vol) * meta->total_volumes;
504         if (size > meta->config_size) {
505 badsize:
506                 G_RAID_DEBUG(1, "Intel metadata size incorrect %d < %d",
507                     meta->config_size, size);
508                 free(meta, M_MD_INTEL);
509                 return (NULL);
510         }
511         for (i = 0; i < meta->total_volumes; i++) {
512                 mvol = intel_get_volume(meta, i);
513                 mmap = intel_get_map(mvol, 0);
514                 size += 4 * (mmap->total_disks - 1);
515                 if (size > meta->config_size)
516                         goto badsize;
517                 if (mvol->migr_state) {
518                         size += sizeof(struct intel_raid_map);
519                         if (size > meta->config_size)
520                                 goto badsize;
521                         mmap = intel_get_map(mvol, 1);
522                         size += 4 * (mmap->total_disks - 1);
523                         if (size > meta->config_size)
524                                 goto badsize;
525                 }
526         }
527
528         /* Validate disk indexes. */
529         for (i = 0; i < meta->total_volumes; i++) {
530                 mvol = intel_get_volume(meta, i);
531                 for (j = 0; j < (mvol->migr_state ? 2 : 1); j++) {
532                         mmap = intel_get_map(mvol, j);
533                         for (k = 0; k < mmap->total_disks; k++) {
534                                 if ((mmap->disk_idx[k] & INTEL_DI_IDX) >
535                                     meta->total_disks) {
536                                         G_RAID_DEBUG(1, "Intel metadata disk"
537                                             " index %d too big (>%d)",
538                                             mmap->disk_idx[k] & INTEL_DI_IDX,
539                                             meta->total_disks);
540                                         free(meta, M_MD_INTEL);
541                                         return (NULL);
542                                 }
543                         }
544                 }
545         }
546
547         /* Validate migration types. */
548         for (i = 0; i < meta->total_volumes; i++) {
549                 mvol = intel_get_volume(meta, i);
550                 if (mvol->migr_state &&
551                     mvol->migr_type != INTEL_MT_INIT &&
552                     mvol->migr_type != INTEL_MT_REBUILD &&
553                     mvol->migr_type != INTEL_MT_VERIFY &&
554                     mvol->migr_type != INTEL_MT_REPAIR) {
555                         G_RAID_DEBUG(1, "Intel metadata has unsupported"
556                             " migration type %d", mvol->migr_type);
557                         free(meta, M_MD_INTEL);
558                         return (NULL);
559                 }
560         }
561
562         return (meta);
563 }
564
565 static int
566 intel_meta_write(struct g_consumer *cp, struct intel_raid_conf *meta)
567 {
568         struct g_provider *pp;
569         char *buf;
570         int error, i, sectors;
571         uint32_t checksum, *ptr;
572
573         pp = cp->provider;
574
575         /* Recalculate checksum for case if metadata were changed. */
576         meta->checksum = 0;
577         for (checksum = 0, ptr = (uint32_t *)meta, i = 0;
578             i < (meta->config_size / sizeof(uint32_t)); i++) {
579                 checksum += *ptr++;
580         }
581         meta->checksum = checksum;
582
583         /* Create and fill buffer. */
584         sectors = (meta->config_size + pp->sectorsize - 1) / pp->sectorsize;
585         buf = malloc(sectors * pp->sectorsize, M_MD_INTEL, M_WAITOK | M_ZERO);
586         if (sectors > 1) {
587                 memcpy(buf, ((char *)meta) + pp->sectorsize,
588                     (sectors - 1) * pp->sectorsize);
589         }
590         memcpy(buf + (sectors - 1) * pp->sectorsize, meta, pp->sectorsize);
591
592         error = g_write_data(cp,
593             pp->mediasize - pp->sectorsize * (1 + sectors),
594             buf, pp->sectorsize * sectors);
595         if (error != 0) {
596                 G_RAID_DEBUG(1, "Cannot write metadata to %s (error=%d).",
597                     pp->name, error);
598         }
599
600         free(buf, M_MD_INTEL);
601         return (error);
602 }
603
604 static int
605 intel_meta_erase(struct g_consumer *cp)
606 {
607         struct g_provider *pp;
608         char *buf;
609         int error;
610
611         pp = cp->provider;
612         buf = malloc(pp->sectorsize, M_MD_INTEL, M_WAITOK | M_ZERO);
613         error = g_write_data(cp,
614             pp->mediasize - 2 * pp->sectorsize,
615             buf, pp->sectorsize);
616         if (error != 0) {
617                 G_RAID_DEBUG(1, "Cannot erase metadata on %s (error=%d).",
618                     pp->name, error);
619         }
620         free(buf, M_MD_INTEL);
621         return (error);
622 }
623
624 static int
625 intel_meta_write_spare(struct g_consumer *cp, struct intel_raid_disk *d)
626 {
627         struct intel_raid_conf *meta;
628         int error;
629
630         /* Fill anchor and single disk. */
631         meta = malloc(INTEL_MAX_MD_SIZE(1), M_MD_INTEL, M_WAITOK | M_ZERO);
632         memcpy(&meta->intel_id[0], INTEL_MAGIC, sizeof(INTEL_MAGIC) - 1);
633         memcpy(&meta->version[0], INTEL_VERSION_1000,
634             sizeof(INTEL_VERSION_1000) - 1);
635         meta->config_size = INTEL_MAX_MD_SIZE(1);
636         meta->config_id = arc4random();
637         meta->generation = 1;
638         meta->total_disks = 1;
639         meta->disk[0] = *d;
640         error = intel_meta_write(cp, meta);
641         free(meta, M_MD_INTEL);
642         return (error);
643 }
644
645 static struct g_raid_disk *
646 g_raid_md_intel_get_disk(struct g_raid_softc *sc, int id)
647 {
648         struct g_raid_disk      *disk;
649         struct g_raid_md_intel_perdisk *pd;
650
651         TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
652                 pd = (struct g_raid_md_intel_perdisk *)disk->d_md_data;
653                 if (pd->pd_disk_pos == id)
654                         break;
655         }
656         return (disk);
657 }
658
659 static int
660 g_raid_md_intel_supported(int level, int qual, int disks, int force)
661 {
662
663         switch (level) {
664         case G_RAID_VOLUME_RL_RAID0:
665                 if (disks < 1)
666                         return (0);
667                 if (!force && (disks < 2 || disks > 6))
668                         return (0);
669                 break;
670         case G_RAID_VOLUME_RL_RAID1:
671                 if (disks < 1)
672                         return (0);
673                 if (!force && (disks != 2))
674                         return (0);
675                 break;
676         case G_RAID_VOLUME_RL_RAID1E:
677                 if (disks < 2)
678                         return (0);
679                 if (!force && (disks != 4))
680                         return (0);
681                 break;
682         case G_RAID_VOLUME_RL_RAID5:
683                 if (disks < 3)
684                         return (0);
685                 if (!force && disks > 6)
686                         return (0);
687                 if (qual != G_RAID_VOLUME_RLQ_R5LA)
688                         return (0);
689                 break;
690         default:
691                 return (0);
692         }
693         if (level != G_RAID_VOLUME_RL_RAID5 && qual != G_RAID_VOLUME_RLQ_NONE)
694                 return (0);
695         return (1);
696 }
697
698 static struct g_raid_volume *
699 g_raid_md_intel_get_volume(struct g_raid_softc *sc, int id)
700 {
701         struct g_raid_volume    *mvol;
702
703         TAILQ_FOREACH(mvol, &sc->sc_volumes, v_next) {
704                 if ((intptr_t)(mvol->v_md_data) == id)
705                         break;
706         }
707         return (mvol);
708 }
709
710 static int
711 g_raid_md_intel_start_disk(struct g_raid_disk *disk)
712 {
713         struct g_raid_softc *sc;
714         struct g_raid_subdisk *sd, *tmpsd;
715         struct g_raid_disk *olddisk, *tmpdisk;
716         struct g_raid_md_object *md;
717         struct g_raid_md_intel_object *mdi;
718         struct g_raid_md_intel_perdisk *pd, *oldpd;
719         struct intel_raid_conf *meta;
720         struct intel_raid_vol *mvol;
721         struct intel_raid_map *mmap0, *mmap1;
722         int disk_pos, resurrection = 0;
723
724         sc = disk->d_softc;
725         md = sc->sc_md;
726         mdi = (struct g_raid_md_intel_object *)md;
727         meta = mdi->mdio_meta;
728         pd = (struct g_raid_md_intel_perdisk *)disk->d_md_data;
729         olddisk = NULL;
730
731         /* Find disk position in metadata by it's serial. */
732         disk_pos = intel_meta_find_disk(meta, pd->pd_disk_meta.serial);
733         if (disk_pos < 0) {
734                 G_RAID_DEBUG1(1, sc, "Unknown, probably new or stale disk");
735                 /* Failed stale disk is useless for us. */
736                 if (pd->pd_disk_meta.flags & INTEL_F_FAILED) {
737                         g_raid_change_disk_state(disk, G_RAID_DISK_S_STALE_FAILED);
738                         return (0);
739                 }
740                 /* If we are in the start process, that's all for now. */
741                 if (!mdi->mdio_started)
742                         goto nofit;
743                 /*
744                  * If we have already started - try to get use of the disk.
745                  * Try to replace OFFLINE disks first, then FAILED.
746                  */
747                 TAILQ_FOREACH(tmpdisk, &sc->sc_disks, d_next) {
748                         if (tmpdisk->d_state != G_RAID_DISK_S_OFFLINE &&
749                             tmpdisk->d_state != G_RAID_DISK_S_FAILED)
750                                 continue;
751                         /* Make sure this disk is big enough. */
752                         TAILQ_FOREACH(sd, &tmpdisk->d_subdisks, sd_next) {
753                                 off_t disk_sectors = 
754                                     intel_get_disk_sectors(&pd->pd_disk_meta);
755
756                                 if (sd->sd_offset + sd->sd_size + 4096 >
757                                     disk_sectors * 512) {
758                                         G_RAID_DEBUG1(1, sc,
759                                             "Disk too small (%llu < %llu)",
760                                             (unsigned long long)
761                                             disk_sectors * 512,
762                                             (unsigned long long)
763                                             sd->sd_offset + sd->sd_size + 4096);
764                                         break;
765                                 }
766                         }
767                         if (sd != NULL)
768                                 continue;
769                         if (tmpdisk->d_state == G_RAID_DISK_S_OFFLINE) {
770                                 olddisk = tmpdisk;
771                                 break;
772                         } else if (olddisk == NULL)
773                                 olddisk = tmpdisk;
774                 }
775                 if (olddisk == NULL) {
776 nofit:
777                         if (pd->pd_disk_meta.flags & INTEL_F_SPARE) {
778                                 g_raid_change_disk_state(disk,
779                                     G_RAID_DISK_S_SPARE);
780                                 return (1);
781                         } else {
782                                 g_raid_change_disk_state(disk,
783                                     G_RAID_DISK_S_STALE);
784                                 return (0);
785                         }
786                 }
787                 oldpd = (struct g_raid_md_intel_perdisk *)olddisk->d_md_data;
788                 disk_pos = oldpd->pd_disk_pos;
789                 resurrection = 1;
790         }
791
792         if (olddisk == NULL) {
793                 /* Find placeholder by position. */
794                 olddisk = g_raid_md_intel_get_disk(sc, disk_pos);
795                 if (olddisk == NULL)
796                         panic("No disk at position %d!", disk_pos);
797                 if (olddisk->d_state != G_RAID_DISK_S_OFFLINE) {
798                         G_RAID_DEBUG1(1, sc, "More then one disk for pos %d",
799                             disk_pos);
800                         g_raid_change_disk_state(disk, G_RAID_DISK_S_STALE);
801                         return (0);
802                 }
803                 oldpd = (struct g_raid_md_intel_perdisk *)olddisk->d_md_data;
804         }
805
806         /* Replace failed disk or placeholder with new disk. */
807         TAILQ_FOREACH_SAFE(sd, &olddisk->d_subdisks, sd_next, tmpsd) {
808                 TAILQ_REMOVE(&olddisk->d_subdisks, sd, sd_next);
809                 TAILQ_INSERT_TAIL(&disk->d_subdisks, sd, sd_next);
810                 sd->sd_disk = disk;
811         }
812         oldpd->pd_disk_pos = -2;
813         pd->pd_disk_pos = disk_pos;
814
815         /* If it was placeholder -- destroy it. */
816         if (olddisk->d_state == G_RAID_DISK_S_OFFLINE) {
817                 g_raid_destroy_disk(olddisk);
818         } else {
819                 /* Otherwise, make it STALE_FAILED. */
820                 g_raid_change_disk_state(olddisk, G_RAID_DISK_S_STALE_FAILED);
821                 /* Update global metadata just in case. */
822                 memcpy(&meta->disk[disk_pos], &pd->pd_disk_meta,
823                     sizeof(struct intel_raid_disk));
824         }
825
826         /* Welcome the new disk. */
827         if (resurrection)
828                 g_raid_change_disk_state(disk, G_RAID_DISK_S_ACTIVE);
829         else if (meta->disk[disk_pos].flags & INTEL_F_FAILED)
830                 g_raid_change_disk_state(disk, G_RAID_DISK_S_FAILED);
831         else if (meta->disk[disk_pos].flags & INTEL_F_SPARE)
832                 g_raid_change_disk_state(disk, G_RAID_DISK_S_SPARE);
833         else
834                 g_raid_change_disk_state(disk, G_RAID_DISK_S_ACTIVE);
835         TAILQ_FOREACH(sd, &disk->d_subdisks, sd_next) {
836                 mvol = intel_get_volume(meta,
837                     (uintptr_t)(sd->sd_volume->v_md_data));
838                 mmap0 = intel_get_map(mvol, 0);
839                 if (mvol->migr_state)
840                         mmap1 = intel_get_map(mvol, 1);
841                 else
842                         mmap1 = mmap0;
843
844                 if (resurrection) {
845                         /* Stale disk, almost same as new. */
846                         g_raid_change_subdisk_state(sd,
847                             G_RAID_SUBDISK_S_NEW);
848                 } else if (meta->disk[disk_pos].flags & INTEL_F_FAILED) {
849                         /* Failed disk, almost useless. */
850                         g_raid_change_subdisk_state(sd,
851                             G_RAID_SUBDISK_S_FAILED);
852                 } else if (mvol->migr_state == 0) {
853                         if (mmap0->status == INTEL_S_UNINITIALIZED) {
854                                 /* Freshly created uninitialized volume. */
855                                 g_raid_change_subdisk_state(sd,
856                                     G_RAID_SUBDISK_S_UNINITIALIZED);
857                         } else if (mmap0->disk_idx[sd->sd_pos] & INTEL_DI_RBLD) {
858                                 /* Freshly inserted disk. */
859                                 g_raid_change_subdisk_state(sd,
860                                     G_RAID_SUBDISK_S_NEW);
861                         } else if (mvol->dirty) {
862                                 /* Dirty volume (unclean shutdown). */
863                                 g_raid_change_subdisk_state(sd,
864                                     G_RAID_SUBDISK_S_STALE);
865                         } else {
866                                 /* Up to date disk. */
867                                 g_raid_change_subdisk_state(sd,
868                                     G_RAID_SUBDISK_S_ACTIVE);
869                         }
870                 } else if (mvol->migr_type == INTEL_MT_INIT ||
871                            mvol->migr_type == INTEL_MT_REBUILD) {
872                         if (mmap0->disk_idx[sd->sd_pos] & INTEL_DI_RBLD) {
873                                 /* Freshly inserted disk. */
874                                 g_raid_change_subdisk_state(sd,
875                                     G_RAID_SUBDISK_S_NEW);
876                         } else if (mmap1->disk_idx[sd->sd_pos] & INTEL_DI_RBLD) {
877                                 /* Rebuilding disk. */
878                                 g_raid_change_subdisk_state(sd,
879                                     G_RAID_SUBDISK_S_REBUILD);
880                                 if (mvol->dirty) {
881                                         sd->sd_rebuild_pos = 0;
882                                 } else {
883                                         sd->sd_rebuild_pos =
884                                             intel_get_vol_curr_migr_unit(mvol) *
885                                             sd->sd_volume->v_strip_size *
886                                             mmap0->total_domains;
887                                 }
888                         } else if (mvol->dirty) {
889                                 /* Dirty volume (unclean shutdown). */
890                                 g_raid_change_subdisk_state(sd,
891                                     G_RAID_SUBDISK_S_STALE);
892                         } else {
893                                 /* Up to date disk. */
894                                 g_raid_change_subdisk_state(sd,
895                                     G_RAID_SUBDISK_S_ACTIVE);
896                         }
897                 } else if (mvol->migr_type == INTEL_MT_VERIFY ||
898                            mvol->migr_type == INTEL_MT_REPAIR) {
899                         if (mmap0->disk_idx[sd->sd_pos] & INTEL_DI_RBLD) {
900                                 /* Freshly inserted disk. */
901                                 g_raid_change_subdisk_state(sd,
902                                     G_RAID_SUBDISK_S_NEW);
903                         } else if (mmap1->disk_idx[sd->sd_pos] & INTEL_DI_RBLD) {
904                                 /* Resyncing disk. */
905                                 g_raid_change_subdisk_state(sd,
906                                     G_RAID_SUBDISK_S_RESYNC);
907                                 if (mvol->dirty) {
908                                         sd->sd_rebuild_pos = 0;
909                                 } else {
910                                         sd->sd_rebuild_pos =
911                                             intel_get_vol_curr_migr_unit(mvol) *
912                                             sd->sd_volume->v_strip_size *
913                                             mmap0->total_domains;
914                                 }
915                         } else if (mvol->dirty) {
916                                 /* Dirty volume (unclean shutdown). */
917                                 g_raid_change_subdisk_state(sd,
918                                     G_RAID_SUBDISK_S_STALE);
919                         } else {
920                                 /* Up to date disk. */
921                                 g_raid_change_subdisk_state(sd,
922                                     G_RAID_SUBDISK_S_ACTIVE);
923                         }
924                 }
925                 g_raid_event_send(sd, G_RAID_SUBDISK_E_NEW,
926                     G_RAID_EVENT_SUBDISK);
927         }
928
929         /* Update status of our need for spare. */
930         if (mdi->mdio_started) {
931                 mdi->mdio_incomplete =
932                     (g_raid_ndisks(sc, G_RAID_DISK_S_ACTIVE) <
933                      meta->total_disks);
934         }
935
936         return (resurrection);
937 }
938
939 static void
940 g_disk_md_intel_retaste(void *arg, int pending)
941 {
942
943         G_RAID_DEBUG(1, "Array is not complete, trying to retaste.");
944         g_retaste(&g_raid_class);
945         free(arg, M_MD_INTEL);
946 }
947
948 static void
949 g_raid_md_intel_refill(struct g_raid_softc *sc)
950 {
951         struct g_raid_md_object *md;
952         struct g_raid_md_intel_object *mdi;
953         struct intel_raid_conf *meta;
954         struct g_raid_disk *disk;
955         struct task *task;
956         int update, na;
957
958         md = sc->sc_md;
959         mdi = (struct g_raid_md_intel_object *)md;
960         meta = mdi->mdio_meta;
961         update = 0;
962         do {
963                 /* Make sure we miss anything. */
964                 na = g_raid_ndisks(sc, G_RAID_DISK_S_ACTIVE);
965                 if (na == meta->total_disks)
966                         break;
967
968                 G_RAID_DEBUG1(1, md->mdo_softc,
969                     "Array is not complete (%d of %d), "
970                     "trying to refill.", na, meta->total_disks);
971
972                 /* Try to get use some of STALE disks. */
973                 TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
974                         if (disk->d_state == G_RAID_DISK_S_STALE) {
975                                 update += g_raid_md_intel_start_disk(disk);
976                                 if (disk->d_state == G_RAID_DISK_S_ACTIVE)
977                                         break;
978                         }
979                 }
980                 if (disk != NULL)
981                         continue;
982
983                 /* Try to get use some of SPARE disks. */
984                 TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
985                         if (disk->d_state == G_RAID_DISK_S_SPARE) {
986                                 update += g_raid_md_intel_start_disk(disk);
987                                 if (disk->d_state == G_RAID_DISK_S_ACTIVE)
988                                         break;
989                         }
990                 }
991         } while (disk != NULL);
992
993         /* Write new metadata if we changed something. */
994         if (update) {
995                 g_raid_md_write_intel(md, NULL, NULL, NULL);
996                 meta = mdi->mdio_meta;
997         }
998
999         /* Update status of our need for spare. */
1000         mdi->mdio_incomplete = (g_raid_ndisks(sc, G_RAID_DISK_S_ACTIVE) <
1001             meta->total_disks);
1002
1003         /* Request retaste hoping to find spare. */
1004         if (mdi->mdio_incomplete) {
1005                 task = malloc(sizeof(struct task),
1006                     M_MD_INTEL, M_WAITOK | M_ZERO);
1007                 TASK_INIT(task, 0, g_disk_md_intel_retaste, task);
1008                 taskqueue_enqueue(taskqueue_swi, task);
1009         }
1010 }
1011
1012 static void
1013 g_raid_md_intel_start(struct g_raid_softc *sc)
1014 {
1015         struct g_raid_md_object *md;
1016         struct g_raid_md_intel_object *mdi;
1017         struct g_raid_md_intel_perdisk *pd;
1018         struct intel_raid_conf *meta;
1019         struct intel_raid_vol *mvol;
1020         struct intel_raid_map *mmap;
1021         struct g_raid_volume *vol;
1022         struct g_raid_subdisk *sd;
1023         struct g_raid_disk *disk;
1024         int i, j, disk_pos;
1025
1026         md = sc->sc_md;
1027         mdi = (struct g_raid_md_intel_object *)md;
1028         meta = mdi->mdio_meta;
1029
1030         /* Create volumes and subdisks. */
1031         for (i = 0; i < meta->total_volumes; i++) {
1032                 mvol = intel_get_volume(meta, i);
1033                 mmap = intel_get_map(mvol, 0);
1034                 vol = g_raid_create_volume(sc, mvol->name, -1);
1035                 vol->v_md_data = (void *)(intptr_t)i;
1036                 vol->v_raid_level_qualifier = G_RAID_VOLUME_RLQ_NONE;
1037                 if (mmap->type == INTEL_T_RAID0)
1038                         vol->v_raid_level = G_RAID_VOLUME_RL_RAID0;
1039                 else if (mmap->type == INTEL_T_RAID1 &&
1040                     mmap->total_domains >= 2 &&
1041                     mmap->total_domains <= mmap->total_disks) {
1042                         /* Assume total_domains is correct. */
1043                         if (mmap->total_domains == mmap->total_disks)
1044                                 vol->v_raid_level = G_RAID_VOLUME_RL_RAID1;
1045                         else
1046                                 vol->v_raid_level = G_RAID_VOLUME_RL_RAID1E;
1047                 } else if (mmap->type == INTEL_T_RAID1) {
1048                         /* total_domains looks wrong. */
1049                         if (mmap->total_disks <= 2)
1050                                 vol->v_raid_level = G_RAID_VOLUME_RL_RAID1;
1051                         else
1052                                 vol->v_raid_level = G_RAID_VOLUME_RL_RAID1E;
1053                 } else if (mmap->type == INTEL_T_RAID5) {
1054                         vol->v_raid_level = G_RAID_VOLUME_RL_RAID5;
1055                         vol->v_raid_level_qualifier = G_RAID_VOLUME_RLQ_R5LA;
1056                 } else
1057                         vol->v_raid_level = G_RAID_VOLUME_RL_UNKNOWN;
1058                 vol->v_strip_size = (u_int)mmap->strip_sectors * 512; //ZZZ
1059                 vol->v_disks_count = mmap->total_disks;
1060                 vol->v_mediasize = (off_t)mvol->total_sectors * 512; //ZZZ
1061                 vol->v_sectorsize = 512; //ZZZ
1062                 for (j = 0; j < vol->v_disks_count; j++) {
1063                         sd = &vol->v_subdisks[j];
1064                         sd->sd_offset = intel_get_map_offset(mmap) * 512; //ZZZ
1065                         sd->sd_size = intel_get_map_disk_sectors(mmap) * 512; //ZZZ
1066                 }
1067                 g_raid_start_volume(vol);
1068         }
1069
1070         /* Create disk placeholders to store data for later writing. */
1071         for (disk_pos = 0; disk_pos < meta->total_disks; disk_pos++) {
1072                 pd = malloc(sizeof(*pd), M_MD_INTEL, M_WAITOK | M_ZERO);
1073                 pd->pd_disk_pos = disk_pos;
1074                 pd->pd_disk_meta = meta->disk[disk_pos];
1075                 disk = g_raid_create_disk(sc);
1076                 disk->d_md_data = (void *)pd;
1077                 disk->d_state = G_RAID_DISK_S_OFFLINE;
1078                 for (i = 0; i < meta->total_volumes; i++) {
1079                         mvol = intel_get_volume(meta, i);
1080                         mmap = intel_get_map(mvol, 0);
1081                         for (j = 0; j < mmap->total_disks; j++) {
1082                                 if ((mmap->disk_idx[j] & INTEL_DI_IDX) == disk_pos)
1083                                         break;
1084                         }
1085                         if (j == mmap->total_disks)
1086                                 continue;
1087                         vol = g_raid_md_intel_get_volume(sc, i);
1088                         sd = &vol->v_subdisks[j];
1089                         sd->sd_disk = disk;
1090                         TAILQ_INSERT_TAIL(&disk->d_subdisks, sd, sd_next);
1091                 }
1092         }
1093
1094         /* Make all disks found till the moment take their places. */
1095         do {
1096                 TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
1097                         if (disk->d_state == G_RAID_DISK_S_NONE) {
1098                                 g_raid_md_intel_start_disk(disk);
1099                                 break;
1100                         }
1101                 }
1102         } while (disk != NULL);
1103
1104         mdi->mdio_started = 1;
1105         G_RAID_DEBUG1(0, sc, "Array started.");
1106         g_raid_md_write_intel(md, NULL, NULL, NULL);
1107
1108         /* Pickup any STALE/SPARE disks to refill array if needed. */
1109         g_raid_md_intel_refill(sc);
1110
1111         TAILQ_FOREACH(vol, &sc->sc_volumes, v_next) {
1112                 g_raid_event_send(vol, G_RAID_VOLUME_E_START,
1113                     G_RAID_EVENT_VOLUME);
1114         }
1115
1116         callout_stop(&mdi->mdio_start_co);
1117         G_RAID_DEBUG1(1, sc, "root_mount_rel %p", mdi->mdio_rootmount);
1118         root_mount_rel(mdi->mdio_rootmount);
1119         mdi->mdio_rootmount = NULL;
1120 }
1121
1122 static void
1123 g_raid_md_intel_new_disk(struct g_raid_disk *disk)
1124 {
1125         struct g_raid_softc *sc;
1126         struct g_raid_md_object *md;
1127         struct g_raid_md_intel_object *mdi;
1128         struct intel_raid_conf *pdmeta;
1129         struct g_raid_md_intel_perdisk *pd;
1130
1131         sc = disk->d_softc;
1132         md = sc->sc_md;
1133         mdi = (struct g_raid_md_intel_object *)md;
1134         pd = (struct g_raid_md_intel_perdisk *)disk->d_md_data;
1135         pdmeta = pd->pd_meta;
1136
1137         if (mdi->mdio_started) {
1138                 if (g_raid_md_intel_start_disk(disk))
1139                         g_raid_md_write_intel(md, NULL, NULL, NULL);
1140         } else {
1141                 /* If we haven't started yet - check metadata freshness. */
1142                 if (mdi->mdio_meta == NULL ||
1143                     ((int32_t)(pdmeta->generation - mdi->mdio_generation)) > 0) {
1144                         G_RAID_DEBUG1(1, sc, "Newer disk");
1145                         if (mdi->mdio_meta != NULL)
1146                                 free(mdi->mdio_meta, M_MD_INTEL);
1147                         mdi->mdio_meta = intel_meta_copy(pdmeta);
1148                         mdi->mdio_generation = mdi->mdio_meta->generation;
1149                         mdi->mdio_disks_present = 1;
1150                 } else if (pdmeta->generation == mdi->mdio_generation) {
1151                         mdi->mdio_disks_present++;
1152                         G_RAID_DEBUG1(1, sc, "Matching disk (%d of %d up)",
1153                             mdi->mdio_disks_present,
1154                             mdi->mdio_meta->total_disks);
1155                 } else {
1156                         G_RAID_DEBUG1(1, sc, "Older disk");
1157                 }
1158                 /* If we collected all needed disks - start array. */
1159                 if (mdi->mdio_disks_present == mdi->mdio_meta->total_disks)
1160                         g_raid_md_intel_start(sc);
1161         }
1162 }
1163
1164 static void
1165 g_raid_intel_go(void *arg)
1166 {
1167         struct g_raid_softc *sc;
1168         struct g_raid_md_object *md;
1169         struct g_raid_md_intel_object *mdi;
1170
1171         sc = arg;
1172         md = sc->sc_md;
1173         mdi = (struct g_raid_md_intel_object *)md;
1174         if (!mdi->mdio_started) {
1175                 G_RAID_DEBUG1(0, sc, "Force array start due to timeout.");
1176                 g_raid_event_send(sc, G_RAID_NODE_E_START, 0);
1177         }
1178 }
1179
1180 static int
1181 g_raid_md_create_intel(struct g_raid_md_object *md, struct g_class *mp,
1182     struct g_geom **gp)
1183 {
1184         struct g_raid_softc *sc;
1185         struct g_raid_md_intel_object *mdi;
1186         char name[16];
1187
1188         mdi = (struct g_raid_md_intel_object *)md;
1189         mdi->mdio_config_id = arc4random();
1190         mdi->mdio_generation = 0;
1191         snprintf(name, sizeof(name), "Intel-%08x", mdi->mdio_config_id);
1192         sc = g_raid_create_node(mp, name, md);
1193         if (sc == NULL)
1194                 return (G_RAID_MD_TASTE_FAIL);
1195         md->mdo_softc = sc;
1196         *gp = sc->sc_geom;
1197         return (G_RAID_MD_TASTE_NEW);
1198 }
1199
1200 /*
1201  * Return the last N characters of the serial label.  The Linux and
1202  * ataraid(7) code always uses the last 16 characters of the label to
1203  * store into the Intel meta format.  Generalize this to N characters
1204  * since that's easy.  Labels can be up to 20 characters for SATA drives
1205  * and up 251 characters for SAS drives.  Since intel controllers don't
1206  * support SAS drives, just stick with the SATA limits for stack friendliness.
1207  */
1208 static int
1209 g_raid_md_get_label(struct g_consumer *cp, char *serial, int serlen)
1210 {
1211         char serial_buffer[24];
1212         int len, error;
1213         
1214         len = sizeof(serial_buffer);
1215         error = g_io_getattr("GEOM::ident", cp, &len, serial_buffer);
1216         if (error != 0)
1217                 return (error);
1218         len = strlen(serial_buffer);
1219         if (len > serlen)
1220                 len -= serlen;
1221         else
1222                 len = 0;
1223         strncpy(serial, serial_buffer + len, serlen);
1224         return (0);
1225 }
1226
1227 static int
1228 g_raid_md_taste_intel(struct g_raid_md_object *md, struct g_class *mp,
1229                               struct g_consumer *cp, struct g_geom **gp)
1230 {
1231         struct g_consumer *rcp;
1232         struct g_provider *pp;
1233         struct g_raid_md_intel_object *mdi, *mdi1;
1234         struct g_raid_softc *sc;
1235         struct g_raid_disk *disk;
1236         struct intel_raid_conf *meta;
1237         struct g_raid_md_intel_perdisk *pd;
1238         struct g_geom *geom;
1239         int error, disk_pos, result, spare, len;
1240         char serial[INTEL_SERIAL_LEN];
1241         char name[16];
1242         uint16_t vendor;
1243
1244         G_RAID_DEBUG(1, "Tasting Intel on %s", cp->provider->name);
1245         mdi = (struct g_raid_md_intel_object *)md;
1246         pp = cp->provider;
1247
1248         /* Read metadata from device. */
1249         meta = NULL;
1250         vendor = 0xffff;
1251         disk_pos = 0;
1252         if (g_access(cp, 1, 0, 0) != 0)
1253                 return (G_RAID_MD_TASTE_FAIL);
1254         g_topology_unlock();
1255         error = g_raid_md_get_label(cp, serial, sizeof(serial));
1256         if (error != 0) {
1257                 G_RAID_DEBUG(1, "Cannot get serial number from %s (error=%d).",
1258                     pp->name, error);
1259                 goto fail2;
1260         }
1261         len = 2;
1262         if (pp->geom->rank == 1)
1263                 g_io_getattr("GEOM::hba_vendor", cp, &len, &vendor);
1264         meta = intel_meta_read(cp);
1265         g_topology_lock();
1266         g_access(cp, -1, 0, 0);
1267         if (meta == NULL) {
1268                 if (g_raid_aggressive_spare) {
1269                         if (vendor != 0x8086) {
1270                                 G_RAID_DEBUG(1,
1271                                     "Intel vendor mismatch 0x%04x != 0x8086",
1272                                     vendor);
1273                         } else {
1274                                 G_RAID_DEBUG(1,
1275                                     "No Intel metadata, forcing spare.");
1276                                 spare = 2;
1277                                 goto search;
1278                         }
1279                 }
1280                 return (G_RAID_MD_TASTE_FAIL);
1281         }
1282
1283         /* Check this disk position in obtained metadata. */
1284         disk_pos = intel_meta_find_disk(meta, serial);
1285         if (disk_pos < 0) {
1286                 G_RAID_DEBUG(1, "Intel serial '%s' not found", serial);
1287                 goto fail1;
1288         }
1289         if (intel_get_disk_sectors(&meta->disk[disk_pos]) !=
1290             (pp->mediasize / pp->sectorsize)) {
1291                 G_RAID_DEBUG(1, "Intel size mismatch %ju != %ju",
1292                     intel_get_disk_sectors(&meta->disk[disk_pos]),
1293                     (off_t)(pp->mediasize / pp->sectorsize));
1294                 goto fail1;
1295         }
1296
1297         /* Metadata valid. Print it. */
1298         g_raid_md_intel_print(meta);
1299         G_RAID_DEBUG(1, "Intel disk position %d", disk_pos);
1300         spare = meta->disk[disk_pos].flags & INTEL_F_SPARE;
1301
1302 search:
1303         /* Search for matching node. */
1304         sc = NULL;
1305         mdi1 = NULL;
1306         LIST_FOREACH(geom, &mp->geom, geom) {
1307                 sc = geom->softc;
1308                 if (sc == NULL)
1309                         continue;
1310                 if (sc->sc_stopping != 0)
1311                         continue;
1312                 if (sc->sc_md->mdo_class != md->mdo_class)
1313                         continue;
1314                 mdi1 = (struct g_raid_md_intel_object *)sc->sc_md;
1315                 if (spare) {
1316                         if (mdi1->mdio_incomplete)
1317                                 break;
1318                 } else {
1319                         if (mdi1->mdio_config_id == meta->config_id)
1320                                 break;
1321                 }
1322         }
1323
1324         /* Found matching node. */
1325         if (geom != NULL) {
1326                 G_RAID_DEBUG(1, "Found matching array %s", sc->sc_name);
1327                 result = G_RAID_MD_TASTE_EXISTING;
1328
1329         } else if (spare) { /* Not found needy node -- left for later. */
1330                 G_RAID_DEBUG(1, "Spare is not needed at this time");
1331                 goto fail1;
1332
1333         } else { /* Not found matching node -- create one. */
1334                 result = G_RAID_MD_TASTE_NEW;
1335                 mdi->mdio_config_id = meta->config_id;
1336                 snprintf(name, sizeof(name), "Intel-%08x", meta->config_id);
1337                 sc = g_raid_create_node(mp, name, md);
1338                 md->mdo_softc = sc;
1339                 geom = sc->sc_geom;
1340                 callout_init(&mdi->mdio_start_co, 1);
1341                 callout_reset(&mdi->mdio_start_co, g_raid_start_timeout * hz,
1342                     g_raid_intel_go, sc);
1343                 mdi->mdio_rootmount = root_mount_hold("GRAID-Intel");
1344                 G_RAID_DEBUG1(1, sc, "root_mount_hold %p", mdi->mdio_rootmount);
1345         }
1346
1347         rcp = g_new_consumer(geom);
1348         g_attach(rcp, pp);
1349         if (g_access(rcp, 1, 1, 1) != 0)
1350                 ; //goto fail1;
1351
1352         g_topology_unlock();
1353         sx_xlock(&sc->sc_lock);
1354
1355         pd = malloc(sizeof(*pd), M_MD_INTEL, M_WAITOK | M_ZERO);
1356         pd->pd_meta = meta;
1357         pd->pd_disk_pos = -1;
1358         if (spare == 2) {
1359                 memcpy(&pd->pd_disk_meta.serial[0], serial, INTEL_SERIAL_LEN);
1360                 intel_set_disk_sectors(&pd->pd_disk_meta, 
1361                     pp->mediasize / pp->sectorsize);
1362                 pd->pd_disk_meta.id = 0;
1363                 pd->pd_disk_meta.flags = INTEL_F_SPARE;
1364         } else {
1365                 pd->pd_disk_meta = meta->disk[disk_pos];
1366         }
1367         disk = g_raid_create_disk(sc);
1368         disk->d_md_data = (void *)pd;
1369         disk->d_consumer = rcp;
1370         rcp->private = disk;
1371
1372         /* Read kernel dumping information. */
1373         disk->d_kd.offset = 0;
1374         disk->d_kd.length = OFF_MAX;
1375         len = sizeof(disk->d_kd);
1376         error = g_io_getattr("GEOM::kerneldump", rcp, &len, &disk->d_kd);
1377         if (disk->d_kd.di.dumper == NULL)
1378                 G_RAID_DEBUG1(2, sc, "Dumping not supported by %s: %d.", 
1379                     rcp->provider->name, error);
1380
1381         g_raid_md_intel_new_disk(disk);
1382
1383         sx_xunlock(&sc->sc_lock);
1384         g_topology_lock();
1385         *gp = geom;
1386         return (result);
1387 fail2:
1388         g_topology_lock();
1389         g_access(cp, -1, 0, 0);
1390 fail1:
1391         free(meta, M_MD_INTEL);
1392         return (G_RAID_MD_TASTE_FAIL);
1393 }
1394
1395 static int
1396 g_raid_md_event_intel(struct g_raid_md_object *md,
1397     struct g_raid_disk *disk, u_int event)
1398 {
1399         struct g_raid_softc *sc;
1400         struct g_raid_subdisk *sd;
1401         struct g_raid_md_intel_object *mdi;
1402         struct g_raid_md_intel_perdisk *pd;
1403
1404         sc = md->mdo_softc;
1405         mdi = (struct g_raid_md_intel_object *)md;
1406         if (disk == NULL) {
1407                 switch (event) {
1408                 case G_RAID_NODE_E_START:
1409                         if (!mdi->mdio_started)
1410                                 g_raid_md_intel_start(sc);
1411                         return (0);
1412                 }
1413                 return (-1);
1414         }
1415         pd = (struct g_raid_md_intel_perdisk *)disk->d_md_data;
1416         switch (event) {
1417         case G_RAID_DISK_E_DISCONNECTED:
1418                 /* If disk was assigned, just update statuses. */
1419                 if (pd->pd_disk_pos >= 0) {
1420                         g_raid_change_disk_state(disk, G_RAID_DISK_S_OFFLINE);
1421                         if (disk->d_consumer) {
1422                                 g_raid_kill_consumer(sc, disk->d_consumer);
1423                                 disk->d_consumer = NULL;
1424                         }
1425                         TAILQ_FOREACH(sd, &disk->d_subdisks, sd_next) {
1426                                 g_raid_change_subdisk_state(sd,
1427                                     G_RAID_SUBDISK_S_NONE);
1428                                 g_raid_event_send(sd, G_RAID_SUBDISK_E_DISCONNECTED,
1429                                     G_RAID_EVENT_SUBDISK);
1430                         }
1431                 } else {
1432                         /* Otherwise -- delete. */
1433                         g_raid_change_disk_state(disk, G_RAID_DISK_S_NONE);
1434                         g_raid_destroy_disk(disk);
1435                 }
1436
1437                 /* Write updated metadata to all disks. */
1438                 g_raid_md_write_intel(md, NULL, NULL, NULL);
1439
1440                 /* Check if anything left except placeholders. */
1441                 if (g_raid_ndisks(sc, -1) ==
1442                     g_raid_ndisks(sc, G_RAID_DISK_S_OFFLINE))
1443                         g_raid_destroy_node(sc, 0);
1444                 else
1445                         g_raid_md_intel_refill(sc);
1446                 return (0);
1447         }
1448         return (-2);
1449 }
1450
1451 static int
1452 g_raid_md_ctl_intel(struct g_raid_md_object *md,
1453     struct gctl_req *req)
1454 {
1455         struct g_raid_softc *sc;
1456         struct g_raid_volume *vol, *vol1;
1457         struct g_raid_subdisk *sd;
1458         struct g_raid_disk *disk;
1459         struct g_raid_md_intel_object *mdi;
1460         struct g_raid_md_intel_perdisk *pd;
1461         struct g_consumer *cp;
1462         struct g_provider *pp;
1463         char arg[16], serial[INTEL_SERIAL_LEN];
1464         const char *verb, *volname, *levelname, *diskname;
1465         char *tmp;
1466         int *nargs, *force;
1467         off_t off, size, sectorsize, strip, disk_sectors;
1468         intmax_t *sizearg, *striparg;
1469         int numdisks, i, len, level, qual, update;
1470         int error;
1471
1472         sc = md->mdo_softc;
1473         mdi = (struct g_raid_md_intel_object *)md;
1474         verb = gctl_get_param(req, "verb", NULL);
1475         nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs));
1476         error = 0;
1477         if (strcmp(verb, "label") == 0) {
1478
1479                 if (*nargs < 4) {
1480                         gctl_error(req, "Invalid number of arguments.");
1481                         return (-1);
1482                 }
1483                 volname = gctl_get_asciiparam(req, "arg1");
1484                 if (volname == NULL) {
1485                         gctl_error(req, "No volume name.");
1486                         return (-2);
1487                 }
1488                 levelname = gctl_get_asciiparam(req, "arg2");
1489                 if (levelname == NULL) {
1490                         gctl_error(req, "No RAID level.");
1491                         return (-3);
1492                 }
1493                 if (strcasecmp(levelname, "RAID5") == 0)
1494                         levelname = "RAID5-LA";
1495                 if (g_raid_volume_str2level(levelname, &level, &qual)) {
1496                         gctl_error(req, "Unknown RAID level '%s'.", levelname);
1497                         return (-4);
1498                 }
1499                 numdisks = *nargs - 3;
1500                 force = gctl_get_paraml(req, "force", sizeof(*force));
1501                 if (!g_raid_md_intel_supported(level, qual, numdisks,
1502                     force ? *force : 0)) {
1503                         gctl_error(req, "Unsupported RAID level "
1504                             "(0x%02x/0x%02x), or number of disks (%d).",
1505                             level, qual, numdisks);
1506                         return (-5);
1507                 }
1508
1509                 /* Search for disks, connect them and probe. */
1510                 size = 0x7fffffffffffffffllu;
1511                 sectorsize = 0;
1512                 for (i = 0; i < numdisks; i++) {
1513                         snprintf(arg, sizeof(arg), "arg%d", i + 3);
1514                         diskname = gctl_get_asciiparam(req, arg);
1515                         if (diskname == NULL) {
1516                                 gctl_error(req, "No disk name (%s).", arg);
1517                                 error = -6;
1518                                 break;
1519                         }
1520                         if (strcmp(diskname, "NONE") == 0) {
1521                                 cp = NULL;
1522                                 pp = NULL;
1523                         } else {
1524                                 g_topology_lock();
1525                                 cp = g_raid_open_consumer(sc, diskname);
1526                                 if (cp == NULL) {
1527                                         gctl_error(req, "Can't open disk '%s'.",
1528                                             diskname);
1529                                         g_topology_unlock();
1530                                         error = -7;
1531                                         break;
1532                                 }
1533                                 pp = cp->provider;
1534                         }
1535                         pd = malloc(sizeof(*pd), M_MD_INTEL, M_WAITOK | M_ZERO);
1536                         pd->pd_disk_pos = i;
1537                         disk = g_raid_create_disk(sc);
1538                         disk->d_md_data = (void *)pd;
1539                         disk->d_consumer = cp;
1540                         if (cp == NULL) {
1541                                 strcpy(&pd->pd_disk_meta.serial[0], "NONE");
1542                                 pd->pd_disk_meta.id = 0xffffffff;
1543                                 pd->pd_disk_meta.flags = INTEL_F_ASSIGNED;
1544                                 continue;
1545                         }
1546                         cp->private = disk;
1547                         g_topology_unlock();
1548
1549                         error = g_raid_md_get_label(cp,
1550                             &pd->pd_disk_meta.serial[0], INTEL_SERIAL_LEN);
1551                         if (error != 0) {
1552                                 gctl_error(req,
1553                                     "Can't get serial for provider '%s'.",
1554                                     diskname);
1555                                 error = -8;
1556                                 break;
1557                         }
1558
1559                         /* Read kernel dumping information. */
1560                         disk->d_kd.offset = 0;
1561                         disk->d_kd.length = OFF_MAX;
1562                         len = sizeof(disk->d_kd);
1563                         g_io_getattr("GEOM::kerneldump", cp, &len, &disk->d_kd);
1564                         if (disk->d_kd.di.dumper == NULL)
1565                                 G_RAID_DEBUG1(2, sc,
1566                                     "Dumping not supported by %s.",
1567                                     cp->provider->name);
1568
1569                         intel_set_disk_sectors(&pd->pd_disk_meta,
1570                             pp->mediasize / pp->sectorsize);
1571                         if (size > pp->mediasize)
1572                                 size = pp->mediasize;
1573                         if (sectorsize < pp->sectorsize)
1574                                 sectorsize = pp->sectorsize;
1575                         pd->pd_disk_meta.id = 0;
1576                         pd->pd_disk_meta.flags = INTEL_F_ASSIGNED | INTEL_F_ONLINE;
1577                 }
1578                 if (error != 0)
1579                         return (error);
1580
1581                 if (sectorsize <= 0) {
1582                         gctl_error(req, "Can't get sector size.");
1583                         return (-8);
1584                 }
1585
1586                 /* Reserve some space for metadata. */
1587                 size -= ((4096 + sectorsize - 1) / sectorsize) * sectorsize;
1588
1589                 /* Handle size argument. */
1590                 len = sizeof(*sizearg);
1591                 sizearg = gctl_get_param(req, "size", &len);
1592                 if (sizearg != NULL && len == sizeof(*sizearg) &&
1593                     *sizearg > 0) {
1594                         if (*sizearg > size) {
1595                                 gctl_error(req, "Size too big %lld > %lld.",
1596                                     (long long)*sizearg, (long long)size);
1597                                 return (-9);
1598                         }
1599                         size = *sizearg;
1600                 }
1601
1602                 /* Handle strip argument. */
1603                 strip = 131072;
1604                 len = sizeof(*striparg);
1605                 striparg = gctl_get_param(req, "strip", &len);
1606                 if (striparg != NULL && len == sizeof(*striparg) &&
1607                     *striparg > 0) {
1608                         if (*striparg < sectorsize) {
1609                                 gctl_error(req, "Strip size too small.");
1610                                 return (-10);
1611                         }
1612                         if (*striparg % sectorsize != 0) {
1613                                 gctl_error(req, "Incorrect strip size.");
1614                                 return (-11);
1615                         }
1616                         if (strip > 65535 * sectorsize) {
1617                                 gctl_error(req, "Strip size too big.");
1618                                 return (-12);
1619                         }
1620                         strip = *striparg;
1621                 }
1622
1623                 /* Round size down to strip or sector. */
1624                 if (level == G_RAID_VOLUME_RL_RAID1)
1625                         size -= (size % sectorsize);
1626                 else if (level == G_RAID_VOLUME_RL_RAID1E &&
1627                     (numdisks & 1) != 0)
1628                         size -= (size % (2 * strip));
1629                 else
1630                         size -= (size % strip);
1631                 if (size <= 0) {
1632                         gctl_error(req, "Size too small.");
1633                         return (-13);
1634                 }
1635
1636                 /* We have all we need, create things: volume, ... */
1637                 mdi->mdio_started = 1;
1638                 vol = g_raid_create_volume(sc, volname, -1);
1639                 vol->v_md_data = (void *)(intptr_t)0;
1640                 vol->v_raid_level = level;
1641                 vol->v_raid_level_qualifier = qual;
1642                 vol->v_strip_size = strip;
1643                 vol->v_disks_count = numdisks;
1644                 if (level == G_RAID_VOLUME_RL_RAID0)
1645                         vol->v_mediasize = size * numdisks;
1646                 else if (level == G_RAID_VOLUME_RL_RAID1)
1647                         vol->v_mediasize = size;
1648                 else if (level == G_RAID_VOLUME_RL_RAID5)
1649                         vol->v_mediasize = size * (numdisks - 1);
1650                 else { /* RAID1E */
1651                         vol->v_mediasize = ((size * numdisks) / strip / 2) *
1652                             strip;
1653                 }
1654                 vol->v_sectorsize = sectorsize;
1655                 g_raid_start_volume(vol);
1656
1657                 /* , and subdisks. */
1658                 TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
1659                         pd = (struct g_raid_md_intel_perdisk *)disk->d_md_data;
1660                         sd = &vol->v_subdisks[pd->pd_disk_pos];
1661                         sd->sd_disk = disk;
1662                         sd->sd_offset = 0;
1663                         sd->sd_size = size;
1664                         TAILQ_INSERT_TAIL(&disk->d_subdisks, sd, sd_next);
1665                         if (sd->sd_disk->d_consumer != NULL) {
1666                                 g_raid_change_disk_state(disk,
1667                                     G_RAID_DISK_S_ACTIVE);
1668                                 if (level == G_RAID_VOLUME_RL_RAID5)
1669                                         g_raid_change_subdisk_state(sd,
1670                                             G_RAID_SUBDISK_S_UNINITIALIZED);
1671                                 else
1672                                         g_raid_change_subdisk_state(sd,
1673                                             G_RAID_SUBDISK_S_ACTIVE);
1674                                 g_raid_event_send(sd, G_RAID_SUBDISK_E_NEW,
1675                                     G_RAID_EVENT_SUBDISK);
1676                         } else {
1677                                 g_raid_change_disk_state(disk, G_RAID_DISK_S_OFFLINE);
1678                         }
1679                 }
1680
1681                 /* Write metadata based on created entities. */
1682                 G_RAID_DEBUG1(0, sc, "Array started.");
1683                 g_raid_md_write_intel(md, NULL, NULL, NULL);
1684
1685                 /* Pickup any STALE/SPARE disks to refill array if needed. */
1686                 g_raid_md_intel_refill(sc);
1687
1688                 g_raid_event_send(vol, G_RAID_VOLUME_E_START,
1689                     G_RAID_EVENT_VOLUME);
1690                 return (0);
1691         }
1692         if (strcmp(verb, "add") == 0) {
1693
1694                 if (*nargs != 3) {
1695                         gctl_error(req, "Invalid number of arguments.");
1696                         return (-1);
1697                 }
1698                 volname = gctl_get_asciiparam(req, "arg1");
1699                 if (volname == NULL) {
1700                         gctl_error(req, "No volume name.");
1701                         return (-2);
1702                 }
1703                 levelname = gctl_get_asciiparam(req, "arg2");
1704                 if (levelname == NULL) {
1705                         gctl_error(req, "No RAID level.");
1706                         return (-3);
1707                 }
1708                 if (strcasecmp(levelname, "RAID5") == 0)
1709                         levelname = "RAID5-LA";
1710                 if (g_raid_volume_str2level(levelname, &level, &qual)) {
1711                         gctl_error(req, "Unknown RAID level '%s'.", levelname);
1712                         return (-4);
1713                 }
1714
1715                 /* Look for existing volumes. */
1716                 i = 0;
1717                 vol1 = NULL;
1718                 TAILQ_FOREACH(vol, &sc->sc_volumes, v_next) {
1719                         vol1 = vol;
1720                         i++;
1721                 }
1722                 if (i > 1) {
1723                         gctl_error(req, "Maximum two volumes supported.");
1724                         return (-6);
1725                 }
1726                 if (vol1 == NULL) {
1727                         gctl_error(req, "At least one volume must exist.");
1728                         return (-7);
1729                 }
1730
1731                 numdisks = vol1->v_disks_count;
1732                 force = gctl_get_paraml(req, "force", sizeof(*force));
1733                 if (!g_raid_md_intel_supported(level, qual, numdisks,
1734                     force ? *force : 0)) {
1735                         gctl_error(req, "Unsupported RAID level "
1736                             "(0x%02x/0x%02x), or number of disks (%d).",
1737                             level, qual, numdisks);
1738                         return (-5);
1739                 }
1740
1741                 /* Collect info about present disks. */
1742                 size = 0x7fffffffffffffffllu;
1743                 sectorsize = 512;
1744                 for (i = 0; i < numdisks; i++) {
1745                         disk = vol1->v_subdisks[i].sd_disk;
1746                         pd = (struct g_raid_md_intel_perdisk *)
1747                             disk->d_md_data;
1748                         disk_sectors = 
1749                             intel_get_disk_sectors(&pd->pd_disk_meta);
1750
1751                         if (disk_sectors * 512 < size)
1752                                 size = disk_sectors * 512;
1753                         if (disk->d_consumer != NULL &&
1754                             disk->d_consumer->provider != NULL &&
1755                             disk->d_consumer->provider->sectorsize >
1756                              sectorsize) {
1757                                 sectorsize =
1758                                     disk->d_consumer->provider->sectorsize;
1759                         }
1760                 }
1761
1762                 /* Reserve some space for metadata. */
1763                 size -= ((4096 + sectorsize - 1) / sectorsize) * sectorsize;
1764
1765                 /* Decide insert before or after. */
1766                 sd = &vol1->v_subdisks[0];
1767                 if (sd->sd_offset >
1768                     size - (sd->sd_offset + sd->sd_size)) {
1769                         off = 0;
1770                         size = sd->sd_offset;
1771                 } else {
1772                         off = sd->sd_offset + sd->sd_size;
1773                         size = size - (sd->sd_offset + sd->sd_size);
1774                 }
1775
1776                 /* Handle strip argument. */
1777                 strip = 131072;
1778                 len = sizeof(*striparg);
1779                 striparg = gctl_get_param(req, "strip", &len);
1780                 if (striparg != NULL && len == sizeof(*striparg) &&
1781                     *striparg > 0) {
1782                         if (*striparg < sectorsize) {
1783                                 gctl_error(req, "Strip size too small.");
1784                                 return (-10);
1785                         }
1786                         if (*striparg % sectorsize != 0) {
1787                                 gctl_error(req, "Incorrect strip size.");
1788                                 return (-11);
1789                         }
1790                         if (strip > 65535 * sectorsize) {
1791                                 gctl_error(req, "Strip size too big.");
1792                                 return (-12);
1793                         }
1794                         strip = *striparg;
1795                 }
1796
1797                 /* Round offset up to strip. */
1798                 if (off % strip != 0) {
1799                         size -= strip - off % strip;
1800                         off += strip - off % strip;
1801                 }
1802
1803                 /* Handle size argument. */
1804                 len = sizeof(*sizearg);
1805                 sizearg = gctl_get_param(req, "size", &len);
1806                 if (sizearg != NULL && len == sizeof(*sizearg) &&
1807                     *sizearg > 0) {
1808                         if (*sizearg > size) {
1809                                 gctl_error(req, "Size too big %lld > %lld.",
1810                                     (long long)*sizearg, (long long)size);
1811                                 return (-9);
1812                         }
1813                         size = *sizearg;
1814                 }
1815
1816                 /* Round size down to strip or sector. */
1817                 if (level == G_RAID_VOLUME_RL_RAID1)
1818                         size -= (size % sectorsize);
1819                 else
1820                         size -= (size % strip);
1821                 if (size <= 0) {
1822                         gctl_error(req, "Size too small.");
1823                         return (-13);
1824                 }
1825                 if (size > 0xffffffffllu * sectorsize) {
1826                         gctl_error(req, "Size too big.");
1827                         return (-14);
1828                 }
1829
1830                 /* We have all we need, create things: volume, ... */
1831                 vol = g_raid_create_volume(sc, volname, -1);
1832                 vol->v_md_data = (void *)(intptr_t)i;
1833                 vol->v_raid_level = level;
1834                 vol->v_raid_level_qualifier = qual;
1835                 vol->v_strip_size = strip;
1836                 vol->v_disks_count = numdisks;
1837                 if (level == G_RAID_VOLUME_RL_RAID0)
1838                         vol->v_mediasize = size * numdisks;
1839                 else if (level == G_RAID_VOLUME_RL_RAID1)
1840                         vol->v_mediasize = size;
1841                 else if (level == G_RAID_VOLUME_RL_RAID5)
1842                         vol->v_mediasize = size * (numdisks - 1);
1843                 else { /* RAID1E */
1844                         vol->v_mediasize = ((size * numdisks) / strip / 2) *
1845                             strip;
1846                 }
1847                 vol->v_sectorsize = sectorsize;
1848                 g_raid_start_volume(vol);
1849
1850                 /* , and subdisks. */
1851                 for (i = 0; i < numdisks; i++) {
1852                         disk = vol1->v_subdisks[i].sd_disk;
1853                         sd = &vol->v_subdisks[i];
1854                         sd->sd_disk = disk;
1855                         sd->sd_offset = off;
1856                         sd->sd_size = size;
1857                         TAILQ_INSERT_TAIL(&disk->d_subdisks, sd, sd_next);
1858                         if (disk->d_state == G_RAID_DISK_S_ACTIVE) {
1859                                 if (level == G_RAID_VOLUME_RL_RAID5)
1860                                         g_raid_change_subdisk_state(sd,
1861                                             G_RAID_SUBDISK_S_UNINITIALIZED);
1862                                 else
1863                                         g_raid_change_subdisk_state(sd,
1864                                             G_RAID_SUBDISK_S_ACTIVE);
1865                                 g_raid_event_send(sd, G_RAID_SUBDISK_E_NEW,
1866                                     G_RAID_EVENT_SUBDISK);
1867                         }
1868                 }
1869
1870                 /* Write metadata based on created entities. */
1871                 g_raid_md_write_intel(md, NULL, NULL, NULL);
1872
1873                 g_raid_event_send(vol, G_RAID_VOLUME_E_START,
1874                     G_RAID_EVENT_VOLUME);
1875                 return (0);
1876         }
1877         if (strcmp(verb, "delete") == 0) {
1878
1879                 /* Full node destruction. */
1880                 if (*nargs == 1) {
1881                         /* Check if some volume is still open. */
1882                         force = gctl_get_paraml(req, "force", sizeof(*force));
1883                         if (force != NULL && *force == 0 &&
1884                             g_raid_nopens(sc) != 0) {
1885                                 gctl_error(req, "Some volume is still open.");
1886                                 return (-4);
1887                         }
1888
1889                         TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
1890                                 if (disk->d_consumer)
1891                                         intel_meta_erase(disk->d_consumer);
1892                         }
1893                         g_raid_destroy_node(sc, 0);
1894                         return (0);
1895                 }
1896
1897                 /* Destroy specified volume. If it was last - all node. */
1898                 if (*nargs != 2) {
1899                         gctl_error(req, "Invalid number of arguments.");
1900                         return (-1);
1901                 }
1902                 volname = gctl_get_asciiparam(req, "arg1");
1903                 if (volname == NULL) {
1904                         gctl_error(req, "No volume name.");
1905                         return (-2);
1906                 }
1907
1908                 /* Search for volume. */
1909                 TAILQ_FOREACH(vol, &sc->sc_volumes, v_next) {
1910                         if (strcmp(vol->v_name, volname) == 0)
1911                                 break;
1912                 }
1913                 if (vol == NULL) {
1914                         i = strtol(volname, &tmp, 10);
1915                         if (verb != volname && tmp[0] == 0) {
1916                                 TAILQ_FOREACH(vol, &sc->sc_volumes, v_next) {
1917                                         if (vol->v_global_id == i)
1918                                                 break;
1919                                 }
1920                         }
1921                 }
1922                 if (vol == NULL) {
1923                         gctl_error(req, "Volume '%s' not found.", volname);
1924                         return (-3);
1925                 }
1926
1927                 /* Check if volume is still open. */
1928                 force = gctl_get_paraml(req, "force", sizeof(*force));
1929                 if (force != NULL && *force == 0 &&
1930                     vol->v_provider_open != 0) {
1931                         gctl_error(req, "Volume is still open.");
1932                         return (-4);
1933                 }
1934
1935                 /* Destroy volume and potentially node. */
1936                 i = 0;
1937                 TAILQ_FOREACH(vol1, &sc->sc_volumes, v_next)
1938                         i++;
1939                 if (i >= 2) {
1940                         g_raid_destroy_volume(vol);
1941                         g_raid_md_write_intel(md, NULL, NULL, NULL);
1942                 } else {
1943                         TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
1944                                 if (disk->d_consumer)
1945                                         intel_meta_erase(disk->d_consumer);
1946                         }
1947                         g_raid_destroy_node(sc, 0);
1948                 }
1949                 return (0);
1950         }
1951         if (strcmp(verb, "remove") == 0 ||
1952             strcmp(verb, "fail") == 0) {
1953                 if (*nargs < 2) {
1954                         gctl_error(req, "Invalid number of arguments.");
1955                         return (-1);
1956                 }
1957                 for (i = 1; i < *nargs; i++) {
1958                         snprintf(arg, sizeof(arg), "arg%d", i);
1959                         diskname = gctl_get_asciiparam(req, arg);
1960                         if (diskname == NULL) {
1961                                 gctl_error(req, "No disk name (%s).", arg);
1962                                 error = -2;
1963                                 break;
1964                         }
1965                         if (strncmp(diskname, "/dev/", 5) == 0)
1966                                 diskname += 5;
1967
1968                         TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
1969                                 if (disk->d_consumer != NULL && 
1970                                     disk->d_consumer->provider != NULL &&
1971                                     strcmp(disk->d_consumer->provider->name,
1972                                      diskname) == 0)
1973                                         break;
1974                         }
1975                         if (disk == NULL) {
1976                                 gctl_error(req, "Disk '%s' not found.",
1977                                     diskname);
1978                                 error = -3;
1979                                 break;
1980                         }
1981
1982                         if (strcmp(verb, "fail") == 0) {
1983                                 g_raid_md_fail_disk_intel(md, NULL, disk);
1984                                 continue;
1985                         }
1986
1987                         pd = (struct g_raid_md_intel_perdisk *)disk->d_md_data;
1988
1989                         /* Erase metadata on deleting disk. */
1990                         intel_meta_erase(disk->d_consumer);
1991
1992                         /* If disk was assigned, just update statuses. */
1993                         if (pd->pd_disk_pos >= 0) {
1994                                 g_raid_change_disk_state(disk, G_RAID_DISK_S_OFFLINE);
1995                                 g_raid_kill_consumer(sc, disk->d_consumer);
1996                                 disk->d_consumer = NULL;
1997                                 TAILQ_FOREACH(sd, &disk->d_subdisks, sd_next) {
1998                                         g_raid_change_subdisk_state(sd,
1999                                             G_RAID_SUBDISK_S_NONE);
2000                                         g_raid_event_send(sd, G_RAID_SUBDISK_E_DISCONNECTED,
2001                                             G_RAID_EVENT_SUBDISK);
2002                                 }
2003                         } else {
2004                                 /* Otherwise -- delete. */
2005                                 g_raid_change_disk_state(disk, G_RAID_DISK_S_NONE);
2006                                 g_raid_destroy_disk(disk);
2007                         }
2008                 }
2009
2010                 /* Write updated metadata to remaining disks. */
2011                 g_raid_md_write_intel(md, NULL, NULL, NULL);
2012
2013                 /* Check if anything left except placeholders. */
2014                 if (g_raid_ndisks(sc, -1) ==
2015                     g_raid_ndisks(sc, G_RAID_DISK_S_OFFLINE))
2016                         g_raid_destroy_node(sc, 0);
2017                 else
2018                         g_raid_md_intel_refill(sc);
2019                 return (error);
2020         }
2021         if (strcmp(verb, "insert") == 0) {
2022                 if (*nargs < 2) {
2023                         gctl_error(req, "Invalid number of arguments.");
2024                         return (-1);
2025                 }
2026                 update = 0;
2027                 for (i = 1; i < *nargs; i++) {
2028                         /* Get disk name. */
2029                         snprintf(arg, sizeof(arg), "arg%d", i);
2030                         diskname = gctl_get_asciiparam(req, arg);
2031                         if (diskname == NULL) {
2032                                 gctl_error(req, "No disk name (%s).", arg);
2033                                 error = -3;
2034                                 break;
2035                         }
2036
2037                         /* Try to find provider with specified name. */
2038                         g_topology_lock();
2039                         cp = g_raid_open_consumer(sc, diskname);
2040                         if (cp == NULL) {
2041                                 gctl_error(req, "Can't open disk '%s'.",
2042                                     diskname);
2043                                 g_topology_unlock();
2044                                 error = -4;
2045                                 break;
2046                         }
2047                         pp = cp->provider;
2048                         g_topology_unlock();
2049
2050                         /* Read disk serial. */
2051                         error = g_raid_md_get_label(cp,
2052                             &serial[0], INTEL_SERIAL_LEN);
2053                         if (error != 0) {
2054                                 gctl_error(req,
2055                                     "Can't get serial for provider '%s'.",
2056                                     diskname);
2057                                 g_raid_kill_consumer(sc, cp);
2058                                 error = -7;
2059                                 break;
2060                         }
2061
2062                         pd = malloc(sizeof(*pd), M_MD_INTEL, M_WAITOK | M_ZERO);
2063                         pd->pd_disk_pos = -1;
2064
2065                         disk = g_raid_create_disk(sc);
2066                         disk->d_consumer = cp;
2067                         disk->d_md_data = (void *)pd;
2068                         cp->private = disk;
2069
2070                         /* Read kernel dumping information. */
2071                         disk->d_kd.offset = 0;
2072                         disk->d_kd.length = OFF_MAX;
2073                         len = sizeof(disk->d_kd);
2074                         g_io_getattr("GEOM::kerneldump", cp, &len, &disk->d_kd);
2075                         if (disk->d_kd.di.dumper == NULL)
2076                                 G_RAID_DEBUG1(2, sc,
2077                                     "Dumping not supported by %s.",
2078                                     cp->provider->name);
2079
2080                         memcpy(&pd->pd_disk_meta.serial[0], &serial[0],
2081                             INTEL_SERIAL_LEN);
2082                         intel_set_disk_sectors(&pd->pd_disk_meta,
2083                             pp->mediasize / pp->sectorsize);
2084                         pd->pd_disk_meta.id = 0;
2085                         pd->pd_disk_meta.flags = INTEL_F_SPARE;
2086
2087                         /* Welcome the "new" disk. */
2088                         update += g_raid_md_intel_start_disk(disk);
2089                         if (disk->d_state == G_RAID_DISK_S_SPARE) {
2090                                 intel_meta_write_spare(cp, &pd->pd_disk_meta);
2091                                 g_raid_destroy_disk(disk);
2092                         } else if (disk->d_state != G_RAID_DISK_S_ACTIVE) {
2093                                 gctl_error(req, "Disk '%s' doesn't fit.",
2094                                     diskname);
2095                                 g_raid_destroy_disk(disk);
2096                                 error = -8;
2097                                 break;
2098                         }
2099                 }
2100
2101                 /* Write new metadata if we changed something. */
2102                 if (update)
2103                         g_raid_md_write_intel(md, NULL, NULL, NULL);
2104                 return (error);
2105         }
2106         return (-100);
2107 }
2108
2109 static int
2110 g_raid_md_write_intel(struct g_raid_md_object *md, struct g_raid_volume *tvol,
2111     struct g_raid_subdisk *tsd, struct g_raid_disk *tdisk)
2112 {
2113         struct g_raid_softc *sc;
2114         struct g_raid_volume *vol;
2115         struct g_raid_subdisk *sd;
2116         struct g_raid_disk *disk;
2117         struct g_raid_md_intel_object *mdi;
2118         struct g_raid_md_intel_perdisk *pd;
2119         struct intel_raid_conf *meta;
2120         struct intel_raid_vol *mvol;
2121         struct intel_raid_map *mmap0, *mmap1;
2122         off_t sectorsize = 512, pos;
2123         const char *version, *cv;
2124         int vi, sdi, numdisks, len, state, stale;
2125
2126         sc = md->mdo_softc;
2127         mdi = (struct g_raid_md_intel_object *)md;
2128
2129         if (sc->sc_stopping == G_RAID_DESTROY_HARD)
2130                 return (0);
2131
2132         /* Bump generation. Newly written metadata may differ from previous. */
2133         mdi->mdio_generation++;
2134
2135         /* Count number of disks. */
2136         numdisks = 0;
2137         TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
2138                 pd = (struct g_raid_md_intel_perdisk *)disk->d_md_data;
2139                 if (pd->pd_disk_pos < 0)
2140                         continue;
2141                 numdisks++;
2142                 if (disk->d_state == G_RAID_DISK_S_ACTIVE) {
2143                         pd->pd_disk_meta.flags =
2144                             INTEL_F_ONLINE | INTEL_F_ASSIGNED;
2145                 } else if (disk->d_state == G_RAID_DISK_S_FAILED) {
2146                         pd->pd_disk_meta.flags = INTEL_F_FAILED | INTEL_F_ASSIGNED;
2147                 } else {
2148                         pd->pd_disk_meta.flags = INTEL_F_ASSIGNED;
2149                         if (pd->pd_disk_meta.id != 0xffffffff) {
2150                                 pd->pd_disk_meta.id = 0xffffffff;
2151                                 len = strlen(pd->pd_disk_meta.serial);
2152                                 len = min(len, INTEL_SERIAL_LEN - 3);
2153                                 strcpy(pd->pd_disk_meta.serial + len, ":0");
2154                         }
2155                 }
2156         }
2157
2158         /* Fill anchor and disks. */
2159         meta = malloc(INTEL_MAX_MD_SIZE(numdisks),
2160             M_MD_INTEL, M_WAITOK | M_ZERO);
2161         memcpy(&meta->intel_id[0], INTEL_MAGIC, sizeof(INTEL_MAGIC) - 1);
2162         meta->config_size = INTEL_MAX_MD_SIZE(numdisks);
2163         meta->config_id = mdi->mdio_config_id;
2164         meta->generation = mdi->mdio_generation;
2165         meta->attributes = INTEL_ATTR_CHECKSUM;
2166         meta->total_disks = numdisks;
2167         TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
2168                 pd = (struct g_raid_md_intel_perdisk *)disk->d_md_data;
2169                 if (pd->pd_disk_pos < 0)
2170                         continue;
2171                 meta->disk[pd->pd_disk_pos] = pd->pd_disk_meta;
2172         }
2173
2174         /* Fill volumes and maps. */
2175         vi = 0;
2176         version = INTEL_VERSION_1000;
2177         TAILQ_FOREACH(vol, &sc->sc_volumes, v_next) {
2178                 if (vol->v_stopping)
2179                         continue;
2180                 mvol = intel_get_volume(meta, vi);
2181
2182                 /* New metadata may have different volumes order. */
2183                 vol->v_md_data = (void *)(intptr_t)vi;
2184
2185                 for (sdi = 0; sdi < vol->v_disks_count; sdi++) {
2186                         sd = &vol->v_subdisks[sdi];
2187                         if (sd->sd_disk != NULL)
2188                                 break;
2189                 }
2190                 if (sdi >= vol->v_disks_count)
2191                         panic("No any filled subdisk in volume");
2192                 if (vol->v_mediasize >= 0x20000000000llu)
2193                         meta->attributes |= INTEL_ATTR_2TB;
2194                 if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID0)
2195                         meta->attributes |= INTEL_ATTR_RAID0;
2196                 else if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID1)
2197                         meta->attributes |= INTEL_ATTR_RAID1;
2198                 else if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID5)
2199                         meta->attributes |= INTEL_ATTR_RAID5;
2200                 else
2201                         meta->attributes |= INTEL_ATTR_RAID10;
2202
2203                 if (meta->attributes & INTEL_ATTR_2TB)
2204                         cv = INTEL_VERSION_1300;
2205 //              else if (dev->status == DEV_CLONE_N_GO)
2206 //                      cv = INTEL_VERSION_1206;
2207                 else if (vol->v_disks_count > 4)
2208                         cv = INTEL_VERSION_1204;
2209                 else if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID5)
2210                         cv = INTEL_VERSION_1202;
2211                 else if (vol->v_disks_count > 2)
2212                         cv = INTEL_VERSION_1201;
2213                 else if (vi > 0)
2214                         cv = INTEL_VERSION_1200;
2215                 else if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID1)
2216                         cv = INTEL_VERSION_1100;
2217                 else
2218                         cv = INTEL_VERSION_1000;
2219                 if (strcmp(cv, version) > 0)
2220                         version = cv;
2221
2222                 strlcpy(&mvol->name[0], vol->v_name, sizeof(mvol->name));
2223                 mvol->total_sectors = vol->v_mediasize / sectorsize;
2224
2225                 /* Check for any recovery in progress. */
2226                 state = G_RAID_SUBDISK_S_ACTIVE;
2227                 pos = 0x7fffffffffffffffllu;
2228                 stale = 0;
2229                 for (sdi = 0; sdi < vol->v_disks_count; sdi++) {
2230                         sd = &vol->v_subdisks[sdi];
2231                         if (sd->sd_state == G_RAID_SUBDISK_S_REBUILD)
2232                                 state = G_RAID_SUBDISK_S_REBUILD;
2233                         else if (sd->sd_state == G_RAID_SUBDISK_S_RESYNC &&
2234                             state != G_RAID_SUBDISK_S_REBUILD)
2235                                 state = G_RAID_SUBDISK_S_RESYNC;
2236                         else if (sd->sd_state == G_RAID_SUBDISK_S_STALE)
2237                                 stale = 1;
2238                         if ((sd->sd_state == G_RAID_SUBDISK_S_REBUILD ||
2239                             sd->sd_state == G_RAID_SUBDISK_S_RESYNC) &&
2240                              sd->sd_rebuild_pos < pos)
2241                                 pos = sd->sd_rebuild_pos;
2242                 }
2243                 if (state == G_RAID_SUBDISK_S_REBUILD) {
2244                         mvol->migr_state = 1;
2245                         mvol->migr_type = INTEL_MT_REBUILD;
2246                 } else if (state == G_RAID_SUBDISK_S_RESYNC) {
2247                         mvol->migr_state = 1;
2248                         /* mvol->migr_type = INTEL_MT_REPAIR; */
2249                         mvol->migr_type = INTEL_MT_VERIFY;
2250                         mvol->state |= INTEL_ST_VERIFY_AND_FIX;
2251                 } else
2252                         mvol->migr_state = 0;
2253                 mvol->dirty = (vol->v_dirty || stale);
2254
2255                 mmap0 = intel_get_map(mvol, 0);
2256
2257                 /* Write map / common part of two maps. */
2258                 intel_set_map_offset(mmap0, sd->sd_offset / sectorsize);
2259                 intel_set_map_disk_sectors(mmap0, sd->sd_size / sectorsize);
2260                 mmap0->strip_sectors = vol->v_strip_size / sectorsize;
2261                 if (vol->v_state == G_RAID_VOLUME_S_BROKEN)
2262                         mmap0->status = INTEL_S_FAILURE;
2263                 else if (vol->v_state == G_RAID_VOLUME_S_DEGRADED)
2264                         mmap0->status = INTEL_S_DEGRADED;
2265                 else if (g_raid_nsubdisks(vol, G_RAID_SUBDISK_S_UNINITIALIZED)
2266                     == g_raid_nsubdisks(vol, -1))
2267                         mmap0->status = INTEL_S_UNINITIALIZED;
2268                 else
2269                         mmap0->status = INTEL_S_READY;
2270                 if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID0)
2271                         mmap0->type = INTEL_T_RAID0;
2272                 else if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID1 ||
2273                     vol->v_raid_level == G_RAID_VOLUME_RL_RAID1E)
2274                         mmap0->type = INTEL_T_RAID1;
2275                 else
2276                         mmap0->type = INTEL_T_RAID5;
2277                 mmap0->total_disks = vol->v_disks_count;
2278                 if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID1)
2279                         mmap0->total_domains = vol->v_disks_count;
2280                 else if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID1E)
2281                         mmap0->total_domains = 2;
2282                 else
2283                         mmap0->total_domains = 1;
2284                 intel_set_map_stripe_count(mmap0,
2285                     sd->sd_size / vol->v_strip_size / mmap0->total_domains);
2286                 mmap0->failed_disk_num = 0xff;
2287                 mmap0->ddf = 1;
2288
2289                 /* If there are two maps - copy common and update. */
2290                 if (mvol->migr_state) {
2291                         intel_set_vol_curr_migr_unit(mvol,
2292                             pos / vol->v_strip_size / mmap0->total_domains);
2293                         mmap1 = intel_get_map(mvol, 1);
2294                         memcpy(mmap1, mmap0, sizeof(struct intel_raid_map));
2295                         mmap0->status = INTEL_S_READY;
2296                 } else
2297                         mmap1 = NULL;
2298
2299                 /* Write disk indexes and put rebuild flags. */
2300                 for (sdi = 0; sdi < vol->v_disks_count; sdi++) {
2301                         sd = &vol->v_subdisks[sdi];
2302                         pd = (struct g_raid_md_intel_perdisk *)
2303                             sd->sd_disk->d_md_data;
2304                         mmap0->disk_idx[sdi] = pd->pd_disk_pos;
2305                         if (mvol->migr_state)
2306                                 mmap1->disk_idx[sdi] = pd->pd_disk_pos;
2307                         if (sd->sd_state == G_RAID_SUBDISK_S_REBUILD ||
2308                             sd->sd_state == G_RAID_SUBDISK_S_RESYNC) {
2309                                 mmap1->disk_idx[sdi] |= INTEL_DI_RBLD;
2310                         } else if (sd->sd_state != G_RAID_SUBDISK_S_ACTIVE &&
2311                             sd->sd_state != G_RAID_SUBDISK_S_STALE &&
2312                             sd->sd_state != G_RAID_SUBDISK_S_UNINITIALIZED) {
2313                                 mmap0->disk_idx[sdi] |= INTEL_DI_RBLD;
2314                                 if (mvol->migr_state)
2315                                         mmap1->disk_idx[sdi] |= INTEL_DI_RBLD;
2316                         }
2317                         if ((sd->sd_state == G_RAID_SUBDISK_S_NONE ||
2318                              sd->sd_state == G_RAID_SUBDISK_S_FAILED) &&
2319                             mmap0->failed_disk_num == 0xff) {
2320                                 mmap0->failed_disk_num = sdi;
2321                                 if (mvol->migr_state)
2322                                         mmap1->failed_disk_num = sdi;
2323                         }
2324                 }
2325                 vi++;
2326         }
2327         meta->total_volumes = vi;
2328         if (strcmp(version, INTEL_VERSION_1300) != 0)
2329                 meta->attributes &= INTEL_ATTR_CHECKSUM;
2330         memcpy(&meta->version[0], version, sizeof(INTEL_VERSION_1000) - 1);
2331
2332         /* We are done. Print meta data and store them to disks. */
2333         g_raid_md_intel_print(meta);
2334         if (mdi->mdio_meta != NULL)
2335                 free(mdi->mdio_meta, M_MD_INTEL);
2336         mdi->mdio_meta = meta;
2337         TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
2338                 pd = (struct g_raid_md_intel_perdisk *)disk->d_md_data;
2339                 if (disk->d_state != G_RAID_DISK_S_ACTIVE)
2340                         continue;
2341                 if (pd->pd_meta != NULL) {
2342                         free(pd->pd_meta, M_MD_INTEL);
2343                         pd->pd_meta = NULL;
2344                 }
2345                 pd->pd_meta = intel_meta_copy(meta);
2346                 intel_meta_write(disk->d_consumer, meta);
2347         }
2348         return (0);
2349 }
2350
2351 static int
2352 g_raid_md_fail_disk_intel(struct g_raid_md_object *md,
2353     struct g_raid_subdisk *tsd, struct g_raid_disk *tdisk)
2354 {
2355         struct g_raid_softc *sc;
2356         struct g_raid_md_intel_object *mdi;
2357         struct g_raid_md_intel_perdisk *pd;
2358         struct g_raid_subdisk *sd;
2359
2360         sc = md->mdo_softc;
2361         mdi = (struct g_raid_md_intel_object *)md;
2362         pd = (struct g_raid_md_intel_perdisk *)tdisk->d_md_data;
2363
2364         /* We can't fail disk that is not a part of array now. */
2365         if (pd->pd_disk_pos < 0)
2366                 return (-1);
2367
2368         /*
2369          * Mark disk as failed in metadata and try to write that metadata
2370          * to the disk itself to prevent it's later resurrection as STALE.
2371          */
2372         mdi->mdio_meta->disk[pd->pd_disk_pos].flags = INTEL_F_FAILED;
2373         pd->pd_disk_meta.flags = INTEL_F_FAILED;
2374         g_raid_md_intel_print(mdi->mdio_meta);
2375         if (tdisk->d_consumer)
2376                 intel_meta_write(tdisk->d_consumer, mdi->mdio_meta);
2377
2378         /* Change states. */
2379         g_raid_change_disk_state(tdisk, G_RAID_DISK_S_FAILED);
2380         TAILQ_FOREACH(sd, &tdisk->d_subdisks, sd_next) {
2381                 g_raid_change_subdisk_state(sd,
2382                     G_RAID_SUBDISK_S_FAILED);
2383                 g_raid_event_send(sd, G_RAID_SUBDISK_E_FAILED,
2384                     G_RAID_EVENT_SUBDISK);
2385         }
2386
2387         /* Write updated metadata to remaining disks. */
2388         g_raid_md_write_intel(md, NULL, NULL, tdisk);
2389
2390         /* Check if anything left except placeholders. */
2391         if (g_raid_ndisks(sc, -1) ==
2392             g_raid_ndisks(sc, G_RAID_DISK_S_OFFLINE))
2393                 g_raid_destroy_node(sc, 0);
2394         else
2395                 g_raid_md_intel_refill(sc);
2396         return (0);
2397 }
2398
2399 static int
2400 g_raid_md_free_disk_intel(struct g_raid_md_object *md,
2401     struct g_raid_disk *disk)
2402 {
2403         struct g_raid_md_intel_perdisk *pd;
2404
2405         pd = (struct g_raid_md_intel_perdisk *)disk->d_md_data;
2406         if (pd->pd_meta != NULL) {
2407                 free(pd->pd_meta, M_MD_INTEL);
2408                 pd->pd_meta = NULL;
2409         }
2410         free(pd, M_MD_INTEL);
2411         disk->d_md_data = NULL;
2412         return (0);
2413 }
2414
2415 static int
2416 g_raid_md_free_intel(struct g_raid_md_object *md)
2417 {
2418         struct g_raid_md_intel_object *mdi;
2419
2420         mdi = (struct g_raid_md_intel_object *)md;
2421         if (!mdi->mdio_started) {
2422                 mdi->mdio_started = 0;
2423                 callout_stop(&mdi->mdio_start_co);
2424                 G_RAID_DEBUG1(1, md->mdo_softc,
2425                     "root_mount_rel %p", mdi->mdio_rootmount);
2426                 root_mount_rel(mdi->mdio_rootmount);
2427                 mdi->mdio_rootmount = NULL;
2428         }
2429         if (mdi->mdio_meta != NULL) {
2430                 free(mdi->mdio_meta, M_MD_INTEL);
2431                 mdi->mdio_meta = NULL;
2432         }
2433         return (0);
2434 }
2435
2436 G_RAID_MD_DECLARE(intel, "Intel");