]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/geom/raid/md_intel.c
Restore backward compatibility for "attach" verb.
[FreeBSD/FreeBSD.git] / sys / geom / raid / md_intel.c
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2010 Alexander Motin <mav@FreeBSD.org>
5  * Copyright (c) 2000 - 2008 Søren Schmidt <sos@FreeBSD.org>
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  */
29
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
32
33 #include <sys/param.h>
34 #include <sys/bio.h>
35 #include <sys/endian.h>
36 #include <sys/kernel.h>
37 #include <sys/kobj.h>
38 #include <sys/limits.h>
39 #include <sys/lock.h>
40 #include <sys/malloc.h>
41 #include <sys/mutex.h>
42 #include <sys/systm.h>
43 #include <sys/taskqueue.h>
44 #include <sys/disk.h>
45 #include <geom/geom.h>
46 #include "geom/raid/g_raid.h"
47 #include "g_raid_md_if.h"
48
49 static MALLOC_DEFINE(M_MD_INTEL, "md_intel_data", "GEOM_RAID Intel metadata");
50
51 struct intel_raid_map {
52         uint32_t        offset;
53         uint32_t        disk_sectors;
54         uint32_t        stripe_count;
55         uint16_t        strip_sectors;
56         uint8_t         status;
57 #define INTEL_S_READY           0x00
58 #define INTEL_S_UNINITIALIZED   0x01
59 #define INTEL_S_DEGRADED        0x02
60 #define INTEL_S_FAILURE         0x03
61
62         uint8_t         type;
63 #define INTEL_T_RAID0           0x00
64 #define INTEL_T_RAID1           0x01
65 #define INTEL_T_RAID5           0x05
66
67         uint8_t         total_disks;
68         uint8_t         total_domains;
69         uint8_t         failed_disk_num;
70         uint8_t         ddf;
71         uint32_t        offset_hi;
72         uint32_t        disk_sectors_hi;
73         uint32_t        stripe_count_hi;
74         uint32_t        filler_2[4];
75         uint32_t        disk_idx[1];    /* total_disks entries. */
76 #define INTEL_DI_IDX    0x00ffffff
77 #define INTEL_DI_RBLD   0x01000000
78 } __packed;
79
80 struct intel_raid_vol {
81         uint8_t         name[16];
82         u_int64_t       total_sectors __packed;
83         uint32_t        state;
84 #define INTEL_ST_BOOTABLE               0x00000001
85 #define INTEL_ST_BOOT_DEVICE            0x00000002
86 #define INTEL_ST_READ_COALESCING        0x00000004
87 #define INTEL_ST_WRITE_COALESCING       0x00000008
88 #define INTEL_ST_LAST_SHUTDOWN_DIRTY    0x00000010
89 #define INTEL_ST_HIDDEN_AT_BOOT         0x00000020
90 #define INTEL_ST_CURRENTLY_HIDDEN       0x00000040
91 #define INTEL_ST_VERIFY_AND_FIX         0x00000080
92 #define INTEL_ST_MAP_STATE_UNINIT       0x00000100
93 #define INTEL_ST_NO_AUTO_RECOVERY       0x00000200
94 #define INTEL_ST_CLONE_N_GO             0x00000400
95 #define INTEL_ST_CLONE_MAN_SYNC         0x00000800
96 #define INTEL_ST_CNG_MASTER_DISK_NUM    0x00001000
97         uint32_t        reserved;
98         uint8_t         migr_priority;
99         uint8_t         num_sub_vols;
100         uint8_t         tid;
101         uint8_t         cng_master_disk;
102         uint16_t        cache_policy;
103         uint8_t         cng_state;
104 #define INTEL_CNGST_UPDATED             0
105 #define INTEL_CNGST_NEEDS_UPDATE        1
106 #define INTEL_CNGST_MASTER_MISSING      2
107         uint8_t         cng_sub_state;
108         uint32_t        filler_0[10];
109
110         uint32_t        curr_migr_unit;
111         uint32_t        checkpoint_id;
112         uint8_t         migr_state;
113         uint8_t         migr_type;
114 #define INTEL_MT_INIT           0
115 #define INTEL_MT_REBUILD        1
116 #define INTEL_MT_VERIFY         2
117 #define INTEL_MT_GEN_MIGR       3
118 #define INTEL_MT_STATE_CHANGE   4
119 #define INTEL_MT_REPAIR         5
120         uint8_t         dirty;
121         uint8_t         fs_state;
122         uint16_t        verify_errors;
123         uint16_t        bad_blocks;
124         uint32_t        curr_migr_unit_hi;
125         uint32_t        filler_1[3];
126         struct intel_raid_map map[1];   /* 2 entries if migr_state != 0. */
127 } __packed;
128
129 struct intel_raid_disk {
130 #define INTEL_SERIAL_LEN        16
131         uint8_t         serial[INTEL_SERIAL_LEN];
132         uint32_t        sectors;
133         uint32_t        id;
134         uint32_t        flags;
135 #define INTEL_F_SPARE           0x01
136 #define INTEL_F_ASSIGNED        0x02
137 #define INTEL_F_FAILED          0x04
138 #define INTEL_F_ONLINE          0x08
139 #define INTEL_F_DISABLED        0x80
140         uint32_t        owner_cfg_num;
141         uint32_t        sectors_hi;
142         uint32_t        filler[3];
143 } __packed;
144
145 struct intel_raid_conf {
146         uint8_t         intel_id[24];
147 #define INTEL_MAGIC             "Intel Raid ISM Cfg Sig. "
148
149         uint8_t         version[6];
150 #define INTEL_VERSION_1000      "1.0.00"        /* RAID0 */
151 #define INTEL_VERSION_1100      "1.1.00"        /* RAID1 */
152 #define INTEL_VERSION_1200      "1.2.00"        /* Many volumes */
153 #define INTEL_VERSION_1201      "1.2.01"        /* 3 or 4 disks */
154 #define INTEL_VERSION_1202      "1.2.02"        /* RAID5 */
155 #define INTEL_VERSION_1204      "1.2.04"        /* 5 or 6 disks */
156 #define INTEL_VERSION_1206      "1.2.06"        /* CNG */
157 #define INTEL_VERSION_1300      "1.3.00"        /* Attributes */
158
159         uint8_t         dummy_0[2];
160         uint32_t        checksum;
161         uint32_t        config_size;
162         uint32_t        config_id;
163         uint32_t        generation;
164         uint32_t        error_log_size;
165         uint32_t        attributes;
166 #define INTEL_ATTR_RAID0        0x00000001
167 #define INTEL_ATTR_RAID1        0x00000002
168 #define INTEL_ATTR_RAID10       0x00000004
169 #define INTEL_ATTR_RAID1E       0x00000008
170 #define INTEL_ATTR_RAID5        0x00000010
171 #define INTEL_ATTR_RAIDCNG      0x00000020
172 #define INTEL_ATTR_EXT_STRIP    0x00000040
173 #define INTEL_ATTR_NVM_CACHE    0x02000000
174 #define INTEL_ATTR_2TB_DISK     0x04000000
175 #define INTEL_ATTR_BBM          0x08000000
176 #define INTEL_ATTR_NVM_CACHE2   0x10000000
177 #define INTEL_ATTR_2TB          0x20000000
178 #define INTEL_ATTR_PM           0x40000000
179 #define INTEL_ATTR_CHECKSUM     0x80000000
180
181         uint8_t         total_disks;
182         uint8_t         total_volumes;
183         uint8_t         error_log_pos;
184         uint8_t         dummy_2[1];
185         uint32_t        cache_size;
186         uint32_t        orig_config_id;
187         uint32_t        pwr_cycle_count;
188         uint32_t        bbm_log_size;
189         uint32_t        filler_0[35];
190         struct intel_raid_disk  disk[1];        /* total_disks entries. */
191         /* Here goes total_volumes of struct intel_raid_vol. */
192 } __packed;
193
194 #define INTEL_ATTR_SUPPORTED    ( INTEL_ATTR_RAID0 | INTEL_ATTR_RAID1 | \
195     INTEL_ATTR_RAID10 | INTEL_ATTR_RAID1E | INTEL_ATTR_RAID5 |          \
196     INTEL_ATTR_RAIDCNG | INTEL_ATTR_EXT_STRIP | INTEL_ATTR_2TB_DISK |   \
197     INTEL_ATTR_2TB | INTEL_ATTR_PM | INTEL_ATTR_CHECKSUM )
198
199 #define INTEL_MAX_MD_SIZE(ndisks)                               \
200     (sizeof(struct intel_raid_conf) +                           \
201      sizeof(struct intel_raid_disk) * (ndisks - 1) +            \
202      sizeof(struct intel_raid_vol) * 2 +                        \
203      sizeof(struct intel_raid_map) * 2 +                        \
204      sizeof(uint32_t) * (ndisks - 1) * 4)
205
206 struct g_raid_md_intel_perdisk {
207         struct intel_raid_conf  *pd_meta;
208         int                      pd_disk_pos;
209         struct intel_raid_disk   pd_disk_meta;
210 };
211
212 struct g_raid_md_intel_pervolume {
213         int                      pv_volume_pos;
214         int                      pv_cng;
215         int                      pv_cng_man_sync;
216         int                      pv_cng_master_disk;
217 };
218
219 struct g_raid_md_intel_object {
220         struct g_raid_md_object  mdio_base;
221         uint32_t                 mdio_config_id;
222         uint32_t                 mdio_orig_config_id;
223         uint32_t                 mdio_generation;
224         struct intel_raid_conf  *mdio_meta;
225         struct callout           mdio_start_co; /* STARTING state timer. */
226         int                      mdio_disks_present;
227         int                      mdio_started;
228         int                      mdio_incomplete;
229         struct root_hold_token  *mdio_rootmount; /* Root mount delay token. */
230 };
231
232 static g_raid_md_create_t g_raid_md_create_intel;
233 static g_raid_md_taste_t g_raid_md_taste_intel;
234 static g_raid_md_event_t g_raid_md_event_intel;
235 static g_raid_md_ctl_t g_raid_md_ctl_intel;
236 static g_raid_md_write_t g_raid_md_write_intel;
237 static g_raid_md_fail_disk_t g_raid_md_fail_disk_intel;
238 static g_raid_md_free_disk_t g_raid_md_free_disk_intel;
239 static g_raid_md_free_volume_t g_raid_md_free_volume_intel;
240 static g_raid_md_free_t g_raid_md_free_intel;
241
242 static kobj_method_t g_raid_md_intel_methods[] = {
243         KOBJMETHOD(g_raid_md_create,    g_raid_md_create_intel),
244         KOBJMETHOD(g_raid_md_taste,     g_raid_md_taste_intel),
245         KOBJMETHOD(g_raid_md_event,     g_raid_md_event_intel),
246         KOBJMETHOD(g_raid_md_ctl,       g_raid_md_ctl_intel),
247         KOBJMETHOD(g_raid_md_write,     g_raid_md_write_intel),
248         KOBJMETHOD(g_raid_md_fail_disk, g_raid_md_fail_disk_intel),
249         KOBJMETHOD(g_raid_md_free_disk, g_raid_md_free_disk_intel),
250         KOBJMETHOD(g_raid_md_free_volume,       g_raid_md_free_volume_intel),
251         KOBJMETHOD(g_raid_md_free,      g_raid_md_free_intel),
252         { 0, 0 }
253 };
254
255 static struct g_raid_md_class g_raid_md_intel_class = {
256         "Intel",
257         g_raid_md_intel_methods,
258         sizeof(struct g_raid_md_intel_object),
259         .mdc_enable = 1,
260         .mdc_priority = 100
261 };
262
263
264 static struct intel_raid_map *
265 intel_get_map(struct intel_raid_vol *mvol, int i)
266 {
267         struct intel_raid_map *mmap;
268
269         if (i > (mvol->migr_state ? 1 : 0))
270                 return (NULL);
271         mmap = &mvol->map[0];
272         for (; i > 0; i--) {
273                 mmap = (struct intel_raid_map *)
274                     &mmap->disk_idx[mmap->total_disks];
275         }
276         return ((struct intel_raid_map *)mmap);
277 }
278
279 static struct intel_raid_vol *
280 intel_get_volume(struct intel_raid_conf *meta, int i)
281 {
282         struct intel_raid_vol *mvol;
283         struct intel_raid_map *mmap;
284
285         if (i > 1)
286                 return (NULL);
287         mvol = (struct intel_raid_vol *)&meta->disk[meta->total_disks];
288         for (; i > 0; i--) {
289                 mmap = intel_get_map(mvol, mvol->migr_state ? 1 : 0);
290                 mvol = (struct intel_raid_vol *)
291                     &mmap->disk_idx[mmap->total_disks];
292         }
293         return (mvol);
294 }
295
296 static off_t
297 intel_get_map_offset(struct intel_raid_map *mmap)
298 {
299         off_t offset = (off_t)mmap->offset_hi << 32;
300
301         offset += mmap->offset;
302         return (offset);
303 }
304
305 static void
306 intel_set_map_offset(struct intel_raid_map *mmap, off_t offset)
307 {
308
309         mmap->offset = offset & 0xffffffff;
310         mmap->offset_hi = offset >> 32;
311 }
312
313 static off_t
314 intel_get_map_disk_sectors(struct intel_raid_map *mmap)
315 {
316         off_t disk_sectors = (off_t)mmap->disk_sectors_hi << 32;
317
318         disk_sectors += mmap->disk_sectors;
319         return (disk_sectors);
320 }
321
322 static void
323 intel_set_map_disk_sectors(struct intel_raid_map *mmap, off_t disk_sectors)
324 {
325
326         mmap->disk_sectors = disk_sectors & 0xffffffff;
327         mmap->disk_sectors_hi = disk_sectors >> 32;
328 }
329
330 static void
331 intel_set_map_stripe_count(struct intel_raid_map *mmap, off_t stripe_count)
332 {
333
334         mmap->stripe_count = stripe_count & 0xffffffff;
335         mmap->stripe_count_hi = stripe_count >> 32;
336 }
337
338 static off_t
339 intel_get_disk_sectors(struct intel_raid_disk *disk)
340 {
341         off_t sectors = (off_t)disk->sectors_hi << 32;
342
343         sectors += disk->sectors;
344         return (sectors);
345 }
346
347 static void
348 intel_set_disk_sectors(struct intel_raid_disk *disk, off_t sectors)
349 {
350
351         disk->sectors = sectors & 0xffffffff;
352         disk->sectors_hi = sectors >> 32;
353 }
354
355 static off_t
356 intel_get_vol_curr_migr_unit(struct intel_raid_vol *vol)
357 {
358         off_t curr_migr_unit = (off_t)vol->curr_migr_unit_hi << 32;
359
360         curr_migr_unit += vol->curr_migr_unit;
361         return (curr_migr_unit);
362 }
363
364 static void
365 intel_set_vol_curr_migr_unit(struct intel_raid_vol *vol, off_t curr_migr_unit)
366 {
367
368         vol->curr_migr_unit = curr_migr_unit & 0xffffffff;
369         vol->curr_migr_unit_hi = curr_migr_unit >> 32;
370 }
371
372 static char *
373 intel_status2str(int status)
374 {
375
376         switch (status) {
377         case INTEL_S_READY:
378                 return ("READY");
379         case INTEL_S_UNINITIALIZED:
380                 return ("UNINITIALIZED");
381         case INTEL_S_DEGRADED:
382                 return ("DEGRADED");
383         case INTEL_S_FAILURE:
384                 return ("FAILURE");
385         default:
386                 return ("UNKNOWN");
387         }
388 }
389
390 static char *
391 intel_type2str(int type)
392 {
393
394         switch (type) {
395         case INTEL_T_RAID0:
396                 return ("RAID0");
397         case INTEL_T_RAID1:
398                 return ("RAID1");
399         case INTEL_T_RAID5:
400                 return ("RAID5");
401         default:
402                 return ("UNKNOWN");
403         }
404 }
405
406 static char *
407 intel_cngst2str(int cng_state)
408 {
409
410         switch (cng_state) {
411         case INTEL_CNGST_UPDATED:
412                 return ("UPDATED");
413         case INTEL_CNGST_NEEDS_UPDATE:
414                 return ("NEEDS_UPDATE");
415         case INTEL_CNGST_MASTER_MISSING:
416                 return ("MASTER_MISSING");
417         default:
418                 return ("UNKNOWN");
419         }
420 }
421
422 static char *
423 intel_mt2str(int type)
424 {
425
426         switch (type) {
427         case INTEL_MT_INIT:
428                 return ("INIT");
429         case INTEL_MT_REBUILD:
430                 return ("REBUILD");
431         case INTEL_MT_VERIFY:
432                 return ("VERIFY");
433         case INTEL_MT_GEN_MIGR:
434                 return ("GEN_MIGR");
435         case INTEL_MT_STATE_CHANGE:
436                 return ("STATE_CHANGE");
437         case INTEL_MT_REPAIR:
438                 return ("REPAIR");
439         default:
440                 return ("UNKNOWN");
441         }
442 }
443
444 static void
445 g_raid_md_intel_print(struct intel_raid_conf *meta)
446 {
447         struct intel_raid_vol *mvol;
448         struct intel_raid_map *mmap;
449         int i, j, k;
450
451         if (g_raid_debug < 1)
452                 return;
453
454         printf("********* ATA Intel MatrixRAID Metadata *********\n");
455         printf("intel_id            <%.24s>\n", meta->intel_id);
456         printf("version             <%.6s>\n", meta->version);
457         printf("checksum            0x%08x\n", meta->checksum);
458         printf("config_size         0x%08x\n", meta->config_size);
459         printf("config_id           0x%08x\n", meta->config_id);
460         printf("generation          0x%08x\n", meta->generation);
461         printf("error_log_size      %d\n", meta->error_log_size);
462         printf("attributes          0x%b\n", meta->attributes,
463                 "\020"
464                 "\001RAID0"
465                 "\002RAID1"
466                 "\003RAID10"
467                 "\004RAID1E"
468                 "\005RAID15"
469                 "\006RAIDCNG"
470                 "\007EXT_STRIP"
471                 "\032NVM_CACHE"
472                 "\0332TB_DISK"
473                 "\034BBM"
474                 "\035NVM_CACHE"
475                 "\0362TB"
476                 "\037PM"
477                 "\040CHECKSUM");
478         printf("total_disks         %u\n", meta->total_disks);
479         printf("total_volumes       %u\n", meta->total_volumes);
480         printf("error_log_pos       %u\n", meta->error_log_pos);
481         printf("cache_size          %u\n", meta->cache_size);
482         printf("orig_config_id      0x%08x\n", meta->orig_config_id);
483         printf("pwr_cycle_count     %u\n", meta->pwr_cycle_count);
484         printf("bbm_log_size        %u\n", meta->bbm_log_size);
485         printf("Flags: S - Spare, A - Assigned, F - Failed, O - Online, D - Disabled\n");
486         printf("DISK#   serial disk_sectors disk_sectors_hi disk_id flags owner\n");
487         for (i = 0; i < meta->total_disks; i++ ) {
488                 printf("    %d   <%.16s> %u %u 0x%08x 0x%b %08x\n", i,
489                     meta->disk[i].serial, meta->disk[i].sectors,
490                     meta->disk[i].sectors_hi, meta->disk[i].id,
491                     meta->disk[i].flags, "\20\01S\02A\03F\04O\05D",
492                     meta->disk[i].owner_cfg_num);
493         }
494         for (i = 0; i < meta->total_volumes; i++) {
495                 mvol = intel_get_volume(meta, i);
496                 printf(" ****** Volume %d ******\n", i);
497                 printf(" name               %.16s\n", mvol->name);
498                 printf(" total_sectors      %ju\n", mvol->total_sectors);
499                 printf(" state              0x%b\n", mvol->state,
500                         "\020"
501                         "\001BOOTABLE"
502                         "\002BOOT_DEVICE"
503                         "\003READ_COALESCING"
504                         "\004WRITE_COALESCING"
505                         "\005LAST_SHUTDOWN_DIRTY"
506                         "\006HIDDEN_AT_BOOT"
507                         "\007CURRENTLY_HIDDEN"
508                         "\010VERIFY_AND_FIX"
509                         "\011MAP_STATE_UNINIT"
510                         "\012NO_AUTO_RECOVERY"
511                         "\013CLONE_N_GO"
512                         "\014CLONE_MAN_SYNC"
513                         "\015CNG_MASTER_DISK_NUM");
514                 printf(" reserved           %u\n", mvol->reserved);
515                 printf(" migr_priority      %u\n", mvol->migr_priority);
516                 printf(" num_sub_vols       %u\n", mvol->num_sub_vols);
517                 printf(" tid                %u\n", mvol->tid);
518                 printf(" cng_master_disk    %u\n", mvol->cng_master_disk);
519                 printf(" cache_policy       %u\n", mvol->cache_policy);
520                 printf(" cng_state          %u (%s)\n", mvol->cng_state,
521                         intel_cngst2str(mvol->cng_state));
522                 printf(" cng_sub_state      %u\n", mvol->cng_sub_state);
523                 printf(" curr_migr_unit     %u\n", mvol->curr_migr_unit);
524                 printf(" curr_migr_unit_hi  %u\n", mvol->curr_migr_unit_hi);
525                 printf(" checkpoint_id      %u\n", mvol->checkpoint_id);
526                 printf(" migr_state         %u\n", mvol->migr_state);
527                 printf(" migr_type          %u (%s)\n", mvol->migr_type,
528                         intel_mt2str(mvol->migr_type));
529                 printf(" dirty              %u\n", mvol->dirty);
530                 printf(" fs_state           %u\n", mvol->fs_state);
531                 printf(" verify_errors      %u\n", mvol->verify_errors);
532                 printf(" bad_blocks         %u\n", mvol->bad_blocks);
533
534                 for (j = 0; j < (mvol->migr_state ? 2 : 1); j++) {
535                         printf("  *** Map %d ***\n", j);
536                         mmap = intel_get_map(mvol, j);
537                         printf("  offset            %u\n", mmap->offset);
538                         printf("  offset_hi         %u\n", mmap->offset_hi);
539                         printf("  disk_sectors      %u\n", mmap->disk_sectors);
540                         printf("  disk_sectors_hi   %u\n", mmap->disk_sectors_hi);
541                         printf("  stripe_count      %u\n", mmap->stripe_count);
542                         printf("  stripe_count_hi   %u\n", mmap->stripe_count_hi);
543                         printf("  strip_sectors     %u\n", mmap->strip_sectors);
544                         printf("  status            %u (%s)\n", mmap->status,
545                                 intel_status2str(mmap->status));
546                         printf("  type              %u (%s)\n", mmap->type,
547                                 intel_type2str(mmap->type));
548                         printf("  total_disks       %u\n", mmap->total_disks);
549                         printf("  total_domains     %u\n", mmap->total_domains);
550                         printf("  failed_disk_num   %u\n", mmap->failed_disk_num);
551                         printf("  ddf               %u\n", mmap->ddf);
552                         printf("  disk_idx         ");
553                         for (k = 0; k < mmap->total_disks; k++)
554                                 printf(" 0x%08x", mmap->disk_idx[k]);
555                         printf("\n");
556                 }
557         }
558         printf("=================================================\n");
559 }
560
561 static struct intel_raid_conf *
562 intel_meta_copy(struct intel_raid_conf *meta)
563 {
564         struct intel_raid_conf *nmeta;
565
566         nmeta = malloc(meta->config_size, M_MD_INTEL, M_WAITOK);
567         memcpy(nmeta, meta, meta->config_size);
568         return (nmeta);
569 }
570
571 static int
572 intel_meta_find_disk(struct intel_raid_conf *meta, char *serial)
573 {
574         int pos;
575
576         for (pos = 0; pos < meta->total_disks; pos++) {
577                 if (strncmp(meta->disk[pos].serial,
578                     serial, INTEL_SERIAL_LEN) == 0)
579                         return (pos);
580         }
581         return (-1);
582 }
583
584 static struct intel_raid_conf *
585 intel_meta_read(struct g_consumer *cp)
586 {
587         struct g_provider *pp;
588         struct intel_raid_conf *meta;
589         struct intel_raid_vol *mvol;
590         struct intel_raid_map *mmap, *mmap1;
591         char *buf;
592         int error, i, j, k, left, size;
593         uint32_t checksum, *ptr;
594
595         pp = cp->provider;
596
597         /* Read the anchor sector. */
598         buf = g_read_data(cp,
599             pp->mediasize - pp->sectorsize * 2, pp->sectorsize, &error);
600         if (buf == NULL) {
601                 G_RAID_DEBUG(1, "Cannot read metadata from %s (error=%d).",
602                     pp->name, error);
603                 return (NULL);
604         }
605         meta = (struct intel_raid_conf *)buf;
606
607         /* Check if this is an Intel RAID struct */
608         if (strncmp(meta->intel_id, INTEL_MAGIC, strlen(INTEL_MAGIC))) {
609                 G_RAID_DEBUG(1, "Intel signature check failed on %s", pp->name);
610                 g_free(buf);
611                 return (NULL);
612         }
613         if (meta->config_size > 65536 ||
614             meta->config_size < sizeof(struct intel_raid_conf)) {
615                 G_RAID_DEBUG(1, "Intel metadata size looks wrong: %d",
616                     meta->config_size);
617                 g_free(buf);
618                 return (NULL);
619         }
620         size = meta->config_size;
621         meta = malloc(size, M_MD_INTEL, M_WAITOK);
622         memcpy(meta, buf, min(size, pp->sectorsize));
623         g_free(buf);
624
625         /* Read all the rest, if needed. */
626         if (meta->config_size > pp->sectorsize) {
627                 left = (meta->config_size - 1) / pp->sectorsize;
628                 buf = g_read_data(cp,
629                     pp->mediasize - pp->sectorsize * (2 + left),
630                     pp->sectorsize * left, &error);
631                 if (buf == NULL) {
632                         G_RAID_DEBUG(1, "Cannot read remaining metadata"
633                             " part from %s (error=%d).",
634                             pp->name, error);
635                         free(meta, M_MD_INTEL);
636                         return (NULL);
637                 }
638                 memcpy(((char *)meta) + pp->sectorsize, buf,
639                     pp->sectorsize * left);
640                 g_free(buf);
641         }
642
643         /* Check metadata checksum. */
644         for (checksum = 0, ptr = (uint32_t *)meta, i = 0;
645             i < (meta->config_size / sizeof(uint32_t)); i++) {
646                 checksum += *ptr++;
647         }
648         checksum -= meta->checksum;
649         if (checksum != meta->checksum) {
650                 G_RAID_DEBUG(1, "Intel checksum check failed on %s", pp->name);
651                 free(meta, M_MD_INTEL);
652                 return (NULL);
653         }
654
655         /* Validate metadata size. */
656         size = sizeof(struct intel_raid_conf) +
657             sizeof(struct intel_raid_disk) * (meta->total_disks - 1) +
658             sizeof(struct intel_raid_vol) * meta->total_volumes;
659         if (size > meta->config_size) {
660 badsize:
661                 G_RAID_DEBUG(1, "Intel metadata size incorrect %d < %d",
662                     meta->config_size, size);
663                 free(meta, M_MD_INTEL);
664                 return (NULL);
665         }
666         for (i = 0; i < meta->total_volumes; i++) {
667                 mvol = intel_get_volume(meta, i);
668                 mmap = intel_get_map(mvol, 0);
669                 size += 4 * (mmap->total_disks - 1);
670                 if (size > meta->config_size)
671                         goto badsize;
672                 if (mvol->migr_state) {
673                         size += sizeof(struct intel_raid_map);
674                         if (size > meta->config_size)
675                                 goto badsize;
676                         mmap = intel_get_map(mvol, 1);
677                         size += 4 * (mmap->total_disks - 1);
678                         if (size > meta->config_size)
679                                 goto badsize;
680                 }
681         }
682
683         g_raid_md_intel_print(meta);
684
685         if (strncmp(meta->version, INTEL_VERSION_1300, 6) > 0) {
686                 G_RAID_DEBUG(1, "Intel unsupported version: '%.6s'",
687                     meta->version);
688                 free(meta, M_MD_INTEL);
689                 return (NULL);
690         }
691
692         if (strncmp(meta->version, INTEL_VERSION_1300, 6) >= 0 &&
693             (meta->attributes & ~INTEL_ATTR_SUPPORTED) != 0) {
694                 G_RAID_DEBUG(1, "Intel unsupported attributes: 0x%08x",
695                     meta->attributes & ~INTEL_ATTR_SUPPORTED);
696                 free(meta, M_MD_INTEL);
697                 return (NULL);
698         }
699
700         /* Validate disk indexes. */
701         for (i = 0; i < meta->total_volumes; i++) {
702                 mvol = intel_get_volume(meta, i);
703                 for (j = 0; j < (mvol->migr_state ? 2 : 1); j++) {
704                         mmap = intel_get_map(mvol, j);
705                         for (k = 0; k < mmap->total_disks; k++) {
706                                 if ((mmap->disk_idx[k] & INTEL_DI_IDX) >
707                                     meta->total_disks) {
708                                         G_RAID_DEBUG(1, "Intel metadata disk"
709                                             " index %d too big (>%d)",
710                                             mmap->disk_idx[k] & INTEL_DI_IDX,
711                                             meta->total_disks);
712                                         free(meta, M_MD_INTEL);
713                                         return (NULL);
714                                 }
715                         }
716                 }
717         }
718
719         /* Validate migration types. */
720         for (i = 0; i < meta->total_volumes; i++) {
721                 mvol = intel_get_volume(meta, i);
722                 /* Deny unknown migration types. */
723                 if (mvol->migr_state &&
724                     mvol->migr_type != INTEL_MT_INIT &&
725                     mvol->migr_type != INTEL_MT_REBUILD &&
726                     mvol->migr_type != INTEL_MT_VERIFY &&
727                     mvol->migr_type != INTEL_MT_GEN_MIGR &&
728                     mvol->migr_type != INTEL_MT_REPAIR) {
729                         G_RAID_DEBUG(1, "Intel metadata has unsupported"
730                             " migration type %d", mvol->migr_type);
731                         free(meta, M_MD_INTEL);
732                         return (NULL);
733                 }
734                 /* Deny general migrations except SINGLE->RAID1. */
735                 if (mvol->migr_state &&
736                     mvol->migr_type == INTEL_MT_GEN_MIGR) {
737                         mmap = intel_get_map(mvol, 0);
738                         mmap1 = intel_get_map(mvol, 1);
739                         if (mmap1->total_disks != 1 ||
740                             mmap->type != INTEL_T_RAID1 ||
741                             mmap->total_disks != 2 ||
742                             mmap->offset != mmap1->offset ||
743                             mmap->disk_sectors != mmap1->disk_sectors ||
744                             mmap->total_domains != mmap->total_disks ||
745                             mmap->offset_hi != mmap1->offset_hi ||
746                             mmap->disk_sectors_hi != mmap1->disk_sectors_hi ||
747                             (mmap->disk_idx[0] != mmap1->disk_idx[0] &&
748                              mmap->disk_idx[0] != mmap1->disk_idx[1])) {
749                                 G_RAID_DEBUG(1, "Intel metadata has unsupported"
750                                     " variant of general migration");
751                                 free(meta, M_MD_INTEL);
752                                 return (NULL);
753                         }
754                 }
755         }
756
757         return (meta);
758 }
759
760 static int
761 intel_meta_write(struct g_consumer *cp, struct intel_raid_conf *meta)
762 {
763         struct g_provider *pp;
764         char *buf;
765         int error, i, sectors;
766         uint32_t checksum, *ptr;
767
768         pp = cp->provider;
769
770         /* Recalculate checksum for case if metadata were changed. */
771         meta->checksum = 0;
772         for (checksum = 0, ptr = (uint32_t *)meta, i = 0;
773             i < (meta->config_size / sizeof(uint32_t)); i++) {
774                 checksum += *ptr++;
775         }
776         meta->checksum = checksum;
777
778         /* Create and fill buffer. */
779         sectors = howmany(meta->config_size, pp->sectorsize);
780         buf = malloc(sectors * pp->sectorsize, M_MD_INTEL, M_WAITOK | M_ZERO);
781         if (sectors > 1) {
782                 memcpy(buf, ((char *)meta) + pp->sectorsize,
783                     (sectors - 1) * pp->sectorsize);
784         }
785         memcpy(buf + (sectors - 1) * pp->sectorsize, meta, pp->sectorsize);
786
787         error = g_write_data(cp,
788             pp->mediasize - pp->sectorsize * (1 + sectors),
789             buf, pp->sectorsize * sectors);
790         if (error != 0) {
791                 G_RAID_DEBUG(1, "Cannot write metadata to %s (error=%d).",
792                     pp->name, error);
793         }
794
795         free(buf, M_MD_INTEL);
796         return (error);
797 }
798
799 static int
800 intel_meta_erase(struct g_consumer *cp)
801 {
802         struct g_provider *pp;
803         char *buf;
804         int error;
805
806         pp = cp->provider;
807         buf = malloc(pp->sectorsize, M_MD_INTEL, M_WAITOK | M_ZERO);
808         error = g_write_data(cp,
809             pp->mediasize - 2 * pp->sectorsize,
810             buf, pp->sectorsize);
811         if (error != 0) {
812                 G_RAID_DEBUG(1, "Cannot erase metadata on %s (error=%d).",
813                     pp->name, error);
814         }
815         free(buf, M_MD_INTEL);
816         return (error);
817 }
818
819 static int
820 intel_meta_write_spare(struct g_consumer *cp, struct intel_raid_disk *d)
821 {
822         struct intel_raid_conf *meta;
823         int error;
824
825         /* Fill anchor and single disk. */
826         meta = malloc(INTEL_MAX_MD_SIZE(1), M_MD_INTEL, M_WAITOK | M_ZERO);
827         memcpy(&meta->intel_id[0], INTEL_MAGIC, sizeof(INTEL_MAGIC) - 1);
828         memcpy(&meta->version[0], INTEL_VERSION_1000,
829             sizeof(INTEL_VERSION_1000) - 1);
830         meta->config_size = INTEL_MAX_MD_SIZE(1);
831         meta->config_id = meta->orig_config_id = arc4random();
832         meta->generation = 1;
833         meta->total_disks = 1;
834         meta->disk[0] = *d;
835         error = intel_meta_write(cp, meta);
836         free(meta, M_MD_INTEL);
837         return (error);
838 }
839
840 static struct g_raid_disk *
841 g_raid_md_intel_get_disk(struct g_raid_softc *sc, int id)
842 {
843         struct g_raid_disk      *disk;
844         struct g_raid_md_intel_perdisk *pd;
845
846         TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
847                 pd = (struct g_raid_md_intel_perdisk *)disk->d_md_data;
848                 if (pd->pd_disk_pos == id)
849                         break;
850         }
851         return (disk);
852 }
853
854 static int
855 g_raid_md_intel_supported(int level, int qual, int disks, int force)
856 {
857
858         switch (level) {
859         case G_RAID_VOLUME_RL_RAID0:
860                 if (disks < 1)
861                         return (0);
862                 if (!force && (disks < 2 || disks > 6))
863                         return (0);
864                 break;
865         case G_RAID_VOLUME_RL_RAID1:
866                 if (disks < 1)
867                         return (0);
868                 if (!force && (disks != 2))
869                         return (0);
870                 break;
871         case G_RAID_VOLUME_RL_RAID1E:
872                 if (disks < 2)
873                         return (0);
874                 if (!force && (disks != 4))
875                         return (0);
876                 break;
877         case G_RAID_VOLUME_RL_RAID5:
878                 if (disks < 3)
879                         return (0);
880                 if (!force && disks > 6)
881                         return (0);
882                 if (qual != G_RAID_VOLUME_RLQ_R5LA)
883                         return (0);
884                 break;
885         default:
886                 return (0);
887         }
888         if (level != G_RAID_VOLUME_RL_RAID5 && qual != G_RAID_VOLUME_RLQ_NONE)
889                 return (0);
890         return (1);
891 }
892
893 static struct g_raid_volume *
894 g_raid_md_intel_get_volume(struct g_raid_softc *sc, int id)
895 {
896         struct g_raid_volume    *mvol;
897         struct g_raid_md_intel_pervolume *pv;
898
899         TAILQ_FOREACH(mvol, &sc->sc_volumes, v_next) {
900                 pv = mvol->v_md_data;
901                 if (pv->pv_volume_pos == id)
902                         break;
903         }
904         return (mvol);
905 }
906
907 static int
908 g_raid_md_intel_start_disk(struct g_raid_disk *disk)
909 {
910         struct g_raid_softc *sc;
911         struct g_raid_subdisk *sd, *tmpsd;
912         struct g_raid_disk *olddisk, *tmpdisk;
913         struct g_raid_md_object *md;
914         struct g_raid_md_intel_object *mdi;
915         struct g_raid_md_intel_pervolume *pv;
916         struct g_raid_md_intel_perdisk *pd, *oldpd;
917         struct intel_raid_conf *meta;
918         struct intel_raid_vol *mvol;
919         struct intel_raid_map *mmap0, *mmap1;
920         int disk_pos, resurrection = 0, migr_global, i;
921
922         sc = disk->d_softc;
923         md = sc->sc_md;
924         mdi = (struct g_raid_md_intel_object *)md;
925         meta = mdi->mdio_meta;
926         pd = (struct g_raid_md_intel_perdisk *)disk->d_md_data;
927         olddisk = NULL;
928
929         /* Find disk position in metadata by its serial. */
930         disk_pos = intel_meta_find_disk(meta, pd->pd_disk_meta.serial);
931         if (disk_pos < 0) {
932                 G_RAID_DEBUG1(1, sc, "Unknown, probably new or stale disk");
933                 /* Failed stale disk is useless for us. */
934                 if ((pd->pd_disk_meta.flags & INTEL_F_FAILED) &&
935                     !(pd->pd_disk_meta.flags & INTEL_F_DISABLED)) {
936                         g_raid_change_disk_state(disk, G_RAID_DISK_S_STALE_FAILED);
937                         return (0);
938                 }
939                 /* If we are in the start process, that's all for now. */
940                 if (!mdi->mdio_started)
941                         goto nofit;
942                 /*
943                  * If we have already started - try to get use of the disk.
944                  * Try to replace OFFLINE disks first, then FAILED.
945                  */
946                 TAILQ_FOREACH(tmpdisk, &sc->sc_disks, d_next) {
947                         if (tmpdisk->d_state != G_RAID_DISK_S_OFFLINE &&
948                             tmpdisk->d_state != G_RAID_DISK_S_FAILED)
949                                 continue;
950                         /* Make sure this disk is big enough. */
951                         TAILQ_FOREACH(sd, &tmpdisk->d_subdisks, sd_next) {
952                                 off_t disk_sectors = 
953                                     intel_get_disk_sectors(&pd->pd_disk_meta);
954
955                                 if (sd->sd_offset + sd->sd_size + 4096 >
956                                     disk_sectors * 512) {
957                                         G_RAID_DEBUG1(1, sc,
958                                             "Disk too small (%llu < %llu)",
959                                             (unsigned long long)
960                                             disk_sectors * 512,
961                                             (unsigned long long)
962                                             sd->sd_offset + sd->sd_size + 4096);
963                                         break;
964                                 }
965                         }
966                         if (sd != NULL)
967                                 continue;
968                         if (tmpdisk->d_state == G_RAID_DISK_S_OFFLINE) {
969                                 olddisk = tmpdisk;
970                                 break;
971                         } else if (olddisk == NULL)
972                                 olddisk = tmpdisk;
973                 }
974                 if (olddisk == NULL) {
975 nofit:
976                         if (pd->pd_disk_meta.flags & INTEL_F_SPARE) {
977                                 g_raid_change_disk_state(disk,
978                                     G_RAID_DISK_S_SPARE);
979                                 return (1);
980                         } else {
981                                 g_raid_change_disk_state(disk,
982                                     G_RAID_DISK_S_STALE);
983                                 return (0);
984                         }
985                 }
986                 oldpd = (struct g_raid_md_intel_perdisk *)olddisk->d_md_data;
987                 disk_pos = oldpd->pd_disk_pos;
988                 resurrection = 1;
989         }
990
991         if (olddisk == NULL) {
992                 /* Find placeholder by position. */
993                 olddisk = g_raid_md_intel_get_disk(sc, disk_pos);
994                 if (olddisk == NULL)
995                         panic("No disk at position %d!", disk_pos);
996                 if (olddisk->d_state != G_RAID_DISK_S_OFFLINE) {
997                         G_RAID_DEBUG1(1, sc, "More than one disk for pos %d",
998                             disk_pos);
999                         g_raid_change_disk_state(disk, G_RAID_DISK_S_STALE);
1000                         return (0);
1001                 }
1002                 oldpd = (struct g_raid_md_intel_perdisk *)olddisk->d_md_data;
1003         }
1004
1005         /* Replace failed disk or placeholder with new disk. */
1006         TAILQ_FOREACH_SAFE(sd, &olddisk->d_subdisks, sd_next, tmpsd) {
1007                 TAILQ_REMOVE(&olddisk->d_subdisks, sd, sd_next);
1008                 TAILQ_INSERT_TAIL(&disk->d_subdisks, sd, sd_next);
1009                 sd->sd_disk = disk;
1010         }
1011         oldpd->pd_disk_pos = -2;
1012         pd->pd_disk_pos = disk_pos;
1013
1014         /* If it was placeholder -- destroy it. */
1015         if (olddisk->d_state == G_RAID_DISK_S_OFFLINE) {
1016                 g_raid_destroy_disk(olddisk);
1017         } else {
1018                 /* Otherwise, make it STALE_FAILED. */
1019                 g_raid_change_disk_state(olddisk, G_RAID_DISK_S_STALE_FAILED);
1020                 /* Update global metadata just in case. */
1021                 memcpy(&meta->disk[disk_pos], &pd->pd_disk_meta,
1022                     sizeof(struct intel_raid_disk));
1023         }
1024
1025         /* Welcome the new disk. */
1026         if ((meta->disk[disk_pos].flags & INTEL_F_DISABLED) &&
1027             !(pd->pd_disk_meta.flags & INTEL_F_SPARE))
1028                 g_raid_change_disk_state(disk, G_RAID_DISK_S_DISABLED);
1029         else if (resurrection)
1030                 g_raid_change_disk_state(disk, G_RAID_DISK_S_ACTIVE);
1031         else if (meta->disk[disk_pos].flags & INTEL_F_FAILED)
1032                 g_raid_change_disk_state(disk, G_RAID_DISK_S_FAILED);
1033         else if (meta->disk[disk_pos].flags & INTEL_F_SPARE)
1034                 g_raid_change_disk_state(disk, G_RAID_DISK_S_SPARE);
1035         else
1036                 g_raid_change_disk_state(disk, G_RAID_DISK_S_ACTIVE);
1037         TAILQ_FOREACH(sd, &disk->d_subdisks, sd_next) {
1038                 pv = sd->sd_volume->v_md_data;
1039                 mvol = intel_get_volume(meta, pv->pv_volume_pos);
1040                 mmap0 = intel_get_map(mvol, 0);
1041                 if (mvol->migr_state)
1042                         mmap1 = intel_get_map(mvol, 1);
1043                 else
1044                         mmap1 = mmap0;
1045
1046                 migr_global = 1;
1047                 for (i = 0; i < mmap0->total_disks; i++) {
1048                         if ((mmap0->disk_idx[i] & INTEL_DI_RBLD) == 0 &&
1049                             (mmap1->disk_idx[i] & INTEL_DI_RBLD) != 0)
1050                                 migr_global = 0;
1051                 }
1052
1053                 if ((meta->disk[disk_pos].flags & INTEL_F_DISABLED) &&
1054                     !(pd->pd_disk_meta.flags & INTEL_F_SPARE)) {
1055                         /* Disabled disk, useless. */
1056                         g_raid_change_subdisk_state(sd,
1057                             G_RAID_SUBDISK_S_NONE);
1058                 } else if (resurrection) {
1059                         /* Stale disk, almost same as new. */
1060                         g_raid_change_subdisk_state(sd,
1061                             G_RAID_SUBDISK_S_NEW);
1062                 } else if (meta->disk[disk_pos].flags & INTEL_F_FAILED) {
1063                         /* Failed disk, almost useless. */
1064                         g_raid_change_subdisk_state(sd,
1065                             G_RAID_SUBDISK_S_FAILED);
1066                 } else if (mvol->migr_state == 0) {
1067                         if (mmap0->status == INTEL_S_UNINITIALIZED &&
1068                             (!pv->pv_cng || pv->pv_cng_master_disk != disk_pos)) {
1069                                 /* Freshly created uninitialized volume. */
1070                                 g_raid_change_subdisk_state(sd,
1071                                     G_RAID_SUBDISK_S_UNINITIALIZED);
1072                         } else if (mmap0->disk_idx[sd->sd_pos] & INTEL_DI_RBLD) {
1073                                 /* Freshly inserted disk. */
1074                                 g_raid_change_subdisk_state(sd,
1075                                     G_RAID_SUBDISK_S_NEW);
1076                         } else if (mvol->dirty && (!pv->pv_cng ||
1077                             pv->pv_cng_master_disk != disk_pos)) {
1078                                 /* Dirty volume (unclean shutdown). */
1079                                 g_raid_change_subdisk_state(sd,
1080                                     G_RAID_SUBDISK_S_STALE);
1081                         } else {
1082                                 /* Up to date disk. */
1083                                 g_raid_change_subdisk_state(sd,
1084                                     G_RAID_SUBDISK_S_ACTIVE);
1085                         }
1086                 } else if (mvol->migr_type == INTEL_MT_INIT ||
1087                            mvol->migr_type == INTEL_MT_REBUILD) {
1088                         if (mmap0->disk_idx[sd->sd_pos] & INTEL_DI_RBLD) {
1089                                 /* Freshly inserted disk. */
1090                                 g_raid_change_subdisk_state(sd,
1091                                     G_RAID_SUBDISK_S_NEW);
1092                         } else if (mmap1->disk_idx[sd->sd_pos] & INTEL_DI_RBLD) {
1093                                 /* Rebuilding disk. */
1094                                 g_raid_change_subdisk_state(sd,
1095                                     G_RAID_SUBDISK_S_REBUILD);
1096                                 if (mvol->dirty) {
1097                                         sd->sd_rebuild_pos = 0;
1098                                 } else {
1099                                         sd->sd_rebuild_pos =
1100                                             intel_get_vol_curr_migr_unit(mvol) *
1101                                             sd->sd_volume->v_strip_size *
1102                                             mmap0->total_domains;
1103                                 }
1104                         } else if (mvol->migr_type == INTEL_MT_INIT &&
1105                             migr_global) {
1106                                 /* Freshly created uninitialized volume. */
1107                                 g_raid_change_subdisk_state(sd,
1108                                     G_RAID_SUBDISK_S_UNINITIALIZED);
1109                         } else if (mvol->dirty && (!pv->pv_cng ||
1110                             pv->pv_cng_master_disk != disk_pos)) {
1111                                 /* Dirty volume (unclean shutdown). */
1112                                 g_raid_change_subdisk_state(sd,
1113                                     G_RAID_SUBDISK_S_STALE);
1114                         } else {
1115                                 /* Up to date disk. */
1116                                 g_raid_change_subdisk_state(sd,
1117                                     G_RAID_SUBDISK_S_ACTIVE);
1118                         }
1119                 } else if (mvol->migr_type == INTEL_MT_VERIFY ||
1120                            mvol->migr_type == INTEL_MT_REPAIR) {
1121                         if (mmap0->disk_idx[sd->sd_pos] & INTEL_DI_RBLD) {
1122                                 /* Freshly inserted disk. */
1123                                 g_raid_change_subdisk_state(sd,
1124                                     G_RAID_SUBDISK_S_NEW);
1125                         } else if ((mmap1->disk_idx[sd->sd_pos] & INTEL_DI_RBLD) ||
1126                             migr_global) {
1127                                 /* Resyncing disk. */
1128                                 g_raid_change_subdisk_state(sd,
1129                                     G_RAID_SUBDISK_S_RESYNC);
1130                                 if (mvol->dirty) {
1131                                         sd->sd_rebuild_pos = 0;
1132                                 } else {
1133                                         sd->sd_rebuild_pos =
1134                                             intel_get_vol_curr_migr_unit(mvol) *
1135                                             sd->sd_volume->v_strip_size *
1136                                             mmap0->total_domains;
1137                                 }
1138                         } else if (mvol->dirty) {
1139                                 /* Dirty volume (unclean shutdown). */
1140                                 g_raid_change_subdisk_state(sd,
1141                                     G_RAID_SUBDISK_S_STALE);
1142                         } else {
1143                                 /* Up to date disk. */
1144                                 g_raid_change_subdisk_state(sd,
1145                                     G_RAID_SUBDISK_S_ACTIVE);
1146                         }
1147                 } else if (mvol->migr_type == INTEL_MT_GEN_MIGR) {
1148                         if ((mmap1->disk_idx[0] & INTEL_DI_IDX) != disk_pos) {
1149                                 /* Freshly inserted disk. */
1150                                 g_raid_change_subdisk_state(sd,
1151                                     G_RAID_SUBDISK_S_NEW);
1152                         } else {
1153                                 /* Up to date disk. */
1154                                 g_raid_change_subdisk_state(sd,
1155                                     G_RAID_SUBDISK_S_ACTIVE);
1156                         }
1157                 }
1158                 g_raid_event_send(sd, G_RAID_SUBDISK_E_NEW,
1159                     G_RAID_EVENT_SUBDISK);
1160         }
1161
1162         /* Update status of our need for spare. */
1163         if (mdi->mdio_started) {
1164                 mdi->mdio_incomplete =
1165                     (g_raid_ndisks(sc, G_RAID_DISK_S_ACTIVE) +
1166                      g_raid_ndisks(sc, G_RAID_DISK_S_DISABLED) <
1167                      meta->total_disks);
1168         }
1169
1170         return (resurrection);
1171 }
1172
1173 static void
1174 g_disk_md_intel_retaste(void *arg, int pending)
1175 {
1176
1177         G_RAID_DEBUG(1, "Array is not complete, trying to retaste.");
1178         g_retaste(&g_raid_class);
1179         free(arg, M_MD_INTEL);
1180 }
1181
1182 static void
1183 g_raid_md_intel_refill(struct g_raid_softc *sc)
1184 {
1185         struct g_raid_md_object *md;
1186         struct g_raid_md_intel_object *mdi;
1187         struct intel_raid_conf *meta;
1188         struct g_raid_disk *disk;
1189         struct task *task;
1190         int update, na;
1191
1192         md = sc->sc_md;
1193         mdi = (struct g_raid_md_intel_object *)md;
1194         meta = mdi->mdio_meta;
1195         update = 0;
1196         do {
1197                 /* Make sure we miss anything. */
1198                 na = g_raid_ndisks(sc, G_RAID_DISK_S_ACTIVE) +
1199                     g_raid_ndisks(sc, G_RAID_DISK_S_DISABLED);
1200                 if (na == meta->total_disks)
1201                         break;
1202
1203                 G_RAID_DEBUG1(1, md->mdo_softc,
1204                     "Array is not complete (%d of %d), "
1205                     "trying to refill.", na, meta->total_disks);
1206
1207                 /* Try to get use some of STALE disks. */
1208                 TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
1209                         if (disk->d_state == G_RAID_DISK_S_STALE) {
1210                                 update += g_raid_md_intel_start_disk(disk);
1211                                 if (disk->d_state == G_RAID_DISK_S_ACTIVE ||
1212                                     disk->d_state == G_RAID_DISK_S_DISABLED)
1213                                         break;
1214                         }
1215                 }
1216                 if (disk != NULL)
1217                         continue;
1218
1219                 /* Try to get use some of SPARE disks. */
1220                 TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
1221                         if (disk->d_state == G_RAID_DISK_S_SPARE) {
1222                                 update += g_raid_md_intel_start_disk(disk);
1223                                 if (disk->d_state == G_RAID_DISK_S_ACTIVE)
1224                                         break;
1225                         }
1226                 }
1227         } while (disk != NULL);
1228
1229         /* Write new metadata if we changed something. */
1230         if (update) {
1231                 g_raid_md_write_intel(md, NULL, NULL, NULL);
1232                 meta = mdi->mdio_meta;
1233         }
1234
1235         /* Update status of our need for spare. */
1236         mdi->mdio_incomplete = (g_raid_ndisks(sc, G_RAID_DISK_S_ACTIVE) +
1237             g_raid_ndisks(sc, G_RAID_DISK_S_DISABLED) < meta->total_disks);
1238
1239         /* Request retaste hoping to find spare. */
1240         if (mdi->mdio_incomplete) {
1241                 task = malloc(sizeof(struct task),
1242                     M_MD_INTEL, M_WAITOK | M_ZERO);
1243                 TASK_INIT(task, 0, g_disk_md_intel_retaste, task);
1244                 taskqueue_enqueue(taskqueue_swi, task);
1245         }
1246 }
1247
1248 static void
1249 g_raid_md_intel_start(struct g_raid_softc *sc)
1250 {
1251         struct g_raid_md_object *md;
1252         struct g_raid_md_intel_object *mdi;
1253         struct g_raid_md_intel_pervolume *pv;
1254         struct g_raid_md_intel_perdisk *pd;
1255         struct intel_raid_conf *meta;
1256         struct intel_raid_vol *mvol;
1257         struct intel_raid_map *mmap;
1258         struct g_raid_volume *vol;
1259         struct g_raid_subdisk *sd;
1260         struct g_raid_disk *disk;
1261         int i, j, disk_pos;
1262
1263         md = sc->sc_md;
1264         mdi = (struct g_raid_md_intel_object *)md;
1265         meta = mdi->mdio_meta;
1266
1267         /* Create volumes and subdisks. */
1268         for (i = 0; i < meta->total_volumes; i++) {
1269                 mvol = intel_get_volume(meta, i);
1270                 mmap = intel_get_map(mvol, 0);
1271                 vol = g_raid_create_volume(sc, mvol->name, mvol->tid - 1);
1272                 pv = malloc(sizeof(*pv), M_MD_INTEL, M_WAITOK | M_ZERO);
1273                 pv->pv_volume_pos = i;
1274                 pv->pv_cng = (mvol->state & INTEL_ST_CLONE_N_GO) != 0;
1275                 pv->pv_cng_man_sync = (mvol->state & INTEL_ST_CLONE_MAN_SYNC) != 0;
1276                 if (mvol->cng_master_disk < mmap->total_disks)
1277                         pv->pv_cng_master_disk = mvol->cng_master_disk;
1278                 vol->v_md_data = pv;
1279                 vol->v_raid_level_qualifier = G_RAID_VOLUME_RLQ_NONE;
1280                 if (mmap->type == INTEL_T_RAID0)
1281                         vol->v_raid_level = G_RAID_VOLUME_RL_RAID0;
1282                 else if (mmap->type == INTEL_T_RAID1 &&
1283                     mmap->total_domains >= 2 &&
1284                     mmap->total_domains <= mmap->total_disks) {
1285                         /* Assume total_domains is correct. */
1286                         if (mmap->total_domains == mmap->total_disks)
1287                                 vol->v_raid_level = G_RAID_VOLUME_RL_RAID1;
1288                         else
1289                                 vol->v_raid_level = G_RAID_VOLUME_RL_RAID1E;
1290                 } else if (mmap->type == INTEL_T_RAID1) {
1291                         /* total_domains looks wrong. */
1292                         if (mmap->total_disks <= 2)
1293                                 vol->v_raid_level = G_RAID_VOLUME_RL_RAID1;
1294                         else
1295                                 vol->v_raid_level = G_RAID_VOLUME_RL_RAID1E;
1296                 } else if (mmap->type == INTEL_T_RAID5) {
1297                         vol->v_raid_level = G_RAID_VOLUME_RL_RAID5;
1298                         vol->v_raid_level_qualifier = G_RAID_VOLUME_RLQ_R5LA;
1299                 } else
1300                         vol->v_raid_level = G_RAID_VOLUME_RL_UNKNOWN;
1301                 vol->v_strip_size = (u_int)mmap->strip_sectors * 512; //ZZZ
1302                 vol->v_disks_count = mmap->total_disks;
1303                 vol->v_mediasize = (off_t)mvol->total_sectors * 512; //ZZZ
1304                 vol->v_sectorsize = 512; //ZZZ
1305                 for (j = 0; j < vol->v_disks_count; j++) {
1306                         sd = &vol->v_subdisks[j];
1307                         sd->sd_offset = intel_get_map_offset(mmap) * 512; //ZZZ
1308                         sd->sd_size = intel_get_map_disk_sectors(mmap) * 512; //ZZZ
1309                 }
1310                 g_raid_start_volume(vol);
1311         }
1312
1313         /* Create disk placeholders to store data for later writing. */
1314         for (disk_pos = 0; disk_pos < meta->total_disks; disk_pos++) {
1315                 pd = malloc(sizeof(*pd), M_MD_INTEL, M_WAITOK | M_ZERO);
1316                 pd->pd_disk_pos = disk_pos;
1317                 pd->pd_disk_meta = meta->disk[disk_pos];
1318                 disk = g_raid_create_disk(sc);
1319                 disk->d_md_data = (void *)pd;
1320                 disk->d_state = G_RAID_DISK_S_OFFLINE;
1321                 for (i = 0; i < meta->total_volumes; i++) {
1322                         mvol = intel_get_volume(meta, i);
1323                         mmap = intel_get_map(mvol, 0);
1324                         for (j = 0; j < mmap->total_disks; j++) {
1325                                 if ((mmap->disk_idx[j] & INTEL_DI_IDX) == disk_pos)
1326                                         break;
1327                         }
1328                         if (j == mmap->total_disks)
1329                                 continue;
1330                         vol = g_raid_md_intel_get_volume(sc, i);
1331                         sd = &vol->v_subdisks[j];
1332                         sd->sd_disk = disk;
1333                         TAILQ_INSERT_TAIL(&disk->d_subdisks, sd, sd_next);
1334                 }
1335         }
1336
1337         /* Make all disks found till the moment take their places. */
1338         do {
1339                 TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
1340                         if (disk->d_state == G_RAID_DISK_S_NONE) {
1341                                 g_raid_md_intel_start_disk(disk);
1342                                 break;
1343                         }
1344                 }
1345         } while (disk != NULL);
1346
1347         mdi->mdio_started = 1;
1348         G_RAID_DEBUG1(0, sc, "Array started.");
1349         g_raid_md_write_intel(md, NULL, NULL, NULL);
1350
1351         /* Pickup any STALE/SPARE disks to refill array if needed. */
1352         g_raid_md_intel_refill(sc);
1353
1354         TAILQ_FOREACH(vol, &sc->sc_volumes, v_next) {
1355                 g_raid_event_send(vol, G_RAID_VOLUME_E_START,
1356                     G_RAID_EVENT_VOLUME);
1357         }
1358
1359         callout_stop(&mdi->mdio_start_co);
1360         G_RAID_DEBUG1(1, sc, "root_mount_rel %p", mdi->mdio_rootmount);
1361         root_mount_rel(mdi->mdio_rootmount);
1362         mdi->mdio_rootmount = NULL;
1363 }
1364
1365 static void
1366 g_raid_md_intel_new_disk(struct g_raid_disk *disk)
1367 {
1368         struct g_raid_softc *sc;
1369         struct g_raid_md_object *md;
1370         struct g_raid_md_intel_object *mdi;
1371         struct intel_raid_conf *pdmeta;
1372         struct g_raid_md_intel_perdisk *pd;
1373
1374         sc = disk->d_softc;
1375         md = sc->sc_md;
1376         mdi = (struct g_raid_md_intel_object *)md;
1377         pd = (struct g_raid_md_intel_perdisk *)disk->d_md_data;
1378         pdmeta = pd->pd_meta;
1379
1380         if (mdi->mdio_started) {
1381                 if (g_raid_md_intel_start_disk(disk))
1382                         g_raid_md_write_intel(md, NULL, NULL, NULL);
1383         } else {
1384                 /* If we haven't started yet - check metadata freshness. */
1385                 if (mdi->mdio_meta == NULL ||
1386                     ((int32_t)(pdmeta->generation - mdi->mdio_generation)) > 0) {
1387                         G_RAID_DEBUG1(1, sc, "Newer disk");
1388                         if (mdi->mdio_meta != NULL)
1389                                 free(mdi->mdio_meta, M_MD_INTEL);
1390                         mdi->mdio_meta = intel_meta_copy(pdmeta);
1391                         mdi->mdio_generation = mdi->mdio_meta->generation;
1392                         mdi->mdio_disks_present = 1;
1393                 } else if (pdmeta->generation == mdi->mdio_generation) {
1394                         mdi->mdio_disks_present++;
1395                         G_RAID_DEBUG1(1, sc, "Matching disk (%d of %d up)",
1396                             mdi->mdio_disks_present,
1397                             mdi->mdio_meta->total_disks);
1398                 } else {
1399                         G_RAID_DEBUG1(1, sc, "Older disk");
1400                 }
1401                 /* If we collected all needed disks - start array. */
1402                 if (mdi->mdio_disks_present == mdi->mdio_meta->total_disks)
1403                         g_raid_md_intel_start(sc);
1404         }
1405 }
1406
1407 static void
1408 g_raid_intel_go(void *arg)
1409 {
1410         struct g_raid_softc *sc;
1411         struct g_raid_md_object *md;
1412         struct g_raid_md_intel_object *mdi;
1413
1414         sc = arg;
1415         md = sc->sc_md;
1416         mdi = (struct g_raid_md_intel_object *)md;
1417         if (!mdi->mdio_started) {
1418                 G_RAID_DEBUG1(0, sc, "Force array start due to timeout.");
1419                 g_raid_event_send(sc, G_RAID_NODE_E_START, 0);
1420         }
1421 }
1422
1423 static int
1424 g_raid_md_create_intel(struct g_raid_md_object *md, struct g_class *mp,
1425     struct g_geom **gp)
1426 {
1427         struct g_raid_softc *sc;
1428         struct g_raid_md_intel_object *mdi;
1429         char name[16];
1430
1431         mdi = (struct g_raid_md_intel_object *)md;
1432         mdi->mdio_config_id = mdi->mdio_orig_config_id = arc4random();
1433         mdi->mdio_generation = 0;
1434         snprintf(name, sizeof(name), "Intel-%08x", mdi->mdio_config_id);
1435         sc = g_raid_create_node(mp, name, md);
1436         if (sc == NULL)
1437                 return (G_RAID_MD_TASTE_FAIL);
1438         md->mdo_softc = sc;
1439         *gp = sc->sc_geom;
1440         return (G_RAID_MD_TASTE_NEW);
1441 }
1442
1443 /*
1444  * Return the last N characters of the serial label.  The Linux and
1445  * ataraid(7) code always uses the last 16 characters of the label to
1446  * store into the Intel meta format.  Generalize this to N characters
1447  * since that's easy.  Labels can be up to 20 characters for SATA drives
1448  * and up 251 characters for SAS drives.  Since intel controllers don't
1449  * support SAS drives, just stick with the SATA limits for stack friendliness.
1450  */
1451 static int
1452 g_raid_md_get_label(struct g_consumer *cp, char *serial, int serlen)
1453 {
1454         char serial_buffer[DISK_IDENT_SIZE];
1455         int len, error;
1456         
1457         len = sizeof(serial_buffer);
1458         error = g_io_getattr("GEOM::ident", cp, &len, serial_buffer);
1459         if (error != 0)
1460                 return (error);
1461         len = strlen(serial_buffer);
1462         if (len > serlen)
1463                 len -= serlen;
1464         else
1465                 len = 0;
1466         strncpy(serial, serial_buffer + len, serlen);
1467         return (0);
1468 }
1469
1470 static int
1471 g_raid_md_taste_intel(struct g_raid_md_object *md, struct g_class *mp,
1472                               struct g_consumer *cp, struct g_geom **gp)
1473 {
1474         struct g_consumer *rcp;
1475         struct g_provider *pp;
1476         struct g_raid_md_intel_object *mdi, *mdi1;
1477         struct g_raid_softc *sc;
1478         struct g_raid_disk *disk;
1479         struct intel_raid_conf *meta;
1480         struct g_raid_md_intel_perdisk *pd;
1481         struct g_geom *geom;
1482         int error, disk_pos, result, spare, len;
1483         char serial[INTEL_SERIAL_LEN];
1484         char name[16];
1485         uint16_t vendor;
1486
1487         G_RAID_DEBUG(1, "Tasting Intel on %s", cp->provider->name);
1488         mdi = (struct g_raid_md_intel_object *)md;
1489         pp = cp->provider;
1490
1491         /* Read metadata from device. */
1492         meta = NULL;
1493         disk_pos = 0;
1494         g_topology_unlock();
1495         error = g_raid_md_get_label(cp, serial, sizeof(serial));
1496         if (error != 0) {
1497                 G_RAID_DEBUG(1, "Cannot get serial number from %s (error=%d).",
1498                     pp->name, error);
1499                 goto fail2;
1500         }
1501         vendor = 0xffff;
1502         len = sizeof(vendor);
1503         if (pp->geom->rank == 1)
1504                 g_io_getattr("GEOM::hba_vendor", cp, &len, &vendor);
1505         meta = intel_meta_read(cp);
1506         g_topology_lock();
1507         if (meta == NULL) {
1508                 if (g_raid_aggressive_spare) {
1509                         if (vendor != 0x8086) {
1510                                 G_RAID_DEBUG(1,
1511                                     "Intel vendor mismatch 0x%04x != 0x8086",
1512                                     vendor);
1513                         } else {
1514                                 G_RAID_DEBUG(1,
1515                                     "No Intel metadata, forcing spare.");
1516                                 spare = 2;
1517                                 goto search;
1518                         }
1519                 }
1520                 return (G_RAID_MD_TASTE_FAIL);
1521         }
1522
1523         /* Check this disk position in obtained metadata. */
1524         disk_pos = intel_meta_find_disk(meta, serial);
1525         if (disk_pos < 0) {
1526                 G_RAID_DEBUG(1, "Intel serial '%s' not found", serial);
1527                 goto fail1;
1528         }
1529         if (intel_get_disk_sectors(&meta->disk[disk_pos]) !=
1530             (pp->mediasize / pp->sectorsize)) {
1531                 G_RAID_DEBUG(1, "Intel size mismatch %ju != %ju",
1532                     intel_get_disk_sectors(&meta->disk[disk_pos]),
1533                     (off_t)(pp->mediasize / pp->sectorsize));
1534                 goto fail1;
1535         }
1536
1537         G_RAID_DEBUG(1, "Intel disk position %d", disk_pos);
1538         spare = meta->disk[disk_pos].flags & INTEL_F_SPARE;
1539
1540 search:
1541         /* Search for matching node. */
1542         sc = NULL;
1543         mdi1 = NULL;
1544         LIST_FOREACH(geom, &mp->geom, geom) {
1545                 sc = geom->softc;
1546                 if (sc == NULL)
1547                         continue;
1548                 if (sc->sc_stopping != 0)
1549                         continue;
1550                 if (sc->sc_md->mdo_class != md->mdo_class)
1551                         continue;
1552                 mdi1 = (struct g_raid_md_intel_object *)sc->sc_md;
1553                 if (spare) {
1554                         if (mdi1->mdio_incomplete)
1555                                 break;
1556                 } else {
1557                         if (mdi1->mdio_config_id == meta->config_id)
1558                                 break;
1559                 }
1560         }
1561
1562         /* Found matching node. */
1563         if (geom != NULL) {
1564                 G_RAID_DEBUG(1, "Found matching array %s", sc->sc_name);
1565                 result = G_RAID_MD_TASTE_EXISTING;
1566
1567         } else if (spare) { /* Not found needy node -- left for later. */
1568                 G_RAID_DEBUG(1, "Spare is not needed at this time");
1569                 goto fail1;
1570
1571         } else { /* Not found matching node -- create one. */
1572                 result = G_RAID_MD_TASTE_NEW;
1573                 mdi->mdio_config_id = meta->config_id;
1574                 mdi->mdio_orig_config_id = meta->orig_config_id;
1575                 snprintf(name, sizeof(name), "Intel-%08x", meta->config_id);
1576                 sc = g_raid_create_node(mp, name, md);
1577                 md->mdo_softc = sc;
1578                 geom = sc->sc_geom;
1579                 callout_init(&mdi->mdio_start_co, 1);
1580                 callout_reset(&mdi->mdio_start_co, g_raid_start_timeout * hz,
1581                     g_raid_intel_go, sc);
1582                 mdi->mdio_rootmount = root_mount_hold("GRAID-Intel");
1583                 G_RAID_DEBUG1(1, sc, "root_mount_hold %p", mdi->mdio_rootmount);
1584         }
1585
1586         /* There is no return after this point, so we close passed consumer. */
1587         g_access(cp, -1, 0, 0);
1588
1589         rcp = g_new_consumer(geom);
1590         rcp->flags |= G_CF_DIRECT_RECEIVE;
1591         g_attach(rcp, pp);
1592         if (g_access(rcp, 1, 1, 1) != 0)
1593                 ; //goto fail1;
1594
1595         g_topology_unlock();
1596         sx_xlock(&sc->sc_lock);
1597
1598         pd = malloc(sizeof(*pd), M_MD_INTEL, M_WAITOK | M_ZERO);
1599         pd->pd_meta = meta;
1600         pd->pd_disk_pos = -1;
1601         if (spare == 2) {
1602                 memcpy(&pd->pd_disk_meta.serial[0], serial, INTEL_SERIAL_LEN);
1603                 intel_set_disk_sectors(&pd->pd_disk_meta, 
1604                     pp->mediasize / pp->sectorsize);
1605                 pd->pd_disk_meta.id = 0;
1606                 pd->pd_disk_meta.flags = INTEL_F_SPARE;
1607         } else {
1608                 pd->pd_disk_meta = meta->disk[disk_pos];
1609         }
1610         disk = g_raid_create_disk(sc);
1611         disk->d_md_data = (void *)pd;
1612         disk->d_consumer = rcp;
1613         rcp->private = disk;
1614
1615         g_raid_get_disk_info(disk);
1616
1617         g_raid_md_intel_new_disk(disk);
1618
1619         sx_xunlock(&sc->sc_lock);
1620         g_topology_lock();
1621         *gp = geom;
1622         return (result);
1623 fail2:
1624         g_topology_lock();
1625 fail1:
1626         free(meta, M_MD_INTEL);
1627         return (G_RAID_MD_TASTE_FAIL);
1628 }
1629
1630 static int
1631 g_raid_md_event_intel(struct g_raid_md_object *md,
1632     struct g_raid_disk *disk, u_int event)
1633 {
1634         struct g_raid_softc *sc;
1635         struct g_raid_subdisk *sd;
1636         struct g_raid_md_intel_object *mdi;
1637         struct g_raid_md_intel_perdisk *pd;
1638
1639         sc = md->mdo_softc;
1640         mdi = (struct g_raid_md_intel_object *)md;
1641         if (disk == NULL) {
1642                 switch (event) {
1643                 case G_RAID_NODE_E_START:
1644                         if (!mdi->mdio_started)
1645                                 g_raid_md_intel_start(sc);
1646                         return (0);
1647                 }
1648                 return (-1);
1649         }
1650         pd = (struct g_raid_md_intel_perdisk *)disk->d_md_data;
1651         switch (event) {
1652         case G_RAID_DISK_E_DISCONNECTED:
1653                 /* If disk was assigned, just update statuses. */
1654                 if (pd->pd_disk_pos >= 0) {
1655                         g_raid_change_disk_state(disk, G_RAID_DISK_S_OFFLINE);
1656                         if (disk->d_consumer) {
1657                                 g_raid_kill_consumer(sc, disk->d_consumer);
1658                                 disk->d_consumer = NULL;
1659                         }
1660                         TAILQ_FOREACH(sd, &disk->d_subdisks, sd_next) {
1661                                 g_raid_change_subdisk_state(sd,
1662                                     G_RAID_SUBDISK_S_NONE);
1663                                 g_raid_event_send(sd, G_RAID_SUBDISK_E_DISCONNECTED,
1664                                     G_RAID_EVENT_SUBDISK);
1665                         }
1666                 } else {
1667                         /* Otherwise -- delete. */
1668                         g_raid_change_disk_state(disk, G_RAID_DISK_S_NONE);
1669                         g_raid_destroy_disk(disk);
1670                 }
1671
1672                 /* Write updated metadata to all disks. */
1673                 g_raid_md_write_intel(md, NULL, NULL, NULL);
1674
1675                 /* Check if anything left except placeholders. */
1676                 if (g_raid_ndisks(sc, -1) ==
1677                     g_raid_ndisks(sc, G_RAID_DISK_S_OFFLINE))
1678                         g_raid_destroy_node(sc, 0);
1679                 else
1680                         g_raid_md_intel_refill(sc);
1681                 return (0);
1682         }
1683         return (-2);
1684 }
1685
1686 static int
1687 g_raid_md_ctl_intel(struct g_raid_md_object *md,
1688     struct gctl_req *req)
1689 {
1690         struct g_raid_softc *sc;
1691         struct g_raid_volume *vol, *vol1;
1692         struct g_raid_subdisk *sd;
1693         struct g_raid_disk *disk;
1694         struct g_raid_md_intel_object *mdi;
1695         struct g_raid_md_intel_pervolume *pv;
1696         struct g_raid_md_intel_perdisk *pd;
1697         struct g_consumer *cp;
1698         struct g_provider *pp;
1699         char arg[16], serial[INTEL_SERIAL_LEN];
1700         const char *nodename, *verb, *volname, *levelname, *diskname;
1701         char *tmp;
1702         int *nargs, *force;
1703         off_t off, size, sectorsize, strip, disk_sectors;
1704         intmax_t *sizearg, *striparg;
1705         int numdisks, i, len, level, qual, update;
1706         int error;
1707
1708         sc = md->mdo_softc;
1709         mdi = (struct g_raid_md_intel_object *)md;
1710         verb = gctl_get_param(req, "verb", NULL);
1711         nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs));
1712         error = 0;
1713         if (strcmp(verb, "label") == 0) {
1714
1715                 if (*nargs < 4) {
1716                         gctl_error(req, "Invalid number of arguments.");
1717                         return (-1);
1718                 }
1719                 volname = gctl_get_asciiparam(req, "arg1");
1720                 if (volname == NULL) {
1721                         gctl_error(req, "No volume name.");
1722                         return (-2);
1723                 }
1724                 levelname = gctl_get_asciiparam(req, "arg2");
1725                 if (levelname == NULL) {
1726                         gctl_error(req, "No RAID level.");
1727                         return (-3);
1728                 }
1729                 if (strcasecmp(levelname, "RAID5") == 0)
1730                         levelname = "RAID5-LA";
1731                 if (g_raid_volume_str2level(levelname, &level, &qual)) {
1732                         gctl_error(req, "Unknown RAID level '%s'.", levelname);
1733                         return (-4);
1734                 }
1735                 numdisks = *nargs - 3;
1736                 force = gctl_get_paraml(req, "force", sizeof(*force));
1737                 if (!g_raid_md_intel_supported(level, qual, numdisks,
1738                     force ? *force : 0)) {
1739                         gctl_error(req, "Unsupported RAID level "
1740                             "(0x%02x/0x%02x), or number of disks (%d).",
1741                             level, qual, numdisks);
1742                         return (-5);
1743                 }
1744
1745                 /* Search for disks, connect them and probe. */
1746                 size = 0x7fffffffffffffffllu;
1747                 sectorsize = 0;
1748                 for (i = 0; i < numdisks; i++) {
1749                         snprintf(arg, sizeof(arg), "arg%d", i + 3);
1750                         diskname = gctl_get_asciiparam(req, arg);
1751                         if (diskname == NULL) {
1752                                 gctl_error(req, "No disk name (%s).", arg);
1753                                 error = -6;
1754                                 break;
1755                         }
1756                         if (strcmp(diskname, "NONE") == 0) {
1757                                 cp = NULL;
1758                                 pp = NULL;
1759                         } else {
1760                                 g_topology_lock();
1761                                 cp = g_raid_open_consumer(sc, diskname);
1762                                 if (cp == NULL) {
1763                                         gctl_error(req, "Can't open disk '%s'.",
1764                                             diskname);
1765                                         g_topology_unlock();
1766                                         error = -7;
1767                                         break;
1768                                 }
1769                                 pp = cp->provider;
1770                         }
1771                         pd = malloc(sizeof(*pd), M_MD_INTEL, M_WAITOK | M_ZERO);
1772                         pd->pd_disk_pos = i;
1773                         disk = g_raid_create_disk(sc);
1774                         disk->d_md_data = (void *)pd;
1775                         disk->d_consumer = cp;
1776                         if (cp == NULL) {
1777                                 strcpy(&pd->pd_disk_meta.serial[0], "NONE");
1778                                 pd->pd_disk_meta.id = 0xffffffff;
1779                                 pd->pd_disk_meta.flags = INTEL_F_ASSIGNED;
1780                                 continue;
1781                         }
1782                         cp->private = disk;
1783                         g_topology_unlock();
1784
1785                         error = g_raid_md_get_label(cp,
1786                             &pd->pd_disk_meta.serial[0], INTEL_SERIAL_LEN);
1787                         if (error != 0) {
1788                                 gctl_error(req,
1789                                     "Can't get serial for provider '%s'.",
1790                                     diskname);
1791                                 error = -8;
1792                                 break;
1793                         }
1794
1795                         g_raid_get_disk_info(disk);
1796
1797                         intel_set_disk_sectors(&pd->pd_disk_meta,
1798                             pp->mediasize / pp->sectorsize);
1799                         if (size > pp->mediasize)
1800                                 size = pp->mediasize;
1801                         if (sectorsize < pp->sectorsize)
1802                                 sectorsize = pp->sectorsize;
1803                         pd->pd_disk_meta.id = 0;
1804                         pd->pd_disk_meta.flags = INTEL_F_ASSIGNED | INTEL_F_ONLINE;
1805                 }
1806                 if (error != 0)
1807                         return (error);
1808
1809                 if (sectorsize <= 0) {
1810                         gctl_error(req, "Can't get sector size.");
1811                         return (-8);
1812                 }
1813
1814                 /* Reserve some space for metadata. */
1815                 size -= ((4096 + sectorsize - 1) / sectorsize) * sectorsize;
1816
1817                 /* Handle size argument. */
1818                 len = sizeof(*sizearg);
1819                 sizearg = gctl_get_param(req, "size", &len);
1820                 if (sizearg != NULL && len == sizeof(*sizearg) &&
1821                     *sizearg > 0) {
1822                         if (*sizearg > size) {
1823                                 gctl_error(req, "Size too big %lld > %lld.",
1824                                     (long long)*sizearg, (long long)size);
1825                                 return (-9);
1826                         }
1827                         size = *sizearg;
1828                 }
1829
1830                 /* Handle strip argument. */
1831                 strip = 131072;
1832                 len = sizeof(*striparg);
1833                 striparg = gctl_get_param(req, "strip", &len);
1834                 if (striparg != NULL && len == sizeof(*striparg) &&
1835                     *striparg > 0) {
1836                         if (*striparg < sectorsize) {
1837                                 gctl_error(req, "Strip size too small.");
1838                                 return (-10);
1839                         }
1840                         if (*striparg % sectorsize != 0) {
1841                                 gctl_error(req, "Incorrect strip size.");
1842                                 return (-11);
1843                         }
1844                         if (strip > 65535 * sectorsize) {
1845                                 gctl_error(req, "Strip size too big.");
1846                                 return (-12);
1847                         }
1848                         strip = *striparg;
1849                 }
1850
1851                 /* Round size down to strip or sector. */
1852                 if (level == G_RAID_VOLUME_RL_RAID1)
1853                         size -= (size % sectorsize);
1854                 else if (level == G_RAID_VOLUME_RL_RAID1E &&
1855                     (numdisks & 1) != 0)
1856                         size -= (size % (2 * strip));
1857                 else
1858                         size -= (size % strip);
1859                 if (size <= 0) {
1860                         gctl_error(req, "Size too small.");
1861                         return (-13);
1862                 }
1863
1864                 /* We have all we need, create things: volume, ... */
1865                 mdi->mdio_started = 1;
1866                 vol = g_raid_create_volume(sc, volname, -1);
1867                 pv = malloc(sizeof(*pv), M_MD_INTEL, M_WAITOK | M_ZERO);
1868                 pv->pv_volume_pos = 0;
1869                 vol->v_md_data = pv;
1870                 vol->v_raid_level = level;
1871                 vol->v_raid_level_qualifier = qual;
1872                 vol->v_strip_size = strip;
1873                 vol->v_disks_count = numdisks;
1874                 if (level == G_RAID_VOLUME_RL_RAID0)
1875                         vol->v_mediasize = size * numdisks;
1876                 else if (level == G_RAID_VOLUME_RL_RAID1)
1877                         vol->v_mediasize = size;
1878                 else if (level == G_RAID_VOLUME_RL_RAID5)
1879                         vol->v_mediasize = size * (numdisks - 1);
1880                 else { /* RAID1E */
1881                         vol->v_mediasize = ((size * numdisks) / strip / 2) *
1882                             strip;
1883                 }
1884                 vol->v_sectorsize = sectorsize;
1885                 g_raid_start_volume(vol);
1886
1887                 /* , and subdisks. */
1888                 TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
1889                         pd = (struct g_raid_md_intel_perdisk *)disk->d_md_data;
1890                         sd = &vol->v_subdisks[pd->pd_disk_pos];
1891                         sd->sd_disk = disk;
1892                         sd->sd_offset = 0;
1893                         sd->sd_size = size;
1894                         TAILQ_INSERT_TAIL(&disk->d_subdisks, sd, sd_next);
1895                         if (sd->sd_disk->d_consumer != NULL) {
1896                                 g_raid_change_disk_state(disk,
1897                                     G_RAID_DISK_S_ACTIVE);
1898                                 if (level == G_RAID_VOLUME_RL_RAID5)
1899                                         g_raid_change_subdisk_state(sd,
1900                                             G_RAID_SUBDISK_S_UNINITIALIZED);
1901                                 else
1902                                         g_raid_change_subdisk_state(sd,
1903                                             G_RAID_SUBDISK_S_ACTIVE);
1904                                 g_raid_event_send(sd, G_RAID_SUBDISK_E_NEW,
1905                                     G_RAID_EVENT_SUBDISK);
1906                         } else {
1907                                 g_raid_change_disk_state(disk, G_RAID_DISK_S_OFFLINE);
1908                         }
1909                 }
1910
1911                 /* Write metadata based on created entities. */
1912                 G_RAID_DEBUG1(0, sc, "Array started.");
1913                 g_raid_md_write_intel(md, NULL, NULL, NULL);
1914
1915                 /* Pickup any STALE/SPARE disks to refill array if needed. */
1916                 g_raid_md_intel_refill(sc);
1917
1918                 g_raid_event_send(vol, G_RAID_VOLUME_E_START,
1919                     G_RAID_EVENT_VOLUME);
1920                 return (0);
1921         }
1922         if (strcmp(verb, "add") == 0) {
1923
1924                 if (*nargs != 3) {
1925                         gctl_error(req, "Invalid number of arguments.");
1926                         return (-1);
1927                 }
1928                 volname = gctl_get_asciiparam(req, "arg1");
1929                 if (volname == NULL) {
1930                         gctl_error(req, "No volume name.");
1931                         return (-2);
1932                 }
1933                 levelname = gctl_get_asciiparam(req, "arg2");
1934                 if (levelname == NULL) {
1935                         gctl_error(req, "No RAID level.");
1936                         return (-3);
1937                 }
1938                 if (strcasecmp(levelname, "RAID5") == 0)
1939                         levelname = "RAID5-LA";
1940                 if (g_raid_volume_str2level(levelname, &level, &qual)) {
1941                         gctl_error(req, "Unknown RAID level '%s'.", levelname);
1942                         return (-4);
1943                 }
1944
1945                 /* Look for existing volumes. */
1946                 i = 0;
1947                 vol1 = NULL;
1948                 TAILQ_FOREACH(vol, &sc->sc_volumes, v_next) {
1949                         vol1 = vol;
1950                         i++;
1951                 }
1952                 if (i > 1) {
1953                         gctl_error(req, "Maximum two volumes supported.");
1954                         return (-6);
1955                 }
1956                 if (vol1 == NULL) {
1957                         gctl_error(req, "At least one volume must exist.");
1958                         return (-7);
1959                 }
1960
1961                 numdisks = vol1->v_disks_count;
1962                 force = gctl_get_paraml(req, "force", sizeof(*force));
1963                 if (!g_raid_md_intel_supported(level, qual, numdisks,
1964                     force ? *force : 0)) {
1965                         gctl_error(req, "Unsupported RAID level "
1966                             "(0x%02x/0x%02x), or number of disks (%d).",
1967                             level, qual, numdisks);
1968                         return (-5);
1969                 }
1970
1971                 /* Collect info about present disks. */
1972                 size = 0x7fffffffffffffffllu;
1973                 sectorsize = 512;
1974                 for (i = 0; i < numdisks; i++) {
1975                         disk = vol1->v_subdisks[i].sd_disk;
1976                         pd = (struct g_raid_md_intel_perdisk *)
1977                             disk->d_md_data;
1978                         disk_sectors = 
1979                             intel_get_disk_sectors(&pd->pd_disk_meta);
1980
1981                         if (disk_sectors * 512 < size)
1982                                 size = disk_sectors * 512;
1983                         if (disk->d_consumer != NULL &&
1984                             disk->d_consumer->provider != NULL &&
1985                             disk->d_consumer->provider->sectorsize >
1986                              sectorsize) {
1987                                 sectorsize =
1988                                     disk->d_consumer->provider->sectorsize;
1989                         }
1990                 }
1991
1992                 /* Reserve some space for metadata. */
1993                 size -= ((4096 + sectorsize - 1) / sectorsize) * sectorsize;
1994
1995                 /* Decide insert before or after. */
1996                 sd = &vol1->v_subdisks[0];
1997                 if (sd->sd_offset >
1998                     size - (sd->sd_offset + sd->sd_size)) {
1999                         off = 0;
2000                         size = sd->sd_offset;
2001                 } else {
2002                         off = sd->sd_offset + sd->sd_size;
2003                         size = size - (sd->sd_offset + sd->sd_size);
2004                 }
2005
2006                 /* Handle strip argument. */
2007                 strip = 131072;
2008                 len = sizeof(*striparg);
2009                 striparg = gctl_get_param(req, "strip", &len);
2010                 if (striparg != NULL && len == sizeof(*striparg) &&
2011                     *striparg > 0) {
2012                         if (*striparg < sectorsize) {
2013                                 gctl_error(req, "Strip size too small.");
2014                                 return (-10);
2015                         }
2016                         if (*striparg % sectorsize != 0) {
2017                                 gctl_error(req, "Incorrect strip size.");
2018                                 return (-11);
2019                         }
2020                         if (strip > 65535 * sectorsize) {
2021                                 gctl_error(req, "Strip size too big.");
2022                                 return (-12);
2023                         }
2024                         strip = *striparg;
2025                 }
2026
2027                 /* Round offset up to strip. */
2028                 if (off % strip != 0) {
2029                         size -= strip - off % strip;
2030                         off += strip - off % strip;
2031                 }
2032
2033                 /* Handle size argument. */
2034                 len = sizeof(*sizearg);
2035                 sizearg = gctl_get_param(req, "size", &len);
2036                 if (sizearg != NULL && len == sizeof(*sizearg) &&
2037                     *sizearg > 0) {
2038                         if (*sizearg > size) {
2039                                 gctl_error(req, "Size too big %lld > %lld.",
2040                                     (long long)*sizearg, (long long)size);
2041                                 return (-9);
2042                         }
2043                         size = *sizearg;
2044                 }
2045
2046                 /* Round size down to strip or sector. */
2047                 if (level == G_RAID_VOLUME_RL_RAID1)
2048                         size -= (size % sectorsize);
2049                 else
2050                         size -= (size % strip);
2051                 if (size <= 0) {
2052                         gctl_error(req, "Size too small.");
2053                         return (-13);
2054                 }
2055                 if (size > 0xffffffffllu * sectorsize) {
2056                         gctl_error(req, "Size too big.");
2057                         return (-14);
2058                 }
2059
2060                 /* We have all we need, create things: volume, ... */
2061                 vol = g_raid_create_volume(sc, volname, -1);
2062                 pv = malloc(sizeof(*pv), M_MD_INTEL, M_WAITOK | M_ZERO);
2063                 pv->pv_volume_pos = i;
2064                 vol->v_md_data = pv;
2065                 vol->v_raid_level = level;
2066                 vol->v_raid_level_qualifier = qual;
2067                 vol->v_strip_size = strip;
2068                 vol->v_disks_count = numdisks;
2069                 if (level == G_RAID_VOLUME_RL_RAID0)
2070                         vol->v_mediasize = size * numdisks;
2071                 else if (level == G_RAID_VOLUME_RL_RAID1)
2072                         vol->v_mediasize = size;
2073                 else if (level == G_RAID_VOLUME_RL_RAID5)
2074                         vol->v_mediasize = size * (numdisks - 1);
2075                 else { /* RAID1E */
2076                         vol->v_mediasize = ((size * numdisks) / strip / 2) *
2077                             strip;
2078                 }
2079                 vol->v_sectorsize = sectorsize;
2080                 g_raid_start_volume(vol);
2081
2082                 /* , and subdisks. */
2083                 for (i = 0; i < numdisks; i++) {
2084                         disk = vol1->v_subdisks[i].sd_disk;
2085                         sd = &vol->v_subdisks[i];
2086                         sd->sd_disk = disk;
2087                         sd->sd_offset = off;
2088                         sd->sd_size = size;
2089                         TAILQ_INSERT_TAIL(&disk->d_subdisks, sd, sd_next);
2090                         if (disk->d_state == G_RAID_DISK_S_ACTIVE) {
2091                                 if (level == G_RAID_VOLUME_RL_RAID5)
2092                                         g_raid_change_subdisk_state(sd,
2093                                             G_RAID_SUBDISK_S_UNINITIALIZED);
2094                                 else
2095                                         g_raid_change_subdisk_state(sd,
2096                                             G_RAID_SUBDISK_S_ACTIVE);
2097                                 g_raid_event_send(sd, G_RAID_SUBDISK_E_NEW,
2098                                     G_RAID_EVENT_SUBDISK);
2099                         }
2100                 }
2101
2102                 /* Write metadata based on created entities. */
2103                 g_raid_md_write_intel(md, NULL, NULL, NULL);
2104
2105                 g_raid_event_send(vol, G_RAID_VOLUME_E_START,
2106                     G_RAID_EVENT_VOLUME);
2107                 return (0);
2108         }
2109         if (strcmp(verb, "delete") == 0) {
2110
2111                 nodename = gctl_get_asciiparam(req, "arg0");
2112                 if (nodename != NULL && strcasecmp(sc->sc_name, nodename) != 0)
2113                         nodename = NULL;
2114
2115                 /* Full node destruction. */
2116                 if (*nargs == 1 && nodename != NULL) {
2117                         /* Check if some volume is still open. */
2118                         force = gctl_get_paraml(req, "force", sizeof(*force));
2119                         if (force != NULL && *force == 0 &&
2120                             g_raid_nopens(sc) != 0) {
2121                                 gctl_error(req, "Some volume is still open.");
2122                                 return (-4);
2123                         }
2124
2125                         TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
2126                                 if (disk->d_consumer)
2127                                         intel_meta_erase(disk->d_consumer);
2128                         }
2129                         g_raid_destroy_node(sc, 0);
2130                         return (0);
2131                 }
2132
2133                 /* Destroy specified volume. If it was last - all node. */
2134                 if (*nargs > 2) {
2135                         gctl_error(req, "Invalid number of arguments.");
2136                         return (-1);
2137                 }
2138                 volname = gctl_get_asciiparam(req,
2139                     nodename != NULL ? "arg1" : "arg0");
2140                 if (volname == NULL) {
2141                         gctl_error(req, "No volume name.");
2142                         return (-2);
2143                 }
2144
2145                 /* Search for volume. */
2146                 TAILQ_FOREACH(vol, &sc->sc_volumes, v_next) {
2147                         if (strcmp(vol->v_name, volname) == 0)
2148                                 break;
2149                         pp = vol->v_provider;
2150                         if (pp == NULL)
2151                                 continue;
2152                         if (strcmp(pp->name, volname) == 0)
2153                                 break;
2154                         if (strncmp(pp->name, "raid/", 5) == 0 &&
2155                             strcmp(pp->name + 5, volname) == 0)
2156                                 break;
2157                 }
2158                 if (vol == NULL) {
2159                         i = strtol(volname, &tmp, 10);
2160                         if (verb != volname && tmp[0] == 0) {
2161                                 TAILQ_FOREACH(vol, &sc->sc_volumes, v_next) {
2162                                         if (vol->v_global_id == i)
2163                                                 break;
2164                                 }
2165                         }
2166                 }
2167                 if (vol == NULL) {
2168                         gctl_error(req, "Volume '%s' not found.", volname);
2169                         return (-3);
2170                 }
2171
2172                 /* Check if volume is still open. */
2173                 force = gctl_get_paraml(req, "force", sizeof(*force));
2174                 if (force != NULL && *force == 0 &&
2175                     vol->v_provider_open != 0) {
2176                         gctl_error(req, "Volume is still open.");
2177                         return (-4);
2178                 }
2179
2180                 /* Destroy volume and potentially node. */
2181                 i = 0;
2182                 TAILQ_FOREACH(vol1, &sc->sc_volumes, v_next)
2183                         i++;
2184                 if (i >= 2) {
2185                         g_raid_destroy_volume(vol);
2186                         g_raid_md_write_intel(md, NULL, NULL, NULL);
2187                 } else {
2188                         TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
2189                                 if (disk->d_consumer)
2190                                         intel_meta_erase(disk->d_consumer);
2191                         }
2192                         g_raid_destroy_node(sc, 0);
2193                 }
2194                 return (0);
2195         }
2196         if (strcmp(verb, "remove") == 0 ||
2197             strcmp(verb, "fail") == 0) {
2198                 if (*nargs < 2) {
2199                         gctl_error(req, "Invalid number of arguments.");
2200                         return (-1);
2201                 }
2202                 for (i = 1; i < *nargs; i++) {
2203                         snprintf(arg, sizeof(arg), "arg%d", i);
2204                         diskname = gctl_get_asciiparam(req, arg);
2205                         if (diskname == NULL) {
2206                                 gctl_error(req, "No disk name (%s).", arg);
2207                                 error = -2;
2208                                 break;
2209                         }
2210                         if (strncmp(diskname, "/dev/", 5) == 0)
2211                                 diskname += 5;
2212
2213                         TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
2214                                 if (disk->d_consumer != NULL && 
2215                                     disk->d_consumer->provider != NULL &&
2216                                     strcmp(disk->d_consumer->provider->name,
2217                                      diskname) == 0)
2218                                         break;
2219                         }
2220                         if (disk == NULL) {
2221                                 gctl_error(req, "Disk '%s' not found.",
2222                                     diskname);
2223                                 error = -3;
2224                                 break;
2225                         }
2226
2227                         if (strcmp(verb, "fail") == 0) {
2228                                 g_raid_md_fail_disk_intel(md, NULL, disk);
2229                                 continue;
2230                         }
2231
2232                         pd = (struct g_raid_md_intel_perdisk *)disk->d_md_data;
2233
2234                         /* Erase metadata on deleting disk. */
2235                         intel_meta_erase(disk->d_consumer);
2236
2237                         /* If disk was assigned, just update statuses. */
2238                         if (pd->pd_disk_pos >= 0) {
2239                                 g_raid_change_disk_state(disk, G_RAID_DISK_S_OFFLINE);
2240                                 g_raid_kill_consumer(sc, disk->d_consumer);
2241                                 disk->d_consumer = NULL;
2242                                 TAILQ_FOREACH(sd, &disk->d_subdisks, sd_next) {
2243                                         g_raid_change_subdisk_state(sd,
2244                                             G_RAID_SUBDISK_S_NONE);
2245                                         g_raid_event_send(sd, G_RAID_SUBDISK_E_DISCONNECTED,
2246                                             G_RAID_EVENT_SUBDISK);
2247                                 }
2248                         } else {
2249                                 /* Otherwise -- delete. */
2250                                 g_raid_change_disk_state(disk, G_RAID_DISK_S_NONE);
2251                                 g_raid_destroy_disk(disk);
2252                         }
2253                 }
2254
2255                 /* Write updated metadata to remaining disks. */
2256                 g_raid_md_write_intel(md, NULL, NULL, NULL);
2257
2258                 /* Check if anything left except placeholders. */
2259                 if (g_raid_ndisks(sc, -1) ==
2260                     g_raid_ndisks(sc, G_RAID_DISK_S_OFFLINE))
2261                         g_raid_destroy_node(sc, 0);
2262                 else
2263                         g_raid_md_intel_refill(sc);
2264                 return (error);
2265         }
2266         if (strcmp(verb, "insert") == 0) {
2267                 if (*nargs < 2) {
2268                         gctl_error(req, "Invalid number of arguments.");
2269                         return (-1);
2270                 }
2271                 update = 0;
2272                 for (i = 1; i < *nargs; i++) {
2273                         /* Get disk name. */
2274                         snprintf(arg, sizeof(arg), "arg%d", i);
2275                         diskname = gctl_get_asciiparam(req, arg);
2276                         if (diskname == NULL) {
2277                                 gctl_error(req, "No disk name (%s).", arg);
2278                                 error = -3;
2279                                 break;
2280                         }
2281
2282                         /* Try to find provider with specified name. */
2283                         g_topology_lock();
2284                         cp = g_raid_open_consumer(sc, diskname);
2285                         if (cp == NULL) {
2286                                 gctl_error(req, "Can't open disk '%s'.",
2287                                     diskname);
2288                                 g_topology_unlock();
2289                                 error = -4;
2290                                 break;
2291                         }
2292                         pp = cp->provider;
2293                         g_topology_unlock();
2294
2295                         /* Read disk serial. */
2296                         error = g_raid_md_get_label(cp,
2297                             &serial[0], INTEL_SERIAL_LEN);
2298                         if (error != 0) {
2299                                 gctl_error(req,
2300                                     "Can't get serial for provider '%s'.",
2301                                     diskname);
2302                                 g_raid_kill_consumer(sc, cp);
2303                                 error = -7;
2304                                 break;
2305                         }
2306
2307                         pd = malloc(sizeof(*pd), M_MD_INTEL, M_WAITOK | M_ZERO);
2308                         pd->pd_disk_pos = -1;
2309
2310                         disk = g_raid_create_disk(sc);
2311                         disk->d_consumer = cp;
2312                         disk->d_md_data = (void *)pd;
2313                         cp->private = disk;
2314
2315                         g_raid_get_disk_info(disk);
2316
2317                         memcpy(&pd->pd_disk_meta.serial[0], &serial[0],
2318                             INTEL_SERIAL_LEN);
2319                         intel_set_disk_sectors(&pd->pd_disk_meta,
2320                             pp->mediasize / pp->sectorsize);
2321                         pd->pd_disk_meta.id = 0;
2322                         pd->pd_disk_meta.flags = INTEL_F_SPARE;
2323
2324                         /* Welcome the "new" disk. */
2325                         update += g_raid_md_intel_start_disk(disk);
2326                         if (disk->d_state == G_RAID_DISK_S_SPARE) {
2327                                 intel_meta_write_spare(cp, &pd->pd_disk_meta);
2328                                 g_raid_destroy_disk(disk);
2329                         } else if (disk->d_state != G_RAID_DISK_S_ACTIVE) {
2330                                 gctl_error(req, "Disk '%s' doesn't fit.",
2331                                     diskname);
2332                                 g_raid_destroy_disk(disk);
2333                                 error = -8;
2334                                 break;
2335                         }
2336                 }
2337
2338                 /* Write new metadata if we changed something. */
2339                 if (update)
2340                         g_raid_md_write_intel(md, NULL, NULL, NULL);
2341                 return (error);
2342         }
2343         return (-100);
2344 }
2345
2346 static int
2347 g_raid_md_write_intel(struct g_raid_md_object *md, struct g_raid_volume *tvol,
2348     struct g_raid_subdisk *tsd, struct g_raid_disk *tdisk)
2349 {
2350         struct g_raid_softc *sc;
2351         struct g_raid_volume *vol;
2352         struct g_raid_subdisk *sd;
2353         struct g_raid_disk *disk;
2354         struct g_raid_md_intel_object *mdi;
2355         struct g_raid_md_intel_pervolume *pv;
2356         struct g_raid_md_intel_perdisk *pd;
2357         struct intel_raid_conf *meta;
2358         struct intel_raid_vol *mvol;
2359         struct intel_raid_map *mmap0, *mmap1;
2360         off_t sectorsize = 512, pos;
2361         const char *version, *cv;
2362         int vi, sdi, numdisks, len, state, stale;
2363
2364         sc = md->mdo_softc;
2365         mdi = (struct g_raid_md_intel_object *)md;
2366
2367         if (sc->sc_stopping == G_RAID_DESTROY_HARD)
2368                 return (0);
2369
2370         /* Bump generation. Newly written metadata may differ from previous. */
2371         mdi->mdio_generation++;
2372
2373         /* Count number of disks. */
2374         numdisks = 0;
2375         TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
2376                 pd = (struct g_raid_md_intel_perdisk *)disk->d_md_data;
2377                 if (pd->pd_disk_pos < 0)
2378                         continue;
2379                 numdisks++;
2380                 if (disk->d_state == G_RAID_DISK_S_ACTIVE) {
2381                         pd->pd_disk_meta.flags =
2382                             INTEL_F_ONLINE | INTEL_F_ASSIGNED;
2383                 } else if (disk->d_state == G_RAID_DISK_S_FAILED) {
2384                         pd->pd_disk_meta.flags = INTEL_F_FAILED |
2385                             INTEL_F_ASSIGNED;
2386                 } else if (disk->d_state == G_RAID_DISK_S_DISABLED) {
2387                         pd->pd_disk_meta.flags = INTEL_F_FAILED |
2388                             INTEL_F_ASSIGNED | INTEL_F_DISABLED;
2389                 } else {
2390                         if (!(pd->pd_disk_meta.flags & INTEL_F_DISABLED))
2391                                 pd->pd_disk_meta.flags = INTEL_F_ASSIGNED;
2392                         if (pd->pd_disk_meta.id != 0xffffffff) {
2393                                 pd->pd_disk_meta.id = 0xffffffff;
2394                                 len = strlen(pd->pd_disk_meta.serial);
2395                                 len = min(len, INTEL_SERIAL_LEN - 3);
2396                                 strcpy(pd->pd_disk_meta.serial + len, ":0");
2397                         }
2398                 }
2399         }
2400
2401         /* Fill anchor and disks. */
2402         meta = malloc(INTEL_MAX_MD_SIZE(numdisks),
2403             M_MD_INTEL, M_WAITOK | M_ZERO);
2404         memcpy(&meta->intel_id[0], INTEL_MAGIC, sizeof(INTEL_MAGIC) - 1);
2405         meta->config_size = INTEL_MAX_MD_SIZE(numdisks);
2406         meta->config_id = mdi->mdio_config_id;
2407         meta->orig_config_id = mdi->mdio_orig_config_id;
2408         meta->generation = mdi->mdio_generation;
2409         meta->attributes = INTEL_ATTR_CHECKSUM;
2410         meta->total_disks = numdisks;
2411         TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
2412                 pd = (struct g_raid_md_intel_perdisk *)disk->d_md_data;
2413                 if (pd->pd_disk_pos < 0)
2414                         continue;
2415                 meta->disk[pd->pd_disk_pos] = pd->pd_disk_meta;
2416                 if (pd->pd_disk_meta.sectors_hi != 0)
2417                         meta->attributes |= INTEL_ATTR_2TB_DISK;
2418         }
2419
2420         /* Fill volumes and maps. */
2421         vi = 0;
2422         version = INTEL_VERSION_1000;
2423         TAILQ_FOREACH(vol, &sc->sc_volumes, v_next) {
2424                 pv = vol->v_md_data;
2425                 if (vol->v_stopping)
2426                         continue;
2427                 mvol = intel_get_volume(meta, vi);
2428
2429                 /* New metadata may have different volumes order. */
2430                 pv->pv_volume_pos = vi;
2431
2432                 for (sdi = 0; sdi < vol->v_disks_count; sdi++) {
2433                         sd = &vol->v_subdisks[sdi];
2434                         if (sd->sd_disk != NULL)
2435                                 break;
2436                 }
2437                 if (sdi >= vol->v_disks_count)
2438                         panic("No any filled subdisk in volume");
2439                 if (vol->v_mediasize >= 0x20000000000llu)
2440                         meta->attributes |= INTEL_ATTR_2TB;
2441                 if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID0)
2442                         meta->attributes |= INTEL_ATTR_RAID0;
2443                 else if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID1)
2444                         meta->attributes |= INTEL_ATTR_RAID1;
2445                 else if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID5)
2446                         meta->attributes |= INTEL_ATTR_RAID5;
2447                 else if ((vol->v_disks_count & 1) == 0)
2448                         meta->attributes |= INTEL_ATTR_RAID10;
2449                 else
2450                         meta->attributes |= INTEL_ATTR_RAID1E;
2451                 if (pv->pv_cng)
2452                         meta->attributes |= INTEL_ATTR_RAIDCNG;
2453                 if (vol->v_strip_size > 131072)
2454                         meta->attributes |= INTEL_ATTR_EXT_STRIP;
2455
2456                 if (pv->pv_cng)
2457                         cv = INTEL_VERSION_1206;
2458                 else if (vol->v_disks_count > 4)
2459                         cv = INTEL_VERSION_1204;
2460                 else if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID5)
2461                         cv = INTEL_VERSION_1202;
2462                 else if (vol->v_disks_count > 2)
2463                         cv = INTEL_VERSION_1201;
2464                 else if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID1)
2465                         cv = INTEL_VERSION_1100;
2466                 else
2467                         cv = INTEL_VERSION_1000;
2468                 if (strcmp(cv, version) > 0)
2469                         version = cv;
2470
2471                 strlcpy(&mvol->name[0], vol->v_name, sizeof(mvol->name));
2472                 mvol->total_sectors = vol->v_mediasize / sectorsize;
2473                 mvol->state = (INTEL_ST_READ_COALESCING |
2474                     INTEL_ST_WRITE_COALESCING);
2475                 mvol->tid = vol->v_global_id + 1;
2476                 if (pv->pv_cng) {
2477                         mvol->state |= INTEL_ST_CLONE_N_GO;
2478                         if (pv->pv_cng_man_sync)
2479                                 mvol->state |= INTEL_ST_CLONE_MAN_SYNC;
2480                         mvol->cng_master_disk = pv->pv_cng_master_disk;
2481                         if (vol->v_subdisks[pv->pv_cng_master_disk].sd_state ==
2482                             G_RAID_SUBDISK_S_NONE)
2483                                 mvol->cng_state = INTEL_CNGST_MASTER_MISSING;
2484                         else if (vol->v_state != G_RAID_VOLUME_S_OPTIMAL)
2485                                 mvol->cng_state = INTEL_CNGST_NEEDS_UPDATE;
2486                         else
2487                                 mvol->cng_state = INTEL_CNGST_UPDATED;
2488                 }
2489
2490                 /* Check for any recovery in progress. */
2491                 state = G_RAID_SUBDISK_S_ACTIVE;
2492                 pos = 0x7fffffffffffffffllu;
2493                 stale = 0;
2494                 for (sdi = 0; sdi < vol->v_disks_count; sdi++) {
2495                         sd = &vol->v_subdisks[sdi];
2496                         if (sd->sd_state == G_RAID_SUBDISK_S_REBUILD)
2497                                 state = G_RAID_SUBDISK_S_REBUILD;
2498                         else if (sd->sd_state == G_RAID_SUBDISK_S_RESYNC &&
2499                             state != G_RAID_SUBDISK_S_REBUILD)
2500                                 state = G_RAID_SUBDISK_S_RESYNC;
2501                         else if (sd->sd_state == G_RAID_SUBDISK_S_STALE)
2502                                 stale = 1;
2503                         if ((sd->sd_state == G_RAID_SUBDISK_S_REBUILD ||
2504                             sd->sd_state == G_RAID_SUBDISK_S_RESYNC) &&
2505                              sd->sd_rebuild_pos < pos)
2506                                 pos = sd->sd_rebuild_pos;
2507                 }
2508                 if (state == G_RAID_SUBDISK_S_REBUILD) {
2509                         mvol->migr_state = 1;
2510                         mvol->migr_type = INTEL_MT_REBUILD;
2511                 } else if (state == G_RAID_SUBDISK_S_RESYNC) {
2512                         mvol->migr_state = 1;
2513                         /* mvol->migr_type = INTEL_MT_REPAIR; */
2514                         mvol->migr_type = INTEL_MT_VERIFY;
2515                         mvol->state |= INTEL_ST_VERIFY_AND_FIX;
2516                 } else
2517                         mvol->migr_state = 0;
2518                 mvol->dirty = (vol->v_dirty || stale);
2519
2520                 mmap0 = intel_get_map(mvol, 0);
2521
2522                 /* Write map / common part of two maps. */
2523                 intel_set_map_offset(mmap0, sd->sd_offset / sectorsize);
2524                 intel_set_map_disk_sectors(mmap0, sd->sd_size / sectorsize);
2525                 mmap0->strip_sectors = vol->v_strip_size / sectorsize;
2526                 if (vol->v_state == G_RAID_VOLUME_S_BROKEN)
2527                         mmap0->status = INTEL_S_FAILURE;
2528                 else if (vol->v_state == G_RAID_VOLUME_S_DEGRADED)
2529                         mmap0->status = INTEL_S_DEGRADED;
2530                 else if (g_raid_nsubdisks(vol, G_RAID_SUBDISK_S_UNINITIALIZED)
2531                     == g_raid_nsubdisks(vol, -1))
2532                         mmap0->status = INTEL_S_UNINITIALIZED;
2533                 else
2534                         mmap0->status = INTEL_S_READY;
2535                 if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID0)
2536                         mmap0->type = INTEL_T_RAID0;
2537                 else if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID1 ||
2538                     vol->v_raid_level == G_RAID_VOLUME_RL_RAID1E)
2539                         mmap0->type = INTEL_T_RAID1;
2540                 else
2541                         mmap0->type = INTEL_T_RAID5;
2542                 mmap0->total_disks = vol->v_disks_count;
2543                 if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID1)
2544                         mmap0->total_domains = vol->v_disks_count;
2545                 else if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID1E)
2546                         mmap0->total_domains = 2;
2547                 else
2548                         mmap0->total_domains = 1;
2549                 intel_set_map_stripe_count(mmap0,
2550                     sd->sd_size / vol->v_strip_size / mmap0->total_domains);
2551                 mmap0->failed_disk_num = 0xff;
2552                 mmap0->ddf = 1;
2553
2554                 /* If there are two maps - copy common and update. */
2555                 if (mvol->migr_state) {
2556                         intel_set_vol_curr_migr_unit(mvol,
2557                             pos / vol->v_strip_size / mmap0->total_domains);
2558                         mmap1 = intel_get_map(mvol, 1);
2559                         memcpy(mmap1, mmap0, sizeof(struct intel_raid_map));
2560                         mmap0->status = INTEL_S_READY;
2561                 } else
2562                         mmap1 = NULL;
2563
2564                 /* Write disk indexes and put rebuild flags. */
2565                 for (sdi = 0; sdi < vol->v_disks_count; sdi++) {
2566                         sd = &vol->v_subdisks[sdi];
2567                         pd = (struct g_raid_md_intel_perdisk *)
2568                             sd->sd_disk->d_md_data;
2569                         mmap0->disk_idx[sdi] = pd->pd_disk_pos;
2570                         if (mvol->migr_state)
2571                                 mmap1->disk_idx[sdi] = pd->pd_disk_pos;
2572                         if (sd->sd_state == G_RAID_SUBDISK_S_REBUILD ||
2573                             sd->sd_state == G_RAID_SUBDISK_S_RESYNC) {
2574                                 mmap1->disk_idx[sdi] |= INTEL_DI_RBLD;
2575                         } else if (sd->sd_state != G_RAID_SUBDISK_S_ACTIVE &&
2576                             sd->sd_state != G_RAID_SUBDISK_S_STALE &&
2577                             sd->sd_state != G_RAID_SUBDISK_S_UNINITIALIZED) {
2578                                 mmap0->disk_idx[sdi] |= INTEL_DI_RBLD;
2579                                 if (mvol->migr_state)
2580                                         mmap1->disk_idx[sdi] |= INTEL_DI_RBLD;
2581                         }
2582                         if ((sd->sd_state == G_RAID_SUBDISK_S_NONE ||
2583                              sd->sd_state == G_RAID_SUBDISK_S_FAILED ||
2584                              sd->sd_state == G_RAID_SUBDISK_S_REBUILD) &&
2585                             mmap0->failed_disk_num == 0xff) {
2586                                 mmap0->failed_disk_num = sdi;
2587                                 if (mvol->migr_state)
2588                                         mmap1->failed_disk_num = sdi;
2589                         }
2590                 }
2591                 vi++;
2592         }
2593         meta->total_volumes = vi;
2594         if (vi > 1 || meta->attributes &
2595              (INTEL_ATTR_EXT_STRIP | INTEL_ATTR_2TB_DISK | INTEL_ATTR_2TB))
2596                 version = INTEL_VERSION_1300;
2597         if (strcmp(version, INTEL_VERSION_1300) < 0)
2598                 meta->attributes &= INTEL_ATTR_CHECKSUM;
2599         memcpy(&meta->version[0], version, sizeof(INTEL_VERSION_1000) - 1);
2600
2601         /* We are done. Print meta data and store them to disks. */
2602         g_raid_md_intel_print(meta);
2603         if (mdi->mdio_meta != NULL)
2604                 free(mdi->mdio_meta, M_MD_INTEL);
2605         mdi->mdio_meta = meta;
2606         TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
2607                 pd = (struct g_raid_md_intel_perdisk *)disk->d_md_data;
2608                 if (disk->d_state != G_RAID_DISK_S_ACTIVE)
2609                         continue;
2610                 if (pd->pd_meta != NULL) {
2611                         free(pd->pd_meta, M_MD_INTEL);
2612                         pd->pd_meta = NULL;
2613                 }
2614                 pd->pd_meta = intel_meta_copy(meta);
2615                 intel_meta_write(disk->d_consumer, meta);
2616         }
2617         return (0);
2618 }
2619
2620 static int
2621 g_raid_md_fail_disk_intel(struct g_raid_md_object *md,
2622     struct g_raid_subdisk *tsd, struct g_raid_disk *tdisk)
2623 {
2624         struct g_raid_softc *sc;
2625         struct g_raid_md_intel_object *mdi;
2626         struct g_raid_md_intel_perdisk *pd;
2627         struct g_raid_subdisk *sd;
2628
2629         sc = md->mdo_softc;
2630         mdi = (struct g_raid_md_intel_object *)md;
2631         pd = (struct g_raid_md_intel_perdisk *)tdisk->d_md_data;
2632
2633         /* We can't fail disk that is not a part of array now. */
2634         if (pd->pd_disk_pos < 0)
2635                 return (-1);
2636
2637         /*
2638          * Mark disk as failed in metadata and try to write that metadata
2639          * to the disk itself to prevent it's later resurrection as STALE.
2640          */
2641         mdi->mdio_meta->disk[pd->pd_disk_pos].flags = INTEL_F_FAILED;
2642         pd->pd_disk_meta.flags = INTEL_F_FAILED;
2643         g_raid_md_intel_print(mdi->mdio_meta);
2644         if (tdisk->d_consumer)
2645                 intel_meta_write(tdisk->d_consumer, mdi->mdio_meta);
2646
2647         /* Change states. */
2648         g_raid_change_disk_state(tdisk, G_RAID_DISK_S_FAILED);
2649         TAILQ_FOREACH(sd, &tdisk->d_subdisks, sd_next) {
2650                 g_raid_change_subdisk_state(sd,
2651                     G_RAID_SUBDISK_S_FAILED);
2652                 g_raid_event_send(sd, G_RAID_SUBDISK_E_FAILED,
2653                     G_RAID_EVENT_SUBDISK);
2654         }
2655
2656         /* Write updated metadata to remaining disks. */
2657         g_raid_md_write_intel(md, NULL, NULL, tdisk);
2658
2659         /* Check if anything left except placeholders. */
2660         if (g_raid_ndisks(sc, -1) ==
2661             g_raid_ndisks(sc, G_RAID_DISK_S_OFFLINE))
2662                 g_raid_destroy_node(sc, 0);
2663         else
2664                 g_raid_md_intel_refill(sc);
2665         return (0);
2666 }
2667
2668 static int
2669 g_raid_md_free_disk_intel(struct g_raid_md_object *md,
2670     struct g_raid_disk *disk)
2671 {
2672         struct g_raid_md_intel_perdisk *pd;
2673
2674         pd = (struct g_raid_md_intel_perdisk *)disk->d_md_data;
2675         if (pd->pd_meta != NULL) {
2676                 free(pd->pd_meta, M_MD_INTEL);
2677                 pd->pd_meta = NULL;
2678         }
2679         free(pd, M_MD_INTEL);
2680         disk->d_md_data = NULL;
2681         return (0);
2682 }
2683
2684 static int
2685 g_raid_md_free_volume_intel(struct g_raid_md_object *md,
2686     struct g_raid_volume *vol)
2687 {
2688         struct g_raid_md_intel_pervolume *pv;
2689
2690         pv = (struct g_raid_md_intel_pervolume *)vol->v_md_data;
2691         free(pv, M_MD_INTEL);
2692         vol->v_md_data = NULL;
2693         return (0);
2694 }
2695
2696 static int
2697 g_raid_md_free_intel(struct g_raid_md_object *md)
2698 {
2699         struct g_raid_md_intel_object *mdi;
2700
2701         mdi = (struct g_raid_md_intel_object *)md;
2702         if (!mdi->mdio_started) {
2703                 mdi->mdio_started = 0;
2704                 callout_stop(&mdi->mdio_start_co);
2705                 G_RAID_DEBUG1(1, md->mdo_softc,
2706                     "root_mount_rel %p", mdi->mdio_rootmount);
2707                 root_mount_rel(mdi->mdio_rootmount);
2708                 mdi->mdio_rootmount = NULL;
2709         }
2710         if (mdi->mdio_meta != NULL) {
2711                 free(mdi->mdio_meta, M_MD_INTEL);
2712                 mdi->mdio_meta = NULL;
2713         }
2714         return (0);
2715 }
2716
2717 G_RAID_MD_DECLARE(intel, "Intel");