]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - lib/libefi/rdwr_efi.c
Update the behavior of mountpoint property
[FreeBSD/FreeBSD.git] / lib / libefi / rdwr_efi.c
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or https://opensource.org/licenses/CDDL-1.0.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21
22 /*
23  * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Copyright 2012 Nexenta Systems, Inc.  All rights reserved.
25  * Copyright (c) 2018 by Delphix. All rights reserved.
26  */
27
28 #include <stdio.h>
29 #include <stdlib.h>
30 #include <errno.h>
31 #include <string.h>
32 #include <unistd.h>
33 #include <uuid/uuid.h>
34 #include <zlib.h>
35 #include <libintl.h>
36 #include <sys/types.h>
37 #include <sys/dkio.h>
38 #include <sys/mhd.h>
39 #include <sys/param.h>
40 #include <sys/dktp/fdisk.h>
41 #include <sys/efi_partition.h>
42 #include <sys/byteorder.h>
43 #include <sys/vdev_disk.h>
44 #include <linux/fs.h>
45 #include <linux/blkpg.h>
46
47 static struct uuid_to_ptag {
48         struct uuid     uuid;
49 } conversion_array[] = {
50         { EFI_UNUSED },
51         { EFI_BOOT },
52         { EFI_ROOT },
53         { EFI_SWAP },
54         { EFI_USR },
55         { EFI_BACKUP },
56         { EFI_UNUSED },         /* STAND is never used */
57         { EFI_VAR },
58         { EFI_HOME },
59         { EFI_ALTSCTR },
60         { EFI_UNUSED },         /* CACHE (cachefs) is never used */
61         { EFI_RESERVED },
62         { EFI_SYSTEM },
63         { EFI_LEGACY_MBR },
64         { EFI_SYMC_PUB },
65         { EFI_SYMC_CDS },
66         { EFI_MSFT_RESV },
67         { EFI_DELL_BASIC },
68         { EFI_DELL_RAID },
69         { EFI_DELL_SWAP },
70         { EFI_DELL_LVM },
71         { EFI_DELL_RESV },
72         { EFI_AAPL_HFS },
73         { EFI_AAPL_UFS },
74         { EFI_FREEBSD_BOOT },
75         { EFI_FREEBSD_SWAP },
76         { EFI_FREEBSD_UFS },
77         { EFI_FREEBSD_VINUM },
78         { EFI_FREEBSD_ZFS },
79         { EFI_BIOS_BOOT },
80         { EFI_INTC_RS },
81         { EFI_SNE_BOOT },
82         { EFI_LENOVO_BOOT },
83         { EFI_MSFT_LDMM },
84         { EFI_MSFT_LDMD },
85         { EFI_MSFT_RE },
86         { EFI_IBM_GPFS },
87         { EFI_MSFT_STORAGESPACES },
88         { EFI_HPQ_DATA },
89         { EFI_HPQ_SVC },
90         { EFI_RHT_DATA },
91         { EFI_RHT_HOME },
92         { EFI_RHT_SRV },
93         { EFI_RHT_DMCRYPT },
94         { EFI_RHT_LUKS },
95         { EFI_FREEBSD_DISKLABEL },
96         { EFI_AAPL_RAID },
97         { EFI_AAPL_RAIDOFFLINE },
98         { EFI_AAPL_BOOT },
99         { EFI_AAPL_LABEL },
100         { EFI_AAPL_TVRECOVERY },
101         { EFI_AAPL_CORESTORAGE },
102         { EFI_NETBSD_SWAP },
103         { EFI_NETBSD_FFS },
104         { EFI_NETBSD_LFS },
105         { EFI_NETBSD_RAID },
106         { EFI_NETBSD_CAT },
107         { EFI_NETBSD_CRYPT },
108         { EFI_GOOG_KERN },
109         { EFI_GOOG_ROOT },
110         { EFI_GOOG_RESV },
111         { EFI_HAIKU_BFS },
112         { EFI_MIDNIGHTBSD_BOOT },
113         { EFI_MIDNIGHTBSD_DATA },
114         { EFI_MIDNIGHTBSD_SWAP },
115         { EFI_MIDNIGHTBSD_UFS },
116         { EFI_MIDNIGHTBSD_VINUM },
117         { EFI_MIDNIGHTBSD_ZFS },
118         { EFI_CEPH_JOURNAL },
119         { EFI_CEPH_DMCRYPTJOURNAL },
120         { EFI_CEPH_OSD },
121         { EFI_CEPH_DMCRYPTOSD },
122         { EFI_CEPH_CREATE },
123         { EFI_CEPH_DMCRYPTCREATE },
124         { EFI_OPENBSD_DISKLABEL },
125         { EFI_BBRY_QNX },
126         { EFI_BELL_PLAN9 },
127         { EFI_VMW_KCORE },
128         { EFI_VMW_VMFS },
129         { EFI_VMW_RESV },
130         { EFI_RHT_ROOTX86 },
131         { EFI_RHT_ROOTAMD64 },
132         { EFI_RHT_ROOTARM },
133         { EFI_RHT_ROOTARM64 },
134         { EFI_ACRONIS_SECUREZONE },
135         { EFI_ONIE_BOOT },
136         { EFI_ONIE_CONFIG },
137         { EFI_IBM_PPRPBOOT },
138         { EFI_FREEDESKTOP_BOOT }
139 };
140
141 int efi_debug = 0;
142
143 static int efi_read(int, struct dk_gpt *);
144
145 /*
146  * Return a 32-bit CRC of the contents of the buffer.  Pre-and-post
147  * one's conditioning will be handled by crc32() internally.
148  */
149 static uint32_t
150 efi_crc32(const unsigned char *buf, unsigned int size)
151 {
152         uint32_t crc = crc32(0, Z_NULL, 0);
153
154         crc = crc32(crc, buf, size);
155
156         return (crc);
157 }
158
159 static int
160 read_disk_info(int fd, diskaddr_t *capacity, uint_t *lbsize)
161 {
162         int sector_size;
163         unsigned long long capacity_size;
164
165         if (ioctl(fd, BLKSSZGET, &sector_size) < 0)
166                 return (-1);
167
168         if (ioctl(fd, BLKGETSIZE64, &capacity_size) < 0)
169                 return (-1);
170
171         *lbsize = (uint_t)sector_size;
172         *capacity = (diskaddr_t)(capacity_size / sector_size);
173
174         return (0);
175 }
176
177 /*
178  * Return back the device name associated with the file descriptor. The
179  * caller is responsible for freeing the memory associated with the
180  * returned string.
181  */
182 static char *
183 efi_get_devname(int fd)
184 {
185         char path[32];
186
187         /*
188          * The libefi API only provides the open fd and not the file path.
189          * To handle this realpath(3) is used to resolve the block device
190          * name from /proc/self/fd/<fd>.
191          */
192         (void) snprintf(path, sizeof (path), "/proc/self/fd/%d", fd);
193         return (realpath(path, NULL));
194 }
195
196 static int
197 efi_get_info(int fd, struct dk_cinfo *dki_info)
198 {
199         char *dev_path;
200         int rval = 0;
201
202         memset(dki_info, 0, sizeof (*dki_info));
203
204         /*
205          * The simplest way to get the partition number under linux is
206          * to parse it out of the /dev/<disk><partition> block device name.
207          * The kernel creates this using the partition number when it
208          * populates /dev/ so it may be trusted.  The tricky bit here is
209          * that the naming convention is based on the block device type.
210          * So we need to take this in to account when parsing out the
211          * partition information.  Aside from the partition number we collect
212          * some additional device info.
213          */
214         dev_path = efi_get_devname(fd);
215         if (dev_path == NULL)
216                 goto error;
217
218         if ((strncmp(dev_path, "/dev/sd", 7) == 0)) {
219                 strcpy(dki_info->dki_cname, "sd");
220                 dki_info->dki_ctype = DKC_SCSI_CCS;
221                 rval = sscanf(dev_path, "/dev/%[a-zA-Z]%hu",
222                     dki_info->dki_dname,
223                     &dki_info->dki_partition);
224         } else if ((strncmp(dev_path, "/dev/hd", 7) == 0)) {
225                 strcpy(dki_info->dki_cname, "hd");
226                 dki_info->dki_ctype = DKC_DIRECT;
227                 rval = sscanf(dev_path, "/dev/%[a-zA-Z]%hu",
228                     dki_info->dki_dname,
229                     &dki_info->dki_partition);
230         } else if ((strncmp(dev_path, "/dev/md", 7) == 0)) {
231                 strcpy(dki_info->dki_cname, "pseudo");
232                 dki_info->dki_ctype = DKC_MD;
233                 strcpy(dki_info->dki_dname, "md");
234                 rval = sscanf(dev_path, "/dev/md%[0-9]p%hu",
235                     dki_info->dki_dname + 2,
236                     &dki_info->dki_partition);
237         } else if ((strncmp(dev_path, "/dev/vd", 7) == 0)) {
238                 strcpy(dki_info->dki_cname, "vd");
239                 dki_info->dki_ctype = DKC_MD;
240                 rval = sscanf(dev_path, "/dev/%[a-zA-Z]%hu",
241                     dki_info->dki_dname,
242                     &dki_info->dki_partition);
243         } else if ((strncmp(dev_path, "/dev/xvd", 8) == 0)) {
244                 strcpy(dki_info->dki_cname, "xvd");
245                 dki_info->dki_ctype = DKC_MD;
246                 rval = sscanf(dev_path, "/dev/%[a-zA-Z]%hu",
247                     dki_info->dki_dname,
248                     &dki_info->dki_partition);
249         } else if ((strncmp(dev_path, "/dev/zd", 7) == 0)) {
250                 strcpy(dki_info->dki_cname, "zd");
251                 dki_info->dki_ctype = DKC_MD;
252                 strcpy(dki_info->dki_dname, "zd");
253                 rval = sscanf(dev_path, "/dev/zd%[0-9]p%hu",
254                     dki_info->dki_dname + 2,
255                     &dki_info->dki_partition);
256         } else if ((strncmp(dev_path, "/dev/dm-", 8) == 0)) {
257                 strcpy(dki_info->dki_cname, "pseudo");
258                 dki_info->dki_ctype = DKC_VBD;
259                 strcpy(dki_info->dki_dname, "dm-");
260                 rval = sscanf(dev_path, "/dev/dm-%[0-9]p%hu",
261                     dki_info->dki_dname + 3,
262                     &dki_info->dki_partition);
263         } else if ((strncmp(dev_path, "/dev/ram", 8) == 0)) {
264                 strcpy(dki_info->dki_cname, "pseudo");
265                 dki_info->dki_ctype = DKC_PCMCIA_MEM;
266                 strcpy(dki_info->dki_dname, "ram");
267                 rval = sscanf(dev_path, "/dev/ram%[0-9]p%hu",
268                     dki_info->dki_dname + 3,
269                     &dki_info->dki_partition);
270         } else if ((strncmp(dev_path, "/dev/loop", 9) == 0)) {
271                 strcpy(dki_info->dki_cname, "pseudo");
272                 dki_info->dki_ctype = DKC_VBD;
273                 strcpy(dki_info->dki_dname, "loop");
274                 rval = sscanf(dev_path, "/dev/loop%[0-9]p%hu",
275                     dki_info->dki_dname + 4,
276                     &dki_info->dki_partition);
277         } else if ((strncmp(dev_path, "/dev/nvme", 9) == 0)) {
278                 strcpy(dki_info->dki_cname, "nvme");
279                 dki_info->dki_ctype = DKC_SCSI_CCS;
280                 strcpy(dki_info->dki_dname, "nvme");
281                 (void) sscanf(dev_path, "/dev/nvme%[0-9]",
282                     dki_info->dki_dname + 4);
283                 size_t controller_length = strlen(
284                     dki_info->dki_dname);
285                 strcpy(dki_info->dki_dname + controller_length,
286                     "n");
287                 rval = sscanf(dev_path,
288                     "/dev/nvme%*[0-9]n%[0-9]p%hu",
289                     dki_info->dki_dname + controller_length + 1,
290                     &dki_info->dki_partition);
291         } else {
292                 strcpy(dki_info->dki_dname, "unknown");
293                 strcpy(dki_info->dki_cname, "unknown");
294                 dki_info->dki_ctype = DKC_UNKNOWN;
295         }
296
297         switch (rval) {
298         case 0:
299                 errno = EINVAL;
300                 goto error;
301         case 1:
302                 dki_info->dki_partition = 0;
303         }
304
305         free(dev_path);
306
307         return (0);
308 error:
309         if (efi_debug)
310                 (void) fprintf(stderr, "DKIOCINFO errno 0x%x\n", errno);
311
312         switch (errno) {
313         case EIO:
314                 return (VT_EIO);
315         case EINVAL:
316                 return (VT_EINVAL);
317         default:
318                 return (VT_ERROR);
319         }
320 }
321
322 /*
323  * the number of blocks the EFI label takes up (round up to nearest
324  * block)
325  */
326 #define NBLOCKS(p, l)   (1 + ((((p) * (int)sizeof (efi_gpe_t))  + \
327                                 ((l) - 1)) / (l)))
328 /* number of partitions -- limited by what we can malloc */
329 #define MAX_PARTS       ((4294967295UL - sizeof (struct dk_gpt)) / \
330                             sizeof (struct dk_part))
331
332 int
333 efi_alloc_and_init(int fd, uint32_t nparts, struct dk_gpt **vtoc)
334 {
335         diskaddr_t      capacity = 0;
336         uint_t          lbsize = 0;
337         uint_t          nblocks;
338         size_t          length;
339         struct dk_gpt   *vptr;
340         struct uuid     uuid;
341         struct dk_cinfo dki_info;
342
343         if (read_disk_info(fd, &capacity, &lbsize) != 0)
344                 return (-1);
345
346         if (efi_get_info(fd, &dki_info) != 0)
347                 return (-1);
348
349         if (dki_info.dki_partition != 0)
350                 return (-1);
351
352         if ((dki_info.dki_ctype == DKC_PCMCIA_MEM) ||
353             (dki_info.dki_ctype == DKC_VBD) ||
354             (dki_info.dki_ctype == DKC_UNKNOWN))
355                 return (-1);
356
357         nblocks = NBLOCKS(nparts, lbsize);
358         if ((nblocks * lbsize) < EFI_MIN_ARRAY_SIZE + lbsize) {
359                 /* 16K plus one block for the GPT */
360                 nblocks = EFI_MIN_ARRAY_SIZE / lbsize + 1;
361         }
362
363         if (nparts > MAX_PARTS) {
364                 if (efi_debug) {
365                         (void) fprintf(stderr,
366                         "the maximum number of partitions supported is %lu\n",
367                             MAX_PARTS);
368                 }
369                 return (-1);
370         }
371
372         length = sizeof (struct dk_gpt) +
373             sizeof (struct dk_part) * (nparts - 1);
374
375         vptr = calloc(1, length);
376         if (vptr == NULL)
377                 return (-1);
378
379         *vtoc = vptr;
380
381         vptr->efi_version = EFI_VERSION_CURRENT;
382         vptr->efi_lbasize = lbsize;
383         vptr->efi_nparts = nparts;
384         /*
385          * add one block here for the PMBR; on disks with a 512 byte
386          * block size and 128 or fewer partitions, efi_first_u_lba
387          * should work out to "34"
388          */
389         vptr->efi_first_u_lba = nblocks + 1;
390         vptr->efi_last_lba = capacity - 1;
391         vptr->efi_altern_lba = capacity -1;
392         vptr->efi_last_u_lba = vptr->efi_last_lba - nblocks;
393
394         (void) uuid_generate((uchar_t *)&uuid);
395         UUID_LE_CONVERT(vptr->efi_disk_uguid, uuid);
396         return (0);
397 }
398
399 /*
400  * Read EFI - return partition number upon success.
401  */
402 int
403 efi_alloc_and_read(int fd, struct dk_gpt **vtoc)
404 {
405         int                     rval;
406         uint32_t                nparts;
407         int                     length;
408         struct dk_gpt           *vptr;
409
410         /* figure out the number of entries that would fit into 16K */
411         nparts = EFI_MIN_ARRAY_SIZE / sizeof (efi_gpe_t);
412         length = (int) sizeof (struct dk_gpt) +
413             (int) sizeof (struct dk_part) * (nparts - 1);
414         vptr = calloc(1, length);
415
416         if (vptr == NULL)
417                 return (VT_ERROR);
418
419         vptr->efi_nparts = nparts;
420         rval = efi_read(fd, vptr);
421
422         if ((rval == VT_EINVAL) && vptr->efi_nparts > nparts) {
423                 void *tmp;
424                 length = (int) sizeof (struct dk_gpt) +
425                     (int) sizeof (struct dk_part) * (vptr->efi_nparts - 1);
426                 if ((tmp = realloc(vptr, length)) == NULL) {
427                         /* cppcheck-suppress doubleFree */
428                         free(vptr);
429                         *vtoc = NULL;
430                         return (VT_ERROR);
431                 } else {
432                         vptr = tmp;
433                         rval = efi_read(fd, vptr);
434                 }
435         }
436
437         if (rval < 0) {
438                 if (efi_debug) {
439                         (void) fprintf(stderr,
440                             "read of EFI table failed, rval=%d\n", rval);
441                 }
442                 free(vptr);
443                 *vtoc = NULL;
444         } else {
445                 *vtoc = vptr;
446         }
447
448         return (rval);
449 }
450
451 static int
452 efi_ioctl(int fd, int cmd, dk_efi_t *dk_ioc)
453 {
454         void *data = dk_ioc->dki_data;
455         int error;
456         diskaddr_t capacity;
457         uint_t lbsize;
458
459         /*
460          * When the IO is not being performed in kernel as an ioctl we need
461          * to know the sector size so we can seek to the proper byte offset.
462          */
463         if (read_disk_info(fd, &capacity, &lbsize) == -1) {
464                 if (efi_debug)
465                         fprintf(stderr, "unable to read disk info: %d", errno);
466
467                 errno = EIO;
468                 return (-1);
469         }
470
471         switch (cmd) {
472         case DKIOCGETEFI:
473                 if (lbsize == 0) {
474                         if (efi_debug)
475                                 (void) fprintf(stderr, "DKIOCGETEFI assuming "
476                                     "LBA %d bytes\n", DEV_BSIZE);
477
478                         lbsize = DEV_BSIZE;
479                 }
480
481                 error = lseek(fd, dk_ioc->dki_lba * lbsize, SEEK_SET);
482                 if (error == -1) {
483                         if (efi_debug)
484                                 (void) fprintf(stderr, "DKIOCGETEFI lseek "
485                                     "error: %d\n", errno);
486                         return (error);
487                 }
488
489                 error = read(fd, data, dk_ioc->dki_length);
490                 if (error == -1) {
491                         if (efi_debug)
492                                 (void) fprintf(stderr, "DKIOCGETEFI read "
493                                     "error: %d\n", errno);
494                         return (error);
495                 }
496
497                 if (error != dk_ioc->dki_length) {
498                         if (efi_debug)
499                                 (void) fprintf(stderr, "DKIOCGETEFI short "
500                                     "read of %d bytes\n", error);
501                         errno = EIO;
502                         return (-1);
503                 }
504                 error = 0;
505                 break;
506
507         case DKIOCSETEFI:
508                 if (lbsize == 0) {
509                         if (efi_debug)
510                                 (void) fprintf(stderr, "DKIOCSETEFI unknown "
511                                     "LBA size\n");
512                         errno = EIO;
513                         return (-1);
514                 }
515
516                 error = lseek(fd, dk_ioc->dki_lba * lbsize, SEEK_SET);
517                 if (error == -1) {
518                         if (efi_debug)
519                                 (void) fprintf(stderr, "DKIOCSETEFI lseek "
520                                     "error: %d\n", errno);
521                         return (error);
522                 }
523
524                 error = write(fd, data, dk_ioc->dki_length);
525                 if (error == -1) {
526                         if (efi_debug)
527                                 (void) fprintf(stderr, "DKIOCSETEFI write "
528                                     "error: %d\n", errno);
529                         return (error);
530                 }
531
532                 if (error != dk_ioc->dki_length) {
533                         if (efi_debug)
534                                 (void) fprintf(stderr, "DKIOCSETEFI short "
535                                     "write of %d bytes\n", error);
536                         errno = EIO;
537                         return (-1);
538                 }
539
540                 /* Sync the new EFI table to disk */
541                 error = fsync(fd);
542                 if (error == -1)
543                         return (error);
544
545                 /* Ensure any local disk cache is also flushed */
546                 if (ioctl(fd, BLKFLSBUF, 0) == -1)
547                         return (error);
548
549                 error = 0;
550                 break;
551
552         default:
553                 if (efi_debug)
554                         (void) fprintf(stderr, "unsupported ioctl()\n");
555
556                 errno = EIO;
557                 return (-1);
558         }
559
560         return (error);
561 }
562
563 int
564 efi_rescan(int fd)
565 {
566         int retry = 10;
567
568         /* Notify the kernel a devices partition table has been updated */
569         while (ioctl(fd, BLKRRPART) != 0) {
570                 if ((--retry == 0) || (errno != EBUSY)) {
571                         (void) fprintf(stderr, "the kernel failed to rescan "
572                             "the partition table: %d\n", errno);
573                         return (-1);
574                 }
575                 usleep(50000);
576         }
577
578         return (0);
579 }
580
581 static int
582 check_label(int fd, dk_efi_t *dk_ioc)
583 {
584         efi_gpt_t               *efi;
585         uint_t                  crc;
586
587         if (efi_ioctl(fd, DKIOCGETEFI, dk_ioc) == -1) {
588                 switch (errno) {
589                 case EIO:
590                         return (VT_EIO);
591                 default:
592                         return (VT_ERROR);
593                 }
594         }
595         efi = dk_ioc->dki_data;
596         if (efi->efi_gpt_Signature != LE_64(EFI_SIGNATURE)) {
597                 if (efi_debug)
598                         (void) fprintf(stderr,
599                             "Bad EFI signature: 0x%llx != 0x%llx\n",
600                             (long long)efi->efi_gpt_Signature,
601                             (long long)LE_64(EFI_SIGNATURE));
602                 return (VT_EINVAL);
603         }
604
605         /*
606          * check CRC of the header; the size of the header should
607          * never be larger than one block
608          */
609         crc = efi->efi_gpt_HeaderCRC32;
610         efi->efi_gpt_HeaderCRC32 = 0;
611         len_t headerSize = (len_t)LE_32(efi->efi_gpt_HeaderSize);
612
613         if (headerSize < EFI_MIN_LABEL_SIZE || headerSize > EFI_LABEL_SIZE) {
614                 if (efi_debug)
615                         (void) fprintf(stderr,
616                             "Invalid EFI HeaderSize %llu.  Assuming %d.\n",
617                             headerSize, EFI_MIN_LABEL_SIZE);
618         }
619
620         if ((headerSize > dk_ioc->dki_length) ||
621             crc != LE_32(efi_crc32((unsigned char *)efi, headerSize))) {
622                 if (efi_debug)
623                         (void) fprintf(stderr,
624                             "Bad EFI CRC: 0x%x != 0x%x\n",
625                             crc, LE_32(efi_crc32((unsigned char *)efi,
626                             headerSize)));
627                 return (VT_EINVAL);
628         }
629
630         return (0);
631 }
632
633 static int
634 efi_read(int fd, struct dk_gpt *vtoc)
635 {
636         int                     i, j;
637         int                     label_len;
638         int                     rval = 0;
639         int                     md_flag = 0;
640         int                     vdc_flag = 0;
641         diskaddr_t              capacity = 0;
642         uint_t                  lbsize = 0;
643         struct dk_minfo         disk_info;
644         dk_efi_t                dk_ioc;
645         efi_gpt_t               *efi;
646         efi_gpe_t               *efi_parts;
647         struct dk_cinfo         dki_info;
648         uint32_t                user_length;
649         boolean_t               legacy_label = B_FALSE;
650
651         /*
652          * get the partition number for this file descriptor.
653          */
654         if ((rval = efi_get_info(fd, &dki_info)) != 0)
655                 return (rval);
656
657         if ((strncmp(dki_info.dki_cname, "pseudo", 7) == 0) &&
658             (strncmp(dki_info.dki_dname, "md", 3) == 0)) {
659                 md_flag++;
660         } else if ((strncmp(dki_info.dki_cname, "vdc", 4) == 0) &&
661             (strncmp(dki_info.dki_dname, "vdc", 4) == 0)) {
662                 /*
663                  * The controller and drive name "vdc" (virtual disk client)
664                  * indicates a LDoms virtual disk.
665                  */
666                 vdc_flag++;
667         }
668
669         /* get the LBA size */
670         if (read_disk_info(fd, &capacity, &lbsize) == -1) {
671                 if (efi_debug) {
672                         (void) fprintf(stderr,
673                             "unable to read disk info: %d",
674                             errno);
675                 }
676                 return (VT_EINVAL);
677         }
678
679         disk_info.dki_lbsize = lbsize;
680         disk_info.dki_capacity = capacity;
681
682         if (disk_info.dki_lbsize == 0) {
683                 if (efi_debug) {
684                         (void) fprintf(stderr,
685                             "efi_read: assuming LBA 512 bytes\n");
686                 }
687                 disk_info.dki_lbsize = DEV_BSIZE;
688         }
689         /*
690          * Read the EFI GPT to figure out how many partitions we need
691          * to deal with.
692          */
693         dk_ioc.dki_lba = 1;
694         if (NBLOCKS(vtoc->efi_nparts, disk_info.dki_lbsize) < 34) {
695                 label_len = EFI_MIN_ARRAY_SIZE + disk_info.dki_lbsize;
696         } else {
697                 label_len = vtoc->efi_nparts * (int) sizeof (efi_gpe_t) +
698                     disk_info.dki_lbsize;
699                 if (label_len % disk_info.dki_lbsize) {
700                         /* pad to physical sector size */
701                         label_len += disk_info.dki_lbsize;
702                         label_len &= ~(disk_info.dki_lbsize - 1);
703                 }
704         }
705
706         if (posix_memalign((void **)&dk_ioc.dki_data,
707             disk_info.dki_lbsize, label_len))
708                 return (VT_ERROR);
709
710         memset(dk_ioc.dki_data, 0, label_len);
711         dk_ioc.dki_length = disk_info.dki_lbsize;
712         user_length = vtoc->efi_nparts;
713         efi = dk_ioc.dki_data;
714         if (md_flag) {
715                 dk_ioc.dki_length = label_len;
716                 if (efi_ioctl(fd, DKIOCGETEFI, &dk_ioc) == -1) {
717                         switch (errno) {
718                         case EIO:
719                                 return (VT_EIO);
720                         default:
721                                 return (VT_ERROR);
722                         }
723                 }
724         } else if ((rval = check_label(fd, &dk_ioc)) == VT_EINVAL) {
725                 /*
726                  * No valid label here; try the alternate. Note that here
727                  * we just read GPT header and save it into dk_ioc.data,
728                  * Later, we will read GUID partition entry array if we
729                  * can get valid GPT header.
730                  */
731
732                 /*
733                  * This is a workaround for legacy systems. In the past, the
734                  * last sector of SCSI disk was invisible on x86 platform. At
735                  * that time, backup label was saved on the next to the last
736                  * sector. It is possible for users to move a disk from previous
737                  * solaris system to present system. Here, we attempt to search
738                  * legacy backup EFI label first.
739                  */
740                 dk_ioc.dki_lba = disk_info.dki_capacity - 2;
741                 dk_ioc.dki_length = disk_info.dki_lbsize;
742                 rval = check_label(fd, &dk_ioc);
743                 if (rval == VT_EINVAL) {
744                         /*
745                          * we didn't find legacy backup EFI label, try to
746                          * search backup EFI label in the last block.
747                          */
748                         dk_ioc.dki_lba = disk_info.dki_capacity - 1;
749                         dk_ioc.dki_length = disk_info.dki_lbsize;
750                         rval = check_label(fd, &dk_ioc);
751                         if (rval == 0) {
752                                 legacy_label = B_TRUE;
753                                 if (efi_debug)
754                                         (void) fprintf(stderr,
755                                             "efi_read: primary label corrupt; "
756                                             "using EFI backup label located on"
757                                             " the last block\n");
758                         }
759                 } else {
760                         if ((efi_debug) && (rval == 0))
761                                 (void) fprintf(stderr, "efi_read: primary label"
762                                     " corrupt; using legacy EFI backup label "
763                                     " located on the next to last block\n");
764                 }
765
766                 if (rval == 0) {
767                         dk_ioc.dki_lba = LE_64(efi->efi_gpt_PartitionEntryLBA);
768                         vtoc->efi_flags |= EFI_GPT_PRIMARY_CORRUPT;
769                         vtoc->efi_nparts =
770                             LE_32(efi->efi_gpt_NumberOfPartitionEntries);
771                         /*
772                          * Partition tables are between backup GPT header
773                          * table and ParitionEntryLBA (the starting LBA of
774                          * the GUID partition entries array). Now that we
775                          * already got valid GPT header and saved it in
776                          * dk_ioc.dki_data, we try to get GUID partition
777                          * entry array here.
778                          */
779                         /* LINTED */
780                         dk_ioc.dki_data = (efi_gpt_t *)((char *)dk_ioc.dki_data
781                             + disk_info.dki_lbsize);
782                         if (legacy_label)
783                                 dk_ioc.dki_length = disk_info.dki_capacity - 1 -
784                                     dk_ioc.dki_lba;
785                         else
786                                 dk_ioc.dki_length = disk_info.dki_capacity - 2 -
787                                     dk_ioc.dki_lba;
788                         dk_ioc.dki_length *= disk_info.dki_lbsize;
789                         if (dk_ioc.dki_length >
790                             ((len_t)label_len - sizeof (*dk_ioc.dki_data))) {
791                                 rval = VT_EINVAL;
792                         } else {
793                                 /*
794                                  * read GUID partition entry array
795                                  */
796                                 rval = efi_ioctl(fd, DKIOCGETEFI, &dk_ioc);
797                         }
798                 }
799
800         } else if (rval == 0) {
801
802                 dk_ioc.dki_lba = LE_64(efi->efi_gpt_PartitionEntryLBA);
803                 /* LINTED */
804                 dk_ioc.dki_data = (efi_gpt_t *)((char *)dk_ioc.dki_data
805                     + disk_info.dki_lbsize);
806                 dk_ioc.dki_length = label_len - disk_info.dki_lbsize;
807                 rval = efi_ioctl(fd, DKIOCGETEFI, &dk_ioc);
808
809         } else if (vdc_flag && rval == VT_ERROR && errno == EINVAL) {
810                 /*
811                  * When the device is a LDoms virtual disk, the DKIOCGETEFI
812                  * ioctl can fail with EINVAL if the virtual disk backend
813                  * is a ZFS volume serviced by a domain running an old version
814                  * of Solaris. This is because the DKIOCGETEFI ioctl was
815                  * initially incorrectly implemented for a ZFS volume and it
816                  * expected the GPT and GPE to be retrieved with a single ioctl.
817                  * So we try to read the GPT and the GPE using that old style
818                  * ioctl.
819                  */
820                 dk_ioc.dki_lba = 1;
821                 dk_ioc.dki_length = label_len;
822                 rval = check_label(fd, &dk_ioc);
823         }
824
825         if (rval < 0) {
826                 free(efi);
827                 return (rval);
828         }
829
830         /* LINTED -- always longlong aligned */
831         efi_parts = (efi_gpe_t *)(((char *)efi) + disk_info.dki_lbsize);
832
833         /*
834          * Assemble this into a "dk_gpt" struct for easier
835          * digestibility by applications.
836          */
837         vtoc->efi_version = LE_32(efi->efi_gpt_Revision);
838         vtoc->efi_nparts = LE_32(efi->efi_gpt_NumberOfPartitionEntries);
839         vtoc->efi_part_size = LE_32(efi->efi_gpt_SizeOfPartitionEntry);
840         vtoc->efi_lbasize = disk_info.dki_lbsize;
841         vtoc->efi_last_lba = disk_info.dki_capacity - 1;
842         vtoc->efi_first_u_lba = LE_64(efi->efi_gpt_FirstUsableLBA);
843         vtoc->efi_last_u_lba = LE_64(efi->efi_gpt_LastUsableLBA);
844         vtoc->efi_altern_lba = LE_64(efi->efi_gpt_AlternateLBA);
845         UUID_LE_CONVERT(vtoc->efi_disk_uguid, efi->efi_gpt_DiskGUID);
846
847         /*
848          * If the array the user passed in is too small, set the length
849          * to what it needs to be and return
850          */
851         if (user_length < vtoc->efi_nparts) {
852                 return (VT_EINVAL);
853         }
854
855         for (i = 0; i < vtoc->efi_nparts; i++) {
856                 UUID_LE_CONVERT(vtoc->efi_parts[i].p_guid,
857                     efi_parts[i].efi_gpe_PartitionTypeGUID);
858
859                 for (j = 0;
860                     j < sizeof (conversion_array)
861                     / sizeof (struct uuid_to_ptag); j++) {
862
863                         if (memcmp(&vtoc->efi_parts[i].p_guid,
864                             &conversion_array[j].uuid,
865                             sizeof (struct uuid)) == 0) {
866                                 vtoc->efi_parts[i].p_tag = j;
867                                 break;
868                         }
869                 }
870                 if (vtoc->efi_parts[i].p_tag == V_UNASSIGNED)
871                         continue;
872                 vtoc->efi_parts[i].p_flag =
873                     LE_16(efi_parts[i].efi_gpe_Attributes.PartitionAttrs);
874                 vtoc->efi_parts[i].p_start =
875                     LE_64(efi_parts[i].efi_gpe_StartingLBA);
876                 vtoc->efi_parts[i].p_size =
877                     LE_64(efi_parts[i].efi_gpe_EndingLBA) -
878                     vtoc->efi_parts[i].p_start + 1;
879                 for (j = 0; j < EFI_PART_NAME_LEN; j++) {
880                         vtoc->efi_parts[i].p_name[j] =
881                             (uchar_t)LE_16(
882                             efi_parts[i].efi_gpe_PartitionName[j]);
883                 }
884
885                 UUID_LE_CONVERT(vtoc->efi_parts[i].p_uguid,
886                     efi_parts[i].efi_gpe_UniquePartitionGUID);
887         }
888         free(efi);
889
890         return (dki_info.dki_partition);
891 }
892
893 /* writes a "protective" MBR */
894 static int
895 write_pmbr(int fd, struct dk_gpt *vtoc)
896 {
897         dk_efi_t        dk_ioc;
898         struct mboot    mb;
899         uchar_t         *cp;
900         diskaddr_t      size_in_lba;
901         uchar_t         *buf;
902         int             len;
903
904         len = (vtoc->efi_lbasize == 0) ? sizeof (mb) : vtoc->efi_lbasize;
905         if (posix_memalign((void **)&buf, len, len))
906                 return (VT_ERROR);
907
908         /*
909          * Preserve any boot code and disk signature if the first block is
910          * already an MBR.
911          */
912         memset(buf, 0, len);
913         dk_ioc.dki_lba = 0;
914         dk_ioc.dki_length = len;
915         /* LINTED -- always longlong aligned */
916         dk_ioc.dki_data = (efi_gpt_t *)buf;
917         if (efi_ioctl(fd, DKIOCGETEFI, &dk_ioc) == -1) {
918                 memset(&mb, 0, sizeof (mb));
919                 mb.signature = LE_16(MBB_MAGIC);
920         } else {
921                 (void) memcpy(&mb, buf, sizeof (mb));
922                 if (mb.signature != LE_16(MBB_MAGIC)) {
923                         memset(&mb, 0, sizeof (mb));
924                         mb.signature = LE_16(MBB_MAGIC);
925                 }
926         }
927
928         memset(&mb.parts, 0, sizeof (mb.parts));
929         cp = (uchar_t *)&mb.parts[0];
930         /* bootable or not */
931         *cp++ = 0;
932         /* beginning CHS; 0xffffff if not representable */
933         *cp++ = 0xff;
934         *cp++ = 0xff;
935         *cp++ = 0xff;
936         /* OS type */
937         *cp++ = EFI_PMBR;
938         /* ending CHS; 0xffffff if not representable */
939         *cp++ = 0xff;
940         *cp++ = 0xff;
941         *cp++ = 0xff;
942         /* starting LBA: 1 (little endian format) by EFI definition */
943         *cp++ = 0x01;
944         *cp++ = 0x00;
945         *cp++ = 0x00;
946         *cp++ = 0x00;
947         /* ending LBA: last block on the disk (little endian format) */
948         size_in_lba = vtoc->efi_last_lba;
949         if (size_in_lba < 0xffffffff) {
950                 *cp++ = (size_in_lba & 0x000000ff);
951                 *cp++ = (size_in_lba & 0x0000ff00) >> 8;
952                 *cp++ = (size_in_lba & 0x00ff0000) >> 16;
953                 *cp++ = (size_in_lba & 0xff000000) >> 24;
954         } else {
955                 *cp++ = 0xff;
956                 *cp++ = 0xff;
957                 *cp++ = 0xff;
958                 *cp++ = 0xff;
959         }
960
961         (void) memcpy(buf, &mb, sizeof (mb));
962         /* LINTED -- always longlong aligned */
963         dk_ioc.dki_data = (efi_gpt_t *)buf;
964         dk_ioc.dki_lba = 0;
965         dk_ioc.dki_length = len;
966         if (efi_ioctl(fd, DKIOCSETEFI, &dk_ioc) == -1) {
967                 free(buf);
968                 switch (errno) {
969                 case EIO:
970                         return (VT_EIO);
971                 case EINVAL:
972                         return (VT_EINVAL);
973                 default:
974                         return (VT_ERROR);
975                 }
976         }
977         free(buf);
978         return (0);
979 }
980
981 /* make sure the user specified something reasonable */
982 static int
983 check_input(struct dk_gpt *vtoc)
984 {
985         int                     resv_part = -1;
986         int                     i, j;
987         diskaddr_t              istart, jstart, isize, jsize, endsect;
988
989         /*
990          * Sanity-check the input (make sure no partitions overlap)
991          */
992         for (i = 0; i < vtoc->efi_nparts; i++) {
993                 /* It can't be unassigned and have an actual size */
994                 if ((vtoc->efi_parts[i].p_tag == V_UNASSIGNED) &&
995                     (vtoc->efi_parts[i].p_size != 0)) {
996                         if (efi_debug) {
997                                 (void) fprintf(stderr, "partition %d is "
998                                     "\"unassigned\" but has a size of %llu",
999                                     i, vtoc->efi_parts[i].p_size);
1000                         }
1001                         return (VT_EINVAL);
1002                 }
1003                 if (vtoc->efi_parts[i].p_tag == V_UNASSIGNED) {
1004                         if (uuid_is_null((uchar_t *)&vtoc->efi_parts[i].p_guid))
1005                                 continue;
1006                         /* we have encountered an unknown uuid */
1007                         vtoc->efi_parts[i].p_tag = 0xff;
1008                 }
1009                 if (vtoc->efi_parts[i].p_tag == V_RESERVED) {
1010                         if (resv_part != -1) {
1011                                 if (efi_debug) {
1012                                         (void) fprintf(stderr, "found "
1013                                             "duplicate reserved partition "
1014                                             "at %d\n", i);
1015                                 }
1016                                 return (VT_EINVAL);
1017                         }
1018                         resv_part = i;
1019                 }
1020                 if ((vtoc->efi_parts[i].p_start < vtoc->efi_first_u_lba) ||
1021                     (vtoc->efi_parts[i].p_start > vtoc->efi_last_u_lba)) {
1022                         if (efi_debug) {
1023                                 (void) fprintf(stderr,
1024                                     "Partition %d starts at %llu.  ",
1025                                     i,
1026                                     vtoc->efi_parts[i].p_start);
1027                                 (void) fprintf(stderr,
1028                                     "It must be between %llu and %llu.\n",
1029                                     vtoc->efi_first_u_lba,
1030                                     vtoc->efi_last_u_lba);
1031                         }
1032                         return (VT_EINVAL);
1033                 }
1034                 if ((vtoc->efi_parts[i].p_start +
1035                     vtoc->efi_parts[i].p_size <
1036                     vtoc->efi_first_u_lba) ||
1037                     (vtoc->efi_parts[i].p_start +
1038                     vtoc->efi_parts[i].p_size >
1039                     vtoc->efi_last_u_lba + 1)) {
1040                         if (efi_debug) {
1041                                 (void) fprintf(stderr,
1042                                     "Partition %d ends at %llu.  ",
1043                                     i,
1044                                     vtoc->efi_parts[i].p_start +
1045                                     vtoc->efi_parts[i].p_size);
1046                                 (void) fprintf(stderr,
1047                                     "It must be between %llu and %llu.\n",
1048                                     vtoc->efi_first_u_lba,
1049                                     vtoc->efi_last_u_lba);
1050                         }
1051                         return (VT_EINVAL);
1052                 }
1053
1054                 for (j = 0; j < vtoc->efi_nparts; j++) {
1055                         isize = vtoc->efi_parts[i].p_size;
1056                         jsize = vtoc->efi_parts[j].p_size;
1057                         istart = vtoc->efi_parts[i].p_start;
1058                         jstart = vtoc->efi_parts[j].p_start;
1059                         if ((i != j) && (isize != 0) && (jsize != 0)) {
1060                                 endsect = jstart + jsize -1;
1061                                 if ((jstart <= istart) &&
1062                                     (istart <= endsect)) {
1063                                         if (efi_debug) {
1064                                                 (void) fprintf(stderr,
1065                                                     "Partition %d overlaps "
1066                                                     "partition %d.", i, j);
1067                                         }
1068                                         return (VT_EINVAL);
1069                                 }
1070                         }
1071                 }
1072         }
1073         /* just a warning for now */
1074         if ((resv_part == -1) && efi_debug) {
1075                 (void) fprintf(stderr,
1076                     "no reserved partition found\n");
1077         }
1078         return (0);
1079 }
1080
1081 static int
1082 call_blkpg_ioctl(int fd, int command, diskaddr_t start,
1083     diskaddr_t size, uint_t pno)
1084 {
1085         struct blkpg_ioctl_arg ioctl_arg;
1086         struct blkpg_partition  linux_part;
1087         memset(&linux_part, 0, sizeof (linux_part));
1088
1089         char *path = efi_get_devname(fd);
1090         if (path == NULL) {
1091                 (void) fprintf(stderr, "failed to retrieve device name\n");
1092                 return (VT_EINVAL);
1093         }
1094
1095         linux_part.start = start;
1096         linux_part.length = size;
1097         linux_part.pno = pno;
1098         snprintf(linux_part.devname, BLKPG_DEVNAMELTH - 1, "%s%u", path, pno);
1099         linux_part.devname[BLKPG_DEVNAMELTH - 1] = '\0';
1100         free(path);
1101
1102         ioctl_arg.op = command;
1103         ioctl_arg.flags = 0;
1104         ioctl_arg.datalen = sizeof (struct blkpg_partition);
1105         ioctl_arg.data = &linux_part;
1106
1107         return (ioctl(fd, BLKPG, &ioctl_arg));
1108 }
1109
1110 /*
1111  * add all the unallocated space to the current label
1112  */
1113 int
1114 efi_use_whole_disk(int fd)
1115 {
1116         struct dk_gpt *efi_label = NULL;
1117         int rval;
1118         int i;
1119         uint_t resv_index = 0, data_index = 0;
1120         diskaddr_t resv_start = 0, data_start = 0;
1121         diskaddr_t data_size, limit, difference;
1122         boolean_t sync_needed = B_FALSE;
1123         uint_t nblocks;
1124
1125         rval = efi_alloc_and_read(fd, &efi_label);
1126         if (rval < 0) {
1127                 if (efi_label != NULL)
1128                         efi_free(efi_label);
1129                 return (rval);
1130         }
1131
1132         /*
1133          * Find the last physically non-zero partition.
1134          * This should be the reserved partition.
1135          */
1136         for (i = 0; i < efi_label->efi_nparts; i ++) {
1137                 if (resv_start < efi_label->efi_parts[i].p_start) {
1138                         resv_start = efi_label->efi_parts[i].p_start;
1139                         resv_index = i;
1140                 }
1141         }
1142
1143         /*
1144          * Find the last physically non-zero partition before that.
1145          * This is the data partition.
1146          */
1147         for (i = 0; i < resv_index; i ++) {
1148                 if (data_start < efi_label->efi_parts[i].p_start) {
1149                         data_start = efi_label->efi_parts[i].p_start;
1150                         data_index = i;
1151                 }
1152         }
1153         data_size = efi_label->efi_parts[data_index].p_size;
1154
1155         /*
1156          * See the "efi_alloc_and_init" function for more information
1157          * about where this "nblocks" value comes from.
1158          */
1159         nblocks = efi_label->efi_first_u_lba - 1;
1160
1161         /*
1162          * Determine if the EFI label is out of sync. We check that:
1163          *
1164          * 1. the data partition ends at the limit we set, and
1165          * 2. the reserved partition starts at the limit we set.
1166          *
1167          * If either of these conditions is not met, then we need to
1168          * resync the EFI label.
1169          *
1170          * The limit is the last usable LBA, determined by the last LBA
1171          * and the first usable LBA fields on the EFI label of the disk
1172          * (see the lines directly above). Additionally, we factor in
1173          * EFI_MIN_RESV_SIZE (per its use in "zpool_label_disk") and
1174          * P2ALIGN it to ensure the partition boundaries are aligned
1175          * (for performance reasons). The alignment should match the
1176          * alignment used by the "zpool_label_disk" function.
1177          */
1178         limit = P2ALIGN(efi_label->efi_last_lba - nblocks - EFI_MIN_RESV_SIZE,
1179             PARTITION_END_ALIGNMENT);
1180         if (data_start + data_size != limit || resv_start != limit)
1181                 sync_needed = B_TRUE;
1182
1183         if (efi_debug && sync_needed)
1184                 (void) fprintf(stderr, "efi_use_whole_disk: sync needed\n");
1185
1186         /*
1187          * If alter_lba is 1, we are using the backup label.
1188          * Since we can locate the backup label by disk capacity,
1189          * there must be no unallocated space.
1190          */
1191         if ((efi_label->efi_altern_lba == 1) || (efi_label->efi_altern_lba
1192             >= efi_label->efi_last_lba && !sync_needed)) {
1193                 if (efi_debug) {
1194                         (void) fprintf(stderr,
1195                             "efi_use_whole_disk: requested space not found\n");
1196                 }
1197                 efi_free(efi_label);
1198                 return (VT_ENOSPC);
1199         }
1200
1201         /*
1202          * Verify that we've found the reserved partition by checking
1203          * that it looks the way it did when we created it in zpool_label_disk.
1204          * If we've found the incorrect partition, then we know that this
1205          * device was reformatted and no longer is solely used by ZFS.
1206          */
1207         if ((efi_label->efi_parts[resv_index].p_size != EFI_MIN_RESV_SIZE) ||
1208             (efi_label->efi_parts[resv_index].p_tag != V_RESERVED) ||
1209             (resv_index != 8)) {
1210                 if (efi_debug) {
1211                         (void) fprintf(stderr,
1212                             "efi_use_whole_disk: wholedisk not available\n");
1213                 }
1214                 efi_free(efi_label);
1215                 return (VT_ENOSPC);
1216         }
1217
1218         if (data_start + data_size != resv_start) {
1219                 if (efi_debug) {
1220                         (void) fprintf(stderr,
1221                             "efi_use_whole_disk: "
1222                             "data_start (%lli) + "
1223                             "data_size (%lli) != "
1224                             "resv_start (%lli)\n",
1225                             data_start, data_size, resv_start);
1226                 }
1227
1228                 return (VT_EINVAL);
1229         }
1230
1231         if (limit < resv_start) {
1232                 if (efi_debug) {
1233                         (void) fprintf(stderr,
1234                             "efi_use_whole_disk: "
1235                             "limit (%lli) < resv_start (%lli)\n",
1236                             limit, resv_start);
1237                 }
1238
1239                 return (VT_EINVAL);
1240         }
1241
1242         difference = limit - resv_start;
1243
1244         if (efi_debug)
1245                 (void) fprintf(stderr,
1246                     "efi_use_whole_disk: difference is %lli\n", difference);
1247
1248         /*
1249          * Move the reserved partition. There is currently no data in
1250          * here except fabricated devids (which get generated via
1251          * efi_write()). So there is no need to copy data.
1252          */
1253         efi_label->efi_parts[data_index].p_size += difference;
1254         efi_label->efi_parts[resv_index].p_start += difference;
1255         efi_label->efi_last_u_lba = efi_label->efi_last_lba - nblocks;
1256
1257         /*
1258          * Rescanning the partition table in the kernel can result
1259          * in the device links to be removed (see comment in vdev_disk_open).
1260          * If BLKPG_RESIZE_PARTITION is available, then we can resize
1261          * the partition table online and avoid having to remove the device
1262          * links used by the pool. This provides a very deterministic
1263          * approach to resizing devices and does not require any
1264          * loops waiting for devices to reappear.
1265          */
1266 #ifdef BLKPG_RESIZE_PARTITION
1267         /*
1268          * Delete the reserved partition since we're about to expand
1269          * the data partition and it would overlap with the reserved
1270          * partition.
1271          * NOTE: The starting index for the ioctl is 1 while for the
1272          * EFI partitions it's 0. For that reason we have to add one
1273          * whenever we make an ioctl call.
1274          */
1275         rval = call_blkpg_ioctl(fd, BLKPG_DEL_PARTITION, 0, 0, resv_index + 1);
1276         if (rval != 0)
1277                 goto out;
1278
1279         /*
1280          * Expand the data partition
1281          */
1282         rval = call_blkpg_ioctl(fd, BLKPG_RESIZE_PARTITION,
1283             efi_label->efi_parts[data_index].p_start * efi_label->efi_lbasize,
1284             efi_label->efi_parts[data_index].p_size * efi_label->efi_lbasize,
1285             data_index + 1);
1286         if (rval != 0) {
1287                 (void) fprintf(stderr, "Unable to resize data "
1288                     "partition:  %d\n", rval);
1289                 /*
1290                  * Since we failed to resize, we need to reset the start
1291                  * of the reserve partition and re-create it.
1292                  */
1293                 efi_label->efi_parts[resv_index].p_start -= difference;
1294         }
1295
1296         /*
1297          * Re-add the reserved partition. If we've expanded the data partition
1298          * then we'll move the reserve partition to the end of the data
1299          * partition. Otherwise, we'll recreate the partition in its original
1300          * location. Note that we do this as best-effort and ignore any
1301          * errors that may arise here. This will ensure that we finish writing
1302          * the EFI label.
1303          */
1304         (void) call_blkpg_ioctl(fd, BLKPG_ADD_PARTITION,
1305             efi_label->efi_parts[resv_index].p_start * efi_label->efi_lbasize,
1306             efi_label->efi_parts[resv_index].p_size * efi_label->efi_lbasize,
1307             resv_index + 1);
1308 #endif
1309
1310         /*
1311          * We're now ready to write the EFI label.
1312          */
1313         if (rval == 0) {
1314                 rval = efi_write(fd, efi_label);
1315                 if (rval < 0 && efi_debug) {
1316                         (void) fprintf(stderr, "efi_use_whole_disk:fail "
1317                             "to write label, rval=%d\n", rval);
1318                 }
1319         }
1320
1321 out:
1322         efi_free(efi_label);
1323         return (rval);
1324 }
1325
1326 /*
1327  * write EFI label and backup label
1328  */
1329 int
1330 efi_write(int fd, struct dk_gpt *vtoc)
1331 {
1332         dk_efi_t                dk_ioc;
1333         efi_gpt_t               *efi;
1334         efi_gpe_t               *efi_parts;
1335         int                     i, j;
1336         struct dk_cinfo         dki_info;
1337         int                     rval;
1338         int                     md_flag = 0;
1339         int                     nblocks;
1340         diskaddr_t              lba_backup_gpt_hdr;
1341
1342         if ((rval = efi_get_info(fd, &dki_info)) != 0)
1343                 return (rval);
1344
1345         /* check if we are dealing with a metadevice */
1346         if ((strncmp(dki_info.dki_cname, "pseudo", 7) == 0) &&
1347             (strncmp(dki_info.dki_dname, "md", 3) == 0)) {
1348                 md_flag = 1;
1349         }
1350
1351         if (check_input(vtoc)) {
1352                 /*
1353                  * not valid; if it's a metadevice just pass it down
1354                  * because SVM will do its own checking
1355                  */
1356                 if (md_flag == 0) {
1357                         return (VT_EINVAL);
1358                 }
1359         }
1360
1361         dk_ioc.dki_lba = 1;
1362         if (NBLOCKS(vtoc->efi_nparts, vtoc->efi_lbasize) < 34) {
1363                 dk_ioc.dki_length = EFI_MIN_ARRAY_SIZE + vtoc->efi_lbasize;
1364         } else {
1365                 dk_ioc.dki_length = (len_t)NBLOCKS(vtoc->efi_nparts,
1366                     vtoc->efi_lbasize) *
1367                     vtoc->efi_lbasize;
1368         }
1369
1370         /*
1371          * the number of blocks occupied by GUID partition entry array
1372          */
1373         nblocks = dk_ioc.dki_length / vtoc->efi_lbasize - 1;
1374
1375         /*
1376          * Backup GPT header is located on the block after GUID
1377          * partition entry array. Here, we calculate the address
1378          * for backup GPT header.
1379          */
1380         lba_backup_gpt_hdr = vtoc->efi_last_u_lba + 1 + nblocks;
1381         if (posix_memalign((void **)&dk_ioc.dki_data,
1382             vtoc->efi_lbasize, dk_ioc.dki_length))
1383                 return (VT_ERROR);
1384
1385         memset(dk_ioc.dki_data, 0, dk_ioc.dki_length);
1386         efi = dk_ioc.dki_data;
1387
1388         /* stuff user's input into EFI struct */
1389         efi->efi_gpt_Signature = LE_64(EFI_SIGNATURE);
1390         efi->efi_gpt_Revision = LE_32(vtoc->efi_version); /* 0x02000100 */
1391         efi->efi_gpt_HeaderSize = LE_32(sizeof (struct efi_gpt) - LEN_EFI_PAD);
1392         efi->efi_gpt_Reserved1 = 0;
1393         efi->efi_gpt_MyLBA = LE_64(1ULL);
1394         efi->efi_gpt_AlternateLBA = LE_64(lba_backup_gpt_hdr);
1395         efi->efi_gpt_FirstUsableLBA = LE_64(vtoc->efi_first_u_lba);
1396         efi->efi_gpt_LastUsableLBA = LE_64(vtoc->efi_last_u_lba);
1397         efi->efi_gpt_PartitionEntryLBA = LE_64(2ULL);
1398         efi->efi_gpt_NumberOfPartitionEntries = LE_32(vtoc->efi_nparts);
1399         efi->efi_gpt_SizeOfPartitionEntry = LE_32(sizeof (struct efi_gpe));
1400         UUID_LE_CONVERT(efi->efi_gpt_DiskGUID, vtoc->efi_disk_uguid);
1401
1402         /* LINTED -- always longlong aligned */
1403         efi_parts = (efi_gpe_t *)((char *)dk_ioc.dki_data + vtoc->efi_lbasize);
1404
1405         for (i = 0; i < vtoc->efi_nparts; i++) {
1406                 for (j = 0;
1407                     j < sizeof (conversion_array) /
1408                     sizeof (struct uuid_to_ptag); j++) {
1409
1410                         if (vtoc->efi_parts[i].p_tag == j) {
1411                                 UUID_LE_CONVERT(
1412                                     efi_parts[i].efi_gpe_PartitionTypeGUID,
1413                                     conversion_array[j].uuid);
1414                                 break;
1415                         }
1416                 }
1417
1418                 if (j == sizeof (conversion_array) /
1419                     sizeof (struct uuid_to_ptag)) {
1420                         /*
1421                          * If we didn't have a matching uuid match, bail here.
1422                          * Don't write a label with unknown uuid.
1423                          */
1424                         if (efi_debug) {
1425                                 (void) fprintf(stderr,
1426                                     "Unknown uuid for p_tag %d\n",
1427                                     vtoc->efi_parts[i].p_tag);
1428                         }
1429                         return (VT_EINVAL);
1430                 }
1431
1432                 /* Zero's should be written for empty partitions */
1433                 if (vtoc->efi_parts[i].p_tag == V_UNASSIGNED)
1434                         continue;
1435
1436                 efi_parts[i].efi_gpe_StartingLBA =
1437                     LE_64(vtoc->efi_parts[i].p_start);
1438                 efi_parts[i].efi_gpe_EndingLBA =
1439                     LE_64(vtoc->efi_parts[i].p_start +
1440                     vtoc->efi_parts[i].p_size - 1);
1441                 efi_parts[i].efi_gpe_Attributes.PartitionAttrs =
1442                     LE_16(vtoc->efi_parts[i].p_flag);
1443                 for (j = 0; j < EFI_PART_NAME_LEN; j++) {
1444                         efi_parts[i].efi_gpe_PartitionName[j] =
1445                             LE_16((ushort_t)vtoc->efi_parts[i].p_name[j]);
1446                 }
1447                 if ((vtoc->efi_parts[i].p_tag != V_UNASSIGNED) &&
1448                     uuid_is_null((uchar_t *)&vtoc->efi_parts[i].p_uguid)) {
1449                         (void) uuid_generate((uchar_t *)
1450                             &vtoc->efi_parts[i].p_uguid);
1451                 }
1452                 memcpy(&efi_parts[i].efi_gpe_UniquePartitionGUID,
1453                     &vtoc->efi_parts[i].p_uguid,
1454                     sizeof (uuid_t));
1455         }
1456         efi->efi_gpt_PartitionEntryArrayCRC32 =
1457             LE_32(efi_crc32((unsigned char *)efi_parts,
1458             vtoc->efi_nparts * (int)sizeof (struct efi_gpe)));
1459         efi->efi_gpt_HeaderCRC32 =
1460             LE_32(efi_crc32((unsigned char *)efi,
1461             LE_32(efi->efi_gpt_HeaderSize)));
1462
1463         if (efi_ioctl(fd, DKIOCSETEFI, &dk_ioc) == -1) {
1464                 free(dk_ioc.dki_data);
1465                 switch (errno) {
1466                 case EIO:
1467                         return (VT_EIO);
1468                 case EINVAL:
1469                         return (VT_EINVAL);
1470                 default:
1471                         return (VT_ERROR);
1472                 }
1473         }
1474         /* if it's a metadevice we're done */
1475         if (md_flag) {
1476                 free(dk_ioc.dki_data);
1477                 return (0);
1478         }
1479
1480         /* write backup partition array */
1481         dk_ioc.dki_lba = vtoc->efi_last_u_lba + 1;
1482         dk_ioc.dki_length -= vtoc->efi_lbasize;
1483         /* LINTED */
1484         dk_ioc.dki_data = (efi_gpt_t *)((char *)dk_ioc.dki_data +
1485             vtoc->efi_lbasize);
1486
1487         if (efi_ioctl(fd, DKIOCSETEFI, &dk_ioc) == -1) {
1488                 /*
1489                  * we wrote the primary label okay, so don't fail
1490                  */
1491                 if (efi_debug) {
1492                         (void) fprintf(stderr,
1493                             "write of backup partitions to block %llu "
1494                             "failed, errno %d\n",
1495                             vtoc->efi_last_u_lba + 1,
1496                             errno);
1497                 }
1498         }
1499         /*
1500          * now swap MyLBA and AlternateLBA fields and write backup
1501          * partition table header
1502          */
1503         dk_ioc.dki_lba = lba_backup_gpt_hdr;
1504         dk_ioc.dki_length = vtoc->efi_lbasize;
1505         /* LINTED */
1506         dk_ioc.dki_data = (efi_gpt_t *)((char *)dk_ioc.dki_data -
1507             vtoc->efi_lbasize);
1508         efi->efi_gpt_AlternateLBA = LE_64(1ULL);
1509         efi->efi_gpt_MyLBA = LE_64(lba_backup_gpt_hdr);
1510         efi->efi_gpt_PartitionEntryLBA = LE_64(vtoc->efi_last_u_lba + 1);
1511         efi->efi_gpt_HeaderCRC32 = 0;
1512         efi->efi_gpt_HeaderCRC32 =
1513             LE_32(efi_crc32((unsigned char *)dk_ioc.dki_data,
1514             LE_32(efi->efi_gpt_HeaderSize)));
1515
1516         if (efi_ioctl(fd, DKIOCSETEFI, &dk_ioc) == -1) {
1517                 if (efi_debug) {
1518                         (void) fprintf(stderr,
1519                             "write of backup header to block %llu failed, "
1520                             "errno %d\n",
1521                             lba_backup_gpt_hdr,
1522                             errno);
1523                 }
1524         }
1525         /* write the PMBR */
1526         (void) write_pmbr(fd, vtoc);
1527         free(dk_ioc.dki_data);
1528
1529         return (0);
1530 }
1531
1532 void
1533 efi_free(struct dk_gpt *ptr)
1534 {
1535         free(ptr);
1536 }
1537
1538 void
1539 efi_err_check(struct dk_gpt *vtoc)
1540 {
1541         int                     resv_part = -1;
1542         int                     i, j;
1543         diskaddr_t              istart, jstart, isize, jsize, endsect;
1544         int                     overlap = 0;
1545
1546         /*
1547          * make sure no partitions overlap
1548          */
1549         for (i = 0; i < vtoc->efi_nparts; i++) {
1550                 /* It can't be unassigned and have an actual size */
1551                 if ((vtoc->efi_parts[i].p_tag == V_UNASSIGNED) &&
1552                     (vtoc->efi_parts[i].p_size != 0)) {
1553                         (void) fprintf(stderr,
1554                             "partition %d is \"unassigned\" but has a size "
1555                             "of %llu\n", i, vtoc->efi_parts[i].p_size);
1556                 }
1557                 if (vtoc->efi_parts[i].p_tag == V_UNASSIGNED) {
1558                         continue;
1559                 }
1560                 if (vtoc->efi_parts[i].p_tag == V_RESERVED) {
1561                         if (resv_part != -1) {
1562                                 (void) fprintf(stderr,
1563                                     "found duplicate reserved partition at "
1564                                     "%d\n", i);
1565                         }
1566                         resv_part = i;
1567                         if (vtoc->efi_parts[i].p_size != EFI_MIN_RESV_SIZE)
1568                                 (void) fprintf(stderr,
1569                                     "Warning: reserved partition size must "
1570                                     "be %d sectors\n", EFI_MIN_RESV_SIZE);
1571                 }
1572                 if ((vtoc->efi_parts[i].p_start < vtoc->efi_first_u_lba) ||
1573                     (vtoc->efi_parts[i].p_start > vtoc->efi_last_u_lba)) {
1574                         (void) fprintf(stderr,
1575                             "Partition %d starts at %llu\n",
1576                             i,
1577                             vtoc->efi_parts[i].p_start);
1578                         (void) fprintf(stderr,
1579                             "It must be between %llu and %llu.\n",
1580                             vtoc->efi_first_u_lba,
1581                             vtoc->efi_last_u_lba);
1582                 }
1583                 if ((vtoc->efi_parts[i].p_start +
1584                     vtoc->efi_parts[i].p_size <
1585                     vtoc->efi_first_u_lba) ||
1586                     (vtoc->efi_parts[i].p_start +
1587                     vtoc->efi_parts[i].p_size >
1588                     vtoc->efi_last_u_lba + 1)) {
1589                         (void) fprintf(stderr,
1590                             "Partition %d ends at %llu\n",
1591                             i,
1592                             vtoc->efi_parts[i].p_start +
1593                             vtoc->efi_parts[i].p_size);
1594                         (void) fprintf(stderr,
1595                             "It must be between %llu and %llu.\n",
1596                             vtoc->efi_first_u_lba,
1597                             vtoc->efi_last_u_lba);
1598                 }
1599
1600                 for (j = 0; j < vtoc->efi_nparts; j++) {
1601                         isize = vtoc->efi_parts[i].p_size;
1602                         jsize = vtoc->efi_parts[j].p_size;
1603                         istart = vtoc->efi_parts[i].p_start;
1604                         jstart = vtoc->efi_parts[j].p_start;
1605                         if ((i != j) && (isize != 0) && (jsize != 0)) {
1606                                 endsect = jstart + jsize -1;
1607                                 if ((jstart <= istart) &&
1608                                     (istart <= endsect)) {
1609                                         if (!overlap) {
1610                                         (void) fprintf(stderr,
1611                                             "label error: EFI Labels do not "
1612                                             "support overlapping partitions\n");
1613                                         }
1614                                         (void) fprintf(stderr,
1615                                             "Partition %d overlaps partition "
1616                                             "%d.\n", i, j);
1617                                         overlap = 1;
1618                                 }
1619                         }
1620                 }
1621         }
1622         /* make sure there is a reserved partition */
1623         if (resv_part == -1) {
1624                 (void) fprintf(stderr,
1625                     "no reserved partition found\n");
1626         }
1627 }