]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - stand/common/part.c
MFV r336960: 9256 zfs send space estimation off by > 10% on some datasets
[FreeBSD/FreeBSD.git] / stand / common / part.c
1 /*-
2  * Copyright (c) 2012 Andrey V. Elsukov <ae@FreeBSD.org>
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  */
26
27 #include <sys/cdefs.h>
28 __FBSDID("$FreeBSD$");
29
30 #include <stand.h>
31 #include <sys/param.h>
32 #include <sys/diskmbr.h>
33 #include <sys/disklabel.h>
34 #include <sys/endian.h>
35 #include <sys/gpt.h>
36 #include <sys/stddef.h>
37 #include <sys/queue.h>
38 #include <sys/vtoc.h>
39
40 #include <fs/cd9660/iso.h>
41
42 #include <crc32.h>
43 #include <part.h>
44 #include <uuid.h>
45
46 #ifdef PART_DEBUG
47 #define DEBUG(fmt, args...) printf("%s: " fmt "\n", __func__, ## args)
48 #else
49 #define DEBUG(fmt, args...)
50 #endif
51
52 #ifdef LOADER_GPT_SUPPORT
53 #define MAXTBLSZ        64
54 static const uuid_t gpt_uuid_unused = GPT_ENT_TYPE_UNUSED;
55 static const uuid_t gpt_uuid_ms_basic_data = GPT_ENT_TYPE_MS_BASIC_DATA;
56 static const uuid_t gpt_uuid_freebsd_ufs = GPT_ENT_TYPE_FREEBSD_UFS;
57 static const uuid_t gpt_uuid_efi = GPT_ENT_TYPE_EFI;
58 static const uuid_t gpt_uuid_freebsd = GPT_ENT_TYPE_FREEBSD;
59 static const uuid_t gpt_uuid_freebsd_boot = GPT_ENT_TYPE_FREEBSD_BOOT;
60 static const uuid_t gpt_uuid_freebsd_nandfs = GPT_ENT_TYPE_FREEBSD_NANDFS;
61 static const uuid_t gpt_uuid_freebsd_swap = GPT_ENT_TYPE_FREEBSD_SWAP;
62 static const uuid_t gpt_uuid_freebsd_zfs = GPT_ENT_TYPE_FREEBSD_ZFS;
63 static const uuid_t gpt_uuid_freebsd_vinum = GPT_ENT_TYPE_FREEBSD_VINUM;
64 #endif
65
66 struct pentry {
67         struct ptable_entry     part;
68         uint64_t                flags;
69         union {
70                 uint8_t bsd;
71                 uint8_t mbr;
72                 uuid_t  gpt;
73                 uint16_t vtoc8;
74         } type;
75         STAILQ_ENTRY(pentry)    entry;
76 };
77
78 struct ptable {
79         enum ptable_type        type;
80         uint16_t                sectorsize;
81         uint64_t                sectors;
82
83         STAILQ_HEAD(, pentry)   entries;
84 };
85
86 static struct parttypes {
87         enum partition_type     type;
88         const char              *desc;
89 } ptypes[] = {
90         { PART_UNKNOWN,         "Unknown" },
91         { PART_EFI,             "EFI" },
92         { PART_FREEBSD,         "FreeBSD" },
93         { PART_FREEBSD_BOOT,    "FreeBSD boot" },
94         { PART_FREEBSD_NANDFS,  "FreeBSD nandfs" },
95         { PART_FREEBSD_UFS,     "FreeBSD UFS" },
96         { PART_FREEBSD_ZFS,     "FreeBSD ZFS" },
97         { PART_FREEBSD_SWAP,    "FreeBSD swap" },
98         { PART_FREEBSD_VINUM,   "FreeBSD vinum" },
99         { PART_LINUX,           "Linux" },
100         { PART_LINUX_SWAP,      "Linux swap" },
101         { PART_DOS,             "DOS/Windows" },
102         { PART_ISO9660,         "ISO9660" },
103 };
104
105 const char *
106 parttype2str(enum partition_type type)
107 {
108         size_t i;
109
110         for (i = 0; i < nitems(ptypes); i++)
111                 if (ptypes[i].type == type)
112                         return (ptypes[i].desc);
113         return (ptypes[0].desc);
114 }
115
116 #ifdef LOADER_GPT_SUPPORT
117 static void
118 uuid_letoh(uuid_t *uuid)
119 {
120
121         uuid->time_low = le32toh(uuid->time_low);
122         uuid->time_mid = le16toh(uuid->time_mid);
123         uuid->time_hi_and_version = le16toh(uuid->time_hi_and_version);
124 }
125
126 static enum partition_type
127 gpt_parttype(uuid_t type)
128 {
129
130         if (uuid_equal(&type, &gpt_uuid_efi, NULL))
131                 return (PART_EFI);
132         else if (uuid_equal(&type, &gpt_uuid_ms_basic_data, NULL))
133                 return (PART_DOS);
134         else if (uuid_equal(&type, &gpt_uuid_freebsd_boot, NULL))
135                 return (PART_FREEBSD_BOOT);
136         else if (uuid_equal(&type, &gpt_uuid_freebsd_ufs, NULL))
137                 return (PART_FREEBSD_UFS);
138         else if (uuid_equal(&type, &gpt_uuid_freebsd_zfs, NULL))
139                 return (PART_FREEBSD_ZFS);
140         else if (uuid_equal(&type, &gpt_uuid_freebsd_swap, NULL))
141                 return (PART_FREEBSD_SWAP);
142         else if (uuid_equal(&type, &gpt_uuid_freebsd_vinum, NULL))
143                 return (PART_FREEBSD_VINUM);
144         else if (uuid_equal(&type, &gpt_uuid_freebsd_nandfs, NULL))
145                 return (PART_FREEBSD_NANDFS);
146         else if (uuid_equal(&type, &gpt_uuid_freebsd, NULL))
147                 return (PART_FREEBSD);
148         return (PART_UNKNOWN);
149 }
150
151 static struct gpt_hdr *
152 gpt_checkhdr(struct gpt_hdr *hdr, uint64_t lba_self, uint64_t lba_last,
153     uint16_t sectorsize)
154 {
155         uint32_t sz, crc;
156
157         if (memcmp(hdr->hdr_sig, GPT_HDR_SIG, sizeof(hdr->hdr_sig)) != 0) {
158                 DEBUG("no GPT signature");
159                 return (NULL);
160         }
161         sz = le32toh(hdr->hdr_size);
162         if (sz < 92 || sz > sectorsize) {
163                 DEBUG("invalid GPT header size: %d", sz);
164                 return (NULL);
165         }
166         crc = le32toh(hdr->hdr_crc_self);
167         hdr->hdr_crc_self = 0;
168         if (crc32(hdr, sz) != crc) {
169                 DEBUG("GPT header's CRC doesn't match");
170                 return (NULL);
171         }
172         hdr->hdr_crc_self = crc;
173         hdr->hdr_revision = le32toh(hdr->hdr_revision);
174         if (hdr->hdr_revision < GPT_HDR_REVISION) {
175                 DEBUG("unsupported GPT revision %d", hdr->hdr_revision);
176                 return (NULL);
177         }
178         hdr->hdr_lba_self = le64toh(hdr->hdr_lba_self);
179         if (hdr->hdr_lba_self != lba_self) {
180                 DEBUG("self LBA doesn't match");
181                 return (NULL);
182         }
183         hdr->hdr_lba_alt = le64toh(hdr->hdr_lba_alt);
184         if (hdr->hdr_lba_alt == hdr->hdr_lba_self) {
185                 DEBUG("invalid alternate LBA");
186                 return (NULL);
187         }
188         hdr->hdr_entries = le32toh(hdr->hdr_entries);
189         hdr->hdr_entsz = le32toh(hdr->hdr_entsz);
190         if (hdr->hdr_entries == 0 ||
191             hdr->hdr_entsz < sizeof(struct gpt_ent) ||
192             sectorsize % hdr->hdr_entsz != 0) {
193                 DEBUG("invalid entry size or number of entries");
194                 return (NULL);
195         }
196         hdr->hdr_lba_start = le64toh(hdr->hdr_lba_start);
197         hdr->hdr_lba_end = le64toh(hdr->hdr_lba_end);
198         hdr->hdr_lba_table = le64toh(hdr->hdr_lba_table);
199         hdr->hdr_crc_table = le32toh(hdr->hdr_crc_table);
200         uuid_letoh(&hdr->hdr_uuid);
201         return (hdr);
202 }
203
204 static int
205 gpt_checktbl(const struct gpt_hdr *hdr, uint8_t *tbl, size_t size,
206     uint64_t lba_last)
207 {
208         struct gpt_ent *ent;
209         uint32_t i, cnt;
210
211         cnt = size / hdr->hdr_entsz;
212         if (hdr->hdr_entries <= cnt) {
213                 cnt = hdr->hdr_entries;
214                 /* Check CRC only when buffer size is enough for table. */
215                 if (hdr->hdr_crc_table !=
216                     crc32(tbl, hdr->hdr_entries * hdr->hdr_entsz)) {
217                         DEBUG("GPT table's CRC doesn't match");
218                         return (-1);
219                 }
220         }
221         for (i = 0; i < cnt; i++) {
222                 ent = (struct gpt_ent *)(tbl + i * hdr->hdr_entsz);
223                 uuid_letoh(&ent->ent_type);
224                 if (uuid_equal(&ent->ent_type, &gpt_uuid_unused, NULL))
225                         continue;
226                 ent->ent_lba_start = le64toh(ent->ent_lba_start);
227                 ent->ent_lba_end = le64toh(ent->ent_lba_end);
228         }
229         return (0);
230 }
231
232 static struct ptable *
233 ptable_gptread(struct ptable *table, void *dev, diskread_t dread)
234 {
235         struct pentry *entry;
236         struct gpt_hdr *phdr, hdr;
237         struct gpt_ent *ent;
238         uint8_t *buf, *tbl;
239         uint64_t offset;
240         int pri, sec;
241         size_t size, i;
242
243         buf = malloc(table->sectorsize);
244         if (buf == NULL)
245                 return (NULL);
246         tbl = malloc(table->sectorsize * MAXTBLSZ);
247         if (tbl == NULL) {
248                 free(buf);
249                 return (NULL);
250         }
251         /* Read the primary GPT header. */
252         if (dread(dev, buf, 1, 1) != 0) {
253                 ptable_close(table);
254                 table = NULL;
255                 goto out;
256         }
257         pri = sec = 0;
258         /* Check the primary GPT header. */
259         phdr = gpt_checkhdr((struct gpt_hdr *)buf, 1, table->sectors - 1,
260             table->sectorsize);
261         if (phdr != NULL) {
262                 /* Read the primary GPT table. */
263                 size = MIN(MAXTBLSZ,
264                     howmany(phdr->hdr_entries * phdr->hdr_entsz,
265                         table->sectorsize));
266                 if (dread(dev, tbl, size, phdr->hdr_lba_table) == 0 &&
267                     gpt_checktbl(phdr, tbl, size * table->sectorsize,
268                     table->sectors - 1) == 0) {
269                         memcpy(&hdr, phdr, sizeof(hdr));
270                         pri = 1;
271                 }
272         }
273         offset = pri ? hdr.hdr_lba_alt: table->sectors - 1;
274         /* Read the backup GPT header. */
275         if (dread(dev, buf, 1, offset) != 0)
276                 phdr = NULL;
277         else
278                 phdr = gpt_checkhdr((struct gpt_hdr *)buf, offset,
279                     table->sectors - 1, table->sectorsize);
280         if (phdr != NULL) {
281                 /*
282                  * Compare primary and backup headers.
283                  * If they are equal, then we do not need to read backup
284                  * table. If they are different, then prefer backup header
285                  * and try to read backup table.
286                  */
287                 if (pri == 0 ||
288                     uuid_equal(&hdr.hdr_uuid, &phdr->hdr_uuid, NULL) == 0 ||
289                     hdr.hdr_revision != phdr->hdr_revision ||
290                     hdr.hdr_size != phdr->hdr_size ||
291                     hdr.hdr_lba_start != phdr->hdr_lba_start ||
292                     hdr.hdr_lba_end != phdr->hdr_lba_end ||
293                     hdr.hdr_entries != phdr->hdr_entries ||
294                     hdr.hdr_entsz != phdr->hdr_entsz ||
295                     hdr.hdr_crc_table != phdr->hdr_crc_table) {
296                         /* Read the backup GPT table. */
297                         size = MIN(MAXTBLSZ,
298                                    howmany(phdr->hdr_entries * phdr->hdr_entsz,
299                                        table->sectorsize));
300                         if (dread(dev, tbl, size, phdr->hdr_lba_table) == 0 &&
301                             gpt_checktbl(phdr, tbl, size * table->sectorsize,
302                             table->sectors - 1) == 0) {
303                                 memcpy(&hdr, phdr, sizeof(hdr));
304                                 sec = 1;
305                         }
306                 }
307         }
308         if (pri == 0 && sec == 0) {
309                 /* Both primary and backup tables are invalid. */
310                 table->type = PTABLE_NONE;
311                 goto out;
312         }
313         DEBUG("GPT detected");
314         size = MIN(hdr.hdr_entries * hdr.hdr_entsz,
315             MAXTBLSZ * table->sectorsize);
316
317         /*
318          * If the disk's sector count is smaller than the sector count recorded
319          * in the disk's GPT table header, set the table->sectors to the value
320          * recorded in GPT tables. This is done to work around buggy firmware
321          * that returns truncated disk sizes.
322          *
323          * Note, this is still not a foolproof way to get disk's size. For
324          * example, an image file can be truncated when copied to smaller media.
325          */
326         if (hdr.hdr_lba_alt + 1 > table->sectors)
327                 table->sectors = hdr.hdr_lba_alt + 1;
328
329         for (i = 0; i < size / hdr.hdr_entsz; i++) {
330                 ent = (struct gpt_ent *)(tbl + i * hdr.hdr_entsz);
331                 if (uuid_equal(&ent->ent_type, &gpt_uuid_unused, NULL))
332                         continue;
333
334                 /* Simple sanity checks. */
335                 if (ent->ent_lba_start < hdr.hdr_lba_start ||
336                     ent->ent_lba_end > hdr.hdr_lba_end ||
337                     ent->ent_lba_start > ent->ent_lba_end)
338                         continue;
339
340                 entry = malloc(sizeof(*entry));
341                 if (entry == NULL)
342                         break;
343                 entry->part.start = ent->ent_lba_start;
344                 entry->part.end = ent->ent_lba_end;
345                 entry->part.index = i + 1;
346                 entry->part.type = gpt_parttype(ent->ent_type);
347                 entry->flags = le64toh(ent->ent_attr);
348                 memcpy(&entry->type.gpt, &ent->ent_type, sizeof(uuid_t));
349                 STAILQ_INSERT_TAIL(&table->entries, entry, entry);
350                 DEBUG("new GPT partition added");
351         }
352 out:
353         free(buf);
354         free(tbl);
355         return (table);
356 }
357 #endif /* LOADER_GPT_SUPPORT */
358
359 #ifdef LOADER_MBR_SUPPORT
360 /* We do not need to support too many EBR partitions in the loader */
361 #define MAXEBRENTRIES           8
362 static enum partition_type
363 mbr_parttype(uint8_t type)
364 {
365
366         switch (type) {
367         case DOSPTYP_386BSD:
368                 return (PART_FREEBSD);
369         case DOSPTYP_LINSWP:
370                 return (PART_LINUX_SWAP);
371         case DOSPTYP_LINUX:
372                 return (PART_LINUX);
373         case 0x01:
374         case 0x04:
375         case 0x06:
376         case 0x07:
377         case 0x0b:
378         case 0x0c:
379         case 0x0e:
380                 return (PART_DOS);
381         }
382         return (PART_UNKNOWN);
383 }
384
385 static struct ptable *
386 ptable_ebrread(struct ptable *table, void *dev, diskread_t dread)
387 {
388         struct dos_partition *dp;
389         struct pentry *e1, *entry;
390         uint32_t start, end, offset;
391         u_char *buf;
392         int i, index;
393
394         STAILQ_FOREACH(e1, &table->entries, entry) {
395                 if (e1->type.mbr == DOSPTYP_EXT ||
396                     e1->type.mbr == DOSPTYP_EXTLBA)
397                         break;
398         }
399         if (e1 == NULL)
400                 return (table);
401         index = 5;
402         offset = e1->part.start;
403         buf = malloc(table->sectorsize);
404         if (buf == NULL)
405                 return (table);
406         DEBUG("EBR detected");
407         for (i = 0; i < MAXEBRENTRIES; i++) {
408 #if 0   /* Some BIOSes return an incorrect number of sectors */
409                 if (offset >= table->sectors)
410                         break;
411 #endif
412                 if (dread(dev, buf, 1, offset) != 0)
413                         break;
414                 dp = (struct dos_partition *)(buf + DOSPARTOFF);
415                 if (dp[0].dp_typ == 0)
416                         break;
417                 start = le32toh(dp[0].dp_start);
418                 if (dp[0].dp_typ == DOSPTYP_EXT &&
419                     dp[1].dp_typ == 0) {
420                         offset = e1->part.start + start;
421                         continue;
422                 }
423                 end = le32toh(dp[0].dp_size);
424                 entry = malloc(sizeof(*entry));
425                 if (entry == NULL)
426                         break;
427                 entry->part.start = offset + start;
428                 entry->part.end = entry->part.start + end - 1;
429                 entry->part.index = index++;
430                 entry->part.type = mbr_parttype(dp[0].dp_typ);
431                 entry->flags = dp[0].dp_flag;
432                 entry->type.mbr = dp[0].dp_typ;
433                 STAILQ_INSERT_TAIL(&table->entries, entry, entry);
434                 DEBUG("new EBR partition added");
435                 if (dp[1].dp_typ == 0)
436                         break;
437                 offset = e1->part.start + le32toh(dp[1].dp_start);
438         }
439         free(buf);
440         return (table);
441 }
442 #endif /* LOADER_MBR_SUPPORT */
443
444 static enum partition_type
445 bsd_parttype(uint8_t type)
446 {
447
448         switch (type) {
449         case FS_NANDFS:
450                 return (PART_FREEBSD_NANDFS);
451         case FS_SWAP:
452                 return (PART_FREEBSD_SWAP);
453         case FS_BSDFFS:
454                 return (PART_FREEBSD_UFS);
455         case FS_VINUM:
456                 return (PART_FREEBSD_VINUM);
457         case FS_ZFS:
458                 return (PART_FREEBSD_ZFS);
459         }
460         return (PART_UNKNOWN);
461 }
462
463 static struct ptable *
464 ptable_bsdread(struct ptable *table, void *dev, diskread_t dread)
465 {
466         struct disklabel *dl;
467         struct partition *part;
468         struct pentry *entry;
469         uint8_t *buf;
470         uint32_t raw_offset;
471         int i;
472
473         if (table->sectorsize < sizeof(struct disklabel)) {
474                 DEBUG("Too small sectorsize");
475                 return (table);
476         }
477         buf = malloc(table->sectorsize);
478         if (buf == NULL)
479                 return (table);
480         if (dread(dev, buf, 1, 1) != 0) {
481                 DEBUG("read failed");
482                 ptable_close(table);
483                 table = NULL;
484                 goto out;
485         }
486         dl = (struct disklabel *)buf;
487         if (le32toh(dl->d_magic) != DISKMAGIC &&
488             le32toh(dl->d_magic2) != DISKMAGIC)
489                 goto out;
490         if (le32toh(dl->d_secsize) != table->sectorsize) {
491                 DEBUG("unsupported sector size");
492                 goto out;
493         }
494         dl->d_npartitions = le16toh(dl->d_npartitions);
495         if (dl->d_npartitions > 20 || dl->d_npartitions < 8) {
496                 DEBUG("invalid number of partitions");
497                 goto out;
498         }
499         DEBUG("BSD detected");
500         part = &dl->d_partitions[0];
501         raw_offset = le32toh(part[RAW_PART].p_offset);
502         for (i = 0; i < dl->d_npartitions; i++, part++) {
503                 if (i == RAW_PART)
504                         continue;
505                 if (part->p_size == 0)
506                         continue;
507                 entry = malloc(sizeof(*entry));
508                 if (entry == NULL)
509                         break;
510                 entry->part.start = le32toh(part->p_offset) - raw_offset;
511                 entry->part.end = entry->part.start +
512                     le32toh(part->p_size) - 1;
513                 entry->part.type = bsd_parttype(part->p_fstype);
514                 entry->part.index = i; /* starts from zero */
515                 entry->type.bsd = part->p_fstype;
516                 STAILQ_INSERT_TAIL(&table->entries, entry, entry);
517                 DEBUG("new BSD partition added");
518         }
519         table->type = PTABLE_BSD;
520 out:
521         free(buf);
522         return (table);
523 }
524
525 #ifdef LOADER_VTOC8_SUPPORT
526 static enum partition_type
527 vtoc8_parttype(uint16_t type)
528 {
529
530         switch (type) {
531         case VTOC_TAG_FREEBSD_NANDFS:
532                 return (PART_FREEBSD_NANDFS);
533         case VTOC_TAG_FREEBSD_SWAP:
534                 return (PART_FREEBSD_SWAP);
535         case VTOC_TAG_FREEBSD_UFS:
536                 return (PART_FREEBSD_UFS);
537         case VTOC_TAG_FREEBSD_VINUM:
538                 return (PART_FREEBSD_VINUM);
539         case VTOC_TAG_FREEBSD_ZFS:
540                 return (PART_FREEBSD_ZFS);
541         }
542         return (PART_UNKNOWN);
543 }
544
545 static struct ptable *
546 ptable_vtoc8read(struct ptable *table, void *dev, diskread_t dread)
547 {
548         struct pentry *entry;
549         struct vtoc8 *dl;
550         uint8_t *buf;
551         uint16_t sum, heads, sectors;
552         int i;
553
554         if (table->sectorsize != sizeof(struct vtoc8))
555                 return (table);
556         buf = malloc(table->sectorsize);
557         if (buf == NULL)
558                 return (table);
559         if (dread(dev, buf, 1, 0) != 0) {
560                 DEBUG("read failed");
561                 ptable_close(table);
562                 table = NULL;
563                 goto out;
564         }
565         dl = (struct vtoc8 *)buf;
566         /* Check the sum */
567         for (i = sum = 0; i < sizeof(struct vtoc8); i += sizeof(sum))
568                 sum ^= be16dec(buf + i);
569         if (sum != 0) {
570                 DEBUG("incorrect checksum");
571                 goto out;
572         }
573         if (be16toh(dl->nparts) != VTOC8_NPARTS) {
574                 DEBUG("invalid number of entries");
575                 goto out;
576         }
577         sectors = be16toh(dl->nsecs);
578         heads = be16toh(dl->nheads);
579         if (sectors * heads == 0) {
580                 DEBUG("invalid geometry");
581                 goto out;
582         }
583         DEBUG("VTOC8 detected");
584         for (i = 0; i < VTOC8_NPARTS; i++) {
585                 dl->part[i].tag = be16toh(dl->part[i].tag);
586                 if (i == VTOC_RAW_PART ||
587                     dl->part[i].tag == VTOC_TAG_UNASSIGNED)
588                         continue;
589                 entry = malloc(sizeof(*entry));
590                 if (entry == NULL)
591                         break;
592                 entry->part.start = be32toh(dl->map[i].cyl) * heads * sectors;
593                 entry->part.end = be32toh(dl->map[i].nblks) +
594                     entry->part.start - 1;
595                 entry->part.type = vtoc8_parttype(dl->part[i].tag);
596                 entry->part.index = i; /* starts from zero */
597                 entry->type.vtoc8 = dl->part[i].tag;
598                 STAILQ_INSERT_TAIL(&table->entries, entry, entry);
599                 DEBUG("new VTOC8 partition added");
600         }
601         table->type = PTABLE_VTOC8;
602 out:
603         free(buf);
604         return (table);
605
606 }
607 #endif /* LOADER_VTOC8_SUPPORT */
608
609 #define cdb2devb(bno)   ((bno) * ISO_DEFAULT_BLOCK_SIZE / table->sectorsize)
610
611 static struct ptable *
612 ptable_iso9660read(struct ptable *table, void *dev, diskread_t dread)
613 {
614         uint8_t *buf;
615         struct iso_primary_descriptor *vd;
616         struct pentry *entry;
617
618         buf = malloc(table->sectorsize);
619         if (buf == NULL)
620                 return (table);
621                 
622         if (dread(dev, buf, 1, cdb2devb(16)) != 0) {
623                 DEBUG("read failed");
624                 ptable_close(table);
625                 table = NULL;
626                 goto out;
627         }
628         vd = (struct iso_primary_descriptor *)buf;
629         if (bcmp(vd->id, ISO_STANDARD_ID, sizeof vd->id) != 0)
630                 goto out;
631
632         entry = malloc(sizeof(*entry));
633         if (entry == NULL)
634                 goto out;
635         entry->part.start = 0;
636         entry->part.end = table->sectors;
637         entry->part.type = PART_ISO9660;
638         entry->part.index = 0;
639         STAILQ_INSERT_TAIL(&table->entries, entry, entry);
640
641         table->type = PTABLE_ISO9660;
642
643 out:
644         free(buf);
645         return (table);
646 }
647
648 struct ptable *
649 ptable_open(void *dev, uint64_t sectors, uint16_t sectorsize,
650     diskread_t *dread)
651 {
652         struct dos_partition *dp;
653         struct ptable *table;
654         uint8_t *buf;
655         int i, count;
656 #ifdef LOADER_MBR_SUPPORT
657         struct pentry *entry;
658         uint32_t start, end;
659         int has_ext;
660 #endif
661         table = NULL;
662         buf = malloc(sectorsize);
663         if (buf == NULL)
664                 return (NULL);
665         /* First, read the MBR. */
666         if (dread(dev, buf, 1, DOSBBSECTOR) != 0) {
667                 DEBUG("read failed");
668                 goto out;
669         }
670
671         table = malloc(sizeof(*table));
672         if (table == NULL)
673                 goto out;
674         table->sectors = sectors;
675         table->sectorsize = sectorsize;
676         table->type = PTABLE_NONE;
677         STAILQ_INIT(&table->entries);
678
679         if (ptable_iso9660read(table, dev, dread) != NULL) {
680                 if (table->type == PTABLE_ISO9660)
681                         goto out;
682         }
683
684 #ifdef LOADER_VTOC8_SUPPORT
685         if (be16dec(buf + offsetof(struct vtoc8, magic)) == VTOC_MAGIC) {
686                 if (ptable_vtoc8read(table, dev, dread) == NULL) {
687                         /* Read error. */
688                         table = NULL;
689                         goto out;
690                 } else if (table->type == PTABLE_VTOC8)
691                         goto out;
692         }
693 #endif
694         /* Check the BSD label. */
695         if (ptable_bsdread(table, dev, dread) == NULL) { /* Read error. */
696                 table = NULL;
697                 goto out;
698         } else if (table->type == PTABLE_BSD)
699                 goto out;
700
701 #if defined(LOADER_GPT_SUPPORT) || defined(LOADER_MBR_SUPPORT)
702         /* Check the MBR magic. */
703         if (buf[DOSMAGICOFFSET] != 0x55 ||
704             buf[DOSMAGICOFFSET + 1] != 0xaa) {
705                 DEBUG("magic sequence not found");
706 #if defined(LOADER_GPT_SUPPORT)
707                 /* There is no PMBR, check that we have backup GPT */
708                 table->type = PTABLE_GPT;
709                 table = ptable_gptread(table, dev, dread);
710 #endif
711                 goto out;
712         }
713         /* Check that we have PMBR. Also do some validation. */
714         dp = (struct dos_partition *)(buf + DOSPARTOFF);
715         for (i = 0, count = 0; i < NDOSPART; i++) {
716                 if (dp[i].dp_flag != 0 && dp[i].dp_flag != 0x80) {
717                         DEBUG("invalid partition flag %x", dp[i].dp_flag);
718                         goto out;
719                 }
720 #ifdef LOADER_GPT_SUPPORT
721                 if (dp[i].dp_typ == DOSPTYP_PMBR) {
722                         table->type = PTABLE_GPT;
723                         DEBUG("PMBR detected");
724                 }
725 #endif
726                 if (dp[i].dp_typ != 0)
727                         count++;
728         }
729         /* Do we have some invalid values? */
730         if (table->type == PTABLE_GPT && count > 1) {
731                 if (dp[1].dp_typ != DOSPTYP_HFS) {
732                         table->type = PTABLE_NONE;
733                         DEBUG("Incorrect PMBR, ignore it");
734                 } else {
735                         DEBUG("Bootcamp detected");
736                 }
737         }
738 #ifdef LOADER_GPT_SUPPORT
739         if (table->type == PTABLE_GPT) {
740                 table = ptable_gptread(table, dev, dread);
741                 goto out;
742         }
743 #endif
744 #ifdef LOADER_MBR_SUPPORT
745         /* Read MBR. */
746         DEBUG("MBR detected");
747         table->type = PTABLE_MBR;
748         for (i = has_ext = 0; i < NDOSPART; i++) {
749                 if (dp[i].dp_typ == 0)
750                         continue;
751                 start = le32dec(&(dp[i].dp_start));
752                 end = le32dec(&(dp[i].dp_size));
753                 if (start == 0 || end == 0)
754                         continue;
755 #if 0   /* Some BIOSes return an incorrect number of sectors */
756                 if (start + end - 1 >= sectors)
757                         continue;       /* XXX: ignore */
758 #endif
759                 if (dp[i].dp_typ == DOSPTYP_EXT ||
760                     dp[i].dp_typ == DOSPTYP_EXTLBA)
761                         has_ext = 1;
762                 entry = malloc(sizeof(*entry));
763                 if (entry == NULL)
764                         break;
765                 entry->part.start = start;
766                 entry->part.end = start + end - 1;
767                 entry->part.index = i + 1;
768                 entry->part.type = mbr_parttype(dp[i].dp_typ);
769                 entry->flags = dp[i].dp_flag;
770                 entry->type.mbr = dp[i].dp_typ;
771                 STAILQ_INSERT_TAIL(&table->entries, entry, entry);
772                 DEBUG("new MBR partition added");
773         }
774         if (has_ext) {
775                 table = ptable_ebrread(table, dev, dread);
776                 /* FALLTHROUGH */
777         }
778 #endif /* LOADER_MBR_SUPPORT */
779 #endif /* LOADER_MBR_SUPPORT || LOADER_GPT_SUPPORT */
780 out:
781         free(buf);
782         return (table);
783 }
784
785 void
786 ptable_close(struct ptable *table)
787 {
788         struct pentry *entry;
789
790         while (!STAILQ_EMPTY(&table->entries)) {
791                 entry = STAILQ_FIRST(&table->entries);
792                 STAILQ_REMOVE_HEAD(&table->entries, entry);
793                 free(entry);
794         }
795         free(table);
796 }
797
798 enum ptable_type
799 ptable_gettype(const struct ptable *table)
800 {
801
802         return (table->type);
803 }
804
805 int
806 ptable_getsize(const struct ptable *table, uint64_t *sizep)
807 {
808         uint64_t tmp = table->sectors * table->sectorsize;
809
810         if (tmp < table->sectors)
811                 return (EOVERFLOW);
812
813         if (sizep != NULL)
814                 *sizep = tmp;
815         return (0);
816 }
817
818 int
819 ptable_getpart(const struct ptable *table, struct ptable_entry *part, int index)
820 {
821         struct pentry *entry;
822
823         if (part == NULL || table == NULL)
824                 return (EINVAL);
825
826         STAILQ_FOREACH(entry, &table->entries, entry) {
827                 if (entry->part.index != index)
828                         continue;
829                 memcpy(part, &entry->part, sizeof(*part));
830                 return (0);
831         }
832         return (ENOENT);
833 }
834
835 /*
836  * Search for a slice with the following preferences:
837  *
838  * 1: Active FreeBSD slice
839  * 2: Non-active FreeBSD slice
840  * 3: Active Linux slice
841  * 4: non-active Linux slice
842  * 5: Active FAT/FAT32 slice
843  * 6: non-active FAT/FAT32 slice
844  */
845 #define PREF_RAWDISK    0
846 #define PREF_FBSD_ACT   1
847 #define PREF_FBSD       2
848 #define PREF_LINUX_ACT  3
849 #define PREF_LINUX      4
850 #define PREF_DOS_ACT    5
851 #define PREF_DOS        6
852 #define PREF_NONE       7
853 int
854 ptable_getbestpart(const struct ptable *table, struct ptable_entry *part)
855 {
856         struct pentry *entry, *best;
857         int pref, preflevel;
858
859         if (part == NULL || table == NULL)
860                 return (EINVAL);
861
862         best = NULL;
863         preflevel = pref = PREF_NONE;
864         STAILQ_FOREACH(entry, &table->entries, entry) {
865 #ifdef LOADER_MBR_SUPPORT
866                 if (table->type == PTABLE_MBR) {
867                         switch (entry->type.mbr) {
868                         case DOSPTYP_386BSD:
869                                 pref = entry->flags & 0x80 ? PREF_FBSD_ACT:
870                                     PREF_FBSD;
871                                 break;
872                         case DOSPTYP_LINUX:
873                                 pref = entry->flags & 0x80 ? PREF_LINUX_ACT:
874                                     PREF_LINUX;
875                                 break;
876                         case 0x01:              /* DOS/Windows */
877                         case 0x04:
878                         case 0x06:
879                         case 0x0c:
880                         case 0x0e:
881                         case DOSPTYP_FAT32:
882                                 pref = entry->flags & 0x80 ? PREF_DOS_ACT:
883                                     PREF_DOS;
884                                 break;
885                         default:
886                                 pref = PREF_NONE;
887                         }
888                 }
889 #endif /* LOADER_MBR_SUPPORT */
890 #ifdef LOADER_GPT_SUPPORT
891                 if (table->type == PTABLE_GPT) {
892                         if (entry->part.type == PART_DOS)
893                                 pref = PREF_DOS;
894                         else if (entry->part.type == PART_FREEBSD_UFS ||
895                             entry->part.type == PART_FREEBSD_ZFS)
896                                 pref = PREF_FBSD;
897                         else
898                                 pref = PREF_NONE;
899                 }
900 #endif /* LOADER_GPT_SUPPORT */
901                 if (pref < preflevel) {
902                         preflevel = pref;
903                         best = entry;
904                 }
905         }
906         if (best != NULL) {
907                 memcpy(part, &best->part, sizeof(*part));
908                 return (0);
909         }
910         return (ENOENT);
911 }
912
913 int
914 ptable_iterate(const struct ptable *table, void *arg, ptable_iterate_t *iter)
915 {
916         struct pentry *entry;
917         char name[32];
918         int ret = 0;
919
920         name[0] = '\0';
921         STAILQ_FOREACH(entry, &table->entries, entry) {
922 #ifdef LOADER_MBR_SUPPORT
923                 if (table->type == PTABLE_MBR)
924                         sprintf(name, "s%d", entry->part.index);
925                 else
926 #endif
927 #ifdef LOADER_GPT_SUPPORT
928                 if (table->type == PTABLE_GPT)
929                         sprintf(name, "p%d", entry->part.index);
930                 else
931 #endif
932 #ifdef LOADER_VTOC8_SUPPORT
933                 if (table->type == PTABLE_VTOC8)
934                         sprintf(name, "%c", (uint8_t) 'a' +
935                             entry->part.index);
936                 else
937 #endif
938                 if (table->type == PTABLE_BSD)
939                         sprintf(name, "%c", (uint8_t) 'a' +
940                             entry->part.index);
941                 if ((ret = iter(arg, name, &entry->part)) != 0)
942                         return (ret);
943         }
944         return (ret);
945 }