]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - stand/common/part.c
Optionally bind ktls threads to NUMA domains
[FreeBSD/FreeBSD.git] / stand / common / part.c
1 /*-
2  * Copyright (c) 2012 Andrey V. Elsukov <ae@FreeBSD.org>
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  */
26
27 #include <sys/cdefs.h>
28 __FBSDID("$FreeBSD$");
29
30 #include <stand.h>
31 #include <sys/param.h>
32 #include <sys/diskmbr.h>
33 #include <sys/disklabel.h>
34 #include <sys/endian.h>
35 #include <sys/gpt.h>
36 #include <sys/stddef.h>
37 #include <sys/queue.h>
38 #include <sys/vtoc.h>
39
40 #include <fs/cd9660/iso.h>
41
42 #include <zlib.h>
43 #include <part.h>
44 #include <uuid.h>
45
46 #ifdef PART_DEBUG
47 #define DPRINTF(fmt, args...) printf("%s: " fmt "\n", __func__, ## args)
48 #else
49 #define DPRINTF(fmt, args...)   ((void)0)
50 #endif
51
52 #ifdef LOADER_GPT_SUPPORT
53 #define MAXTBLSZ        64
54 static const uuid_t gpt_uuid_unused = GPT_ENT_TYPE_UNUSED;
55 static const uuid_t gpt_uuid_ms_basic_data = GPT_ENT_TYPE_MS_BASIC_DATA;
56 static const uuid_t gpt_uuid_freebsd_ufs = GPT_ENT_TYPE_FREEBSD_UFS;
57 static const uuid_t gpt_uuid_efi = GPT_ENT_TYPE_EFI;
58 static const uuid_t gpt_uuid_freebsd = GPT_ENT_TYPE_FREEBSD;
59 static const uuid_t gpt_uuid_freebsd_boot = GPT_ENT_TYPE_FREEBSD_BOOT;
60 static const uuid_t gpt_uuid_freebsd_swap = GPT_ENT_TYPE_FREEBSD_SWAP;
61 static const uuid_t gpt_uuid_freebsd_zfs = GPT_ENT_TYPE_FREEBSD_ZFS;
62 static const uuid_t gpt_uuid_freebsd_vinum = GPT_ENT_TYPE_FREEBSD_VINUM;
63 static const uuid_t gpt_uuid_apple_apfs = GPT_ENT_TYPE_APPLE_APFS;
64 #endif
65
66 struct pentry {
67         struct ptable_entry     part;
68         uint64_t                flags;
69         union {
70                 uint8_t bsd;
71                 uint8_t mbr;
72                 uuid_t  gpt;
73                 uint16_t vtoc8;
74         } type;
75         STAILQ_ENTRY(pentry)    entry;
76 };
77
78 struct ptable {
79         enum ptable_type        type;
80         uint16_t                sectorsize;
81         uint64_t                sectors;
82
83         STAILQ_HEAD(, pentry)   entries;
84 };
85
86 static struct parttypes {
87         enum partition_type     type;
88         const char              *desc;
89 } ptypes[] = {
90         { PART_UNKNOWN,         "Unknown" },
91         { PART_EFI,             "EFI" },
92         { PART_FREEBSD,         "FreeBSD" },
93         { PART_FREEBSD_BOOT,    "FreeBSD boot" },
94         { PART_FREEBSD_UFS,     "FreeBSD UFS" },
95         { PART_FREEBSD_ZFS,     "FreeBSD ZFS" },
96         { PART_FREEBSD_SWAP,    "FreeBSD swap" },
97         { PART_FREEBSD_VINUM,   "FreeBSD vinum" },
98         { PART_LINUX,           "Linux" },
99         { PART_LINUX_SWAP,      "Linux swap" },
100         { PART_DOS,             "DOS/Windows" },
101         { PART_ISO9660,         "ISO9660" },
102         { PART_APFS,            "APFS" },
103 };
104
105 const char *
106 parttype2str(enum partition_type type)
107 {
108         size_t i;
109
110         for (i = 0; i < nitems(ptypes); i++)
111                 if (ptypes[i].type == type)
112                         return (ptypes[i].desc);
113         return (ptypes[0].desc);
114 }
115
116 #ifdef LOADER_GPT_SUPPORT
117 static void
118 uuid_letoh(uuid_t *uuid)
119 {
120
121         uuid->time_low = le32toh(uuid->time_low);
122         uuid->time_mid = le16toh(uuid->time_mid);
123         uuid->time_hi_and_version = le16toh(uuid->time_hi_and_version);
124 }
125
126 static enum partition_type
127 gpt_parttype(uuid_t type)
128 {
129
130         if (uuid_equal(&type, &gpt_uuid_efi, NULL))
131                 return (PART_EFI);
132         else if (uuid_equal(&type, &gpt_uuid_ms_basic_data, NULL))
133                 return (PART_DOS);
134         else if (uuid_equal(&type, &gpt_uuid_freebsd_boot, NULL))
135                 return (PART_FREEBSD_BOOT);
136         else if (uuid_equal(&type, &gpt_uuid_freebsd_ufs, NULL))
137                 return (PART_FREEBSD_UFS);
138         else if (uuid_equal(&type, &gpt_uuid_freebsd_zfs, NULL))
139                 return (PART_FREEBSD_ZFS);
140         else if (uuid_equal(&type, &gpt_uuid_freebsd_swap, NULL))
141                 return (PART_FREEBSD_SWAP);
142         else if (uuid_equal(&type, &gpt_uuid_freebsd_vinum, NULL))
143                 return (PART_FREEBSD_VINUM);
144         else if (uuid_equal(&type, &gpt_uuid_freebsd, NULL))
145                 return (PART_FREEBSD);
146         else if (uuid_equal(&type, &gpt_uuid_apple_apfs, NULL))
147                 return (PART_APFS);
148         return (PART_UNKNOWN);
149 }
150
151 static struct gpt_hdr *
152 gpt_checkhdr(struct gpt_hdr *hdr, uint64_t lba_self, uint64_t lba_last,
153     uint16_t sectorsize)
154 {
155         uint32_t sz, crc;
156
157         if (memcmp(hdr->hdr_sig, GPT_HDR_SIG, sizeof(hdr->hdr_sig)) != 0) {
158                 DPRINTF("no GPT signature");
159                 return (NULL);
160         }
161         sz = le32toh(hdr->hdr_size);
162         if (sz < 92 || sz > sectorsize) {
163                 DPRINTF("invalid GPT header size: %d", sz);
164                 return (NULL);
165         }
166         crc = le32toh(hdr->hdr_crc_self);
167         hdr->hdr_crc_self = crc32(0, Z_NULL, 0);
168         if (crc32(hdr->hdr_crc_self, (const Bytef *)hdr, sz) != crc) {
169                 DPRINTF("GPT header's CRC doesn't match");
170                 return (NULL);
171         }
172         hdr->hdr_crc_self = crc;
173         hdr->hdr_revision = le32toh(hdr->hdr_revision);
174         if (hdr->hdr_revision < GPT_HDR_REVISION) {
175                 DPRINTF("unsupported GPT revision %d", hdr->hdr_revision);
176                 return (NULL);
177         }
178         hdr->hdr_lba_self = le64toh(hdr->hdr_lba_self);
179         if (hdr->hdr_lba_self != lba_self) {
180                 DPRINTF("self LBA doesn't match");
181                 return (NULL);
182         }
183         hdr->hdr_lba_alt = le64toh(hdr->hdr_lba_alt);
184         if (hdr->hdr_lba_alt == hdr->hdr_lba_self) {
185                 DPRINTF("invalid alternate LBA");
186                 return (NULL);
187         }
188         hdr->hdr_entries = le32toh(hdr->hdr_entries);
189         hdr->hdr_entsz = le32toh(hdr->hdr_entsz);
190         if (hdr->hdr_entries == 0 ||
191             hdr->hdr_entsz < sizeof(struct gpt_ent) ||
192             sectorsize % hdr->hdr_entsz != 0) {
193                 DPRINTF("invalid entry size or number of entries");
194                 return (NULL);
195         }
196         hdr->hdr_lba_start = le64toh(hdr->hdr_lba_start);
197         hdr->hdr_lba_end = le64toh(hdr->hdr_lba_end);
198         hdr->hdr_lba_table = le64toh(hdr->hdr_lba_table);
199         hdr->hdr_crc_table = le32toh(hdr->hdr_crc_table);
200         uuid_letoh(&hdr->hdr_uuid);
201         return (hdr);
202 }
203
204 static int
205 gpt_checktbl(const struct gpt_hdr *hdr, uint8_t *tbl, size_t size,
206     uint64_t lba_last)
207 {
208         struct gpt_ent *ent;
209         uint32_t i, cnt;
210
211         cnt = size / hdr->hdr_entsz;
212         if (hdr->hdr_entries <= cnt) {
213                 cnt = hdr->hdr_entries;
214                 /* Check CRC only when buffer size is enough for table. */
215                 if (hdr->hdr_crc_table !=
216                     crc32(0, tbl, hdr->hdr_entries * hdr->hdr_entsz)) {
217                         DPRINTF("GPT table's CRC doesn't match");
218                         return (-1);
219                 }
220         }
221         for (i = 0; i < cnt; i++) {
222                 ent = (struct gpt_ent *)(tbl + i * hdr->hdr_entsz);
223                 uuid_letoh(&ent->ent_type);
224                 if (uuid_equal(&ent->ent_type, &gpt_uuid_unused, NULL))
225                         continue;
226                 ent->ent_lba_start = le64toh(ent->ent_lba_start);
227                 ent->ent_lba_end = le64toh(ent->ent_lba_end);
228         }
229         return (0);
230 }
231
232 static struct ptable *
233 ptable_gptread(struct ptable *table, void *dev, diskread_t dread)
234 {
235         struct pentry *entry;
236         struct gpt_hdr *phdr, hdr;
237         struct gpt_ent *ent;
238         uint8_t *buf, *tbl;
239         uint64_t offset;
240         int pri, sec;
241         size_t size, i;
242
243         buf = malloc(table->sectorsize);
244         if (buf == NULL)
245                 return (NULL);
246         tbl = malloc(table->sectorsize * MAXTBLSZ);
247         if (tbl == NULL) {
248                 free(buf);
249                 return (NULL);
250         }
251         /* Read the primary GPT header. */
252         if (dread(dev, buf, 1, 1) != 0) {
253                 ptable_close(table);
254                 table = NULL;
255                 goto out;
256         }
257         pri = sec = 0;
258         /* Check the primary GPT header. */
259         phdr = gpt_checkhdr((struct gpt_hdr *)buf, 1, table->sectors - 1,
260             table->sectorsize);
261         if (phdr != NULL) {
262                 /* Read the primary GPT table. */
263                 size = MIN(MAXTBLSZ,
264                     howmany(phdr->hdr_entries * phdr->hdr_entsz,
265                         table->sectorsize));
266                 if (dread(dev, tbl, size, phdr->hdr_lba_table) == 0 &&
267                     gpt_checktbl(phdr, tbl, size * table->sectorsize,
268                     table->sectors - 1) == 0) {
269                         memcpy(&hdr, phdr, sizeof(hdr));
270                         pri = 1;
271                 }
272         }
273         offset = pri ? hdr.hdr_lba_alt: table->sectors - 1;
274         /* Read the backup GPT header. */
275         if (dread(dev, buf, 1, offset) != 0)
276                 phdr = NULL;
277         else
278                 phdr = gpt_checkhdr((struct gpt_hdr *)buf, offset,
279                     table->sectors - 1, table->sectorsize);
280         if (phdr != NULL) {
281                 /*
282                  * Compare primary and backup headers.
283                  * If they are equal, then we do not need to read backup
284                  * table. If they are different, then prefer backup header
285                  * and try to read backup table.
286                  */
287                 if (pri == 0 ||
288                     uuid_equal(&hdr.hdr_uuid, &phdr->hdr_uuid, NULL) == 0 ||
289                     hdr.hdr_revision != phdr->hdr_revision ||
290                     hdr.hdr_size != phdr->hdr_size ||
291                     hdr.hdr_lba_start != phdr->hdr_lba_start ||
292                     hdr.hdr_lba_end != phdr->hdr_lba_end ||
293                     hdr.hdr_entries != phdr->hdr_entries ||
294                     hdr.hdr_entsz != phdr->hdr_entsz ||
295                     hdr.hdr_crc_table != phdr->hdr_crc_table) {
296                         /* Read the backup GPT table. */
297                         size = MIN(MAXTBLSZ,
298                                    howmany(phdr->hdr_entries * phdr->hdr_entsz,
299                                        table->sectorsize));
300                         if (dread(dev, tbl, size, phdr->hdr_lba_table) == 0 &&
301                             gpt_checktbl(phdr, tbl, size * table->sectorsize,
302                             table->sectors - 1) == 0) {
303                                 memcpy(&hdr, phdr, sizeof(hdr));
304                                 sec = 1;
305                         }
306                 }
307         }
308         if (pri == 0 && sec == 0) {
309                 /* Both primary and backup tables are invalid. */
310                 table->type = PTABLE_NONE;
311                 goto out;
312         }
313         DPRINTF("GPT detected");
314         size = MIN(hdr.hdr_entries * hdr.hdr_entsz,
315             MAXTBLSZ * table->sectorsize);
316
317         /*
318          * If the disk's sector count is smaller than the sector count recorded
319          * in the disk's GPT table header, set the table->sectors to the value
320          * recorded in GPT tables. This is done to work around buggy firmware
321          * that returns truncated disk sizes.
322          *
323          * Note, this is still not a foolproof way to get disk's size. For
324          * example, an image file can be truncated when copied to smaller media.
325          */
326         table->sectors = hdr.hdr_lba_alt + 1;
327
328         for (i = 0; i < size / hdr.hdr_entsz; i++) {
329                 ent = (struct gpt_ent *)(tbl + i * hdr.hdr_entsz);
330                 if (uuid_equal(&ent->ent_type, &gpt_uuid_unused, NULL))
331                         continue;
332
333                 /* Simple sanity checks. */
334                 if (ent->ent_lba_start < hdr.hdr_lba_start ||
335                     ent->ent_lba_end > hdr.hdr_lba_end ||
336                     ent->ent_lba_start > ent->ent_lba_end)
337                         continue;
338
339                 entry = malloc(sizeof(*entry));
340                 if (entry == NULL)
341                         break;
342                 entry->part.start = ent->ent_lba_start;
343                 entry->part.end = ent->ent_lba_end;
344                 entry->part.index = i + 1;
345                 entry->part.type = gpt_parttype(ent->ent_type);
346                 entry->flags = le64toh(ent->ent_attr);
347                 memcpy(&entry->type.gpt, &ent->ent_type, sizeof(uuid_t));
348                 STAILQ_INSERT_TAIL(&table->entries, entry, entry);
349                 DPRINTF("new GPT partition added");
350         }
351 out:
352         free(buf);
353         free(tbl);
354         return (table);
355 }
356 #endif /* LOADER_GPT_SUPPORT */
357
358 #ifdef LOADER_MBR_SUPPORT
359 /* We do not need to support too many EBR partitions in the loader */
360 #define MAXEBRENTRIES           8
361 static enum partition_type
362 mbr_parttype(uint8_t type)
363 {
364
365         switch (type) {
366         case DOSPTYP_386BSD:
367                 return (PART_FREEBSD);
368         case DOSPTYP_LINSWP:
369                 return (PART_LINUX_SWAP);
370         case DOSPTYP_LINUX:
371                 return (PART_LINUX);
372         case 0x01:
373         case 0x04:
374         case 0x06:
375         case 0x07:
376         case 0x0b:
377         case 0x0c:
378         case 0x0e:
379                 return (PART_DOS);
380         }
381         return (PART_UNKNOWN);
382 }
383
384 static struct ptable *
385 ptable_ebrread(struct ptable *table, void *dev, diskread_t dread)
386 {
387         struct dos_partition *dp;
388         struct pentry *e1, *entry;
389         uint32_t start, end, offset;
390         u_char *buf;
391         int i, index;
392
393         STAILQ_FOREACH(e1, &table->entries, entry) {
394                 if (e1->type.mbr == DOSPTYP_EXT ||
395                     e1->type.mbr == DOSPTYP_EXTLBA)
396                         break;
397         }
398         if (e1 == NULL)
399                 return (table);
400         index = 5;
401         offset = e1->part.start;
402         buf = malloc(table->sectorsize);
403         if (buf == NULL)
404                 return (table);
405         DPRINTF("EBR detected");
406         for (i = 0; i < MAXEBRENTRIES; i++) {
407 #if 0   /* Some BIOSes return an incorrect number of sectors */
408                 if (offset >= table->sectors)
409                         break;
410 #endif
411                 if (dread(dev, buf, 1, offset) != 0)
412                         break;
413                 dp = (struct dos_partition *)(buf + DOSPARTOFF);
414                 if (dp[0].dp_typ == 0)
415                         break;
416                 start = le32toh(dp[0].dp_start);
417                 if (dp[0].dp_typ == DOSPTYP_EXT &&
418                     dp[1].dp_typ == 0) {
419                         offset = e1->part.start + start;
420                         continue;
421                 }
422                 end = le32toh(dp[0].dp_size);
423                 entry = malloc(sizeof(*entry));
424                 if (entry == NULL)
425                         break;
426                 entry->part.start = offset + start;
427                 entry->part.end = entry->part.start + end - 1;
428                 entry->part.index = index++;
429                 entry->part.type = mbr_parttype(dp[0].dp_typ);
430                 entry->flags = dp[0].dp_flag;
431                 entry->type.mbr = dp[0].dp_typ;
432                 STAILQ_INSERT_TAIL(&table->entries, entry, entry);
433                 DPRINTF("new EBR partition added");
434                 if (dp[1].dp_typ == 0)
435                         break;
436                 offset = e1->part.start + le32toh(dp[1].dp_start);
437         }
438         free(buf);
439         return (table);
440 }
441 #endif /* LOADER_MBR_SUPPORT */
442
443 static enum partition_type
444 bsd_parttype(uint8_t type)
445 {
446
447         switch (type) {
448         case FS_SWAP:
449                 return (PART_FREEBSD_SWAP);
450         case FS_BSDFFS:
451                 return (PART_FREEBSD_UFS);
452         case FS_VINUM:
453                 return (PART_FREEBSD_VINUM);
454         case FS_ZFS:
455                 return (PART_FREEBSD_ZFS);
456         }
457         return (PART_UNKNOWN);
458 }
459
460 static struct ptable *
461 ptable_bsdread(struct ptable *table, void *dev, diskread_t dread)
462 {
463         struct disklabel *dl;
464         struct partition *part;
465         struct pentry *entry;
466         uint8_t *buf;
467         uint32_t raw_offset;
468         int i;
469
470         if (table->sectorsize < sizeof(struct disklabel)) {
471                 DPRINTF("Too small sectorsize");
472                 return (table);
473         }
474         buf = malloc(table->sectorsize);
475         if (buf == NULL)
476                 return (table);
477         if (dread(dev, buf, 1, 1) != 0) {
478                 DPRINTF("read failed");
479                 ptable_close(table);
480                 table = NULL;
481                 goto out;
482         }
483         dl = (struct disklabel *)buf;
484         if (le32toh(dl->d_magic) != DISKMAGIC &&
485             le32toh(dl->d_magic2) != DISKMAGIC)
486                 goto out;
487         if (le32toh(dl->d_secsize) != table->sectorsize) {
488                 DPRINTF("unsupported sector size");
489                 goto out;
490         }
491         dl->d_npartitions = le16toh(dl->d_npartitions);
492         if (dl->d_npartitions > 20 || dl->d_npartitions < 8) {
493                 DPRINTF("invalid number of partitions");
494                 goto out;
495         }
496         DPRINTF("BSD detected");
497         part = &dl->d_partitions[0];
498         raw_offset = le32toh(part[RAW_PART].p_offset);
499         for (i = 0; i < dl->d_npartitions; i++, part++) {
500                 if (i == RAW_PART)
501                         continue;
502                 if (part->p_size == 0)
503                         continue;
504                 entry = malloc(sizeof(*entry));
505                 if (entry == NULL)
506                         break;
507                 entry->part.start = le32toh(part->p_offset) - raw_offset;
508                 entry->part.end = entry->part.start +
509                     le32toh(part->p_size) - 1;
510                 entry->part.type = bsd_parttype(part->p_fstype);
511                 entry->part.index = i; /* starts from zero */
512                 entry->type.bsd = part->p_fstype;
513                 STAILQ_INSERT_TAIL(&table->entries, entry, entry);
514                 DPRINTF("new BSD partition added");
515         }
516         table->type = PTABLE_BSD;
517 out:
518         free(buf);
519         return (table);
520 }
521
522 #ifdef LOADER_VTOC8_SUPPORT
523 static enum partition_type
524 vtoc8_parttype(uint16_t type)
525 {
526
527         switch (type) {
528         case VTOC_TAG_FREEBSD_SWAP:
529                 return (PART_FREEBSD_SWAP);
530         case VTOC_TAG_FREEBSD_UFS:
531                 return (PART_FREEBSD_UFS);
532         case VTOC_TAG_FREEBSD_VINUM:
533                 return (PART_FREEBSD_VINUM);
534         case VTOC_TAG_FREEBSD_ZFS:
535                 return (PART_FREEBSD_ZFS);
536         }
537         return (PART_UNKNOWN);
538 }
539
540 static struct ptable *
541 ptable_vtoc8read(struct ptable *table, void *dev, diskread_t dread)
542 {
543         struct pentry *entry;
544         struct vtoc8 *dl;
545         uint8_t *buf;
546         uint16_t sum, heads, sectors;
547         int i;
548
549         if (table->sectorsize != sizeof(struct vtoc8))
550                 return (table);
551         buf = malloc(table->sectorsize);
552         if (buf == NULL)
553                 return (table);
554         if (dread(dev, buf, 1, 0) != 0) {
555                 DPRINTF("read failed");
556                 ptable_close(table);
557                 table = NULL;
558                 goto out;
559         }
560         dl = (struct vtoc8 *)buf;
561         /* Check the sum */
562         for (i = sum = 0; i < sizeof(struct vtoc8); i += sizeof(sum))
563                 sum ^= be16dec(buf + i);
564         if (sum != 0) {
565                 DPRINTF("incorrect checksum");
566                 goto out;
567         }
568         if (be16toh(dl->nparts) != VTOC8_NPARTS) {
569                 DPRINTF("invalid number of entries");
570                 goto out;
571         }
572         sectors = be16toh(dl->nsecs);
573         heads = be16toh(dl->nheads);
574         if (sectors * heads == 0) {
575                 DPRINTF("invalid geometry");
576                 goto out;
577         }
578         DPRINTF("VTOC8 detected");
579         for (i = 0; i < VTOC8_NPARTS; i++) {
580                 dl->part[i].tag = be16toh(dl->part[i].tag);
581                 if (i == VTOC_RAW_PART ||
582                     dl->part[i].tag == VTOC_TAG_UNASSIGNED)
583                         continue;
584                 entry = malloc(sizeof(*entry));
585                 if (entry == NULL)
586                         break;
587                 entry->part.start = be32toh(dl->map[i].cyl) * heads * sectors;
588                 entry->part.end = be32toh(dl->map[i].nblks) +
589                     entry->part.start - 1;
590                 entry->part.type = vtoc8_parttype(dl->part[i].tag);
591                 entry->part.index = i; /* starts from zero */
592                 entry->type.vtoc8 = dl->part[i].tag;
593                 STAILQ_INSERT_TAIL(&table->entries, entry, entry);
594                 DPRINTF("new VTOC8 partition added");
595         }
596         table->type = PTABLE_VTOC8;
597 out:
598         free(buf);
599         return (table);
600
601 }
602 #endif /* LOADER_VTOC8_SUPPORT */
603
604 #define cdb2devb(bno)   ((bno) * ISO_DEFAULT_BLOCK_SIZE / table->sectorsize)
605
606 static struct ptable *
607 ptable_iso9660read(struct ptable *table, void *dev, diskread_t dread)
608 {
609         uint8_t *buf;
610         struct iso_primary_descriptor *vd;
611         struct pentry *entry;
612
613         buf = malloc(table->sectorsize);
614         if (buf == NULL)
615                 return (table);
616                 
617         if (dread(dev, buf, 1, cdb2devb(16)) != 0) {
618                 DPRINTF("read failed");
619                 ptable_close(table);
620                 table = NULL;
621                 goto out;
622         }
623         vd = (struct iso_primary_descriptor *)buf;
624         if (bcmp(vd->id, ISO_STANDARD_ID, sizeof vd->id) != 0)
625                 goto out;
626
627         entry = malloc(sizeof(*entry));
628         if (entry == NULL)
629                 goto out;
630         entry->part.start = 0;
631         entry->part.end = table->sectors;
632         entry->part.type = PART_ISO9660;
633         entry->part.index = 0;
634         STAILQ_INSERT_TAIL(&table->entries, entry, entry);
635
636         table->type = PTABLE_ISO9660;
637
638 out:
639         free(buf);
640         return (table);
641 }
642
643 struct ptable *
644 ptable_open(void *dev, uint64_t sectors, uint16_t sectorsize,
645     diskread_t *dread)
646 {
647         struct dos_partition *dp;
648         struct ptable *table;
649         uint8_t *buf;
650 #ifdef LOADER_MBR_SUPPORT
651         struct pentry *entry;
652         uint32_t start, end;
653         int has_ext;
654 #endif
655         table = NULL;
656         dp = NULL;
657         buf = malloc(sectorsize);
658         if (buf == NULL)
659                 return (NULL);
660         /* First, read the MBR. */
661         if (dread(dev, buf, 1, DOSBBSECTOR) != 0) {
662                 DPRINTF("read failed");
663                 goto out;
664         }
665
666         table = malloc(sizeof(*table));
667         if (table == NULL)
668                 goto out;
669         table->sectors = sectors;
670         table->sectorsize = sectorsize;
671         table->type = PTABLE_NONE;
672         STAILQ_INIT(&table->entries);
673
674         if (ptable_iso9660read(table, dev, dread) == NULL) {
675                 /* Read error. */
676                 table = NULL;
677                 goto out;
678         } else if (table->type == PTABLE_ISO9660)
679                 goto out;
680
681 #ifdef LOADER_VTOC8_SUPPORT
682         if (be16dec(buf + offsetof(struct vtoc8, magic)) == VTOC_MAGIC) {
683                 if (ptable_vtoc8read(table, dev, dread) == NULL) {
684                         /* Read error. */
685                         table = NULL;
686                         goto out;
687                 } else if (table->type == PTABLE_VTOC8)
688                         goto out;
689         }
690 #endif
691         /* Check the BSD label. */
692         if (ptable_bsdread(table, dev, dread) == NULL) { /* Read error. */
693                 table = NULL;
694                 goto out;
695         } else if (table->type == PTABLE_BSD)
696                 goto out;
697
698 #if defined(LOADER_GPT_SUPPORT) || defined(LOADER_MBR_SUPPORT)
699         /* Check the MBR magic. */
700         if (buf[DOSMAGICOFFSET] != 0x55 ||
701             buf[DOSMAGICOFFSET + 1] != 0xaa) {
702                 DPRINTF("magic sequence not found");
703 #if defined(LOADER_GPT_SUPPORT)
704                 /* There is no PMBR, check that we have backup GPT */
705                 table->type = PTABLE_GPT;
706                 table = ptable_gptread(table, dev, dread);
707 #endif
708                 goto out;
709         }
710         /* Check that we have PMBR. Also do some validation. */
711         dp = malloc(NDOSPART * sizeof(struct dos_partition));
712         if (dp == NULL)
713                 goto out;
714         bcopy(buf + DOSPARTOFF, dp, NDOSPART * sizeof(struct dos_partition));
715
716         /*
717          * In mac we can have PMBR partition in hybrid MBR;
718          * that is, MBR partition which has DOSPTYP_PMBR entry defined as
719          * start sector 1. After DOSPTYP_PMBR, there may be other partitions.
720          * UEFI compliant PMBR has no other partitions.
721          */
722         for (int i = 0; i < NDOSPART; i++) {
723                 if (dp[i].dp_flag != 0 && dp[i].dp_flag != 0x80) {
724                         DPRINTF("invalid partition flag %x", dp[i].dp_flag);
725                         goto out;
726                 }
727 #ifdef LOADER_GPT_SUPPORT
728                 if (dp[i].dp_typ == DOSPTYP_PMBR && dp[i].dp_start == 1) {
729                         table->type = PTABLE_GPT;
730                         DPRINTF("PMBR detected");
731                 }
732 #endif
733         }
734 #ifdef LOADER_GPT_SUPPORT
735         if (table->type == PTABLE_GPT) {
736                 table = ptable_gptread(table, dev, dread);
737                 goto out;
738         }
739 #endif
740 #ifdef LOADER_MBR_SUPPORT
741         /* Read MBR. */
742         DPRINTF("MBR detected");
743         table->type = PTABLE_MBR;
744         for (int i = has_ext = 0; i < NDOSPART; i++) {
745                 if (dp[i].dp_typ == 0)
746                         continue;
747                 start = le32dec(&(dp[i].dp_start));
748                 end = le32dec(&(dp[i].dp_size));
749                 if (start == 0 || end == 0)
750                         continue;
751 #if 0   /* Some BIOSes return an incorrect number of sectors */
752                 if (start + end - 1 >= sectors)
753                         continue;       /* XXX: ignore */
754 #endif
755                 if (dp[i].dp_typ == DOSPTYP_EXT ||
756                     dp[i].dp_typ == DOSPTYP_EXTLBA)
757                         has_ext = 1;
758                 entry = malloc(sizeof(*entry));
759                 if (entry == NULL)
760                         break;
761                 entry->part.start = start;
762                 entry->part.end = start + end - 1;
763                 entry->part.index = i + 1;
764                 entry->part.type = mbr_parttype(dp[i].dp_typ);
765                 entry->flags = dp[i].dp_flag;
766                 entry->type.mbr = dp[i].dp_typ;
767                 STAILQ_INSERT_TAIL(&table->entries, entry, entry);
768                 DPRINTF("new MBR partition added");
769         }
770         if (has_ext) {
771                 table = ptable_ebrread(table, dev, dread);
772                 /* FALLTHROUGH */
773         }
774 #endif /* LOADER_MBR_SUPPORT */
775 #endif /* LOADER_MBR_SUPPORT || LOADER_GPT_SUPPORT */
776 out:
777         free(dp);
778         free(buf);
779         return (table);
780 }
781
782 void
783 ptable_close(struct ptable *table)
784 {
785         struct pentry *entry;
786
787         if (table == NULL)
788                 return;
789
790         while (!STAILQ_EMPTY(&table->entries)) {
791                 entry = STAILQ_FIRST(&table->entries);
792                 STAILQ_REMOVE_HEAD(&table->entries, entry);
793                 free(entry);
794         }
795         free(table);
796 }
797
798 enum ptable_type
799 ptable_gettype(const struct ptable *table)
800 {
801
802         return (table->type);
803 }
804
805 int
806 ptable_getsize(const struct ptable *table, uint64_t *sizep)
807 {
808         uint64_t tmp = table->sectors * table->sectorsize;
809
810         if (tmp < table->sectors)
811                 return (EOVERFLOW);
812
813         if (sizep != NULL)
814                 *sizep = tmp;
815         return (0);
816 }
817
818 int
819 ptable_getpart(const struct ptable *table, struct ptable_entry *part, int index)
820 {
821         struct pentry *entry;
822
823         if (part == NULL || table == NULL)
824                 return (EINVAL);
825
826         STAILQ_FOREACH(entry, &table->entries, entry) {
827                 if (entry->part.index != index)
828                         continue;
829                 memcpy(part, &entry->part, sizeof(*part));
830                 return (0);
831         }
832         return (ENOENT);
833 }
834
835 /*
836  * Search for a slice with the following preferences:
837  *
838  * 1: Active FreeBSD slice
839  * 2: Non-active FreeBSD slice
840  * 3: Active Linux slice
841  * 4: non-active Linux slice
842  * 5: Active FAT/FAT32 slice
843  * 6: non-active FAT/FAT32 slice
844  */
845 #define PREF_RAWDISK    0
846 #define PREF_FBSD_ACT   1
847 #define PREF_FBSD       2
848 #define PREF_LINUX_ACT  3
849 #define PREF_LINUX      4
850 #define PREF_DOS_ACT    5
851 #define PREF_DOS        6
852 #define PREF_NONE       7
853 int
854 ptable_getbestpart(const struct ptable *table, struct ptable_entry *part)
855 {
856         struct pentry *entry, *best;
857         int pref, preflevel;
858
859         if (part == NULL || table == NULL)
860                 return (EINVAL);
861
862         best = NULL;
863         preflevel = pref = PREF_NONE;
864         STAILQ_FOREACH(entry, &table->entries, entry) {
865 #ifdef LOADER_MBR_SUPPORT
866                 if (table->type == PTABLE_MBR) {
867                         switch (entry->type.mbr) {
868                         case DOSPTYP_386BSD:
869                                 pref = entry->flags & 0x80 ? PREF_FBSD_ACT:
870                                     PREF_FBSD;
871                                 break;
872                         case DOSPTYP_LINUX:
873                                 pref = entry->flags & 0x80 ? PREF_LINUX_ACT:
874                                     PREF_LINUX;
875                                 break;
876                         case 0x01:              /* DOS/Windows */
877                         case 0x04:
878                         case 0x06:
879                         case 0x0c:
880                         case 0x0e:
881                         case DOSPTYP_FAT32:
882                                 pref = entry->flags & 0x80 ? PREF_DOS_ACT:
883                                     PREF_DOS;
884                                 break;
885                         default:
886                                 pref = PREF_NONE;
887                         }
888                 }
889 #endif /* LOADER_MBR_SUPPORT */
890 #ifdef LOADER_GPT_SUPPORT
891                 if (table->type == PTABLE_GPT) {
892                         if (entry->part.type == PART_DOS)
893                                 pref = PREF_DOS;
894                         else if (entry->part.type == PART_FREEBSD_UFS ||
895                             entry->part.type == PART_FREEBSD_ZFS)
896                                 pref = PREF_FBSD;
897                         else
898                                 pref = PREF_NONE;
899                 }
900 #endif /* LOADER_GPT_SUPPORT */
901                 if (pref < preflevel) {
902                         preflevel = pref;
903                         best = entry;
904                 }
905         }
906         if (best != NULL) {
907                 memcpy(part, &best->part, sizeof(*part));
908                 return (0);
909         }
910         return (ENOENT);
911 }
912
913 int
914 ptable_iterate(const struct ptable *table, void *arg, ptable_iterate_t *iter)
915 {
916         struct pentry *entry;
917         char name[32];
918         int ret = 0;
919
920         name[0] = '\0';
921         STAILQ_FOREACH(entry, &table->entries, entry) {
922 #ifdef LOADER_MBR_SUPPORT
923                 if (table->type == PTABLE_MBR)
924                         sprintf(name, "s%d", entry->part.index);
925                 else
926 #endif
927 #ifdef LOADER_GPT_SUPPORT
928                 if (table->type == PTABLE_GPT)
929                         sprintf(name, "p%d", entry->part.index);
930                 else
931 #endif
932 #ifdef LOADER_VTOC8_SUPPORT
933                 if (table->type == PTABLE_VTOC8)
934                         sprintf(name, "%c", (uint8_t) 'a' +
935                             entry->part.index);
936                 else
937 #endif
938                 if (table->type == PTABLE_BSD)
939                         sprintf(name, "%c", (uint8_t) 'a' +
940                             entry->part.index);
941                 if ((ret = iter(arg, name, &entry->part)) != 0)
942                         return (ret);
943         }
944         return (ret);
945 }