]> CyberLeo.Net >> Repos - FreeBSD/releng/8.1.git/blob - sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zap_leaf.c
Copy stable/8 to releng/8.1 in preparation for 8.1-RC1.
[FreeBSD/releng/8.1.git] / sys / cddl / contrib / opensolaris / uts / common / fs / zfs / zap_leaf.c
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25
26 #pragma ident   "%Z%%M% %I%     %E% SMI"
27
28 /*
29  * The 512-byte leaf is broken into 32 16-byte chunks.
30  * chunk number n means l_chunk[n], even though the header precedes it.
31  * the names are stored null-terminated.
32  */
33
34 #include <sys/zfs_context.h>
35 #include <sys/zap.h>
36 #include <sys/zap_impl.h>
37 #include <sys/zap_leaf.h>
38 #include <sys/spa.h>
39 #include <sys/dmu.h>
40
41 static uint16_t *zap_leaf_rehash_entry(zap_leaf_t *l, uint16_t entry);
42
43 #define CHAIN_END 0xffff /* end of the chunk chain */
44
45 /* half the (current) minimum block size */
46 #define MAX_ARRAY_BYTES (8<<10)
47
48 #define LEAF_HASH(l, h) \
49         ((ZAP_LEAF_HASH_NUMENTRIES(l)-1) & \
50         ((h) >> (64 - ZAP_LEAF_HASH_SHIFT(l)-(l)->l_phys->l_hdr.lh_prefix_len)))
51
52 #define LEAF_HASH_ENTPTR(l, h) (&(l)->l_phys->l_hash[LEAF_HASH(l, h)])
53
54
55 static void
56 zap_memset(void *a, int c, size_t n)
57 {
58         char *cp = a;
59         char *cpend = cp + n;
60
61         while (cp < cpend)
62                 *cp++ = c;
63 }
64
65 static void
66 stv(int len, void *addr, uint64_t value)
67 {
68         switch (len) {
69         case 1:
70                 *(uint8_t *)addr = value;
71                 return;
72         case 2:
73                 *(uint16_t *)addr = value;
74                 return;
75         case 4:
76                 *(uint32_t *)addr = value;
77                 return;
78         case 8:
79                 *(uint64_t *)addr = value;
80                 return;
81         }
82         ASSERT(!"bad int len");
83 }
84
85 static uint64_t
86 ldv(int len, const void *addr)
87 {
88         switch (len) {
89         case 1:
90                 return (*(uint8_t *)addr);
91         case 2:
92                 return (*(uint16_t *)addr);
93         case 4:
94                 return (*(uint32_t *)addr);
95         case 8:
96                 return (*(uint64_t *)addr);
97         }
98         ASSERT(!"bad int len");
99         return (0xFEEDFACEDEADBEEFULL);
100 }
101
102 void
103 zap_leaf_byteswap(zap_leaf_phys_t *buf, int size)
104 {
105         int i;
106         zap_leaf_t l;
107         l.l_bs = highbit(size)-1;
108         l.l_phys = buf;
109
110         buf->l_hdr.lh_block_type =      BSWAP_64(buf->l_hdr.lh_block_type);
111         buf->l_hdr.lh_prefix =          BSWAP_64(buf->l_hdr.lh_prefix);
112         buf->l_hdr.lh_magic =           BSWAP_32(buf->l_hdr.lh_magic);
113         buf->l_hdr.lh_nfree =           BSWAP_16(buf->l_hdr.lh_nfree);
114         buf->l_hdr.lh_nentries =        BSWAP_16(buf->l_hdr.lh_nentries);
115         buf->l_hdr.lh_prefix_len =      BSWAP_16(buf->l_hdr.lh_prefix_len);
116         buf->l_hdr.lh_freelist =        BSWAP_16(buf->l_hdr.lh_freelist);
117
118         for (i = 0; i < ZAP_LEAF_HASH_NUMENTRIES(&l); i++)
119                 buf->l_hash[i] = BSWAP_16(buf->l_hash[i]);
120
121         for (i = 0; i < ZAP_LEAF_NUMCHUNKS(&l); i++) {
122                 zap_leaf_chunk_t *lc = &ZAP_LEAF_CHUNK(&l, i);
123                 struct zap_leaf_entry *le;
124
125                 switch (lc->l_free.lf_type) {
126                 case ZAP_CHUNK_ENTRY:
127                         le = &lc->l_entry;
128
129                         le->le_type =           BSWAP_8(le->le_type);
130                         le->le_int_size =       BSWAP_8(le->le_int_size);
131                         le->le_next =           BSWAP_16(le->le_next);
132                         le->le_name_chunk =     BSWAP_16(le->le_name_chunk);
133                         le->le_name_length =    BSWAP_16(le->le_name_length);
134                         le->le_value_chunk =    BSWAP_16(le->le_value_chunk);
135                         le->le_value_length =   BSWAP_16(le->le_value_length);
136                         le->le_cd =             BSWAP_32(le->le_cd);
137                         le->le_hash =           BSWAP_64(le->le_hash);
138                         break;
139                 case ZAP_CHUNK_FREE:
140                         lc->l_free.lf_type =    BSWAP_8(lc->l_free.lf_type);
141                         lc->l_free.lf_next =    BSWAP_16(lc->l_free.lf_next);
142                         break;
143                 case ZAP_CHUNK_ARRAY:
144                         lc->l_array.la_type =   BSWAP_8(lc->l_array.la_type);
145                         lc->l_array.la_next =   BSWAP_16(lc->l_array.la_next);
146                         /* la_array doesn't need swapping */
147                         break;
148                 default:
149                         ASSERT(!"bad leaf type");
150                 }
151         }
152 }
153
154 void
155 zap_leaf_init(zap_leaf_t *l, boolean_t sort)
156 {
157         int i;
158
159         l->l_bs = highbit(l->l_dbuf->db_size)-1;
160         zap_memset(&l->l_phys->l_hdr, 0, sizeof (struct zap_leaf_header));
161         zap_memset(l->l_phys->l_hash, CHAIN_END, 2*ZAP_LEAF_HASH_NUMENTRIES(l));
162         for (i = 0; i < ZAP_LEAF_NUMCHUNKS(l); i++) {
163                 ZAP_LEAF_CHUNK(l, i).l_free.lf_type = ZAP_CHUNK_FREE;
164                 ZAP_LEAF_CHUNK(l, i).l_free.lf_next = i+1;
165         }
166         ZAP_LEAF_CHUNK(l, ZAP_LEAF_NUMCHUNKS(l)-1).l_free.lf_next = CHAIN_END;
167         l->l_phys->l_hdr.lh_block_type = ZBT_LEAF;
168         l->l_phys->l_hdr.lh_magic = ZAP_LEAF_MAGIC;
169         l->l_phys->l_hdr.lh_nfree = ZAP_LEAF_NUMCHUNKS(l);
170         if (sort)
171                 l->l_phys->l_hdr.lh_flags |= ZLF_ENTRIES_CDSORTED;
172 }
173
174 /*
175  * Routines which manipulate leaf chunks (l_chunk[]).
176  */
177
178 static uint16_t
179 zap_leaf_chunk_alloc(zap_leaf_t *l)
180 {
181         int chunk;
182
183         ASSERT(l->l_phys->l_hdr.lh_nfree > 0);
184
185         chunk = l->l_phys->l_hdr.lh_freelist;
186         ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS(l));
187         ASSERT3U(ZAP_LEAF_CHUNK(l, chunk).l_free.lf_type, ==, ZAP_CHUNK_FREE);
188
189         l->l_phys->l_hdr.lh_freelist = ZAP_LEAF_CHUNK(l, chunk).l_free.lf_next;
190
191         l->l_phys->l_hdr.lh_nfree--;
192
193         return (chunk);
194 }
195
196 static void
197 zap_leaf_chunk_free(zap_leaf_t *l, uint16_t chunk)
198 {
199         struct zap_leaf_free *zlf = &ZAP_LEAF_CHUNK(l, chunk).l_free;
200         ASSERT3U(l->l_phys->l_hdr.lh_nfree, <, ZAP_LEAF_NUMCHUNKS(l));
201         ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS(l));
202         ASSERT(zlf->lf_type != ZAP_CHUNK_FREE);
203
204         zlf->lf_type = ZAP_CHUNK_FREE;
205         zlf->lf_next = l->l_phys->l_hdr.lh_freelist;
206         bzero(zlf->lf_pad, sizeof (zlf->lf_pad)); /* help it to compress */
207         l->l_phys->l_hdr.lh_freelist = chunk;
208
209         l->l_phys->l_hdr.lh_nfree++;
210 }
211
212 /*
213  * Routines which manipulate leaf arrays (zap_leaf_array type chunks).
214  */
215
216 static uint16_t
217 zap_leaf_array_create(zap_leaf_t *l, const char *buf,
218         int integer_size, int num_integers)
219 {
220         uint16_t chunk_head;
221         uint16_t *chunkp = &chunk_head;
222         int byten = 0;
223         uint64_t value;
224         int shift = (integer_size-1)*8;
225         int len = num_integers;
226
227         ASSERT3U(num_integers * integer_size, <, MAX_ARRAY_BYTES);
228
229         while (len > 0) {
230                 uint16_t chunk = zap_leaf_chunk_alloc(l);
231                 struct zap_leaf_array *la = &ZAP_LEAF_CHUNK(l, chunk).l_array;
232                 int i;
233
234                 la->la_type = ZAP_CHUNK_ARRAY;
235                 for (i = 0; i < ZAP_LEAF_ARRAY_BYTES; i++) {
236                         if (byten == 0)
237                                 value = ldv(integer_size, buf);
238                         la->la_array[i] = value >> shift;
239                         value <<= 8;
240                         if (++byten == integer_size) {
241                                 byten = 0;
242                                 buf += integer_size;
243                                 if (--len == 0)
244                                         break;
245                         }
246                 }
247
248                 *chunkp = chunk;
249                 chunkp = &la->la_next;
250         }
251         *chunkp = CHAIN_END;
252
253         return (chunk_head);
254 }
255
256 static void
257 zap_leaf_array_free(zap_leaf_t *l, uint16_t *chunkp)
258 {
259         uint16_t chunk = *chunkp;
260
261         *chunkp = CHAIN_END;
262
263         while (chunk != CHAIN_END) {
264                 int nextchunk = ZAP_LEAF_CHUNK(l, chunk).l_array.la_next;
265                 ASSERT3U(ZAP_LEAF_CHUNK(l, chunk).l_array.la_type, ==,
266                     ZAP_CHUNK_ARRAY);
267                 zap_leaf_chunk_free(l, chunk);
268                 chunk = nextchunk;
269         }
270 }
271
272 /* array_len and buf_len are in integers, not bytes */
273 static void
274 zap_leaf_array_read(zap_leaf_t *l, uint16_t chunk,
275     int array_int_len, int array_len, int buf_int_len, uint64_t buf_len,
276     char *buf)
277 {
278         int len = MIN(array_len, buf_len);
279         int byten = 0;
280         uint64_t value = 0;
281
282         ASSERT3U(array_int_len, <=, buf_int_len);
283
284         /* Fast path for one 8-byte integer */
285         if (array_int_len == 8 && buf_int_len == 8 && len == 1) {
286                 struct zap_leaf_array *la = &ZAP_LEAF_CHUNK(l, chunk).l_array;
287                 uint8_t *ip = la->la_array;
288                 uint64_t *buf64 = (uint64_t *)buf;
289
290                 *buf64 = (uint64_t)ip[0] << 56 | (uint64_t)ip[1] << 48 |
291                     (uint64_t)ip[2] << 40 | (uint64_t)ip[3] << 32 |
292                     (uint64_t)ip[4] << 24 | (uint64_t)ip[5] << 16 |
293                     (uint64_t)ip[6] << 8 | (uint64_t)ip[7];
294                 return;
295         }
296
297         /* Fast path for an array of 1-byte integers (eg. the entry name) */
298         if (array_int_len == 1 && buf_int_len == 1 &&
299             buf_len > array_len + ZAP_LEAF_ARRAY_BYTES) {
300                 while (chunk != CHAIN_END) {
301                         struct zap_leaf_array *la =
302                             &ZAP_LEAF_CHUNK(l, chunk).l_array;
303                         bcopy(la->la_array, buf, ZAP_LEAF_ARRAY_BYTES);
304                         buf += ZAP_LEAF_ARRAY_BYTES;
305                         chunk = la->la_next;
306                 }
307                 return;
308         }
309
310         while (len > 0) {
311                 struct zap_leaf_array *la = &ZAP_LEAF_CHUNK(l, chunk).l_array;
312                 int i;
313
314                 ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS(l));
315                 for (i = 0; i < ZAP_LEAF_ARRAY_BYTES && len > 0; i++) {
316                         value = (value << 8) | la->la_array[i];
317                         byten++;
318                         if (byten == array_int_len) {
319                                 stv(buf_int_len, buf, value);
320                                 byten = 0;
321                                 len--;
322                                 if (len == 0)
323                                         return;
324                                 buf += buf_int_len;
325                         }
326                 }
327                 chunk = la->la_next;
328         }
329 }
330
331 /*
332  * Only to be used on 8-bit arrays.
333  * array_len is actual len in bytes (not encoded le_value_length).
334  * namenorm is null-terminated.
335  */
336 static boolean_t
337 zap_leaf_array_match(zap_leaf_t *l, zap_name_t *zn, int chunk, int array_len)
338 {
339         int bseen = 0;
340
341         if (zn->zn_matchtype == MT_FIRST) {
342                 char *thisname = kmem_alloc(array_len, KM_SLEEP);
343                 boolean_t match;
344
345                 zap_leaf_array_read(l, chunk, 1, array_len, 1,
346                     array_len, thisname);
347                 match = zap_match(zn, thisname);
348                 kmem_free(thisname, array_len);
349                 return (match);
350         }
351
352         /* Fast path for exact matching */
353         while (bseen < array_len) {
354                 struct zap_leaf_array *la = &ZAP_LEAF_CHUNK(l, chunk).l_array;
355                 int toread = MIN(array_len - bseen, ZAP_LEAF_ARRAY_BYTES);
356                 ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS(l));
357                 if (bcmp(la->la_array, zn->zn_name_orij + bseen, toread))
358                         break;
359                 chunk = la->la_next;
360                 bseen += toread;
361         }
362         return (bseen == array_len);
363 }
364
365 /*
366  * Routines which manipulate leaf entries.
367  */
368
369 int
370 zap_leaf_lookup(zap_leaf_t *l, zap_name_t *zn, zap_entry_handle_t *zeh)
371 {
372         uint16_t *chunkp;
373         struct zap_leaf_entry *le;
374
375         ASSERT3U(l->l_phys->l_hdr.lh_magic, ==, ZAP_LEAF_MAGIC);
376
377 again:
378         for (chunkp = LEAF_HASH_ENTPTR(l, zn->zn_hash);
379             *chunkp != CHAIN_END; chunkp = &le->le_next) {
380                 uint16_t chunk = *chunkp;
381                 le = ZAP_LEAF_ENTRY(l, chunk);
382
383                 ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS(l));
384                 ASSERT3U(le->le_type, ==, ZAP_CHUNK_ENTRY);
385
386                 if (le->le_hash != zn->zn_hash)
387                         continue;
388
389                 /*
390                  * NB: the entry chain is always sorted by cd on
391                  * normalized zap objects, so this will find the
392                  * lowest-cd match for MT_FIRST.
393                  */
394                 ASSERT(zn->zn_matchtype == MT_EXACT ||
395                     (l->l_phys->l_hdr.lh_flags & ZLF_ENTRIES_CDSORTED));
396                 if (zap_leaf_array_match(l, zn, le->le_name_chunk,
397                     le->le_name_length)) {
398                         zeh->zeh_num_integers = le->le_value_length;
399                         zeh->zeh_integer_size = le->le_int_size;
400                         zeh->zeh_cd = le->le_cd;
401                         zeh->zeh_hash = le->le_hash;
402                         zeh->zeh_chunkp = chunkp;
403                         zeh->zeh_leaf = l;
404                         return (0);
405                 }
406         }
407
408         /*
409          * NB: we could of course do this in one pass, but that would be
410          * a pain.  We'll see if MT_BEST is even used much.
411          */
412         if (zn->zn_matchtype == MT_BEST) {
413                 zn->zn_matchtype = MT_FIRST;
414                 goto again;
415         }
416
417         return (ENOENT);
418 }
419
420 /* Return (h1,cd1 >= h2,cd2) */
421 #define HCD_GTEQ(h1, cd1, h2, cd2) \
422         ((h1 > h2) ? TRUE : ((h1 == h2 && cd1 >= cd2) ? TRUE : FALSE))
423
424 int
425 zap_leaf_lookup_closest(zap_leaf_t *l,
426     uint64_t h, uint32_t cd, zap_entry_handle_t *zeh)
427 {
428         uint16_t chunk;
429         uint64_t besth = -1ULL;
430         uint32_t bestcd = ZAP_MAXCD;
431         uint16_t bestlh = ZAP_LEAF_HASH_NUMENTRIES(l)-1;
432         uint16_t lh;
433         struct zap_leaf_entry *le;
434
435         ASSERT3U(l->l_phys->l_hdr.lh_magic, ==, ZAP_LEAF_MAGIC);
436
437         for (lh = LEAF_HASH(l, h); lh <= bestlh; lh++) {
438                 for (chunk = l->l_phys->l_hash[lh];
439                     chunk != CHAIN_END; chunk = le->le_next) {
440                         le = ZAP_LEAF_ENTRY(l, chunk);
441
442                         ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS(l));
443                         ASSERT3U(le->le_type, ==, ZAP_CHUNK_ENTRY);
444
445                         if (HCD_GTEQ(le->le_hash, le->le_cd, h, cd) &&
446                             HCD_GTEQ(besth, bestcd, le->le_hash, le->le_cd)) {
447                                 ASSERT3U(bestlh, >=, lh);
448                                 bestlh = lh;
449                                 besth = le->le_hash;
450                                 bestcd = le->le_cd;
451
452                                 zeh->zeh_num_integers = le->le_value_length;
453                                 zeh->zeh_integer_size = le->le_int_size;
454                                 zeh->zeh_cd = le->le_cd;
455                                 zeh->zeh_hash = le->le_hash;
456                                 zeh->zeh_fakechunk = chunk;
457                                 zeh->zeh_chunkp = &zeh->zeh_fakechunk;
458                                 zeh->zeh_leaf = l;
459                         }
460                 }
461         }
462
463         return (bestcd == ZAP_MAXCD ? ENOENT : 0);
464 }
465
466 int
467 zap_entry_read(const zap_entry_handle_t *zeh,
468     uint8_t integer_size, uint64_t num_integers, void *buf)
469 {
470         struct zap_leaf_entry *le =
471             ZAP_LEAF_ENTRY(zeh->zeh_leaf, *zeh->zeh_chunkp);
472         ASSERT3U(le->le_type, ==, ZAP_CHUNK_ENTRY);
473
474         if (le->le_int_size > integer_size)
475                 return (EINVAL);
476
477         zap_leaf_array_read(zeh->zeh_leaf, le->le_value_chunk, le->le_int_size,
478             le->le_value_length, integer_size, num_integers, buf);
479
480         if (zeh->zeh_num_integers > num_integers)
481                 return (EOVERFLOW);
482         return (0);
483
484 }
485
486 int
487 zap_entry_read_name(const zap_entry_handle_t *zeh, uint16_t buflen, char *buf)
488 {
489         struct zap_leaf_entry *le =
490             ZAP_LEAF_ENTRY(zeh->zeh_leaf, *zeh->zeh_chunkp);
491         ASSERT3U(le->le_type, ==, ZAP_CHUNK_ENTRY);
492
493         zap_leaf_array_read(zeh->zeh_leaf, le->le_name_chunk, 1,
494             le->le_name_length, 1, buflen, buf);
495         if (le->le_name_length > buflen)
496                 return (EOVERFLOW);
497         return (0);
498 }
499
500 int
501 zap_entry_update(zap_entry_handle_t *zeh,
502         uint8_t integer_size, uint64_t num_integers, const void *buf)
503 {
504         int delta_chunks;
505         zap_leaf_t *l = zeh->zeh_leaf;
506         struct zap_leaf_entry *le = ZAP_LEAF_ENTRY(l, *zeh->zeh_chunkp);
507
508         delta_chunks = ZAP_LEAF_ARRAY_NCHUNKS(num_integers * integer_size) -
509             ZAP_LEAF_ARRAY_NCHUNKS(le->le_value_length * le->le_int_size);
510
511         if ((int)l->l_phys->l_hdr.lh_nfree < delta_chunks)
512                 return (EAGAIN);
513
514         /*
515          * We should search other chained leaves (via
516          * zap_entry_remove,create?) otherwise returning EAGAIN will
517          * just send us into an infinite loop if we have to chain
518          * another leaf block, rather than being able to split this
519          * block.
520          */
521
522         zap_leaf_array_free(l, &le->le_value_chunk);
523         le->le_value_chunk =
524             zap_leaf_array_create(l, buf, integer_size, num_integers);
525         le->le_value_length = num_integers;
526         le->le_int_size = integer_size;
527         return (0);
528 }
529
530 void
531 zap_entry_remove(zap_entry_handle_t *zeh)
532 {
533         uint16_t entry_chunk;
534         struct zap_leaf_entry *le;
535         zap_leaf_t *l = zeh->zeh_leaf;
536
537         ASSERT3P(zeh->zeh_chunkp, !=, &zeh->zeh_fakechunk);
538
539         entry_chunk = *zeh->zeh_chunkp;
540         le = ZAP_LEAF_ENTRY(l, entry_chunk);
541         ASSERT3U(le->le_type, ==, ZAP_CHUNK_ENTRY);
542
543         zap_leaf_array_free(l, &le->le_name_chunk);
544         zap_leaf_array_free(l, &le->le_value_chunk);
545
546         *zeh->zeh_chunkp = le->le_next;
547         zap_leaf_chunk_free(l, entry_chunk);
548
549         l->l_phys->l_hdr.lh_nentries--;
550 }
551
552 int
553 zap_entry_create(zap_leaf_t *l, const char *name, uint64_t h, uint32_t cd,
554     uint8_t integer_size, uint64_t num_integers, const void *buf,
555     zap_entry_handle_t *zeh)
556 {
557         uint16_t chunk;
558         uint16_t *chunkp;
559         struct zap_leaf_entry *le;
560         uint64_t namelen, valuelen;
561         int numchunks;
562
563         valuelen = integer_size * num_integers;
564         namelen = strlen(name) + 1;
565         ASSERT(namelen >= 2);
566
567         numchunks = 1 + ZAP_LEAF_ARRAY_NCHUNKS(namelen) +
568             ZAP_LEAF_ARRAY_NCHUNKS(valuelen);
569         if (numchunks > ZAP_LEAF_NUMCHUNKS(l))
570                 return (E2BIG);
571
572         if (cd == ZAP_MAXCD) {
573                 /* find the lowest unused cd */
574                 if (l->l_phys->l_hdr.lh_flags & ZLF_ENTRIES_CDSORTED) {
575                         cd = 0;
576
577                         for (chunk = *LEAF_HASH_ENTPTR(l, h);
578                             chunk != CHAIN_END; chunk = le->le_next) {
579                                 le = ZAP_LEAF_ENTRY(l, chunk);
580                                 if (le->le_cd > cd)
581                                         break;
582                                 if (le->le_hash == h) {
583                                         ASSERT3U(cd, ==, le->le_cd);
584                                         cd++;
585                                 }
586                         }
587                 } else {
588                         /* old unsorted format; do it the O(n^2) way */
589                         for (cd = 0; cd < ZAP_MAXCD; cd++) {
590                                 for (chunk = *LEAF_HASH_ENTPTR(l, h);
591                                     chunk != CHAIN_END; chunk = le->le_next) {
592                                         le = ZAP_LEAF_ENTRY(l, chunk);
593                                         if (le->le_hash == h &&
594                                             le->le_cd == cd) {
595                                                 break;
596                                         }
597                                 }
598                                 /* If this cd is not in use, we are good. */
599                                 if (chunk == CHAIN_END)
600                                         break;
601                         }
602                 }
603                 /*
604                  * we would run out of space in a block before we could
605                  * have ZAP_MAXCD entries
606                  */
607                 ASSERT3U(cd, <, ZAP_MAXCD);
608         }
609
610         if (l->l_phys->l_hdr.lh_nfree < numchunks)
611                 return (EAGAIN);
612
613         /* make the entry */
614         chunk = zap_leaf_chunk_alloc(l);
615         le = ZAP_LEAF_ENTRY(l, chunk);
616         le->le_type = ZAP_CHUNK_ENTRY;
617         le->le_name_chunk = zap_leaf_array_create(l, name, 1, namelen);
618         le->le_name_length = namelen;
619         le->le_value_chunk =
620             zap_leaf_array_create(l, buf, integer_size, num_integers);
621         le->le_value_length = num_integers;
622         le->le_int_size = integer_size;
623         le->le_hash = h;
624         le->le_cd = cd;
625
626         /* link it into the hash chain */
627         /* XXX if we did the search above, we could just use that */
628         chunkp = zap_leaf_rehash_entry(l, chunk);
629
630         l->l_phys->l_hdr.lh_nentries++;
631
632         zeh->zeh_leaf = l;
633         zeh->zeh_num_integers = num_integers;
634         zeh->zeh_integer_size = le->le_int_size;
635         zeh->zeh_cd = le->le_cd;
636         zeh->zeh_hash = le->le_hash;
637         zeh->zeh_chunkp = chunkp;
638
639         return (0);
640 }
641
642 /*
643  * Determine if there is another entry with the same normalized form.
644  * For performance purposes, either zn or name must be provided (the
645  * other can be NULL).  Note, there usually won't be any hash
646  * conflicts, in which case we don't need the concatenated/normalized
647  * form of the name.  But all callers have one of these on hand anyway,
648  * so might as well take advantage.  A cleaner but slower interface
649  * would accept neither argument, and compute the normalized name as
650  * needed (using zap_name_alloc(zap_entry_read_name(zeh))).
651  */
652 boolean_t
653 zap_entry_normalization_conflict(zap_entry_handle_t *zeh, zap_name_t *zn,
654     const char *name, zap_t *zap)
655 {
656         uint64_t chunk;
657         struct zap_leaf_entry *le;
658         boolean_t allocdzn = B_FALSE;
659
660         if (zap->zap_normflags == 0)
661                 return (B_FALSE);
662
663         for (chunk = *LEAF_HASH_ENTPTR(zeh->zeh_leaf, zeh->zeh_hash);
664             chunk != CHAIN_END; chunk = le->le_next) {
665                 le = ZAP_LEAF_ENTRY(zeh->zeh_leaf, chunk);
666                 if (le->le_hash != zeh->zeh_hash)
667                         continue;
668                 if (le->le_cd == zeh->zeh_cd)
669                         continue;
670
671                 if (zn == NULL) {
672                         zn = zap_name_alloc(zap, name, MT_FIRST);
673                         allocdzn = B_TRUE;
674                 }
675                 if (zap_leaf_array_match(zeh->zeh_leaf, zn,
676                     le->le_name_chunk, le->le_name_length)) {
677                         if (allocdzn)
678                                 zap_name_free(zn);
679                         return (B_TRUE);
680                 }
681         }
682         if (allocdzn)
683                 zap_name_free(zn);
684         return (B_FALSE);
685 }
686
687 /*
688  * Routines for transferring entries between leafs.
689  */
690
691 static uint16_t *
692 zap_leaf_rehash_entry(zap_leaf_t *l, uint16_t entry)
693 {
694         struct zap_leaf_entry *le = ZAP_LEAF_ENTRY(l, entry);
695         struct zap_leaf_entry *le2;
696         uint16_t *chunkp;
697
698         /*
699          * keep the entry chain sorted by cd
700          * NB: this will not cause problems for unsorted leafs, though
701          * it is unnecessary there.
702          */
703         for (chunkp = LEAF_HASH_ENTPTR(l, le->le_hash);
704             *chunkp != CHAIN_END; chunkp = &le2->le_next) {
705                 le2 = ZAP_LEAF_ENTRY(l, *chunkp);
706                 if (le2->le_cd > le->le_cd)
707                         break;
708         }
709
710         le->le_next = *chunkp;
711         *chunkp = entry;
712         return (chunkp);
713 }
714
715 static uint16_t
716 zap_leaf_transfer_array(zap_leaf_t *l, uint16_t chunk, zap_leaf_t *nl)
717 {
718         uint16_t new_chunk;
719         uint16_t *nchunkp = &new_chunk;
720
721         while (chunk != CHAIN_END) {
722                 uint16_t nchunk = zap_leaf_chunk_alloc(nl);
723                 struct zap_leaf_array *nla =
724                     &ZAP_LEAF_CHUNK(nl, nchunk).l_array;
725                 struct zap_leaf_array *la =
726                     &ZAP_LEAF_CHUNK(l, chunk).l_array;
727                 int nextchunk = la->la_next;
728
729                 ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS(l));
730                 ASSERT3U(nchunk, <, ZAP_LEAF_NUMCHUNKS(l));
731
732                 *nla = *la; /* structure assignment */
733
734                 zap_leaf_chunk_free(l, chunk);
735                 chunk = nextchunk;
736                 *nchunkp = nchunk;
737                 nchunkp = &nla->la_next;
738         }
739         *nchunkp = CHAIN_END;
740         return (new_chunk);
741 }
742
743 static void
744 zap_leaf_transfer_entry(zap_leaf_t *l, int entry, zap_leaf_t *nl)
745 {
746         struct zap_leaf_entry *le, *nle;
747         uint16_t chunk;
748
749         le = ZAP_LEAF_ENTRY(l, entry);
750         ASSERT3U(le->le_type, ==, ZAP_CHUNK_ENTRY);
751
752         chunk = zap_leaf_chunk_alloc(nl);
753         nle = ZAP_LEAF_ENTRY(nl, chunk);
754         *nle = *le; /* structure assignment */
755
756         (void) zap_leaf_rehash_entry(nl, chunk);
757
758         nle->le_name_chunk = zap_leaf_transfer_array(l, le->le_name_chunk, nl);
759         nle->le_value_chunk =
760             zap_leaf_transfer_array(l, le->le_value_chunk, nl);
761
762         zap_leaf_chunk_free(l, entry);
763
764         l->l_phys->l_hdr.lh_nentries--;
765         nl->l_phys->l_hdr.lh_nentries++;
766 }
767
768 /*
769  * Transfer the entries whose hash prefix ends in 1 to the new leaf.
770  */
771 void
772 zap_leaf_split(zap_leaf_t *l, zap_leaf_t *nl, boolean_t sort)
773 {
774         int i;
775         int bit = 64 - 1 - l->l_phys->l_hdr.lh_prefix_len;
776
777         /* set new prefix and prefix_len */
778         l->l_phys->l_hdr.lh_prefix <<= 1;
779         l->l_phys->l_hdr.lh_prefix_len++;
780         nl->l_phys->l_hdr.lh_prefix = l->l_phys->l_hdr.lh_prefix | 1;
781         nl->l_phys->l_hdr.lh_prefix_len = l->l_phys->l_hdr.lh_prefix_len;
782
783         /* break existing hash chains */
784         zap_memset(l->l_phys->l_hash, CHAIN_END, 2*ZAP_LEAF_HASH_NUMENTRIES(l));
785
786         if (sort)
787                 l->l_phys->l_hdr.lh_flags |= ZLF_ENTRIES_CDSORTED;
788
789         /*
790          * Transfer entries whose hash bit 'bit' is set to nl; rehash
791          * the remaining entries
792          *
793          * NB: We could find entries via the hashtable instead. That
794          * would be O(hashents+numents) rather than O(numblks+numents),
795          * but this accesses memory more sequentially, and when we're
796          * called, the block is usually pretty full.
797          */
798         for (i = 0; i < ZAP_LEAF_NUMCHUNKS(l); i++) {
799                 struct zap_leaf_entry *le = ZAP_LEAF_ENTRY(l, i);
800                 if (le->le_type != ZAP_CHUNK_ENTRY)
801                         continue;
802
803                 if (le->le_hash & (1ULL << bit))
804                         zap_leaf_transfer_entry(l, i, nl);
805                 else
806                         (void) zap_leaf_rehash_entry(l, i);
807         }
808 }
809
810 void
811 zap_leaf_stats(zap_t *zap, zap_leaf_t *l, zap_stats_t *zs)
812 {
813         int i, n;
814
815         n = zap->zap_f.zap_phys->zap_ptrtbl.zt_shift -
816             l->l_phys->l_hdr.lh_prefix_len;
817         n = MIN(n, ZAP_HISTOGRAM_SIZE-1);
818         zs->zs_leafs_with_2n_pointers[n]++;
819
820
821         n = l->l_phys->l_hdr.lh_nentries/5;
822         n = MIN(n, ZAP_HISTOGRAM_SIZE-1);
823         zs->zs_blocks_with_n5_entries[n]++;
824
825         n = ((1<<FZAP_BLOCK_SHIFT(zap)) -
826             l->l_phys->l_hdr.lh_nfree * (ZAP_LEAF_ARRAY_BYTES+1))*10 /
827             (1<<FZAP_BLOCK_SHIFT(zap));
828         n = MIN(n, ZAP_HISTOGRAM_SIZE-1);
829         zs->zs_blocks_n_tenths_full[n]++;
830
831         for (i = 0; i < ZAP_LEAF_HASH_NUMENTRIES(l); i++) {
832                 int nentries = 0;
833                 int chunk = l->l_phys->l_hash[i];
834
835                 while (chunk != CHAIN_END) {
836                         struct zap_leaf_entry *le =
837                             ZAP_LEAF_ENTRY(l, chunk);
838
839                         n = 1 + ZAP_LEAF_ARRAY_NCHUNKS(le->le_name_length) +
840                             ZAP_LEAF_ARRAY_NCHUNKS(le->le_value_length *
841                             le->le_int_size);
842                         n = MIN(n, ZAP_HISTOGRAM_SIZE-1);
843                         zs->zs_entries_using_n_chunks[n]++;
844
845                         chunk = le->le_next;
846                         nentries++;
847                 }
848
849                 n = nentries;
850                 n = MIN(n, ZAP_HISTOGRAM_SIZE-1);
851                 zs->zs_buckets_with_n_entries[n]++;
852         }
853 }