]> CyberLeo.Net >> Repos - FreeBSD/releng/9.0.git/blob - sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zap_leaf.c
Copy stable/9 to releng/9.0 as part of the FreeBSD 9.0-RELEASE release
[FreeBSD/releng/9.0.git] / sys / cddl / contrib / opensolaris / uts / common / fs / zfs / zap_leaf.c
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23  */
24
25 /*
26  * The 512-byte leaf is broken into 32 16-byte chunks.
27  * chunk number n means l_chunk[n], even though the header precedes it.
28  * the names are stored null-terminated.
29  */
30
31 #include <sys/zio.h>
32 #include <sys/spa.h>
33 #include <sys/dmu.h>
34 #include <sys/zfs_context.h>
35 #include <sys/fs/zfs.h>
36 #include <sys/zap.h>
37 #include <sys/zap_impl.h>
38 #include <sys/zap_leaf.h>
39 #include <sys/arc.h>
40
41 static uint16_t *zap_leaf_rehash_entry(zap_leaf_t *l, uint16_t entry);
42
43 #define CHAIN_END 0xffff /* end of the chunk chain */
44
45 /* half the (current) minimum block size */
46 #define MAX_ARRAY_BYTES (8<<10)
47
48 #define LEAF_HASH(l, h) \
49         ((ZAP_LEAF_HASH_NUMENTRIES(l)-1) & \
50         ((h) >> (64 - ZAP_LEAF_HASH_SHIFT(l)-(l)->l_phys->l_hdr.lh_prefix_len)))
51
52 #define LEAF_HASH_ENTPTR(l, h) (&(l)->l_phys->l_hash[LEAF_HASH(l, h)])
53
54
55 static void
56 zap_memset(void *a, int c, size_t n)
57 {
58         char *cp = a;
59         char *cpend = cp + n;
60
61         while (cp < cpend)
62                 *cp++ = c;
63 }
64
65 static void
66 stv(int len, void *addr, uint64_t value)
67 {
68         switch (len) {
69         case 1:
70                 *(uint8_t *)addr = value;
71                 return;
72         case 2:
73                 *(uint16_t *)addr = value;
74                 return;
75         case 4:
76                 *(uint32_t *)addr = value;
77                 return;
78         case 8:
79                 *(uint64_t *)addr = value;
80                 return;
81         }
82         ASSERT(!"bad int len");
83 }
84
85 static uint64_t
86 ldv(int len, const void *addr)
87 {
88         switch (len) {
89         case 1:
90                 return (*(uint8_t *)addr);
91         case 2:
92                 return (*(uint16_t *)addr);
93         case 4:
94                 return (*(uint32_t *)addr);
95         case 8:
96                 return (*(uint64_t *)addr);
97         }
98         ASSERT(!"bad int len");
99         return (0xFEEDFACEDEADBEEFULL);
100 }
101
102 void
103 zap_leaf_byteswap(zap_leaf_phys_t *buf, int size)
104 {
105         int i;
106         zap_leaf_t l;
107         l.l_bs = highbit(size)-1;
108         l.l_phys = buf;
109
110         buf->l_hdr.lh_block_type =      BSWAP_64(buf->l_hdr.lh_block_type);
111         buf->l_hdr.lh_prefix =          BSWAP_64(buf->l_hdr.lh_prefix);
112         buf->l_hdr.lh_magic =           BSWAP_32(buf->l_hdr.lh_magic);
113         buf->l_hdr.lh_nfree =           BSWAP_16(buf->l_hdr.lh_nfree);
114         buf->l_hdr.lh_nentries =        BSWAP_16(buf->l_hdr.lh_nentries);
115         buf->l_hdr.lh_prefix_len =      BSWAP_16(buf->l_hdr.lh_prefix_len);
116         buf->l_hdr.lh_freelist =        BSWAP_16(buf->l_hdr.lh_freelist);
117
118         for (i = 0; i < ZAP_LEAF_HASH_NUMENTRIES(&l); i++)
119                 buf->l_hash[i] = BSWAP_16(buf->l_hash[i]);
120
121         for (i = 0; i < ZAP_LEAF_NUMCHUNKS(&l); i++) {
122                 zap_leaf_chunk_t *lc = &ZAP_LEAF_CHUNK(&l, i);
123                 struct zap_leaf_entry *le;
124
125                 switch (lc->l_free.lf_type) {
126                 case ZAP_CHUNK_ENTRY:
127                         le = &lc->l_entry;
128
129                         le->le_type =           BSWAP_8(le->le_type);
130                         le->le_value_intlen =   BSWAP_8(le->le_value_intlen);
131                         le->le_next =           BSWAP_16(le->le_next);
132                         le->le_name_chunk =     BSWAP_16(le->le_name_chunk);
133                         le->le_name_numints =   BSWAP_16(le->le_name_numints);
134                         le->le_value_chunk =    BSWAP_16(le->le_value_chunk);
135                         le->le_value_numints =  BSWAP_16(le->le_value_numints);
136                         le->le_cd =             BSWAP_32(le->le_cd);
137                         le->le_hash =           BSWAP_64(le->le_hash);
138                         break;
139                 case ZAP_CHUNK_FREE:
140                         lc->l_free.lf_type =    BSWAP_8(lc->l_free.lf_type);
141                         lc->l_free.lf_next =    BSWAP_16(lc->l_free.lf_next);
142                         break;
143                 case ZAP_CHUNK_ARRAY:
144                         lc->l_array.la_type =   BSWAP_8(lc->l_array.la_type);
145                         lc->l_array.la_next =   BSWAP_16(lc->l_array.la_next);
146                         /* la_array doesn't need swapping */
147                         break;
148                 default:
149                         ASSERT(!"bad leaf type");
150                 }
151         }
152 }
153
154 void
155 zap_leaf_init(zap_leaf_t *l, boolean_t sort)
156 {
157         int i;
158
159         l->l_bs = highbit(l->l_dbuf->db_size)-1;
160         zap_memset(&l->l_phys->l_hdr, 0, sizeof (struct zap_leaf_header));
161         zap_memset(l->l_phys->l_hash, CHAIN_END, 2*ZAP_LEAF_HASH_NUMENTRIES(l));
162         for (i = 0; i < ZAP_LEAF_NUMCHUNKS(l); i++) {
163                 ZAP_LEAF_CHUNK(l, i).l_free.lf_type = ZAP_CHUNK_FREE;
164                 ZAP_LEAF_CHUNK(l, i).l_free.lf_next = i+1;
165         }
166         ZAP_LEAF_CHUNK(l, ZAP_LEAF_NUMCHUNKS(l)-1).l_free.lf_next = CHAIN_END;
167         l->l_phys->l_hdr.lh_block_type = ZBT_LEAF;
168         l->l_phys->l_hdr.lh_magic = ZAP_LEAF_MAGIC;
169         l->l_phys->l_hdr.lh_nfree = ZAP_LEAF_NUMCHUNKS(l);
170         if (sort)
171                 l->l_phys->l_hdr.lh_flags |= ZLF_ENTRIES_CDSORTED;
172 }
173
174 /*
175  * Routines which manipulate leaf chunks (l_chunk[]).
176  */
177
178 static uint16_t
179 zap_leaf_chunk_alloc(zap_leaf_t *l)
180 {
181         int chunk;
182
183         ASSERT(l->l_phys->l_hdr.lh_nfree > 0);
184
185         chunk = l->l_phys->l_hdr.lh_freelist;
186         ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS(l));
187         ASSERT3U(ZAP_LEAF_CHUNK(l, chunk).l_free.lf_type, ==, ZAP_CHUNK_FREE);
188
189         l->l_phys->l_hdr.lh_freelist = ZAP_LEAF_CHUNK(l, chunk).l_free.lf_next;
190
191         l->l_phys->l_hdr.lh_nfree--;
192
193         return (chunk);
194 }
195
196 static void
197 zap_leaf_chunk_free(zap_leaf_t *l, uint16_t chunk)
198 {
199         struct zap_leaf_free *zlf = &ZAP_LEAF_CHUNK(l, chunk).l_free;
200         ASSERT3U(l->l_phys->l_hdr.lh_nfree, <, ZAP_LEAF_NUMCHUNKS(l));
201         ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS(l));
202         ASSERT(zlf->lf_type != ZAP_CHUNK_FREE);
203
204         zlf->lf_type = ZAP_CHUNK_FREE;
205         zlf->lf_next = l->l_phys->l_hdr.lh_freelist;
206         bzero(zlf->lf_pad, sizeof (zlf->lf_pad)); /* help it to compress */
207         l->l_phys->l_hdr.lh_freelist = chunk;
208
209         l->l_phys->l_hdr.lh_nfree++;
210 }
211
212 /*
213  * Routines which manipulate leaf arrays (zap_leaf_array type chunks).
214  */
215
216 static uint16_t
217 zap_leaf_array_create(zap_leaf_t *l, const char *buf,
218     int integer_size, int num_integers)
219 {
220         uint16_t chunk_head;
221         uint16_t *chunkp = &chunk_head;
222         int byten = 0;
223         uint64_t value;
224         int shift = (integer_size-1)*8;
225         int len = num_integers;
226
227         ASSERT3U(num_integers * integer_size, <, MAX_ARRAY_BYTES);
228
229         while (len > 0) {
230                 uint16_t chunk = zap_leaf_chunk_alloc(l);
231                 struct zap_leaf_array *la = &ZAP_LEAF_CHUNK(l, chunk).l_array;
232                 int i;
233
234                 la->la_type = ZAP_CHUNK_ARRAY;
235                 for (i = 0; i < ZAP_LEAF_ARRAY_BYTES; i++) {
236                         if (byten == 0)
237                                 value = ldv(integer_size, buf);
238                         la->la_array[i] = value >> shift;
239                         value <<= 8;
240                         if (++byten == integer_size) {
241                                 byten = 0;
242                                 buf += integer_size;
243                                 if (--len == 0)
244                                         break;
245                         }
246                 }
247
248                 *chunkp = chunk;
249                 chunkp = &la->la_next;
250         }
251         *chunkp = CHAIN_END;
252
253         return (chunk_head);
254 }
255
256 static void
257 zap_leaf_array_free(zap_leaf_t *l, uint16_t *chunkp)
258 {
259         uint16_t chunk = *chunkp;
260
261         *chunkp = CHAIN_END;
262
263         while (chunk != CHAIN_END) {
264                 int nextchunk = ZAP_LEAF_CHUNK(l, chunk).l_array.la_next;
265                 ASSERT3U(ZAP_LEAF_CHUNK(l, chunk).l_array.la_type, ==,
266                     ZAP_CHUNK_ARRAY);
267                 zap_leaf_chunk_free(l, chunk);
268                 chunk = nextchunk;
269         }
270 }
271
272 /* array_len and buf_len are in integers, not bytes */
273 static void
274 zap_leaf_array_read(zap_leaf_t *l, uint16_t chunk,
275     int array_int_len, int array_len, int buf_int_len, uint64_t buf_len,
276     void *buf)
277 {
278         int len = MIN(array_len, buf_len);
279         int byten = 0;
280         uint64_t value = 0;
281         char *p = buf;
282
283         ASSERT3U(array_int_len, <=, buf_int_len);
284
285         /* Fast path for one 8-byte integer */
286         if (array_int_len == 8 && buf_int_len == 8 && len == 1) {
287                 struct zap_leaf_array *la = &ZAP_LEAF_CHUNK(l, chunk).l_array;
288                 uint8_t *ip = la->la_array;
289                 uint64_t *buf64 = buf;
290
291                 *buf64 = (uint64_t)ip[0] << 56 | (uint64_t)ip[1] << 48 |
292                     (uint64_t)ip[2] << 40 | (uint64_t)ip[3] << 32 |
293                     (uint64_t)ip[4] << 24 | (uint64_t)ip[5] << 16 |
294                     (uint64_t)ip[6] << 8 | (uint64_t)ip[7];
295                 return;
296         }
297
298         /* Fast path for an array of 1-byte integers (eg. the entry name) */
299         if (array_int_len == 1 && buf_int_len == 1 &&
300             buf_len > array_len + ZAP_LEAF_ARRAY_BYTES) {
301                 while (chunk != CHAIN_END) {
302                         struct zap_leaf_array *la =
303                             &ZAP_LEAF_CHUNK(l, chunk).l_array;
304                         bcopy(la->la_array, p, ZAP_LEAF_ARRAY_BYTES);
305                         p += ZAP_LEAF_ARRAY_BYTES;
306                         chunk = la->la_next;
307                 }
308                 return;
309         }
310
311         while (len > 0) {
312                 struct zap_leaf_array *la = &ZAP_LEAF_CHUNK(l, chunk).l_array;
313                 int i;
314
315                 ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS(l));
316                 for (i = 0; i < ZAP_LEAF_ARRAY_BYTES && len > 0; i++) {
317                         value = (value << 8) | la->la_array[i];
318                         byten++;
319                         if (byten == array_int_len) {
320                                 stv(buf_int_len, p, value);
321                                 byten = 0;
322                                 len--;
323                                 if (len == 0)
324                                         return;
325                                 p += buf_int_len;
326                         }
327                 }
328                 chunk = la->la_next;
329         }
330 }
331
332 static boolean_t
333 zap_leaf_array_match(zap_leaf_t *l, zap_name_t *zn,
334     int chunk, int array_numints)
335 {
336         int bseen = 0;
337
338         if (zap_getflags(zn->zn_zap) & ZAP_FLAG_UINT64_KEY) {
339                 uint64_t *thiskey;
340                 boolean_t match;
341
342                 ASSERT(zn->zn_key_intlen == sizeof (*thiskey));
343                 thiskey = kmem_alloc(array_numints * sizeof (*thiskey),
344                     KM_SLEEP);
345
346                 zap_leaf_array_read(l, chunk, sizeof (*thiskey), array_numints,
347                     sizeof (*thiskey), array_numints, thiskey);
348                 match = bcmp(thiskey, zn->zn_key_orig,
349                     array_numints * sizeof (*thiskey)) == 0;
350                 kmem_free(thiskey, array_numints * sizeof (*thiskey));
351                 return (match);
352         }
353
354         ASSERT(zn->zn_key_intlen == 1);
355         if (zn->zn_matchtype == MT_FIRST) {
356                 char *thisname = kmem_alloc(array_numints, KM_SLEEP);
357                 boolean_t match;
358
359                 zap_leaf_array_read(l, chunk, sizeof (char), array_numints,
360                     sizeof (char), array_numints, thisname);
361                 match = zap_match(zn, thisname);
362                 kmem_free(thisname, array_numints);
363                 return (match);
364         }
365
366         /*
367          * Fast path for exact matching.
368          * First check that the lengths match, so that we don't read
369          * past the end of the zn_key_orig array.
370          */
371         if (array_numints != zn->zn_key_orig_numints)
372                 return (B_FALSE);
373         while (bseen < array_numints) {
374                 struct zap_leaf_array *la = &ZAP_LEAF_CHUNK(l, chunk).l_array;
375                 int toread = MIN(array_numints - bseen, ZAP_LEAF_ARRAY_BYTES);
376                 ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS(l));
377                 if (bcmp(la->la_array, (char *)zn->zn_key_orig + bseen, toread))
378                         break;
379                 chunk = la->la_next;
380                 bseen += toread;
381         }
382         return (bseen == array_numints);
383 }
384
385 /*
386  * Routines which manipulate leaf entries.
387  */
388
389 int
390 zap_leaf_lookup(zap_leaf_t *l, zap_name_t *zn, zap_entry_handle_t *zeh)
391 {
392         uint16_t *chunkp;
393         struct zap_leaf_entry *le;
394
395         ASSERT3U(l->l_phys->l_hdr.lh_magic, ==, ZAP_LEAF_MAGIC);
396
397 again:
398         for (chunkp = LEAF_HASH_ENTPTR(l, zn->zn_hash);
399             *chunkp != CHAIN_END; chunkp = &le->le_next) {
400                 uint16_t chunk = *chunkp;
401                 le = ZAP_LEAF_ENTRY(l, chunk);
402
403                 ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS(l));
404                 ASSERT3U(le->le_type, ==, ZAP_CHUNK_ENTRY);
405
406                 if (le->le_hash != zn->zn_hash)
407                         continue;
408
409                 /*
410                  * NB: the entry chain is always sorted by cd on
411                  * normalized zap objects, so this will find the
412                  * lowest-cd match for MT_FIRST.
413                  */
414                 ASSERT(zn->zn_matchtype == MT_EXACT ||
415                     (l->l_phys->l_hdr.lh_flags & ZLF_ENTRIES_CDSORTED));
416                 if (zap_leaf_array_match(l, zn, le->le_name_chunk,
417                     le->le_name_numints)) {
418                         zeh->zeh_num_integers = le->le_value_numints;
419                         zeh->zeh_integer_size = le->le_value_intlen;
420                         zeh->zeh_cd = le->le_cd;
421                         zeh->zeh_hash = le->le_hash;
422                         zeh->zeh_chunkp = chunkp;
423                         zeh->zeh_leaf = l;
424                         return (0);
425                 }
426         }
427
428         /*
429          * NB: we could of course do this in one pass, but that would be
430          * a pain.  We'll see if MT_BEST is even used much.
431          */
432         if (zn->zn_matchtype == MT_BEST) {
433                 zn->zn_matchtype = MT_FIRST;
434                 goto again;
435         }
436
437         return (ENOENT);
438 }
439
440 /* Return (h1,cd1 >= h2,cd2) */
441 #define HCD_GTEQ(h1, cd1, h2, cd2) \
442         ((h1 > h2) ? TRUE : ((h1 == h2 && cd1 >= cd2) ? TRUE : FALSE))
443
444 int
445 zap_leaf_lookup_closest(zap_leaf_t *l,
446     uint64_t h, uint32_t cd, zap_entry_handle_t *zeh)
447 {
448         uint16_t chunk;
449         uint64_t besth = -1ULL;
450         uint32_t bestcd = -1U;
451         uint16_t bestlh = ZAP_LEAF_HASH_NUMENTRIES(l)-1;
452         uint16_t lh;
453         struct zap_leaf_entry *le;
454
455         ASSERT3U(l->l_phys->l_hdr.lh_magic, ==, ZAP_LEAF_MAGIC);
456
457         for (lh = LEAF_HASH(l, h); lh <= bestlh; lh++) {
458                 for (chunk = l->l_phys->l_hash[lh];
459                     chunk != CHAIN_END; chunk = le->le_next) {
460                         le = ZAP_LEAF_ENTRY(l, chunk);
461
462                         ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS(l));
463                         ASSERT3U(le->le_type, ==, ZAP_CHUNK_ENTRY);
464
465                         if (HCD_GTEQ(le->le_hash, le->le_cd, h, cd) &&
466                             HCD_GTEQ(besth, bestcd, le->le_hash, le->le_cd)) {
467                                 ASSERT3U(bestlh, >=, lh);
468                                 bestlh = lh;
469                                 besth = le->le_hash;
470                                 bestcd = le->le_cd;
471
472                                 zeh->zeh_num_integers = le->le_value_numints;
473                                 zeh->zeh_integer_size = le->le_value_intlen;
474                                 zeh->zeh_cd = le->le_cd;
475                                 zeh->zeh_hash = le->le_hash;
476                                 zeh->zeh_fakechunk = chunk;
477                                 zeh->zeh_chunkp = &zeh->zeh_fakechunk;
478                                 zeh->zeh_leaf = l;
479                         }
480                 }
481         }
482
483         return (bestcd == -1U ? ENOENT : 0);
484 }
485
486 int
487 zap_entry_read(const zap_entry_handle_t *zeh,
488     uint8_t integer_size, uint64_t num_integers, void *buf)
489 {
490         struct zap_leaf_entry *le =
491             ZAP_LEAF_ENTRY(zeh->zeh_leaf, *zeh->zeh_chunkp);
492         ASSERT3U(le->le_type, ==, ZAP_CHUNK_ENTRY);
493
494         if (le->le_value_intlen > integer_size)
495                 return (EINVAL);
496
497         zap_leaf_array_read(zeh->zeh_leaf, le->le_value_chunk,
498             le->le_value_intlen, le->le_value_numints,
499             integer_size, num_integers, buf);
500
501         if (zeh->zeh_num_integers > num_integers)
502                 return (EOVERFLOW);
503         return (0);
504
505 }
506
507 int
508 zap_entry_read_name(zap_t *zap, const zap_entry_handle_t *zeh, uint16_t buflen,
509     char *buf)
510 {
511         struct zap_leaf_entry *le =
512             ZAP_LEAF_ENTRY(zeh->zeh_leaf, *zeh->zeh_chunkp);
513         ASSERT3U(le->le_type, ==, ZAP_CHUNK_ENTRY);
514
515         if (zap_getflags(zap) & ZAP_FLAG_UINT64_KEY) {
516                 zap_leaf_array_read(zeh->zeh_leaf, le->le_name_chunk, 8,
517                     le->le_name_numints, 8, buflen / 8, buf);
518         } else {
519                 zap_leaf_array_read(zeh->zeh_leaf, le->le_name_chunk, 1,
520                     le->le_name_numints, 1, buflen, buf);
521         }
522         if (le->le_name_numints > buflen)
523                 return (EOVERFLOW);
524         return (0);
525 }
526
527 int
528 zap_entry_update(zap_entry_handle_t *zeh,
529         uint8_t integer_size, uint64_t num_integers, const void *buf)
530 {
531         int delta_chunks;
532         zap_leaf_t *l = zeh->zeh_leaf;
533         struct zap_leaf_entry *le = ZAP_LEAF_ENTRY(l, *zeh->zeh_chunkp);
534
535         delta_chunks = ZAP_LEAF_ARRAY_NCHUNKS(num_integers * integer_size) -
536             ZAP_LEAF_ARRAY_NCHUNKS(le->le_value_numints * le->le_value_intlen);
537
538         if ((int)l->l_phys->l_hdr.lh_nfree < delta_chunks)
539                 return (EAGAIN);
540
541         zap_leaf_array_free(l, &le->le_value_chunk);
542         le->le_value_chunk =
543             zap_leaf_array_create(l, buf, integer_size, num_integers);
544         le->le_value_numints = num_integers;
545         le->le_value_intlen = integer_size;
546         return (0);
547 }
548
549 void
550 zap_entry_remove(zap_entry_handle_t *zeh)
551 {
552         uint16_t entry_chunk;
553         struct zap_leaf_entry *le;
554         zap_leaf_t *l = zeh->zeh_leaf;
555
556         ASSERT3P(zeh->zeh_chunkp, !=, &zeh->zeh_fakechunk);
557
558         entry_chunk = *zeh->zeh_chunkp;
559         le = ZAP_LEAF_ENTRY(l, entry_chunk);
560         ASSERT3U(le->le_type, ==, ZAP_CHUNK_ENTRY);
561
562         zap_leaf_array_free(l, &le->le_name_chunk);
563         zap_leaf_array_free(l, &le->le_value_chunk);
564
565         *zeh->zeh_chunkp = le->le_next;
566         zap_leaf_chunk_free(l, entry_chunk);
567
568         l->l_phys->l_hdr.lh_nentries--;
569 }
570
571 int
572 zap_entry_create(zap_leaf_t *l, zap_name_t *zn, uint32_t cd,
573     uint8_t integer_size, uint64_t num_integers, const void *buf,
574     zap_entry_handle_t *zeh)
575 {
576         uint16_t chunk;
577         uint16_t *chunkp;
578         struct zap_leaf_entry *le;
579         uint64_t valuelen;
580         int numchunks;
581         uint64_t h = zn->zn_hash;
582
583         valuelen = integer_size * num_integers;
584
585         numchunks = 1 + ZAP_LEAF_ARRAY_NCHUNKS(zn->zn_key_orig_numints *
586             zn->zn_key_intlen) + ZAP_LEAF_ARRAY_NCHUNKS(valuelen);
587         if (numchunks > ZAP_LEAF_NUMCHUNKS(l))
588                 return (E2BIG);
589
590         if (cd == ZAP_NEED_CD) {
591                 /* find the lowest unused cd */
592                 if (l->l_phys->l_hdr.lh_flags & ZLF_ENTRIES_CDSORTED) {
593                         cd = 0;
594
595                         for (chunk = *LEAF_HASH_ENTPTR(l, h);
596                             chunk != CHAIN_END; chunk = le->le_next) {
597                                 le = ZAP_LEAF_ENTRY(l, chunk);
598                                 if (le->le_cd > cd)
599                                         break;
600                                 if (le->le_hash == h) {
601                                         ASSERT3U(cd, ==, le->le_cd);
602                                         cd++;
603                                 }
604                         }
605                 } else {
606                         /* old unsorted format; do it the O(n^2) way */
607                         for (cd = 0; ; cd++) {
608                                 for (chunk = *LEAF_HASH_ENTPTR(l, h);
609                                     chunk != CHAIN_END; chunk = le->le_next) {
610                                         le = ZAP_LEAF_ENTRY(l, chunk);
611                                         if (le->le_hash == h &&
612                                             le->le_cd == cd) {
613                                                 break;
614                                         }
615                                 }
616                                 /* If this cd is not in use, we are good. */
617                                 if (chunk == CHAIN_END)
618                                         break;
619                         }
620                 }
621                 /*
622                  * We would run out of space in a block before we could
623                  * store enough entries to run out of CD values.
624                  */
625                 ASSERT3U(cd, <, zap_maxcd(zn->zn_zap));
626         }
627
628         if (l->l_phys->l_hdr.lh_nfree < numchunks)
629                 return (EAGAIN);
630
631         /* make the entry */
632         chunk = zap_leaf_chunk_alloc(l);
633         le = ZAP_LEAF_ENTRY(l, chunk);
634         le->le_type = ZAP_CHUNK_ENTRY;
635         le->le_name_chunk = zap_leaf_array_create(l, zn->zn_key_orig,
636             zn->zn_key_intlen, zn->zn_key_orig_numints);
637         le->le_name_numints = zn->zn_key_orig_numints;
638         le->le_value_chunk =
639             zap_leaf_array_create(l, buf, integer_size, num_integers);
640         le->le_value_numints = num_integers;
641         le->le_value_intlen = integer_size;
642         le->le_hash = h;
643         le->le_cd = cd;
644
645         /* link it into the hash chain */
646         /* XXX if we did the search above, we could just use that */
647         chunkp = zap_leaf_rehash_entry(l, chunk);
648
649         l->l_phys->l_hdr.lh_nentries++;
650
651         zeh->zeh_leaf = l;
652         zeh->zeh_num_integers = num_integers;
653         zeh->zeh_integer_size = le->le_value_intlen;
654         zeh->zeh_cd = le->le_cd;
655         zeh->zeh_hash = le->le_hash;
656         zeh->zeh_chunkp = chunkp;
657
658         return (0);
659 }
660
661 /*
662  * Determine if there is another entry with the same normalized form.
663  * For performance purposes, either zn or name must be provided (the
664  * other can be NULL).  Note, there usually won't be any hash
665  * conflicts, in which case we don't need the concatenated/normalized
666  * form of the name.  But all callers have one of these on hand anyway,
667  * so might as well take advantage.  A cleaner but slower interface
668  * would accept neither argument, and compute the normalized name as
669  * needed (using zap_name_alloc(zap_entry_read_name(zeh))).
670  */
671 boolean_t
672 zap_entry_normalization_conflict(zap_entry_handle_t *zeh, zap_name_t *zn,
673     const char *name, zap_t *zap)
674 {
675         uint64_t chunk;
676         struct zap_leaf_entry *le;
677         boolean_t allocdzn = B_FALSE;
678
679         if (zap->zap_normflags == 0)
680                 return (B_FALSE);
681
682         for (chunk = *LEAF_HASH_ENTPTR(zeh->zeh_leaf, zeh->zeh_hash);
683             chunk != CHAIN_END; chunk = le->le_next) {
684                 le = ZAP_LEAF_ENTRY(zeh->zeh_leaf, chunk);
685                 if (le->le_hash != zeh->zeh_hash)
686                         continue;
687                 if (le->le_cd == zeh->zeh_cd)
688                         continue;
689
690                 if (zn == NULL) {
691                         zn = zap_name_alloc(zap, name, MT_FIRST);
692                         allocdzn = B_TRUE;
693                 }
694                 if (zap_leaf_array_match(zeh->zeh_leaf, zn,
695                     le->le_name_chunk, le->le_name_numints)) {
696                         if (allocdzn)
697                                 zap_name_free(zn);
698                         return (B_TRUE);
699                 }
700         }
701         if (allocdzn)
702                 zap_name_free(zn);
703         return (B_FALSE);
704 }
705
706 /*
707  * Routines for transferring entries between leafs.
708  */
709
710 static uint16_t *
711 zap_leaf_rehash_entry(zap_leaf_t *l, uint16_t entry)
712 {
713         struct zap_leaf_entry *le = ZAP_LEAF_ENTRY(l, entry);
714         struct zap_leaf_entry *le2;
715         uint16_t *chunkp;
716
717         /*
718          * keep the entry chain sorted by cd
719          * NB: this will not cause problems for unsorted leafs, though
720          * it is unnecessary there.
721          */
722         for (chunkp = LEAF_HASH_ENTPTR(l, le->le_hash);
723             *chunkp != CHAIN_END; chunkp = &le2->le_next) {
724                 le2 = ZAP_LEAF_ENTRY(l, *chunkp);
725                 if (le2->le_cd > le->le_cd)
726                         break;
727         }
728
729         le->le_next = *chunkp;
730         *chunkp = entry;
731         return (chunkp);
732 }
733
734 static uint16_t
735 zap_leaf_transfer_array(zap_leaf_t *l, uint16_t chunk, zap_leaf_t *nl)
736 {
737         uint16_t new_chunk;
738         uint16_t *nchunkp = &new_chunk;
739
740         while (chunk != CHAIN_END) {
741                 uint16_t nchunk = zap_leaf_chunk_alloc(nl);
742                 struct zap_leaf_array *nla =
743                     &ZAP_LEAF_CHUNK(nl, nchunk).l_array;
744                 struct zap_leaf_array *la =
745                     &ZAP_LEAF_CHUNK(l, chunk).l_array;
746                 int nextchunk = la->la_next;
747
748                 ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS(l));
749                 ASSERT3U(nchunk, <, ZAP_LEAF_NUMCHUNKS(l));
750
751                 *nla = *la; /* structure assignment */
752
753                 zap_leaf_chunk_free(l, chunk);
754                 chunk = nextchunk;
755                 *nchunkp = nchunk;
756                 nchunkp = &nla->la_next;
757         }
758         *nchunkp = CHAIN_END;
759         return (new_chunk);
760 }
761
762 static void
763 zap_leaf_transfer_entry(zap_leaf_t *l, int entry, zap_leaf_t *nl)
764 {
765         struct zap_leaf_entry *le, *nle;
766         uint16_t chunk;
767
768         le = ZAP_LEAF_ENTRY(l, entry);
769         ASSERT3U(le->le_type, ==, ZAP_CHUNK_ENTRY);
770
771         chunk = zap_leaf_chunk_alloc(nl);
772         nle = ZAP_LEAF_ENTRY(nl, chunk);
773         *nle = *le; /* structure assignment */
774
775         (void) zap_leaf_rehash_entry(nl, chunk);
776
777         nle->le_name_chunk = zap_leaf_transfer_array(l, le->le_name_chunk, nl);
778         nle->le_value_chunk =
779             zap_leaf_transfer_array(l, le->le_value_chunk, nl);
780
781         zap_leaf_chunk_free(l, entry);
782
783         l->l_phys->l_hdr.lh_nentries--;
784         nl->l_phys->l_hdr.lh_nentries++;
785 }
786
787 /*
788  * Transfer the entries whose hash prefix ends in 1 to the new leaf.
789  */
790 void
791 zap_leaf_split(zap_leaf_t *l, zap_leaf_t *nl, boolean_t sort)
792 {
793         int i;
794         int bit = 64 - 1 - l->l_phys->l_hdr.lh_prefix_len;
795
796         /* set new prefix and prefix_len */
797         l->l_phys->l_hdr.lh_prefix <<= 1;
798         l->l_phys->l_hdr.lh_prefix_len++;
799         nl->l_phys->l_hdr.lh_prefix = l->l_phys->l_hdr.lh_prefix | 1;
800         nl->l_phys->l_hdr.lh_prefix_len = l->l_phys->l_hdr.lh_prefix_len;
801
802         /* break existing hash chains */
803         zap_memset(l->l_phys->l_hash, CHAIN_END, 2*ZAP_LEAF_HASH_NUMENTRIES(l));
804
805         if (sort)
806                 l->l_phys->l_hdr.lh_flags |= ZLF_ENTRIES_CDSORTED;
807
808         /*
809          * Transfer entries whose hash bit 'bit' is set to nl; rehash
810          * the remaining entries
811          *
812          * NB: We could find entries via the hashtable instead. That
813          * would be O(hashents+numents) rather than O(numblks+numents),
814          * but this accesses memory more sequentially, and when we're
815          * called, the block is usually pretty full.
816          */
817         for (i = 0; i < ZAP_LEAF_NUMCHUNKS(l); i++) {
818                 struct zap_leaf_entry *le = ZAP_LEAF_ENTRY(l, i);
819                 if (le->le_type != ZAP_CHUNK_ENTRY)
820                         continue;
821
822                 if (le->le_hash & (1ULL << bit))
823                         zap_leaf_transfer_entry(l, i, nl);
824                 else
825                         (void) zap_leaf_rehash_entry(l, i);
826         }
827 }
828
829 void
830 zap_leaf_stats(zap_t *zap, zap_leaf_t *l, zap_stats_t *zs)
831 {
832         int i, n;
833
834         n = zap->zap_f.zap_phys->zap_ptrtbl.zt_shift -
835             l->l_phys->l_hdr.lh_prefix_len;
836         n = MIN(n, ZAP_HISTOGRAM_SIZE-1);
837         zs->zs_leafs_with_2n_pointers[n]++;
838
839
840         n = l->l_phys->l_hdr.lh_nentries/5;
841         n = MIN(n, ZAP_HISTOGRAM_SIZE-1);
842         zs->zs_blocks_with_n5_entries[n]++;
843
844         n = ((1<<FZAP_BLOCK_SHIFT(zap)) -
845             l->l_phys->l_hdr.lh_nfree * (ZAP_LEAF_ARRAY_BYTES+1))*10 /
846             (1<<FZAP_BLOCK_SHIFT(zap));
847         n = MIN(n, ZAP_HISTOGRAM_SIZE-1);
848         zs->zs_blocks_n_tenths_full[n]++;
849
850         for (i = 0; i < ZAP_LEAF_HASH_NUMENTRIES(l); i++) {
851                 int nentries = 0;
852                 int chunk = l->l_phys->l_hash[i];
853
854                 while (chunk != CHAIN_END) {
855                         struct zap_leaf_entry *le =
856                             ZAP_LEAF_ENTRY(l, chunk);
857
858                         n = 1 + ZAP_LEAF_ARRAY_NCHUNKS(le->le_name_numints) +
859                             ZAP_LEAF_ARRAY_NCHUNKS(le->le_value_numints *
860                             le->le_value_intlen);
861                         n = MIN(n, ZAP_HISTOGRAM_SIZE-1);
862                         zs->zs_entries_using_n_chunks[n]++;
863
864                         chunk = le->le_next;
865                         nentries++;
866                 }
867
868                 n = nentries;
869                 n = MIN(n, ZAP_HISTOGRAM_SIZE-1);
870                 zs->zs_buckets_with_n_entries[n]++;
871         }
872 }