]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/contrib/openzfs/module/zfs/zap_leaf.c
contrib/kyua: Merge vendor import
[FreeBSD/FreeBSD.git] / sys / contrib / openzfs / module / zfs / zap_leaf.c
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or https://opensource.org/licenses/CDDL-1.0.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21
22 /*
23  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Copyright (c) 2013, 2016 by Delphix. All rights reserved.
25  * Copyright 2017 Nexenta Systems, Inc.
26  */
27
28 /*
29  * The 512-byte leaf is broken into 32 16-byte chunks.
30  * chunk number n means l_chunk[n], even though the header precedes it.
31  * the names are stored null-terminated.
32  */
33
34 #include <sys/zio.h>
35 #include <sys/spa.h>
36 #include <sys/dmu.h>
37 #include <sys/zfs_context.h>
38 #include <sys/fs/zfs.h>
39 #include <sys/zap.h>
40 #include <sys/zap_impl.h>
41 #include <sys/zap_leaf.h>
42 #include <sys/arc.h>
43
44 static uint16_t *zap_leaf_rehash_entry(zap_leaf_t *l, uint16_t entry);
45
46 #define CHAIN_END 0xffff /* end of the chunk chain */
47
48 #define LEAF_HASH(l, h) \
49         ((ZAP_LEAF_HASH_NUMENTRIES(l)-1) & \
50         ((h) >> \
51         (64 - ZAP_LEAF_HASH_SHIFT(l) - zap_leaf_phys(l)->l_hdr.lh_prefix_len)))
52
53 #define LEAF_HASH_ENTPTR(l, h)  (&zap_leaf_phys(l)->l_hash[LEAF_HASH(l, h)])
54
55 static void
56 zap_memset(void *a, int c, size_t n)
57 {
58         char *cp = a;
59         char *cpend = cp + n;
60
61         while (cp < cpend)
62                 *cp++ = c;
63 }
64
65 static void
66 stv(int len, void *addr, uint64_t value)
67 {
68         switch (len) {
69         case 1:
70                 *(uint8_t *)addr = value;
71                 return;
72         case 2:
73                 *(uint16_t *)addr = value;
74                 return;
75         case 4:
76                 *(uint32_t *)addr = value;
77                 return;
78         case 8:
79                 *(uint64_t *)addr = value;
80                 return;
81         default:
82                 cmn_err(CE_PANIC, "bad int len %d", len);
83         }
84 }
85
86 static uint64_t
87 ldv(int len, const void *addr)
88 {
89         switch (len) {
90         case 1:
91                 return (*(uint8_t *)addr);
92         case 2:
93                 return (*(uint16_t *)addr);
94         case 4:
95                 return (*(uint32_t *)addr);
96         case 8:
97                 return (*(uint64_t *)addr);
98         default:
99                 cmn_err(CE_PANIC, "bad int len %d", len);
100         }
101         return (0xFEEDFACEDEADBEEFULL);
102 }
103
104 void
105 zap_leaf_byteswap(zap_leaf_phys_t *buf, int size)
106 {
107         zap_leaf_t l;
108         dmu_buf_t l_dbuf;
109
110         l_dbuf.db_data = buf;
111         l.l_bs = highbit64(size) - 1;
112         l.l_dbuf = &l_dbuf;
113
114         buf->l_hdr.lh_block_type =      BSWAP_64(buf->l_hdr.lh_block_type);
115         buf->l_hdr.lh_prefix =          BSWAP_64(buf->l_hdr.lh_prefix);
116         buf->l_hdr.lh_magic =           BSWAP_32(buf->l_hdr.lh_magic);
117         buf->l_hdr.lh_nfree =           BSWAP_16(buf->l_hdr.lh_nfree);
118         buf->l_hdr.lh_nentries =        BSWAP_16(buf->l_hdr.lh_nentries);
119         buf->l_hdr.lh_prefix_len =      BSWAP_16(buf->l_hdr.lh_prefix_len);
120         buf->l_hdr.lh_freelist =        BSWAP_16(buf->l_hdr.lh_freelist);
121
122         for (int i = 0; i < ZAP_LEAF_HASH_NUMENTRIES(&l); i++)
123                 buf->l_hash[i] = BSWAP_16(buf->l_hash[i]);
124
125         for (int i = 0; i < ZAP_LEAF_NUMCHUNKS(&l); i++) {
126                 zap_leaf_chunk_t *lc = &ZAP_LEAF_CHUNK(&l, i);
127                 struct zap_leaf_entry *le;
128
129                 switch (lc->l_free.lf_type) {
130                 case ZAP_CHUNK_ENTRY:
131                         le = &lc->l_entry;
132
133                         le->le_type =           BSWAP_8(le->le_type);
134                         le->le_value_intlen =   BSWAP_8(le->le_value_intlen);
135                         le->le_next =           BSWAP_16(le->le_next);
136                         le->le_name_chunk =     BSWAP_16(le->le_name_chunk);
137                         le->le_name_numints =   BSWAP_16(le->le_name_numints);
138                         le->le_value_chunk =    BSWAP_16(le->le_value_chunk);
139                         le->le_value_numints =  BSWAP_16(le->le_value_numints);
140                         le->le_cd =             BSWAP_32(le->le_cd);
141                         le->le_hash =           BSWAP_64(le->le_hash);
142                         break;
143                 case ZAP_CHUNK_FREE:
144                         lc->l_free.lf_type =    BSWAP_8(lc->l_free.lf_type);
145                         lc->l_free.lf_next =    BSWAP_16(lc->l_free.lf_next);
146                         break;
147                 case ZAP_CHUNK_ARRAY:
148                         lc->l_array.la_type =   BSWAP_8(lc->l_array.la_type);
149                         lc->l_array.la_next =   BSWAP_16(lc->l_array.la_next);
150                         /* la_array doesn't need swapping */
151                         break;
152                 default:
153                         cmn_err(CE_PANIC, "bad leaf type %d",
154                             lc->l_free.lf_type);
155                 }
156         }
157 }
158
159 void
160 zap_leaf_init(zap_leaf_t *l, boolean_t sort)
161 {
162         l->l_bs = highbit64(l->l_dbuf->db_size) - 1;
163         zap_memset(&zap_leaf_phys(l)->l_hdr, 0,
164             sizeof (struct zap_leaf_header));
165         zap_memset(zap_leaf_phys(l)->l_hash, CHAIN_END,
166             2*ZAP_LEAF_HASH_NUMENTRIES(l));
167         for (int i = 0; i < ZAP_LEAF_NUMCHUNKS(l); i++) {
168                 ZAP_LEAF_CHUNK(l, i).l_free.lf_type = ZAP_CHUNK_FREE;
169                 ZAP_LEAF_CHUNK(l, i).l_free.lf_next = i+1;
170         }
171         ZAP_LEAF_CHUNK(l, ZAP_LEAF_NUMCHUNKS(l)-1).l_free.lf_next = CHAIN_END;
172         zap_leaf_phys(l)->l_hdr.lh_block_type = ZBT_LEAF;
173         zap_leaf_phys(l)->l_hdr.lh_magic = ZAP_LEAF_MAGIC;
174         zap_leaf_phys(l)->l_hdr.lh_nfree = ZAP_LEAF_NUMCHUNKS(l);
175         if (sort)
176                 zap_leaf_phys(l)->l_hdr.lh_flags |= ZLF_ENTRIES_CDSORTED;
177 }
178
179 /*
180  * Routines which manipulate leaf chunks (l_chunk[]).
181  */
182
183 static uint16_t
184 zap_leaf_chunk_alloc(zap_leaf_t *l)
185 {
186         ASSERT(zap_leaf_phys(l)->l_hdr.lh_nfree > 0);
187
188         int chunk = zap_leaf_phys(l)->l_hdr.lh_freelist;
189         ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS(l));
190         ASSERT3U(ZAP_LEAF_CHUNK(l, chunk).l_free.lf_type, ==, ZAP_CHUNK_FREE);
191
192         zap_leaf_phys(l)->l_hdr.lh_freelist =
193             ZAP_LEAF_CHUNK(l, chunk).l_free.lf_next;
194
195         zap_leaf_phys(l)->l_hdr.lh_nfree--;
196
197         return (chunk);
198 }
199
200 static void
201 zap_leaf_chunk_free(zap_leaf_t *l, uint16_t chunk)
202 {
203         struct zap_leaf_free *zlf = &ZAP_LEAF_CHUNK(l, chunk).l_free;
204         ASSERT3U(zap_leaf_phys(l)->l_hdr.lh_nfree, <, ZAP_LEAF_NUMCHUNKS(l));
205         ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS(l));
206         ASSERT(zlf->lf_type != ZAP_CHUNK_FREE);
207
208         zlf->lf_type = ZAP_CHUNK_FREE;
209         zlf->lf_next = zap_leaf_phys(l)->l_hdr.lh_freelist;
210         memset(zlf->lf_pad, 0, sizeof (zlf->lf_pad)); /* help it to compress */
211         zap_leaf_phys(l)->l_hdr.lh_freelist = chunk;
212
213         zap_leaf_phys(l)->l_hdr.lh_nfree++;
214 }
215
216 /*
217  * Routines which manipulate leaf arrays (zap_leaf_array type chunks).
218  */
219
220 static uint16_t
221 zap_leaf_array_create(zap_leaf_t *l, const char *buf,
222     int integer_size, int num_integers)
223 {
224         uint16_t chunk_head;
225         uint16_t *chunkp = &chunk_head;
226         int byten = 0;
227         uint64_t value = 0;
228         int shift = (integer_size - 1) * 8;
229         int len = num_integers;
230
231         ASSERT3U(num_integers * integer_size, <=, ZAP_MAXVALUELEN);
232
233         while (len > 0) {
234                 uint16_t chunk = zap_leaf_chunk_alloc(l);
235                 struct zap_leaf_array *la = &ZAP_LEAF_CHUNK(l, chunk).l_array;
236
237                 la->la_type = ZAP_CHUNK_ARRAY;
238                 for (int i = 0; i < ZAP_LEAF_ARRAY_BYTES; i++) {
239                         if (byten == 0)
240                                 value = ldv(integer_size, buf);
241                         la->la_array[i] = value >> shift;
242                         value <<= 8;
243                         if (++byten == integer_size) {
244                                 byten = 0;
245                                 buf += integer_size;
246                                 if (--len == 0)
247                                         break;
248                         }
249                 }
250
251                 *chunkp = chunk;
252                 chunkp = &la->la_next;
253         }
254         *chunkp = CHAIN_END;
255
256         return (chunk_head);
257 }
258
259 static void
260 zap_leaf_array_free(zap_leaf_t *l, uint16_t *chunkp)
261 {
262         uint16_t chunk = *chunkp;
263
264         *chunkp = CHAIN_END;
265
266         while (chunk != CHAIN_END) {
267                 int nextchunk = ZAP_LEAF_CHUNK(l, chunk).l_array.la_next;
268                 ASSERT3U(ZAP_LEAF_CHUNK(l, chunk).l_array.la_type, ==,
269                     ZAP_CHUNK_ARRAY);
270                 zap_leaf_chunk_free(l, chunk);
271                 chunk = nextchunk;
272         }
273 }
274
275 /* array_len and buf_len are in integers, not bytes */
276 static void
277 zap_leaf_array_read(zap_leaf_t *l, uint16_t chunk,
278     int array_int_len, int array_len, int buf_int_len, uint64_t buf_len,
279     void *buf)
280 {
281         int len = MIN(array_len, buf_len);
282         int byten = 0;
283         uint64_t value = 0;
284         char *p = buf;
285
286         ASSERT3U(array_int_len, <=, buf_int_len);
287
288         /* Fast path for one 8-byte integer */
289         if (array_int_len == 8 && buf_int_len == 8 && len == 1) {
290                 struct zap_leaf_array *la = &ZAP_LEAF_CHUNK(l, chunk).l_array;
291                 uint8_t *ip = la->la_array;
292                 uint64_t *buf64 = buf;
293
294                 *buf64 = (uint64_t)ip[0] << 56 | (uint64_t)ip[1] << 48 |
295                     (uint64_t)ip[2] << 40 | (uint64_t)ip[3] << 32 |
296                     (uint64_t)ip[4] << 24 | (uint64_t)ip[5] << 16 |
297                     (uint64_t)ip[6] << 8 | (uint64_t)ip[7];
298                 return;
299         }
300
301         /* Fast path for an array of 1-byte integers (eg. the entry name) */
302         if (array_int_len == 1 && buf_int_len == 1 &&
303             buf_len > array_len + ZAP_LEAF_ARRAY_BYTES) {
304                 while (chunk != CHAIN_END) {
305                         struct zap_leaf_array *la =
306                             &ZAP_LEAF_CHUNK(l, chunk).l_array;
307                         memcpy(p, la->la_array, ZAP_LEAF_ARRAY_BYTES);
308                         p += ZAP_LEAF_ARRAY_BYTES;
309                         chunk = la->la_next;
310                 }
311                 return;
312         }
313
314         while (len > 0) {
315                 struct zap_leaf_array *la = &ZAP_LEAF_CHUNK(l, chunk).l_array;
316
317                 ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS(l));
318                 for (int i = 0; i < ZAP_LEAF_ARRAY_BYTES; i++) {
319                         value = (value << 8) | la->la_array[i];
320                         byten++;
321                         if (byten == array_int_len) {
322                                 stv(buf_int_len, p, value);
323                                 byten = 0;
324                                 len--;
325                                 if (len == 0)
326                                         return;
327                                 p += buf_int_len;
328                         }
329                 }
330                 chunk = la->la_next;
331         }
332 }
333
334 static boolean_t
335 zap_leaf_array_match(zap_leaf_t *l, zap_name_t *zn,
336     int chunk, int array_numints)
337 {
338         int bseen = 0;
339
340         if (zap_getflags(zn->zn_zap) & ZAP_FLAG_UINT64_KEY) {
341                 uint64_t *thiskey =
342                     kmem_alloc(array_numints * sizeof (*thiskey), KM_SLEEP);
343                 ASSERT(zn->zn_key_intlen == sizeof (*thiskey));
344
345                 zap_leaf_array_read(l, chunk, sizeof (*thiskey), array_numints,
346                     sizeof (*thiskey), array_numints, thiskey);
347                 boolean_t match = memcmp(thiskey, zn->zn_key_orig,
348                     array_numints * sizeof (*thiskey)) == 0;
349                 kmem_free(thiskey, array_numints * sizeof (*thiskey));
350                 return (match);
351         }
352
353         ASSERT(zn->zn_key_intlen == 1);
354         if (zn->zn_matchtype & MT_NORMALIZE) {
355                 char *thisname = kmem_alloc(array_numints, KM_SLEEP);
356
357                 zap_leaf_array_read(l, chunk, sizeof (char), array_numints,
358                     sizeof (char), array_numints, thisname);
359                 boolean_t match = zap_match(zn, thisname);
360                 kmem_free(thisname, array_numints);
361                 return (match);
362         }
363
364         /*
365          * Fast path for exact matching.
366          * First check that the lengths match, so that we don't read
367          * past the end of the zn_key_orig array.
368          */
369         if (array_numints != zn->zn_key_orig_numints)
370                 return (B_FALSE);
371         while (bseen < array_numints) {
372                 struct zap_leaf_array *la = &ZAP_LEAF_CHUNK(l, chunk).l_array;
373                 int toread = MIN(array_numints - bseen, ZAP_LEAF_ARRAY_BYTES);
374                 ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS(l));
375                 if (memcmp(la->la_array, (char *)zn->zn_key_orig + bseen,
376                     toread))
377                         break;
378                 chunk = la->la_next;
379                 bseen += toread;
380         }
381         return (bseen == array_numints);
382 }
383
384 /*
385  * Routines which manipulate leaf entries.
386  */
387
388 int
389 zap_leaf_lookup(zap_leaf_t *l, zap_name_t *zn, zap_entry_handle_t *zeh)
390 {
391         struct zap_leaf_entry *le;
392
393         ASSERT3U(zap_leaf_phys(l)->l_hdr.lh_magic, ==, ZAP_LEAF_MAGIC);
394
395         for (uint16_t *chunkp = LEAF_HASH_ENTPTR(l, zn->zn_hash);
396             *chunkp != CHAIN_END; chunkp = &le->le_next) {
397                 uint16_t chunk = *chunkp;
398                 le = ZAP_LEAF_ENTRY(l, chunk);
399
400                 ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS(l));
401                 ASSERT3U(le->le_type, ==, ZAP_CHUNK_ENTRY);
402
403                 if (le->le_hash != zn->zn_hash)
404                         continue;
405
406                 /*
407                  * NB: the entry chain is always sorted by cd on
408                  * normalized zap objects, so this will find the
409                  * lowest-cd match for MT_NORMALIZE.
410                  */
411                 ASSERT((zn->zn_matchtype == 0) ||
412                     (zap_leaf_phys(l)->l_hdr.lh_flags & ZLF_ENTRIES_CDSORTED));
413                 if (zap_leaf_array_match(l, zn, le->le_name_chunk,
414                     le->le_name_numints)) {
415                         zeh->zeh_num_integers = le->le_value_numints;
416                         zeh->zeh_integer_size = le->le_value_intlen;
417                         zeh->zeh_cd = le->le_cd;
418                         zeh->zeh_hash = le->le_hash;
419                         zeh->zeh_chunkp = chunkp;
420                         zeh->zeh_leaf = l;
421                         return (0);
422                 }
423         }
424
425         return (SET_ERROR(ENOENT));
426 }
427
428 /* Return (h1,cd1 >= h2,cd2) */
429 #define HCD_GTEQ(h1, cd1, h2, cd2) \
430         ((h1 > h2) ? TRUE : ((h1 == h2 && cd1 >= cd2) ? TRUE : FALSE))
431
432 int
433 zap_leaf_lookup_closest(zap_leaf_t *l,
434     uint64_t h, uint32_t cd, zap_entry_handle_t *zeh)
435 {
436         uint64_t besth = -1ULL;
437         uint32_t bestcd = -1U;
438         uint16_t bestlh = ZAP_LEAF_HASH_NUMENTRIES(l)-1;
439         struct zap_leaf_entry *le;
440
441         ASSERT3U(zap_leaf_phys(l)->l_hdr.lh_magic, ==, ZAP_LEAF_MAGIC);
442
443         for (uint16_t lh = LEAF_HASH(l, h); lh <= bestlh; lh++) {
444                 for (uint16_t chunk = zap_leaf_phys(l)->l_hash[lh];
445                     chunk != CHAIN_END; chunk = le->le_next) {
446                         le = ZAP_LEAF_ENTRY(l, chunk);
447
448                         ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS(l));
449                         ASSERT3U(le->le_type, ==, ZAP_CHUNK_ENTRY);
450
451                         if (HCD_GTEQ(le->le_hash, le->le_cd, h, cd) &&
452                             HCD_GTEQ(besth, bestcd, le->le_hash, le->le_cd)) {
453                                 ASSERT3U(bestlh, >=, lh);
454                                 bestlh = lh;
455                                 besth = le->le_hash;
456                                 bestcd = le->le_cd;
457
458                                 zeh->zeh_num_integers = le->le_value_numints;
459                                 zeh->zeh_integer_size = le->le_value_intlen;
460                                 zeh->zeh_cd = le->le_cd;
461                                 zeh->zeh_hash = le->le_hash;
462                                 zeh->zeh_fakechunk = chunk;
463                                 zeh->zeh_chunkp = &zeh->zeh_fakechunk;
464                                 zeh->zeh_leaf = l;
465                         }
466                 }
467         }
468
469         return (bestcd == -1U ? SET_ERROR(ENOENT) : 0);
470 }
471
472 int
473 zap_entry_read(const zap_entry_handle_t *zeh,
474     uint8_t integer_size, uint64_t num_integers, void *buf)
475 {
476         struct zap_leaf_entry *le =
477             ZAP_LEAF_ENTRY(zeh->zeh_leaf, *zeh->zeh_chunkp);
478         ASSERT3U(le->le_type, ==, ZAP_CHUNK_ENTRY);
479
480         if (le->le_value_intlen > integer_size)
481                 return (SET_ERROR(EINVAL));
482
483         zap_leaf_array_read(zeh->zeh_leaf, le->le_value_chunk,
484             le->le_value_intlen, le->le_value_numints,
485             integer_size, num_integers, buf);
486
487         if (zeh->zeh_num_integers > num_integers)
488                 return (SET_ERROR(EOVERFLOW));
489         return (0);
490
491 }
492
493 int
494 zap_entry_read_name(zap_t *zap, const zap_entry_handle_t *zeh, uint16_t buflen,
495     char *buf)
496 {
497         struct zap_leaf_entry *le =
498             ZAP_LEAF_ENTRY(zeh->zeh_leaf, *zeh->zeh_chunkp);
499         ASSERT3U(le->le_type, ==, ZAP_CHUNK_ENTRY);
500
501         if (zap_getflags(zap) & ZAP_FLAG_UINT64_KEY) {
502                 zap_leaf_array_read(zeh->zeh_leaf, le->le_name_chunk, 8,
503                     le->le_name_numints, 8, buflen / 8, buf);
504         } else {
505                 zap_leaf_array_read(zeh->zeh_leaf, le->le_name_chunk, 1,
506                     le->le_name_numints, 1, buflen, buf);
507         }
508         if (le->le_name_numints > buflen)
509                 return (SET_ERROR(EOVERFLOW));
510         return (0);
511 }
512
513 int
514 zap_entry_update(zap_entry_handle_t *zeh,
515     uint8_t integer_size, uint64_t num_integers, const void *buf)
516 {
517         zap_leaf_t *l = zeh->zeh_leaf;
518         struct zap_leaf_entry *le = ZAP_LEAF_ENTRY(l, *zeh->zeh_chunkp);
519
520         int delta_chunks = ZAP_LEAF_ARRAY_NCHUNKS(num_integers * integer_size) -
521             ZAP_LEAF_ARRAY_NCHUNKS(le->le_value_numints * le->le_value_intlen);
522
523         if ((int)zap_leaf_phys(l)->l_hdr.lh_nfree < delta_chunks)
524                 return (SET_ERROR(EAGAIN));
525
526         zap_leaf_array_free(l, &le->le_value_chunk);
527         le->le_value_chunk =
528             zap_leaf_array_create(l, buf, integer_size, num_integers);
529         le->le_value_numints = num_integers;
530         le->le_value_intlen = integer_size;
531         return (0);
532 }
533
534 void
535 zap_entry_remove(zap_entry_handle_t *zeh)
536 {
537         zap_leaf_t *l = zeh->zeh_leaf;
538
539         ASSERT3P(zeh->zeh_chunkp, !=, &zeh->zeh_fakechunk);
540
541         uint16_t entry_chunk = *zeh->zeh_chunkp;
542         struct zap_leaf_entry *le = ZAP_LEAF_ENTRY(l, entry_chunk);
543         ASSERT3U(le->le_type, ==, ZAP_CHUNK_ENTRY);
544
545         zap_leaf_array_free(l, &le->le_name_chunk);
546         zap_leaf_array_free(l, &le->le_value_chunk);
547
548         *zeh->zeh_chunkp = le->le_next;
549         zap_leaf_chunk_free(l, entry_chunk);
550
551         zap_leaf_phys(l)->l_hdr.lh_nentries--;
552 }
553
554 int
555 zap_entry_create(zap_leaf_t *l, zap_name_t *zn, uint32_t cd,
556     uint8_t integer_size, uint64_t num_integers, const void *buf,
557     zap_entry_handle_t *zeh)
558 {
559         uint16_t chunk;
560         struct zap_leaf_entry *le;
561         uint64_t h = zn->zn_hash;
562
563         uint64_t valuelen = integer_size * num_integers;
564
565         int numchunks = 1 + ZAP_LEAF_ARRAY_NCHUNKS(zn->zn_key_orig_numints *
566             zn->zn_key_intlen) + ZAP_LEAF_ARRAY_NCHUNKS(valuelen);
567         if (numchunks > ZAP_LEAF_NUMCHUNKS(l))
568                 return (SET_ERROR(E2BIG));
569
570         if (cd == ZAP_NEED_CD) {
571                 /* find the lowest unused cd */
572                 if (zap_leaf_phys(l)->l_hdr.lh_flags & ZLF_ENTRIES_CDSORTED) {
573                         cd = 0;
574
575                         for (chunk = *LEAF_HASH_ENTPTR(l, h);
576                             chunk != CHAIN_END; chunk = le->le_next) {
577                                 le = ZAP_LEAF_ENTRY(l, chunk);
578                                 if (le->le_cd > cd)
579                                         break;
580                                 if (le->le_hash == h) {
581                                         ASSERT3U(cd, ==, le->le_cd);
582                                         cd++;
583                                 }
584                         }
585                 } else {
586                         /* old unsorted format; do it the O(n^2) way */
587                         for (cd = 0; ; cd++) {
588                                 for (chunk = *LEAF_HASH_ENTPTR(l, h);
589                                     chunk != CHAIN_END; chunk = le->le_next) {
590                                         le = ZAP_LEAF_ENTRY(l, chunk);
591                                         if (le->le_hash == h &&
592                                             le->le_cd == cd) {
593                                                 break;
594                                         }
595                                 }
596                                 /* If this cd is not in use, we are good. */
597                                 if (chunk == CHAIN_END)
598                                         break;
599                         }
600                 }
601                 /*
602                  * We would run out of space in a block before we could
603                  * store enough entries to run out of CD values.
604                  */
605                 ASSERT3U(cd, <, zap_maxcd(zn->zn_zap));
606         }
607
608         if (zap_leaf_phys(l)->l_hdr.lh_nfree < numchunks)
609                 return (SET_ERROR(EAGAIN));
610
611         /* make the entry */
612         chunk = zap_leaf_chunk_alloc(l);
613         le = ZAP_LEAF_ENTRY(l, chunk);
614         le->le_type = ZAP_CHUNK_ENTRY;
615         le->le_name_chunk = zap_leaf_array_create(l, zn->zn_key_orig,
616             zn->zn_key_intlen, zn->zn_key_orig_numints);
617         le->le_name_numints = zn->zn_key_orig_numints;
618         le->le_value_chunk =
619             zap_leaf_array_create(l, buf, integer_size, num_integers);
620         le->le_value_numints = num_integers;
621         le->le_value_intlen = integer_size;
622         le->le_hash = h;
623         le->le_cd = cd;
624
625         /* link it into the hash chain */
626         /* XXX if we did the search above, we could just use that */
627         uint16_t *chunkp = zap_leaf_rehash_entry(l, chunk);
628
629         zap_leaf_phys(l)->l_hdr.lh_nentries++;
630
631         zeh->zeh_leaf = l;
632         zeh->zeh_num_integers = num_integers;
633         zeh->zeh_integer_size = le->le_value_intlen;
634         zeh->zeh_cd = le->le_cd;
635         zeh->zeh_hash = le->le_hash;
636         zeh->zeh_chunkp = chunkp;
637
638         return (0);
639 }
640
641 /*
642  * Determine if there is another entry with the same normalized form.
643  * For performance purposes, either zn or name must be provided (the
644  * other can be NULL).  Note, there usually won't be any hash
645  * conflicts, in which case we don't need the concatenated/normalized
646  * form of the name.  But all callers have one of these on hand anyway,
647  * so might as well take advantage.  A cleaner but slower interface
648  * would accept neither argument, and compute the normalized name as
649  * needed (using zap_name_alloc_str(zap_entry_read_name(zeh))).
650  */
651 boolean_t
652 zap_entry_normalization_conflict(zap_entry_handle_t *zeh, zap_name_t *zn,
653     const char *name, zap_t *zap)
654 {
655         struct zap_leaf_entry *le;
656         boolean_t allocdzn = B_FALSE;
657
658         if (zap->zap_normflags == 0)
659                 return (B_FALSE);
660
661         for (uint16_t chunk = *LEAF_HASH_ENTPTR(zeh->zeh_leaf, zeh->zeh_hash);
662             chunk != CHAIN_END; chunk = le->le_next) {
663                 le = ZAP_LEAF_ENTRY(zeh->zeh_leaf, chunk);
664                 if (le->le_hash != zeh->zeh_hash)
665                         continue;
666                 if (le->le_cd == zeh->zeh_cd)
667                         continue;
668
669                 if (zn == NULL) {
670                         zn = zap_name_alloc_str(zap, name, MT_NORMALIZE);
671                         allocdzn = B_TRUE;
672                 }
673                 if (zap_leaf_array_match(zeh->zeh_leaf, zn,
674                     le->le_name_chunk, le->le_name_numints)) {
675                         if (allocdzn)
676                                 zap_name_free(zn);
677                         return (B_TRUE);
678                 }
679         }
680         if (allocdzn)
681                 zap_name_free(zn);
682         return (B_FALSE);
683 }
684
685 /*
686  * Routines for transferring entries between leafs.
687  */
688
689 static uint16_t *
690 zap_leaf_rehash_entry(zap_leaf_t *l, uint16_t entry)
691 {
692         struct zap_leaf_entry *le = ZAP_LEAF_ENTRY(l, entry);
693         struct zap_leaf_entry *le2;
694         uint16_t *chunkp;
695
696         /*
697          * keep the entry chain sorted by cd
698          * NB: this will not cause problems for unsorted leafs, though
699          * it is unnecessary there.
700          */
701         for (chunkp = LEAF_HASH_ENTPTR(l, le->le_hash);
702             *chunkp != CHAIN_END; chunkp = &le2->le_next) {
703                 le2 = ZAP_LEAF_ENTRY(l, *chunkp);
704                 if (le2->le_cd > le->le_cd)
705                         break;
706         }
707
708         le->le_next = *chunkp;
709         *chunkp = entry;
710         return (chunkp);
711 }
712
713 static uint16_t
714 zap_leaf_transfer_array(zap_leaf_t *l, uint16_t chunk, zap_leaf_t *nl)
715 {
716         uint16_t new_chunk;
717         uint16_t *nchunkp = &new_chunk;
718
719         while (chunk != CHAIN_END) {
720                 uint16_t nchunk = zap_leaf_chunk_alloc(nl);
721                 struct zap_leaf_array *nla =
722                     &ZAP_LEAF_CHUNK(nl, nchunk).l_array;
723                 struct zap_leaf_array *la =
724                     &ZAP_LEAF_CHUNK(l, chunk).l_array;
725                 int nextchunk = la->la_next;
726
727                 ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS(l));
728                 ASSERT3U(nchunk, <, ZAP_LEAF_NUMCHUNKS(l));
729
730                 *nla = *la; /* structure assignment */
731
732                 zap_leaf_chunk_free(l, chunk);
733                 chunk = nextchunk;
734                 *nchunkp = nchunk;
735                 nchunkp = &nla->la_next;
736         }
737         *nchunkp = CHAIN_END;
738         return (new_chunk);
739 }
740
741 static void
742 zap_leaf_transfer_entry(zap_leaf_t *l, int entry, zap_leaf_t *nl)
743 {
744         struct zap_leaf_entry *le = ZAP_LEAF_ENTRY(l, entry);
745         ASSERT3U(le->le_type, ==, ZAP_CHUNK_ENTRY);
746
747         uint16_t chunk = zap_leaf_chunk_alloc(nl);
748         struct zap_leaf_entry *nle = ZAP_LEAF_ENTRY(nl, chunk);
749         *nle = *le; /* structure assignment */
750
751         (void) zap_leaf_rehash_entry(nl, chunk);
752
753         nle->le_name_chunk = zap_leaf_transfer_array(l, le->le_name_chunk, nl);
754         nle->le_value_chunk =
755             zap_leaf_transfer_array(l, le->le_value_chunk, nl);
756
757         zap_leaf_chunk_free(l, entry);
758
759         zap_leaf_phys(l)->l_hdr.lh_nentries--;
760         zap_leaf_phys(nl)->l_hdr.lh_nentries++;
761 }
762
763 /*
764  * Transfer the entries whose hash prefix ends in 1 to the new leaf.
765  */
766 void
767 zap_leaf_split(zap_leaf_t *l, zap_leaf_t *nl, boolean_t sort)
768 {
769         int bit = 64 - 1 - zap_leaf_phys(l)->l_hdr.lh_prefix_len;
770
771         /* set new prefix and prefix_len */
772         zap_leaf_phys(l)->l_hdr.lh_prefix <<= 1;
773         zap_leaf_phys(l)->l_hdr.lh_prefix_len++;
774         zap_leaf_phys(nl)->l_hdr.lh_prefix =
775             zap_leaf_phys(l)->l_hdr.lh_prefix | 1;
776         zap_leaf_phys(nl)->l_hdr.lh_prefix_len =
777             zap_leaf_phys(l)->l_hdr.lh_prefix_len;
778
779         /* break existing hash chains */
780         zap_memset(zap_leaf_phys(l)->l_hash, CHAIN_END,
781             2*ZAP_LEAF_HASH_NUMENTRIES(l));
782
783         if (sort)
784                 zap_leaf_phys(l)->l_hdr.lh_flags |= ZLF_ENTRIES_CDSORTED;
785
786         /*
787          * Transfer entries whose hash bit 'bit' is set to nl; rehash
788          * the remaining entries
789          *
790          * NB: We could find entries via the hashtable instead. That
791          * would be O(hashents+numents) rather than O(numblks+numents),
792          * but this accesses memory more sequentially, and when we're
793          * called, the block is usually pretty full.
794          */
795         for (int i = 0; i < ZAP_LEAF_NUMCHUNKS(l); i++) {
796                 struct zap_leaf_entry *le = ZAP_LEAF_ENTRY(l, i);
797                 if (le->le_type != ZAP_CHUNK_ENTRY)
798                         continue;
799
800                 if (le->le_hash & (1ULL << bit))
801                         zap_leaf_transfer_entry(l, i, nl);
802                 else
803                         (void) zap_leaf_rehash_entry(l, i);
804         }
805 }
806
807 void
808 zap_leaf_stats(zap_t *zap, zap_leaf_t *l, zap_stats_t *zs)
809 {
810         int n = zap_f_phys(zap)->zap_ptrtbl.zt_shift -
811             zap_leaf_phys(l)->l_hdr.lh_prefix_len;
812         n = MIN(n, ZAP_HISTOGRAM_SIZE-1);
813         zs->zs_leafs_with_2n_pointers[n]++;
814
815
816         n = zap_leaf_phys(l)->l_hdr.lh_nentries/5;
817         n = MIN(n, ZAP_HISTOGRAM_SIZE-1);
818         zs->zs_blocks_with_n5_entries[n]++;
819
820         n = ((1<<FZAP_BLOCK_SHIFT(zap)) -
821             zap_leaf_phys(l)->l_hdr.lh_nfree * (ZAP_LEAF_ARRAY_BYTES+1))*10 /
822             (1<<FZAP_BLOCK_SHIFT(zap));
823         n = MIN(n, ZAP_HISTOGRAM_SIZE-1);
824         zs->zs_blocks_n_tenths_full[n]++;
825
826         for (int i = 0; i < ZAP_LEAF_HASH_NUMENTRIES(l); i++) {
827                 int nentries = 0;
828                 int chunk = zap_leaf_phys(l)->l_hash[i];
829
830                 while (chunk != CHAIN_END) {
831                         struct zap_leaf_entry *le =
832                             ZAP_LEAF_ENTRY(l, chunk);
833
834                         n = 1 + ZAP_LEAF_ARRAY_NCHUNKS(le->le_name_numints) +
835                             ZAP_LEAF_ARRAY_NCHUNKS(le->le_value_numints *
836                             le->le_value_intlen);
837                         n = MIN(n, ZAP_HISTOGRAM_SIZE-1);
838                         zs->zs_entries_using_n_chunks[n]++;
839
840                         chunk = le->le_next;
841                         nentries++;
842                 }
843
844                 n = nentries;
845                 n = MIN(n, ZAP_HISTOGRAM_SIZE-1);
846                 zs->zs_buckets_with_n_entries[n]++;
847         }
848 }