2 * Copyright 2000 Hans Reiser
3 * See README for licensing and copyright details
5 * Ported to FreeBSD by Jean-Sébastien Pédron <jspedron@club-internet.fr>
10 #include <gnu/fs/reiserfs/reiserfs_fs.h>
12 /* Minimal possible key. It is never in the tree. */
13 const struct key MIN_KEY = {
19 /* Maximal possible key. It is never in the tree. */
20 const struct key MAX_KEY = {
23 { {0xffffffff, 0xffffffff }, }
26 /* Does the buffer contain a disk block which is in the tree. */
28 B_IS_IN_TREE(const struct buf *p_s_bp)
31 return (B_LEVEL(p_s_bp) != FREE_LEVEL);
34 /* To gets item head in le form */
36 copy_item_head(struct item_head *p_v_to, const struct item_head *p_v_from)
39 memcpy(p_v_to, p_v_from, IH_SIZE);
43 * k1 is pointer to on-disk structure which is stored in little-endian
44 * form. k2 is pointer to cpu variable. For key of items of the same
45 * object this returns 0.
46 * Returns: -1 if key1 < key2, 0 if key1 == key2 or 1 if key1 > key2
49 comp_short_keys(const struct key *le_key, const struct cpu_key *cpu_key)
51 const uint32_t *p_s_le_u32, *p_s_cpu_u32;
52 int n_key_length = REISERFS_SHORT_KEY_LEN;
54 p_s_le_u32 = (const uint32_t *)le_key;
55 p_s_cpu_u32 = (const uint32_t *)&cpu_key->on_disk_key;
56 for(; n_key_length--; ++p_s_le_u32, ++p_s_cpu_u32) {
57 if (le32toh(*p_s_le_u32) < *p_s_cpu_u32)
59 if (le32toh(*p_s_le_u32) > *p_s_cpu_u32)
67 * k1 is pointer to on-disk structure which is stored in little-endian
68 * form. k2 is pointer to cpu variable. Compare keys using all 4 key
70 * Returns: -1 if key1 < key2, 0 if key1 = key2 or 1 if key1 > key2
73 comp_keys(const struct key *le_key, const struct cpu_key *cpu_key)
77 retval = comp_short_keys(le_key, cpu_key);
81 if (le_key_k_offset(le_key_version(le_key), le_key) <
82 cpu_key_k_offset(cpu_key))
84 if (le_key_k_offset(le_key_version(le_key), le_key) >
85 cpu_key_k_offset(cpu_key))
88 if (cpu_key->key_length == 3)
91 /* This part is needed only when tail conversion is in progress */
92 if (le_key_k_type(le_key_version(le_key), le_key) <
93 cpu_key_k_type(cpu_key))
96 if (le_key_k_type(le_key_version(le_key), le_key) >
97 cpu_key_k_type(cpu_key))
103 /* Release all buffers in the path. */
105 pathrelse(struct path *p_s_search_path)
108 int n_path_offset = p_s_search_path->path_length;
110 while (n_path_offset > ILLEGAL_PATH_ELEMENT_OFFSET) {
111 bp = PATH_OFFSET_PBUFFER(p_s_search_path, n_path_offset--);
112 free(bp->b_data, M_REISERFSPATH);
113 free(bp, M_REISERFSPATH);
116 p_s_search_path->path_length = ILLEGAL_PATH_ELEMENT_OFFSET;
120 * This does not say which one is bigger, it only returns 1 if keys
121 * are not equal, 0 otherwise
124 comp_le_keys(const struct key *k1, const struct key *k2)
127 return (memcmp(k1, k2, sizeof(struct key)));
131 * Binary search toolkit function. Search for an item in the array by
133 * Returns: 1 if found, 0 if not found;
134 * *p_n_pos = number of the searched element if found, else the
135 * number of the first element that is larger than p_v_key.
138 * For those not familiar with binary search: n_lbound is the leftmost
139 * item that it could be, n_rbound the rightmost item that it could be.
140 * We examine the item halfway between n_lbound and n_rbound, and that
141 * tells us either that we can increase n_lbound, or decrease n_rbound,
142 * or that we have found it, or if n_lbound <= n_rbound that there are
143 * no possible items, and we have not found it. With each examination we
144 * cut the number of possible items it could be by one more than half
145 * rounded down, or we find it.
148 bin_search(const void *p_v_key, /* Key to search for. */
149 const void *p_v_base, /* First item in the array. */
150 int p_n_num, /* Number of items in the array. */
151 int p_n_width, /* Item size in the array. searched. Lest the
152 reader be confused, note that this is crafted
153 as a general function, and when it is applied
154 specifically to the array of item headers in
155 a node, p_n_width is actually the item header
156 size not the item size. */
157 int *p_n_pos) /* Number of the searched for element. */
159 int n_rbound, n_lbound, n_j;
161 for (n_j = ((n_rbound = p_n_num - 1) + (n_lbound = 0)) / 2;
162 n_lbound <= n_rbound; n_j = (n_rbound + n_lbound) / 2) {
163 switch (COMP_KEYS((const struct key *)
164 ((const char *)p_v_base + n_j * p_n_width),
165 (const struct cpu_key *)p_v_key)) {
174 return (ITEM_FOUND); /* Key found in the array. */
179 * bin_search did not find given key, it returns position of key,
180 * that is minimal and greater than the given one.
183 return (ITEM_NOT_FOUND);
187 * Get delimiting key of the buffer by looking for it in the buffers in
188 * the path, starting from the bottom of the path, and going upwards. We
189 * must check the path's validity at each step. If the key is not in the
190 * path, there is no delimiting key in the tree (buffer is first or last
191 * buffer in tree), and in this case we return a special key, either
192 * MIN_KEY or MAX_KEY.
195 get_lkey(const struct path *p_s_chk_path,
196 const struct reiserfs_sb_info *p_s_sbi)
198 struct buf *p_s_parent;
199 int n_position, n_path_offset = p_s_chk_path->path_length;
201 /* While not higher in path than first element. */
202 while (n_path_offset-- > FIRST_PATH_ELEMENT_OFFSET) {
203 /* Parent at the path is not in the tree now. */
204 if (!B_IS_IN_TREE(p_s_parent =
205 PATH_OFFSET_PBUFFER(p_s_chk_path, n_path_offset)))
208 /* Check whether position in the parent is correct. */
209 if ((n_position = PATH_OFFSET_POSITION(p_s_chk_path,
210 n_path_offset)) > B_NR_ITEMS(p_s_parent))
214 * Check whether parent at the path really points to
217 if (B_N_CHILD_NUM(p_s_parent, n_position) !=
218 (PATH_OFFSET_PBUFFER(p_s_chk_path,
219 n_path_offset + 1)->b_blkno
220 / btodb(p_s_sbi->s_blocksize)))
224 * Return delimiting key if position in the parent is not
228 return (B_N_PDELIM_KEY(p_s_parent, n_position - 1));
231 /* Return MIN_KEY if we are in the root of the buffer tree. */
232 if ((PATH_OFFSET_PBUFFER(p_s_chk_path,
233 FIRST_PATH_ELEMENT_OFFSET)->b_blkno
234 / btodb(p_s_sbi->s_blocksize)) == SB_ROOT_BLOCK(p_s_sbi))
240 /* Get delimiting key of the buffer at the path and its right neighbor. */
242 get_rkey(const struct path *p_s_chk_path,
243 const struct reiserfs_sb_info *p_s_sbi)
245 struct buf *p_s_parent;
246 int n_position, n_path_offset = p_s_chk_path->path_length;
248 while (n_path_offset-- > FIRST_PATH_ELEMENT_OFFSET) {
249 /* Parent at the path is not in the tree now. */
250 if (!B_IS_IN_TREE(p_s_parent =
251 PATH_OFFSET_PBUFFER(p_s_chk_path, n_path_offset)))
254 /* Check whether position in the parent is correct. */
255 if ((n_position = PATH_OFFSET_POSITION(p_s_chk_path,
257 B_NR_ITEMS(p_s_parent))
261 * Check whether parent at the path really points to the
264 if (B_N_CHILD_NUM(p_s_parent, n_position) !=
265 (PATH_OFFSET_PBUFFER(p_s_chk_path,
266 n_path_offset + 1)->b_blkno
267 / btodb(p_s_sbi->s_blocksize)))
271 * Return delimiting key if position in the parent is not
274 if (n_position != B_NR_ITEMS(p_s_parent))
275 return (B_N_PDELIM_KEY(p_s_parent, n_position));
278 /* Return MAX_KEY if we are in the root of the buffer tree. */
279 if ((PATH_OFFSET_PBUFFER(p_s_chk_path,
280 FIRST_PATH_ELEMENT_OFFSET)->b_blkno
281 / btodb(p_s_sbi->s_blocksize)) == SB_ROOT_BLOCK(p_s_sbi))
288 reiserfs_check_path(struct path *p)
291 if (p->path_length != ILLEGAL_PATH_ELEMENT_OFFSET)
292 reiserfs_log(LOG_WARNING, "path not properly relsed\n");
297 * Check whether a key is contained in the tree rooted from a buffer at
298 * a path. This works by looking at the left and right delimiting keys
299 * for the buffer in the last path_element in the path. These delimiting
300 * keys are stored at least one level above that buffer in the tree.
301 * If the buffer is the first or last node in the tree order then one
302 * of the delimiting keys may be absent, and in this case get_lkey and
303 * get_rkey return a special key which is MIN_KEY or MAX_KEY.
307 struct path *p_s_chk_path, /* Path which should be checked. */
308 const struct cpu_key *p_s_key, /* Key which should be checked. */
309 struct reiserfs_sb_info *p_s_sbi) /* Super block pointer. */
312 if (COMP_KEYS(get_lkey(p_s_chk_path, p_s_sbi), p_s_key) == 1)
313 /* left delimiting key is bigger, that the key we look for */
316 if (COMP_KEYS(get_rkey(p_s_chk_path, p_s_sbi), p_s_key) != 1)
317 /* p_s_key must be less than right delimitiing key */
324 /* XXX Il ne semble pas y avoir de compteur de référence dans struct buf */
326 decrement_bcount(struct buf *p_s_bp)
330 if (atomic_read(&(p_s_bp->b_count))) {
338 /* Decrement b_count field of the all buffers in the path. */
340 decrement_counters_in_path(struct path *p_s_search_path)
343 pathrelse(p_s_search_path);
345 int n_path_offset = p_s_search_path->path_length;
347 while (n_path_offset > ILLEGAL_PATH_ELEMENT_OFFSET) {
350 bp = PATH_OFFSET_PBUFFER(p_s_search_path, n_path_offset--);
351 decrement_bcount(bp);
354 p_s_search_path->path_length = ILLEGAL_PATH_ELEMENT_OFFSET;
359 is_leaf(char *buf, int blocksize, struct buf *bp)
361 struct item_head *ih;
362 struct block_head *blkh;
363 int used_space, prev_location, i, nr;
365 blkh = (struct block_head *)buf;
366 if (blkh_level(blkh) != DISK_LEAF_NODE_LEVEL) {
367 reiserfs_log(LOG_WARNING, "this should be caught earlier");
371 nr = blkh_nr_item(blkh);
373 ((blocksize - BLKH_SIZE) / (IH_SIZE + MIN_ITEM_LEN))) {
374 /* Item number is too big or too small */
375 reiserfs_log(LOG_WARNING, "nr_item seems wrong\n");
379 ih = (struct item_head *)(buf + BLKH_SIZE) + nr - 1;
380 used_space = BLKH_SIZE + IH_SIZE * nr + (blocksize - ih_location(ih));
381 if (used_space != blocksize - blkh_free_space(blkh)) {
383 * Free space does not match to calculated amount of
386 reiserfs_log(LOG_WARNING, "free space seems wrong\n");
390 /* FIXME: it is_leaf will hit performance too much - we may have
393 /* Check tables of item heads */
394 ih = (struct item_head *)(buf + BLKH_SIZE);
395 prev_location = blocksize;
396 for (i = 0; i < nr; i++, ih++) {
397 if (le_ih_k_type(ih) == TYPE_ANY) {
398 reiserfs_log(LOG_WARNING,
399 "wrong item type for item\n");
402 if (ih_location(ih) >= blocksize ||
403 ih_location(ih) < IH_SIZE * nr) {
404 reiserfs_log(LOG_WARNING,
405 "item location seems wrong\n");
408 if (ih_item_len(ih) < 1 ||
409 ih_item_len(ih) > MAX_ITEM_LEN(blocksize)) {
410 reiserfs_log(LOG_WARNING, "item length seems wrong\n");
413 if (prev_location - ih_location(ih) != ih_item_len(ih)) {
414 reiserfs_log(LOG_WARNING,
415 "item location seems wrong (second one)\n");
418 prev_location = ih_location(ih);
421 /* One may imagine much more checks */
425 /* Returns 1 if buf looks like an internal node, 0 otherwise */
427 is_internal(char *buf, int blocksize, struct buf *bp)
430 struct block_head *blkh;
432 blkh = (struct block_head *)buf;
433 nr = blkh_level(blkh);
434 if (nr <= DISK_LEAF_NODE_LEVEL || nr > MAX_HEIGHT) {
435 /* This level is not possible for internal nodes */
436 reiserfs_log(LOG_WARNING, "this should be caught earlier\n");
440 nr = blkh_nr_item(blkh);
441 if (nr > (blocksize - BLKH_SIZE - DC_SIZE) / (KEY_SIZE + DC_SIZE)) {
443 * For internal which is not root we might check min
446 reiserfs_log(LOG_WARNING, "number of key seems wrong\n");
450 used_space = BLKH_SIZE + KEY_SIZE * nr + DC_SIZE * (nr + 1);
451 if (used_space != blocksize - blkh_free_space(blkh)) {
452 reiserfs_log(LOG_WARNING,
453 "is_internal: free space seems wrong\n");
457 /* One may imagine much more checks */
462 * Make sure that bh contains formatted node of reiserfs tree of
466 is_tree_node(struct buf *bp, int level)
468 if (B_LEVEL(bp) != level) {
469 reiserfs_log(LOG_WARNING,
470 "node level (%d) doesn't match to the "
471 "expected one (%d)\n", B_LEVEL (bp), level);
475 if (level == DISK_LEAF_NODE_LEVEL)
476 return (is_leaf(bp->b_data, bp->b_bcount, bp));
478 return (is_internal(bp->b_data, bp->b_bcount, bp));
482 search_by_key(struct reiserfs_sb_info *p_s_sbi,
483 const struct cpu_key * p_s_key, /* Key to search. */
484 struct path * p_s_search_path, /* This structure was allocated and
485 initialized by the calling function.
486 It is filled up by this function. */
487 int n_stop_level) /* How far down the tree to search. To
488 stop at leaf level - set to
489 DISK_LEAF_NODE_LEVEL */
492 int n_node_level, n_retval;
493 int n_block_number, expected_level, fs_gen;
494 struct path_element *p_s_last_element;
495 struct buf *p_s_bp, *tmp_bp;
498 * As we add each node to a path we increase its count. This means that
499 * we must be careful to release all nodes in a path before we either
500 * discard the path struct or re-use the path struct, as we do here.
502 decrement_counters_in_path(p_s_search_path);
505 * With each iteration of this loop we search through the items in the
506 * current node, and calculate the next current node(next path element)
507 * for the next iteration of this loop...
509 n_block_number = SB_ROOT_BLOCK(p_s_sbi);
512 reiserfs_log(LOG_DEBUG, "root block: #%d\n", n_block_number);
515 /* Prep path to have another element added to it. */
516 reiserfs_log(LOG_DEBUG, "path element #%d\n",
517 p_s_search_path->path_length);
518 p_s_last_element = PATH_OFFSET_PELEMENT(p_s_search_path,
519 ++p_s_search_path->path_length);
520 fs_gen = get_generation(p_s_sbi);
523 * Read the next tree node, and set the last element in the
524 * path to have a pointer to it.
526 reiserfs_log(LOG_DEBUG, "reading block #%d\n",
528 if ((error = bread(p_s_sbi->s_devvp,
529 n_block_number * btodb(p_s_sbi->s_blocksize),
530 p_s_sbi->s_blocksize, NOCRED, &tmp_bp)) != 0) {
531 reiserfs_log(LOG_DEBUG, "error reading block\n");
532 p_s_search_path->path_length--;
533 pathrelse(p_s_search_path);
536 reiserfs_log(LOG_DEBUG, "blkno = %ju, lblkno = %ju\n",
537 (intmax_t)tmp_bp->b_blkno, (intmax_t)tmp_bp->b_lblkno);
540 * As i didn't found a way to handle the lock correctly,
541 * i copy the data into a fake buffer
543 reiserfs_log(LOG_DEBUG, "allocating p_s_bp\n");
544 p_s_bp = malloc(sizeof *p_s_bp, M_REISERFSPATH, M_WAITOK);
546 reiserfs_log(LOG_DEBUG, "error allocating memory\n");
547 p_s_search_path->path_length--;
548 pathrelse(p_s_search_path);
552 reiserfs_log(LOG_DEBUG, "copying struct buf\n");
553 bcopy(tmp_bp, p_s_bp, sizeof(struct buf));
555 reiserfs_log(LOG_DEBUG, "allocating p_s_bp->b_data\n");
556 p_s_bp->b_data = malloc(p_s_sbi->s_blocksize,
557 M_REISERFSPATH, M_WAITOK);
558 if (!p_s_bp->b_data) {
559 reiserfs_log(LOG_DEBUG, "error allocating memory\n");
560 p_s_search_path->path_length--;
561 pathrelse(p_s_search_path);
562 free(p_s_bp, M_REISERFSPATH);
566 reiserfs_log(LOG_DEBUG, "copying buffer data\n");
567 bcopy(tmp_bp->b_data, p_s_bp->b_data, p_s_sbi->s_blocksize);
571 reiserfs_log(LOG_DEBUG, "...done\n");
572 p_s_last_element->pe_buffer = p_s_bp;
574 if (expected_level == -1)
575 expected_level = SB_TREE_HEIGHT(p_s_sbi);
577 reiserfs_log(LOG_DEBUG, "expected level: %d (%d)\n",
578 expected_level, SB_TREE_HEIGHT(p_s_sbi));
582 * It is possible that schedule occurred. We must check
583 * whether the key to search is still in the tree rooted
584 * from the current buffer. If not then repeat search
587 if (fs_changed(fs_gen, p_s_sbi) &&
588 (!B_IS_IN_TREE(p_s_bp) ||
589 B_LEVEL(p_s_bp) != expected_level ||
590 !key_in_buffer(p_s_search_path, p_s_key, p_s_sbi))) {
591 reiserfs_log(LOG_DEBUG,
592 "the key isn't in the tree anymore\n");
593 decrement_counters_in_path(p_s_search_path);
596 * Get the root block number so that we can repeat
597 * the search starting from the root.
599 n_block_number = SB_ROOT_BLOCK(p_s_sbi);
602 /* Repeat search from the root */
607 * Make sure, that the node contents look like a node of
610 if (!is_tree_node(p_s_bp, expected_level)) {
611 reiserfs_log(LOG_WARNING,
612 "invalid format found in block %ju. Fsck?",
613 (intmax_t)p_s_bp->b_blkno);
614 pathrelse (p_s_search_path);
618 /* Ok, we have acquired next formatted node in the tree */
619 n_node_level = B_LEVEL(p_s_bp);
620 reiserfs_log(LOG_DEBUG, "block info:\n");
621 reiserfs_log(LOG_DEBUG, " node level: %d\n",
623 reiserfs_log(LOG_DEBUG, " nb of items: %d\n",
625 reiserfs_log(LOG_DEBUG, " free space: %d bytes\n",
626 B_FREE_SPACE(p_s_bp));
627 reiserfs_log(LOG_DEBUG, "bin_search with :\n"
628 " p_s_key = (objectid=%d, dirid=%d)\n"
629 " B_NR_ITEMS(p_s_bp) = %d\n"
630 " p_s_last_element->pe_position = %d (path_length = %d)\n",
631 p_s_key->on_disk_key.k_objectid,
632 p_s_key->on_disk_key.k_dir_id,
634 p_s_last_element->pe_position,
635 p_s_search_path->path_length);
636 n_retval = bin_search(p_s_key, B_N_PITEM_HEAD(p_s_bp, 0),
638 (n_node_level == DISK_LEAF_NODE_LEVEL) ? IH_SIZE : KEY_SIZE,
639 &(p_s_last_element->pe_position));
640 reiserfs_log(LOG_DEBUG, "bin_search result: %d\n",
642 if (n_node_level == n_stop_level) {
643 reiserfs_log(LOG_DEBUG, "stop level reached (%s)\n",
644 n_retval == ITEM_FOUND ? "found" : "not found");
648 /* We are not in the stop level */
649 if (n_retval == ITEM_FOUND)
651 * Item has been found, so we choose the pointer
652 * which is to the right of the found one
654 p_s_last_element->pe_position++;
657 * If item was not found we choose the position which is
658 * to the left of the found item. This requires no code,
659 * bin_search did it already.
663 * So we have chosen a position in the current node which
664 * is an internal node. Now we calculate child block number
665 * by position in the node.
667 n_block_number = B_N_CHILD_NUM(p_s_bp,
668 p_s_last_element->pe_position);
671 reiserfs_log(LOG_DEBUG, "done\n");
676 * Form the path to an item and position in this item which contains
677 * file byte defined by p_s_key. If there is no such item corresponding
678 * to the key, we point the path to the item with maximal key less than
679 * p_s_key, and *p_n_pos_in_item is set to one past the last entry/byte
680 * in the item. If searching for entry in a directory item, and it is
681 * not found, *p_n_pos_in_item is set to one entry more than the entry
682 * with maximal key which is less than the sought key.
684 * Note that if there is no entry in this same node which is one more,
685 * then we point to an imaginary entry. For direct items, the position
686 * is in units of bytes, for indirect items the position is in units
687 * of blocknr entries, for directory items the position is in units of
691 /* The function is NOT SCHEDULE-SAFE! */
693 search_for_position_by_key(struct reiserfs_sb_info *p_s_sbi,
694 const struct cpu_key *p_cpu_key, /* Key to search (cpu variable) */
695 struct path *p_s_search_path) /* Filled up by this function. */
697 int retval, n_blk_size;
698 off_t item_offset, offset;
699 struct item_head *p_le_ih; /* Pointer to on-disk structure */
700 struct reiserfs_dir_entry de;
702 /* If searching for directory entry. */
703 if (is_direntry_cpu_key(p_cpu_key))
704 return (search_by_entry_key(p_s_sbi, p_cpu_key,
705 p_s_search_path, &de));
707 /* If not searching for directory entry. */
709 /* If item is found. */
710 retval = search_item(p_s_sbi, p_cpu_key, p_s_search_path);
711 if (retval == IO_ERROR)
713 if (retval == ITEM_FOUND) {
714 if (ih_item_len(B_N_PITEM_HEAD(
715 PATH_PLAST_BUFFER(p_s_search_path),
716 PATH_LAST_POSITION(p_s_search_path))) == 0) {
717 reiserfs_log(LOG_WARNING, "item length equals zero\n");
720 pos_in_item(p_s_search_path) = 0;
721 return (POSITION_FOUND);
724 if (PATH_LAST_POSITION(p_s_search_path) == 0) {
725 reiserfs_log(LOG_WARNING, "position equals zero\n");
728 /* Item is not found. Set path to the previous item. */
729 p_le_ih = B_N_PITEM_HEAD(PATH_PLAST_BUFFER(p_s_search_path),
730 --PATH_LAST_POSITION(p_s_search_path));
731 n_blk_size = p_s_sbi->s_blocksize;
733 if (comp_short_keys(&(p_le_ih->ih_key), p_cpu_key)) {
734 return (FILE_NOT_FOUND);
737 item_offset = le_ih_k_offset(p_le_ih);
738 offset = cpu_key_k_offset(p_cpu_key);
740 /* Needed byte is contained in the item pointed to by the path.*/
741 if (item_offset <= offset &&
742 item_offset + op_bytes_number(p_le_ih, n_blk_size) > offset) {
743 pos_in_item(p_s_search_path) = offset - item_offset;
744 if (is_indirect_le_ih(p_le_ih)) {
745 pos_in_item(p_s_search_path) /= n_blk_size;
747 return (POSITION_FOUND);
750 /* Needed byte is not contained in the item pointed to by the
751 * path. Set pos_in_item out of the item. */
752 if (is_indirect_le_ih(p_le_ih))
753 pos_in_item(p_s_search_path) =
754 ih_item_len(p_le_ih) / UNFM_P_SIZE;
756 pos_in_item(p_s_search_path) =
757 ih_item_len(p_le_ih);
759 return (POSITION_NOT_FOUND);