]> www.pilppa.org Git - linux-2.6-omap-h63xx.git/blob - fs/nilfs2/btree.c
53f0d4c31cb0d141ed18609fc9867c1bf2768262
[linux-2.6-omap-h63xx.git] / fs / nilfs2 / btree.c
1 /*
2  * btree.c - NILFS B-tree.
3  *
4  * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2 of the License, or
9  * (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
19  *
20  * Written by Koji Sato <koji@osrg.net>.
21  */
22
23 #include <linux/slab.h>
24 #include <linux/string.h>
25 #include <linux/errno.h>
26 #include <linux/pagevec.h>
27 #include "nilfs.h"
28 #include "page.h"
29 #include "btnode.h"
30 #include "btree.h"
31 #include "alloc.h"
32
33 /**
34  * struct nilfs_btree_path - A path on which B-tree operations are executed
35  * @bp_bh: buffer head of node block
36  * @bp_sib_bh: buffer head of sibling node block
37  * @bp_index: index of child node
38  * @bp_oldreq: ptr end request for old ptr
39  * @bp_newreq: ptr alloc request for new ptr
40  * @bp_op: rebalance operation
41  */
42 struct nilfs_btree_path {
43         struct buffer_head *bp_bh;
44         struct buffer_head *bp_sib_bh;
45         int bp_index;
46         union nilfs_bmap_ptr_req bp_oldreq;
47         union nilfs_bmap_ptr_req bp_newreq;
48         struct nilfs_btnode_chkey_ctxt bp_ctxt;
49         void (*bp_op)(struct nilfs_btree *, struct nilfs_btree_path *,
50                       int, __u64 *, __u64 *);
51 };
52
53 /*
54  * B-tree path operations
55  */
56
57 static struct kmem_cache *nilfs_btree_path_cache;
58
59 int __init nilfs_btree_path_cache_init(void)
60 {
61         nilfs_btree_path_cache =
62                 kmem_cache_create("nilfs2_btree_path_cache",
63                                   sizeof(struct nilfs_btree_path) *
64                                   NILFS_BTREE_LEVEL_MAX, 0, 0, NULL);
65         return (nilfs_btree_path_cache != NULL) ? 0 : -ENOMEM;
66 }
67
68 void nilfs_btree_path_cache_destroy(void)
69 {
70         kmem_cache_destroy(nilfs_btree_path_cache);
71 }
72
73 static inline struct nilfs_btree_path *
74 nilfs_btree_alloc_path(const struct nilfs_btree *btree)
75 {
76         return (struct nilfs_btree_path *)
77                 kmem_cache_alloc(nilfs_btree_path_cache, GFP_NOFS);
78 }
79
80 static inline void nilfs_btree_free_path(const struct nilfs_btree *btree,
81                                          struct nilfs_btree_path *path)
82 {
83         kmem_cache_free(nilfs_btree_path_cache, path);
84 }
85
86 static void nilfs_btree_init_path(const struct nilfs_btree *btree,
87                                   struct nilfs_btree_path *path)
88 {
89         int level;
90
91         for (level = NILFS_BTREE_LEVEL_DATA;
92              level < NILFS_BTREE_LEVEL_MAX;
93              level++) {
94                 path[level].bp_bh = NULL;
95                 path[level].bp_sib_bh = NULL;
96                 path[level].bp_index = 0;
97                 path[level].bp_oldreq.bpr_ptr = NILFS_BMAP_INVALID_PTR;
98                 path[level].bp_newreq.bpr_ptr = NILFS_BMAP_INVALID_PTR;
99                 path[level].bp_op = NULL;
100         }
101 }
102
103 static void nilfs_btree_clear_path(const struct nilfs_btree *btree,
104                                    struct nilfs_btree_path *path)
105 {
106         int level;
107
108         for (level = NILFS_BTREE_LEVEL_DATA;
109              level < NILFS_BTREE_LEVEL_MAX;
110              level++) {
111                 if (path[level].bp_bh != NULL) {
112                         nilfs_bmap_put_block(&btree->bt_bmap,
113                                              path[level].bp_bh);
114                         path[level].bp_bh = NULL;
115                 }
116                 /* sib_bh is released or deleted by prepare or commit
117                  * operations. */
118                 path[level].bp_sib_bh = NULL;
119                 path[level].bp_index = 0;
120                 path[level].bp_oldreq.bpr_ptr = NILFS_BMAP_INVALID_PTR;
121                 path[level].bp_newreq.bpr_ptr = NILFS_BMAP_INVALID_PTR;
122                 path[level].bp_op = NULL;
123         }
124 }
125
126
127 /*
128  * B-tree node operations
129  */
130
131 static inline int
132 nilfs_btree_node_get_flags(const struct nilfs_btree *btree,
133                            const struct nilfs_btree_node *node)
134 {
135         return node->bn_flags;
136 }
137
138 static inline void
139 nilfs_btree_node_set_flags(struct nilfs_btree *btree,
140                            struct nilfs_btree_node *node,
141                            int flags)
142 {
143         node->bn_flags = flags;
144 }
145
146 static inline int nilfs_btree_node_root(const struct nilfs_btree *btree,
147                                         const struct nilfs_btree_node *node)
148 {
149         return nilfs_btree_node_get_flags(btree, node) & NILFS_BTREE_NODE_ROOT;
150 }
151
152 static inline int
153 nilfs_btree_node_get_level(const struct nilfs_btree *btree,
154                            const struct nilfs_btree_node *node)
155 {
156         return node->bn_level;
157 }
158
159 static inline void
160 nilfs_btree_node_set_level(struct nilfs_btree *btree,
161                            struct nilfs_btree_node *node,
162                            int level)
163 {
164         node->bn_level = level;
165 }
166
167 static inline int
168 nilfs_btree_node_get_nchildren(const struct nilfs_btree *btree,
169                                const struct nilfs_btree_node *node)
170 {
171         return le16_to_cpu(node->bn_nchildren);
172 }
173
174 static inline void
175 nilfs_btree_node_set_nchildren(struct nilfs_btree *btree,
176                                struct nilfs_btree_node *node,
177                                int nchildren)
178 {
179         node->bn_nchildren = cpu_to_le16(nchildren);
180 }
181
182 static inline int
183 nilfs_btree_node_size(const struct nilfs_btree *btree)
184 {
185         return 1 << btree->bt_bmap.b_inode->i_blkbits;
186 }
187
188 static inline int
189 nilfs_btree_node_nchildren_min(const struct nilfs_btree *btree,
190                                const struct nilfs_btree_node *node)
191 {
192         return nilfs_btree_node_root(btree, node) ?
193                 NILFS_BTREE_ROOT_NCHILDREN_MIN :
194                 NILFS_BTREE_NODE_NCHILDREN_MIN(nilfs_btree_node_size(btree));
195 }
196
197 static inline int
198 nilfs_btree_node_nchildren_max(const struct nilfs_btree *btree,
199                                const struct nilfs_btree_node *node)
200 {
201         return nilfs_btree_node_root(btree, node) ?
202                 NILFS_BTREE_ROOT_NCHILDREN_MAX :
203                 NILFS_BTREE_NODE_NCHILDREN_MAX(nilfs_btree_node_size(btree));
204 }
205
206 static inline __le64 *
207 nilfs_btree_node_dkeys(const struct nilfs_btree *btree,
208                        const struct nilfs_btree_node *node)
209 {
210         return (__le64 *)((char *)(node + 1) +
211                           (nilfs_btree_node_root(btree, node) ?
212                            0 : NILFS_BTREE_NODE_EXTRA_PAD_SIZE));
213 }
214
215 static inline __le64 *
216 nilfs_btree_node_dptrs(const struct nilfs_btree *btree,
217                        const struct nilfs_btree_node *node)
218 {
219         return (__le64 *)(nilfs_btree_node_dkeys(btree, node) +
220                           nilfs_btree_node_nchildren_max(btree, node));
221 }
222
223 static inline __u64
224 nilfs_btree_node_get_key(const struct nilfs_btree *btree,
225                          const struct nilfs_btree_node *node, int index)
226 {
227         return nilfs_bmap_dkey_to_key(*(nilfs_btree_node_dkeys(btree, node) +
228                                         index));
229 }
230
231 static inline void
232 nilfs_btree_node_set_key(struct nilfs_btree *btree,
233                          struct nilfs_btree_node *node, int index, __u64 key)
234 {
235         *(nilfs_btree_node_dkeys(btree, node) + index) =
236                 nilfs_bmap_key_to_dkey(key);
237 }
238
239 static inline __u64
240 nilfs_btree_node_get_ptr(const struct nilfs_btree *btree,
241                          const struct nilfs_btree_node *node,
242                          int index)
243 {
244         return nilfs_bmap_dptr_to_ptr(*(nilfs_btree_node_dptrs(btree, node) +
245                                         index));
246 }
247
248 static inline void
249 nilfs_btree_node_set_ptr(struct nilfs_btree *btree,
250                          struct nilfs_btree_node *node,
251                          int index,
252                          __u64 ptr)
253 {
254         *(nilfs_btree_node_dptrs(btree, node) + index) =
255                 nilfs_bmap_ptr_to_dptr(ptr);
256 }
257
258 static void nilfs_btree_node_init(struct nilfs_btree *btree,
259                                   struct nilfs_btree_node *node,
260                                   int flags, int level, int nchildren,
261                                   const __u64 *keys, const __u64 *ptrs)
262 {
263         __le64 *dkeys;
264         __le64 *dptrs;
265         int i;
266
267         nilfs_btree_node_set_flags(btree, node, flags);
268         nilfs_btree_node_set_level(btree, node, level);
269         nilfs_btree_node_set_nchildren(btree, node, nchildren);
270
271         dkeys = nilfs_btree_node_dkeys(btree, node);
272         dptrs = nilfs_btree_node_dptrs(btree, node);
273         for (i = 0; i < nchildren; i++) {
274                 dkeys[i] = nilfs_bmap_key_to_dkey(keys[i]);
275                 dptrs[i] = nilfs_bmap_ptr_to_dptr(ptrs[i]);
276         }
277 }
278
279 /* Assume the buffer heads corresponding to left and right are locked. */
280 static void nilfs_btree_node_move_left(struct nilfs_btree *btree,
281                                        struct nilfs_btree_node *left,
282                                        struct nilfs_btree_node *right,
283                                        int n)
284 {
285         __le64 *ldkeys, *rdkeys;
286         __le64 *ldptrs, *rdptrs;
287         int lnchildren, rnchildren;
288
289         ldkeys = nilfs_btree_node_dkeys(btree, left);
290         ldptrs = nilfs_btree_node_dptrs(btree, left);
291         lnchildren = nilfs_btree_node_get_nchildren(btree, left);
292
293         rdkeys = nilfs_btree_node_dkeys(btree, right);
294         rdptrs = nilfs_btree_node_dptrs(btree, right);
295         rnchildren = nilfs_btree_node_get_nchildren(btree, right);
296
297         memcpy(ldkeys + lnchildren, rdkeys, n * sizeof(*rdkeys));
298         memcpy(ldptrs + lnchildren, rdptrs, n * sizeof(*rdptrs));
299         memmove(rdkeys, rdkeys + n, (rnchildren - n) * sizeof(*rdkeys));
300         memmove(rdptrs, rdptrs + n, (rnchildren - n) * sizeof(*rdptrs));
301
302         lnchildren += n;
303         rnchildren -= n;
304         nilfs_btree_node_set_nchildren(btree, left, lnchildren);
305         nilfs_btree_node_set_nchildren(btree, right, rnchildren);
306 }
307
308 /* Assume that the buffer heads corresponding to left and right are locked. */
309 static void nilfs_btree_node_move_right(struct nilfs_btree *btree,
310                                         struct nilfs_btree_node *left,
311                                         struct nilfs_btree_node *right,
312                                         int n)
313 {
314         __le64 *ldkeys, *rdkeys;
315         __le64 *ldptrs, *rdptrs;
316         int lnchildren, rnchildren;
317
318         ldkeys = nilfs_btree_node_dkeys(btree, left);
319         ldptrs = nilfs_btree_node_dptrs(btree, left);
320         lnchildren = nilfs_btree_node_get_nchildren(btree, left);
321
322         rdkeys = nilfs_btree_node_dkeys(btree, right);
323         rdptrs = nilfs_btree_node_dptrs(btree, right);
324         rnchildren = nilfs_btree_node_get_nchildren(btree, right);
325
326         memmove(rdkeys + n, rdkeys, rnchildren * sizeof(*rdkeys));
327         memmove(rdptrs + n, rdptrs, rnchildren * sizeof(*rdptrs));
328         memcpy(rdkeys, ldkeys + lnchildren - n, n * sizeof(*rdkeys));
329         memcpy(rdptrs, ldptrs + lnchildren - n, n * sizeof(*rdptrs));
330
331         lnchildren -= n;
332         rnchildren += n;
333         nilfs_btree_node_set_nchildren(btree, left, lnchildren);
334         nilfs_btree_node_set_nchildren(btree, right, rnchildren);
335 }
336
337 /* Assume that the buffer head corresponding to node is locked. */
338 static void nilfs_btree_node_insert(struct nilfs_btree *btree,
339                                     struct nilfs_btree_node *node,
340                                     __u64 key, __u64 ptr, int index)
341 {
342         __le64 *dkeys;
343         __le64 *dptrs;
344         int nchildren;
345
346         dkeys = nilfs_btree_node_dkeys(btree, node);
347         dptrs = nilfs_btree_node_dptrs(btree, node);
348         nchildren = nilfs_btree_node_get_nchildren(btree, node);
349         if (index < nchildren) {
350                 memmove(dkeys + index + 1, dkeys + index,
351                         (nchildren - index) * sizeof(*dkeys));
352                 memmove(dptrs + index + 1, dptrs + index,
353                         (nchildren - index) * sizeof(*dptrs));
354         }
355         dkeys[index] = nilfs_bmap_key_to_dkey(key);
356         dptrs[index] = nilfs_bmap_ptr_to_dptr(ptr);
357         nchildren++;
358         nilfs_btree_node_set_nchildren(btree, node, nchildren);
359 }
360
361 /* Assume that the buffer head corresponding to node is locked. */
362 static void nilfs_btree_node_delete(struct nilfs_btree *btree,
363                                     struct nilfs_btree_node *node,
364                                     __u64 *keyp, __u64 *ptrp, int index)
365 {
366         __u64 key;
367         __u64 ptr;
368         __le64 *dkeys;
369         __le64 *dptrs;
370         int nchildren;
371
372         dkeys = nilfs_btree_node_dkeys(btree, node);
373         dptrs = nilfs_btree_node_dptrs(btree, node);
374         key = nilfs_bmap_dkey_to_key(dkeys[index]);
375         ptr = nilfs_bmap_dptr_to_ptr(dptrs[index]);
376         nchildren = nilfs_btree_node_get_nchildren(btree, node);
377         if (keyp != NULL)
378                 *keyp = key;
379         if (ptrp != NULL)
380                 *ptrp = ptr;
381
382         if (index < nchildren - 1) {
383                 memmove(dkeys + index, dkeys + index + 1,
384                         (nchildren - index - 1) * sizeof(*dkeys));
385                 memmove(dptrs + index, dptrs + index + 1,
386                         (nchildren - index - 1) * sizeof(*dptrs));
387         }
388         nchildren--;
389         nilfs_btree_node_set_nchildren(btree, node, nchildren);
390 }
391
392 static int nilfs_btree_node_lookup(const struct nilfs_btree *btree,
393                                    const struct nilfs_btree_node *node,
394                                    __u64 key, int *indexp)
395 {
396         __u64 nkey;
397         int index, low, high, s;
398
399         /* binary search */
400         low = 0;
401         high = nilfs_btree_node_get_nchildren(btree, node) - 1;
402         index = 0;
403         s = 0;
404         while (low <= high) {
405                 index = (low + high) / 2;
406                 nkey = nilfs_btree_node_get_key(btree, node, index);
407                 if (nkey == key) {
408                         s = 0;
409                         goto out;
410                 } else if (nkey < key) {
411                         low = index + 1;
412                         s = -1;
413                 } else {
414                         high = index - 1;
415                         s = 1;
416                 }
417         }
418
419         /* adjust index */
420         if (nilfs_btree_node_get_level(btree, node) >
421             NILFS_BTREE_LEVEL_NODE_MIN) {
422                 if ((s > 0) && (index > 0))
423                         index--;
424         } else if (s < 0)
425                 index++;
426
427  out:
428         BUG_ON(indexp == NULL);
429         *indexp = index;
430
431         return s == 0;
432 }
433
434 static inline struct nilfs_btree_node *
435 nilfs_btree_get_root(const struct nilfs_btree *btree)
436 {
437         return (struct nilfs_btree_node *)btree->bt_bmap.b_u.u_data;
438 }
439
440 static inline struct nilfs_btree_node *
441 nilfs_btree_get_nonroot_node(const struct nilfs_btree *btree,
442                              const struct nilfs_btree_path *path,
443                              int level)
444 {
445         return (struct nilfs_btree_node *)path[level].bp_bh->b_data;
446 }
447
448 static inline struct nilfs_btree_node *
449 nilfs_btree_get_sib_node(const struct nilfs_btree *btree,
450                          const struct nilfs_btree_path *path,
451                          int level)
452 {
453         return (struct nilfs_btree_node *)path[level].bp_sib_bh->b_data;
454 }
455
456 static inline int nilfs_btree_height(const struct nilfs_btree *btree)
457 {
458         return nilfs_btree_node_get_level(btree, nilfs_btree_get_root(btree))
459                 + 1;
460 }
461
462 static inline struct nilfs_btree_node *
463 nilfs_btree_get_node(const struct nilfs_btree *btree,
464                      const struct nilfs_btree_path *path,
465                      int level)
466 {
467         return (level == nilfs_btree_height(btree) - 1) ?
468                 nilfs_btree_get_root(btree) :
469                 nilfs_btree_get_nonroot_node(btree, path, level);
470 }
471
472 static int nilfs_btree_do_lookup(const struct nilfs_btree *btree,
473                                  struct nilfs_btree_path *path,
474                                  __u64 key, __u64 *ptrp, int minlevel)
475 {
476         struct nilfs_btree_node *node;
477         __u64 ptr;
478         int level, index, found, ret;
479
480         BUG_ON(minlevel <= NILFS_BTREE_LEVEL_DATA);
481
482         node = nilfs_btree_get_root(btree);
483         level = nilfs_btree_node_get_level(btree, node);
484         if ((level < minlevel) ||
485             (nilfs_btree_node_get_nchildren(btree, node) <= 0))
486                 return -ENOENT;
487
488         found = nilfs_btree_node_lookup(btree, node, key, &index);
489         ptr = nilfs_btree_node_get_ptr(btree, node, index);
490         path[level].bp_bh = NULL;
491         path[level].bp_index = index;
492
493         for (level--; level >= minlevel; level--) {
494                 ret = nilfs_bmap_get_block(&btree->bt_bmap, ptr,
495                                            &path[level].bp_bh);
496                 if (ret < 0)
497                         return ret;
498                 node = nilfs_btree_get_nonroot_node(btree, path, level);
499                 BUG_ON(level != nilfs_btree_node_get_level(btree, node));
500                 if (!found)
501                         found = nilfs_btree_node_lookup(btree, node, key,
502                                                         &index);
503                 else
504                         index = 0;
505                 if (index < nilfs_btree_node_nchildren_max(btree, node))
506                         ptr = nilfs_btree_node_get_ptr(btree, node, index);
507                 else {
508                         BUG_ON(found || level != NILFS_BTREE_LEVEL_NODE_MIN);
509                         /* insert */
510                         ptr = NILFS_BMAP_INVALID_PTR;
511                 }
512                 path[level].bp_index = index;
513         }
514         if (!found)
515                 return -ENOENT;
516
517         if (ptrp != NULL)
518                 *ptrp = ptr;
519
520         return 0;
521 }
522
523 static int nilfs_btree_do_lookup_last(const struct nilfs_btree *btree,
524                                       struct nilfs_btree_path *path,
525                                       __u64 *keyp, __u64 *ptrp)
526 {
527         struct nilfs_btree_node *node;
528         __u64 ptr;
529         int index, level, ret;
530
531         node = nilfs_btree_get_root(btree);
532         index = nilfs_btree_node_get_nchildren(btree, node) - 1;
533         if (index < 0)
534                 return -ENOENT;
535         level = nilfs_btree_node_get_level(btree, node);
536         ptr = nilfs_btree_node_get_ptr(btree, node, index);
537         path[level].bp_bh = NULL;
538         path[level].bp_index = index;
539
540         for (level--; level > 0; level--) {
541                 ret = nilfs_bmap_get_block(&btree->bt_bmap, ptr,
542                                            &path[level].bp_bh);
543                 if (ret < 0)
544                         return ret;
545                 node = nilfs_btree_get_nonroot_node(btree, path, level);
546                 BUG_ON(level != nilfs_btree_node_get_level(btree, node));
547                 index = nilfs_btree_node_get_nchildren(btree, node) - 1;
548                 ptr = nilfs_btree_node_get_ptr(btree, node, index);
549                 path[level].bp_index = index;
550         }
551
552         if (keyp != NULL)
553                 *keyp = nilfs_btree_node_get_key(btree, node, index);
554         if (ptrp != NULL)
555                 *ptrp = ptr;
556
557         return 0;
558 }
559
560 static int nilfs_btree_lookup(const struct nilfs_bmap *bmap,
561                               __u64 key, int level, __u64 *ptrp)
562 {
563         struct nilfs_btree *btree;
564         struct nilfs_btree_path *path;
565         __u64 ptr;
566         int ret;
567
568         btree = (struct nilfs_btree *)bmap;
569         path = nilfs_btree_alloc_path(btree);
570         if (path == NULL)
571                 return -ENOMEM;
572         nilfs_btree_init_path(btree, path);
573
574         ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level);
575
576         if (ptrp != NULL)
577                 *ptrp = ptr;
578
579         nilfs_btree_clear_path(btree, path);
580         nilfs_btree_free_path(btree, path);
581
582         return ret;
583 }
584
585 static void nilfs_btree_promote_key(struct nilfs_btree *btree,
586                                     struct nilfs_btree_path *path,
587                                     int level, __u64 key)
588 {
589         if (level < nilfs_btree_height(btree) - 1) {
590                 do {
591                         lock_buffer(path[level].bp_bh);
592                         nilfs_btree_node_set_key(
593                                 btree,
594                                 nilfs_btree_get_nonroot_node(
595                                         btree, path, level),
596                                 path[level].bp_index, key);
597                         if (!buffer_dirty(path[level].bp_bh))
598                                 nilfs_btnode_mark_dirty(path[level].bp_bh);
599                         unlock_buffer(path[level].bp_bh);
600                 } while ((path[level].bp_index == 0) &&
601                          (++level < nilfs_btree_height(btree) - 1));
602         }
603
604         /* root */
605         if (level == nilfs_btree_height(btree) - 1) {
606                 nilfs_btree_node_set_key(btree,
607                                          nilfs_btree_get_root(btree),
608                                          path[level].bp_index, key);
609         }
610 }
611
612 static void nilfs_btree_do_insert(struct nilfs_btree *btree,
613                                   struct nilfs_btree_path *path,
614                                   int level, __u64 *keyp, __u64 *ptrp)
615 {
616         struct nilfs_btree_node *node;
617
618         if (level < nilfs_btree_height(btree) - 1) {
619                 lock_buffer(path[level].bp_bh);
620                 node = nilfs_btree_get_nonroot_node(btree, path, level);
621                 nilfs_btree_node_insert(btree, node, *keyp, *ptrp,
622                                         path[level].bp_index);
623                 if (!buffer_dirty(path[level].bp_bh))
624                         nilfs_btnode_mark_dirty(path[level].bp_bh);
625                 unlock_buffer(path[level].bp_bh);
626
627                 if (path[level].bp_index == 0)
628                         nilfs_btree_promote_key(btree, path, level + 1,
629                                                 nilfs_btree_node_get_key(
630                                                         btree, node, 0));
631         } else {
632                 node = nilfs_btree_get_root(btree);
633                 nilfs_btree_node_insert(btree, node, *keyp, *ptrp,
634                                         path[level].bp_index);
635         }
636 }
637
638 static void nilfs_btree_carry_left(struct nilfs_btree *btree,
639                                    struct nilfs_btree_path *path,
640                                    int level, __u64 *keyp, __u64 *ptrp)
641 {
642         struct nilfs_btree_node *node, *left;
643         int nchildren, lnchildren, n, move;
644
645         lock_buffer(path[level].bp_bh);
646         lock_buffer(path[level].bp_sib_bh);
647
648         node = nilfs_btree_get_nonroot_node(btree, path, level);
649         left = nilfs_btree_get_sib_node(btree, path, level);
650         nchildren = nilfs_btree_node_get_nchildren(btree, node);
651         lnchildren = nilfs_btree_node_get_nchildren(btree, left);
652         move = 0;
653
654         n = (nchildren + lnchildren + 1) / 2 - lnchildren;
655         if (n > path[level].bp_index) {
656                 /* move insert point */
657                 n--;
658                 move = 1;
659         }
660
661         nilfs_btree_node_move_left(btree, left, node, n);
662
663         if (!buffer_dirty(path[level].bp_bh))
664                 nilfs_btnode_mark_dirty(path[level].bp_bh);
665         if (!buffer_dirty(path[level].bp_sib_bh))
666                 nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
667
668         unlock_buffer(path[level].bp_bh);
669         unlock_buffer(path[level].bp_sib_bh);
670
671         nilfs_btree_promote_key(btree, path, level + 1,
672                                 nilfs_btree_node_get_key(btree, node, 0));
673
674         if (move) {
675                 nilfs_bmap_put_block(&btree->bt_bmap, path[level].bp_bh);
676                 path[level].bp_bh = path[level].bp_sib_bh;
677                 path[level].bp_sib_bh = NULL;
678                 path[level].bp_index += lnchildren;
679                 path[level + 1].bp_index--;
680         } else {
681                 nilfs_bmap_put_block(&btree->bt_bmap, path[level].bp_sib_bh);
682                 path[level].bp_sib_bh = NULL;
683                 path[level].bp_index -= n;
684         }
685
686         nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
687 }
688
689 static void nilfs_btree_carry_right(struct nilfs_btree *btree,
690                                     struct nilfs_btree_path *path,
691                                     int level, __u64 *keyp, __u64 *ptrp)
692 {
693         struct nilfs_btree_node *node, *right;
694         int nchildren, rnchildren, n, move;
695
696         lock_buffer(path[level].bp_bh);
697         lock_buffer(path[level].bp_sib_bh);
698
699         node = nilfs_btree_get_nonroot_node(btree, path, level);
700         right = nilfs_btree_get_sib_node(btree, path, level);
701         nchildren = nilfs_btree_node_get_nchildren(btree, node);
702         rnchildren = nilfs_btree_node_get_nchildren(btree, right);
703         move = 0;
704
705         n = (nchildren + rnchildren + 1) / 2 - rnchildren;
706         if (n > nchildren - path[level].bp_index) {
707                 /* move insert point */
708                 n--;
709                 move = 1;
710         }
711
712         nilfs_btree_node_move_right(btree, node, right, n);
713
714         if (!buffer_dirty(path[level].bp_bh))
715                 nilfs_btnode_mark_dirty(path[level].bp_bh);
716         if (!buffer_dirty(path[level].bp_sib_bh))
717                 nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
718
719         unlock_buffer(path[level].bp_bh);
720         unlock_buffer(path[level].bp_sib_bh);
721
722         path[level + 1].bp_index++;
723         nilfs_btree_promote_key(btree, path, level + 1,
724                                 nilfs_btree_node_get_key(btree, right, 0));
725         path[level + 1].bp_index--;
726
727         if (move) {
728                 nilfs_bmap_put_block(&btree->bt_bmap, path[level].bp_bh);
729                 path[level].bp_bh = path[level].bp_sib_bh;
730                 path[level].bp_sib_bh = NULL;
731                 path[level].bp_index -=
732                         nilfs_btree_node_get_nchildren(btree, node);
733                 path[level + 1].bp_index++;
734         } else {
735                 nilfs_bmap_put_block(&btree->bt_bmap, path[level].bp_sib_bh);
736                 path[level].bp_sib_bh = NULL;
737         }
738
739         nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
740 }
741
742 static void nilfs_btree_split(struct nilfs_btree *btree,
743                               struct nilfs_btree_path *path,
744                               int level, __u64 *keyp, __u64 *ptrp)
745 {
746         struct nilfs_btree_node *node, *right;
747         __u64 newkey;
748         __u64 newptr;
749         int nchildren, n, move;
750
751         lock_buffer(path[level].bp_bh);
752         lock_buffer(path[level].bp_sib_bh);
753
754         node = nilfs_btree_get_nonroot_node(btree, path, level);
755         right = nilfs_btree_get_sib_node(btree, path, level);
756         nchildren = nilfs_btree_node_get_nchildren(btree, node);
757         move = 0;
758
759         n = (nchildren + 1) / 2;
760         if (n > nchildren - path[level].bp_index) {
761                 n--;
762                 move = 1;
763         }
764
765         nilfs_btree_node_move_right(btree, node, right, n);
766
767         if (!buffer_dirty(path[level].bp_bh))
768                 nilfs_btnode_mark_dirty(path[level].bp_bh);
769         if (!buffer_dirty(path[level].bp_sib_bh))
770                 nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
771
772         unlock_buffer(path[level].bp_bh);
773         unlock_buffer(path[level].bp_sib_bh);
774
775         newkey = nilfs_btree_node_get_key(btree, right, 0);
776         newptr = path[level].bp_newreq.bpr_ptr;
777
778         if (move) {
779                 path[level].bp_index -=
780                         nilfs_btree_node_get_nchildren(btree, node);
781                 nilfs_btree_node_insert(btree, right, *keyp, *ptrp,
782                                         path[level].bp_index);
783
784                 *keyp = nilfs_btree_node_get_key(btree, right, 0);
785                 *ptrp = path[level].bp_newreq.bpr_ptr;
786
787                 nilfs_bmap_put_block(&btree->bt_bmap, path[level].bp_bh);
788                 path[level].bp_bh = path[level].bp_sib_bh;
789                 path[level].bp_sib_bh = NULL;
790         } else {
791                 nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
792
793                 *keyp = nilfs_btree_node_get_key(btree, right, 0);
794                 *ptrp = path[level].bp_newreq.bpr_ptr;
795
796                 nilfs_bmap_put_block(&btree->bt_bmap, path[level].bp_sib_bh);
797                 path[level].bp_sib_bh = NULL;
798         }
799
800         path[level + 1].bp_index++;
801 }
802
803 static void nilfs_btree_grow(struct nilfs_btree *btree,
804                              struct nilfs_btree_path *path,
805                              int level, __u64 *keyp, __u64 *ptrp)
806 {
807         struct nilfs_btree_node *root, *child;
808         int n;
809
810         lock_buffer(path[level].bp_sib_bh);
811
812         root = nilfs_btree_get_root(btree);
813         child = nilfs_btree_get_sib_node(btree, path, level);
814
815         n = nilfs_btree_node_get_nchildren(btree, root);
816
817         nilfs_btree_node_move_right(btree, root, child, n);
818         nilfs_btree_node_set_level(btree, root, level + 1);
819
820         if (!buffer_dirty(path[level].bp_sib_bh))
821                 nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
822
823         unlock_buffer(path[level].bp_sib_bh);
824
825         path[level].bp_bh = path[level].bp_sib_bh;
826         path[level].bp_sib_bh = NULL;
827
828         nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
829
830         *keyp = nilfs_btree_node_get_key(btree, child, 0);
831         *ptrp = path[level].bp_newreq.bpr_ptr;
832 }
833
834 static __u64 nilfs_btree_find_near(const struct nilfs_btree *btree,
835                                    const struct nilfs_btree_path *path)
836 {
837         struct nilfs_btree_node *node;
838         int level;
839
840         if (path == NULL)
841                 return NILFS_BMAP_INVALID_PTR;
842
843         /* left sibling */
844         level = NILFS_BTREE_LEVEL_NODE_MIN;
845         if (path[level].bp_index > 0) {
846                 node = nilfs_btree_get_node(btree, path, level);
847                 return nilfs_btree_node_get_ptr(btree, node,
848                                                 path[level].bp_index - 1);
849         }
850
851         /* parent */
852         level = NILFS_BTREE_LEVEL_NODE_MIN + 1;
853         if (level <= nilfs_btree_height(btree) - 1) {
854                 node = nilfs_btree_get_node(btree, path, level);
855                 return nilfs_btree_node_get_ptr(btree, node,
856                                                 path[level].bp_index);
857         }
858
859         return NILFS_BMAP_INVALID_PTR;
860 }
861
862 static __u64 nilfs_btree_find_target_v(const struct nilfs_btree *btree,
863                                        const struct nilfs_btree_path *path,
864                                        __u64 key)
865 {
866         __u64 ptr;
867
868         ptr = nilfs_bmap_find_target_seq(&btree->bt_bmap, key);
869         if (ptr != NILFS_BMAP_INVALID_PTR)
870                 /* sequential access */
871                 return ptr;
872         else {
873                 ptr = nilfs_btree_find_near(btree, path);
874                 if (ptr != NILFS_BMAP_INVALID_PTR)
875                         /* near */
876                         return ptr;
877         }
878         /* block group */
879         return nilfs_bmap_find_target_in_group(&btree->bt_bmap);
880 }
881
882 static void nilfs_btree_set_target_v(struct nilfs_btree *btree, __u64 key,
883                                      __u64 ptr)
884 {
885         btree->bt_bmap.b_last_allocated_key = key;
886         btree->bt_bmap.b_last_allocated_ptr = ptr;
887 }
888
889 static int nilfs_btree_prepare_insert(struct nilfs_btree *btree,
890                                       struct nilfs_btree_path *path,
891                                       int *levelp, __u64 key, __u64 ptr,
892                                       struct nilfs_bmap_stats *stats)
893 {
894         struct buffer_head *bh;
895         struct nilfs_btree_node *node, *parent, *sib;
896         __u64 sibptr;
897         int pindex, level, ret;
898
899         stats->bs_nblocks = 0;
900         level = NILFS_BTREE_LEVEL_DATA;
901
902         /* allocate a new ptr for data block */
903         if (btree->bt_ops->btop_find_target != NULL)
904                 path[level].bp_newreq.bpr_ptr =
905                         btree->bt_ops->btop_find_target(btree, path, key);
906
907         ret = btree->bt_bmap.b_pops->bpop_prepare_alloc_ptr(
908                 &btree->bt_bmap, &path[level].bp_newreq);
909         if (ret < 0)
910                 goto err_out_data;
911
912         for (level = NILFS_BTREE_LEVEL_NODE_MIN;
913              level < nilfs_btree_height(btree) - 1;
914              level++) {
915                 node = nilfs_btree_get_nonroot_node(btree, path, level);
916                 if (nilfs_btree_node_get_nchildren(btree, node) <
917                     nilfs_btree_node_nchildren_max(btree, node)) {
918                         path[level].bp_op = nilfs_btree_do_insert;
919                         stats->bs_nblocks++;
920                         goto out;
921                 }
922
923                 parent = nilfs_btree_get_node(btree, path, level + 1);
924                 pindex = path[level + 1].bp_index;
925
926                 /* left sibling */
927                 if (pindex > 0) {
928                         sibptr = nilfs_btree_node_get_ptr(btree, parent,
929                                                           pindex - 1);
930                         ret = nilfs_bmap_get_block(&btree->bt_bmap, sibptr,
931                                                    &bh);
932                         if (ret < 0)
933                                 goto err_out_child_node;
934                         sib = (struct nilfs_btree_node *)bh->b_data;
935                         if (nilfs_btree_node_get_nchildren(btree, sib) <
936                             nilfs_btree_node_nchildren_max(btree, sib)) {
937                                 path[level].bp_sib_bh = bh;
938                                 path[level].bp_op = nilfs_btree_carry_left;
939                                 stats->bs_nblocks++;
940                                 goto out;
941                         } else
942                                 nilfs_bmap_put_block(&btree->bt_bmap, bh);
943                 }
944
945                 /* right sibling */
946                 if (pindex <
947                     nilfs_btree_node_get_nchildren(btree, parent) - 1) {
948                         sibptr = nilfs_btree_node_get_ptr(btree, parent,
949                                                           pindex + 1);
950                         ret = nilfs_bmap_get_block(&btree->bt_bmap, sibptr,
951                                                    &bh);
952                         if (ret < 0)
953                                 goto err_out_child_node;
954                         sib = (struct nilfs_btree_node *)bh->b_data;
955                         if (nilfs_btree_node_get_nchildren(btree, sib) <
956                             nilfs_btree_node_nchildren_max(btree, sib)) {
957                                 path[level].bp_sib_bh = bh;
958                                 path[level].bp_op = nilfs_btree_carry_right;
959                                 stats->bs_nblocks++;
960                                 goto out;
961                         } else
962                                 nilfs_bmap_put_block(&btree->bt_bmap, bh);
963                 }
964
965                 /* split */
966                 path[level].bp_newreq.bpr_ptr =
967                         path[level - 1].bp_newreq.bpr_ptr + 1;
968                 ret = btree->bt_bmap.b_pops->bpop_prepare_alloc_ptr(
969                         &btree->bt_bmap, &path[level].bp_newreq);
970                 if (ret < 0)
971                         goto err_out_child_node;
972                 ret = nilfs_bmap_get_new_block(&btree->bt_bmap,
973                                                path[level].bp_newreq.bpr_ptr,
974                                                &bh);
975                 if (ret < 0)
976                         goto err_out_curr_node;
977
978                 stats->bs_nblocks++;
979
980                 lock_buffer(bh);
981                 nilfs_btree_node_init(btree,
982                                       (struct nilfs_btree_node *)bh->b_data,
983                                       0, level, 0, NULL, NULL);
984                 unlock_buffer(bh);
985                 path[level].bp_sib_bh = bh;
986                 path[level].bp_op = nilfs_btree_split;
987         }
988
989         /* root */
990         node = nilfs_btree_get_root(btree);
991         if (nilfs_btree_node_get_nchildren(btree, node) <
992             nilfs_btree_node_nchildren_max(btree, node)) {
993                 path[level].bp_op = nilfs_btree_do_insert;
994                 stats->bs_nblocks++;
995                 goto out;
996         }
997
998         /* grow */
999         path[level].bp_newreq.bpr_ptr = path[level - 1].bp_newreq.bpr_ptr + 1;
1000         ret = btree->bt_bmap.b_pops->bpop_prepare_alloc_ptr(
1001                 &btree->bt_bmap, &path[level].bp_newreq);
1002         if (ret < 0)
1003                 goto err_out_child_node;
1004         ret = nilfs_bmap_get_new_block(&btree->bt_bmap,
1005                                        path[level].bp_newreq.bpr_ptr, &bh);
1006         if (ret < 0)
1007                 goto err_out_curr_node;
1008
1009         lock_buffer(bh);
1010         nilfs_btree_node_init(btree, (struct nilfs_btree_node *)bh->b_data,
1011                               0, level, 0, NULL, NULL);
1012         unlock_buffer(bh);
1013         path[level].bp_sib_bh = bh;
1014         path[level].bp_op = nilfs_btree_grow;
1015
1016         level++;
1017         path[level].bp_op = nilfs_btree_do_insert;
1018
1019         /* a newly-created node block and a data block are added */
1020         stats->bs_nblocks += 2;
1021
1022         /* success */
1023  out:
1024         *levelp = level;
1025         return ret;
1026
1027         /* error */
1028  err_out_curr_node:
1029         btree->bt_bmap.b_pops->bpop_abort_alloc_ptr(&btree->bt_bmap,
1030                                                     &path[level].bp_newreq);
1031  err_out_child_node:
1032         for (level--; level > NILFS_BTREE_LEVEL_DATA; level--) {
1033                 nilfs_bmap_delete_block(&btree->bt_bmap, path[level].bp_sib_bh);
1034                 btree->bt_bmap.b_pops->bpop_abort_alloc_ptr(
1035                         &btree->bt_bmap, &path[level].bp_newreq);
1036
1037         }
1038
1039         btree->bt_bmap.b_pops->bpop_abort_alloc_ptr(&btree->bt_bmap,
1040                                                        &path[level].bp_newreq);
1041  err_out_data:
1042         *levelp = level;
1043         stats->bs_nblocks = 0;
1044         return ret;
1045 }
1046
1047 static void nilfs_btree_commit_insert(struct nilfs_btree *btree,
1048                                       struct nilfs_btree_path *path,
1049                                       int maxlevel, __u64 key, __u64 ptr)
1050 {
1051         int level;
1052
1053         set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr));
1054         ptr = path[NILFS_BTREE_LEVEL_DATA].bp_newreq.bpr_ptr;
1055         if (btree->bt_ops->btop_set_target != NULL)
1056                 btree->bt_ops->btop_set_target(btree, key, ptr);
1057
1058         for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) {
1059                 if (btree->bt_bmap.b_pops->bpop_commit_alloc_ptr != NULL) {
1060                         btree->bt_bmap.b_pops->bpop_commit_alloc_ptr(
1061                                 &btree->bt_bmap, &path[level - 1].bp_newreq);
1062                 }
1063                 path[level].bp_op(btree, path, level, &key, &ptr);
1064         }
1065
1066         if (!nilfs_bmap_dirty(&btree->bt_bmap))
1067                 nilfs_bmap_set_dirty(&btree->bt_bmap);
1068 }
1069
1070 static int nilfs_btree_insert(struct nilfs_bmap *bmap, __u64 key, __u64 ptr)
1071 {
1072         struct nilfs_btree *btree;
1073         struct nilfs_btree_path *path;
1074         struct nilfs_bmap_stats stats;
1075         int level, ret;
1076
1077         btree = (struct nilfs_btree *)bmap;
1078         path = nilfs_btree_alloc_path(btree);
1079         if (path == NULL)
1080                 return -ENOMEM;
1081         nilfs_btree_init_path(btree, path);
1082
1083         ret = nilfs_btree_do_lookup(btree, path, key, NULL,
1084                                     NILFS_BTREE_LEVEL_NODE_MIN);
1085         if (ret != -ENOENT) {
1086                 if (ret == 0)
1087                         ret = -EEXIST;
1088                 goto out;
1089         }
1090
1091         ret = nilfs_btree_prepare_insert(btree, path, &level, key, ptr, &stats);
1092         if (ret < 0)
1093                 goto out;
1094         nilfs_btree_commit_insert(btree, path, level, key, ptr);
1095         nilfs_bmap_add_blocks(bmap, stats.bs_nblocks);
1096
1097  out:
1098         nilfs_btree_clear_path(btree, path);
1099         nilfs_btree_free_path(btree, path);
1100         return ret;
1101 }
1102
1103 static void nilfs_btree_do_delete(struct nilfs_btree *btree,
1104                                   struct nilfs_btree_path *path,
1105                                   int level, __u64 *keyp, __u64 *ptrp)
1106 {
1107         struct nilfs_btree_node *node;
1108
1109         if (level < nilfs_btree_height(btree) - 1) {
1110                 lock_buffer(path[level].bp_bh);
1111                 node = nilfs_btree_get_nonroot_node(btree, path, level);
1112                 nilfs_btree_node_delete(btree, node, keyp, ptrp,
1113                                         path[level].bp_index);
1114                 if (!buffer_dirty(path[level].bp_bh))
1115                         nilfs_btnode_mark_dirty(path[level].bp_bh);
1116                 unlock_buffer(path[level].bp_bh);
1117                 if (path[level].bp_index == 0)
1118                         nilfs_btree_promote_key(btree, path, level + 1,
1119                                 nilfs_btree_node_get_key(btree, node, 0));
1120         } else {
1121                 node = nilfs_btree_get_root(btree);
1122                 nilfs_btree_node_delete(btree, node, keyp, ptrp,
1123                                         path[level].bp_index);
1124         }
1125 }
1126
1127 static void nilfs_btree_borrow_left(struct nilfs_btree *btree,
1128                                     struct nilfs_btree_path *path,
1129                                     int level, __u64 *keyp, __u64 *ptrp)
1130 {
1131         struct nilfs_btree_node *node, *left;
1132         int nchildren, lnchildren, n;
1133
1134         nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1135
1136         lock_buffer(path[level].bp_bh);
1137         lock_buffer(path[level].bp_sib_bh);
1138
1139         node = nilfs_btree_get_nonroot_node(btree, path, level);
1140         left = nilfs_btree_get_sib_node(btree, path, level);
1141         nchildren = nilfs_btree_node_get_nchildren(btree, node);
1142         lnchildren = nilfs_btree_node_get_nchildren(btree, left);
1143
1144         n = (nchildren + lnchildren) / 2 - nchildren;
1145
1146         nilfs_btree_node_move_right(btree, left, node, n);
1147
1148         if (!buffer_dirty(path[level].bp_bh))
1149                 nilfs_btnode_mark_dirty(path[level].bp_bh);
1150         if (!buffer_dirty(path[level].bp_sib_bh))
1151                 nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
1152
1153         unlock_buffer(path[level].bp_bh);
1154         unlock_buffer(path[level].bp_sib_bh);
1155
1156         nilfs_btree_promote_key(btree, path, level + 1,
1157                                 nilfs_btree_node_get_key(btree, node, 0));
1158
1159         nilfs_bmap_put_block(&btree->bt_bmap, path[level].bp_sib_bh);
1160         path[level].bp_sib_bh = NULL;
1161         path[level].bp_index += n;
1162 }
1163
1164 static void nilfs_btree_borrow_right(struct nilfs_btree *btree,
1165                                      struct nilfs_btree_path *path,
1166                                      int level, __u64 *keyp, __u64 *ptrp)
1167 {
1168         struct nilfs_btree_node *node, *right;
1169         int nchildren, rnchildren, n;
1170
1171         nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1172
1173         lock_buffer(path[level].bp_bh);
1174         lock_buffer(path[level].bp_sib_bh);
1175
1176         node = nilfs_btree_get_nonroot_node(btree, path, level);
1177         right = nilfs_btree_get_sib_node(btree, path, level);
1178         nchildren = nilfs_btree_node_get_nchildren(btree, node);
1179         rnchildren = nilfs_btree_node_get_nchildren(btree, right);
1180
1181         n = (nchildren + rnchildren) / 2 - nchildren;
1182
1183         nilfs_btree_node_move_left(btree, node, right, n);
1184
1185         if (!buffer_dirty(path[level].bp_bh))
1186                 nilfs_btnode_mark_dirty(path[level].bp_bh);
1187         if (!buffer_dirty(path[level].bp_sib_bh))
1188                 nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
1189
1190         unlock_buffer(path[level].bp_bh);
1191         unlock_buffer(path[level].bp_sib_bh);
1192
1193         path[level + 1].bp_index++;
1194         nilfs_btree_promote_key(btree, path, level + 1,
1195                                 nilfs_btree_node_get_key(btree, right, 0));
1196         path[level + 1].bp_index--;
1197
1198         nilfs_bmap_put_block(&btree->bt_bmap, path[level].bp_sib_bh);
1199         path[level].bp_sib_bh = NULL;
1200 }
1201
1202 static void nilfs_btree_concat_left(struct nilfs_btree *btree,
1203                                     struct nilfs_btree_path *path,
1204                                     int level, __u64 *keyp, __u64 *ptrp)
1205 {
1206         struct nilfs_btree_node *node, *left;
1207         int n;
1208
1209         nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1210
1211         lock_buffer(path[level].bp_bh);
1212         lock_buffer(path[level].bp_sib_bh);
1213
1214         node = nilfs_btree_get_nonroot_node(btree, path, level);
1215         left = nilfs_btree_get_sib_node(btree, path, level);
1216
1217         n = nilfs_btree_node_get_nchildren(btree, node);
1218
1219         nilfs_btree_node_move_left(btree, left, node, n);
1220
1221         if (!buffer_dirty(path[level].bp_sib_bh))
1222                 nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
1223
1224         unlock_buffer(path[level].bp_bh);
1225         unlock_buffer(path[level].bp_sib_bh);
1226
1227         nilfs_bmap_delete_block(&btree->bt_bmap, path[level].bp_bh);
1228         path[level].bp_bh = path[level].bp_sib_bh;
1229         path[level].bp_sib_bh = NULL;
1230         path[level].bp_index += nilfs_btree_node_get_nchildren(btree, left);
1231 }
1232
1233 static void nilfs_btree_concat_right(struct nilfs_btree *btree,
1234                                      struct nilfs_btree_path *path,
1235                                      int level, __u64 *keyp, __u64 *ptrp)
1236 {
1237         struct nilfs_btree_node *node, *right;
1238         int n;
1239
1240         nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1241
1242         lock_buffer(path[level].bp_bh);
1243         lock_buffer(path[level].bp_sib_bh);
1244
1245         node = nilfs_btree_get_nonroot_node(btree, path, level);
1246         right = nilfs_btree_get_sib_node(btree, path, level);
1247
1248         n = nilfs_btree_node_get_nchildren(btree, right);
1249
1250         nilfs_btree_node_move_left(btree, node, right, n);
1251
1252         if (!buffer_dirty(path[level].bp_bh))
1253                 nilfs_btnode_mark_dirty(path[level].bp_bh);
1254
1255         unlock_buffer(path[level].bp_bh);
1256         unlock_buffer(path[level].bp_sib_bh);
1257
1258         nilfs_bmap_delete_block(&btree->bt_bmap, path[level].bp_sib_bh);
1259         path[level].bp_sib_bh = NULL;
1260         path[level + 1].bp_index++;
1261 }
1262
1263 static void nilfs_btree_shrink(struct nilfs_btree *btree,
1264                                struct nilfs_btree_path *path,
1265                                int level, __u64 *keyp, __u64 *ptrp)
1266 {
1267         struct nilfs_btree_node *root, *child;
1268         int n;
1269
1270         nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1271
1272         lock_buffer(path[level].bp_bh);
1273         root = nilfs_btree_get_root(btree);
1274         child = nilfs_btree_get_nonroot_node(btree, path, level);
1275
1276         nilfs_btree_node_delete(btree, root, NULL, NULL, 0);
1277         nilfs_btree_node_set_level(btree, root, level);
1278         n = nilfs_btree_node_get_nchildren(btree, child);
1279         nilfs_btree_node_move_left(btree, root, child, n);
1280         unlock_buffer(path[level].bp_bh);
1281
1282         nilfs_bmap_delete_block(&btree->bt_bmap, path[level].bp_bh);
1283         path[level].bp_bh = NULL;
1284 }
1285
1286
1287 static int nilfs_btree_prepare_delete(struct nilfs_btree *btree,
1288                                       struct nilfs_btree_path *path,
1289                                       int *levelp,
1290                                       struct nilfs_bmap_stats *stats)
1291 {
1292         struct buffer_head *bh;
1293         struct nilfs_btree_node *node, *parent, *sib;
1294         __u64 sibptr;
1295         int pindex, level, ret;
1296
1297         ret = 0;
1298         stats->bs_nblocks = 0;
1299         for (level = NILFS_BTREE_LEVEL_NODE_MIN;
1300              level < nilfs_btree_height(btree) - 1;
1301              level++) {
1302                 node = nilfs_btree_get_nonroot_node(btree, path, level);
1303                 path[level].bp_oldreq.bpr_ptr =
1304                         nilfs_btree_node_get_ptr(btree, node,
1305                                                  path[level].bp_index);
1306                 if (btree->bt_bmap.b_pops->bpop_prepare_end_ptr != NULL) {
1307                         ret = btree->bt_bmap.b_pops->bpop_prepare_end_ptr(
1308                                 &btree->bt_bmap, &path[level].bp_oldreq);
1309                         if (ret < 0)
1310                                 goto err_out_child_node;
1311                 }
1312
1313                 if (nilfs_btree_node_get_nchildren(btree, node) >
1314                     nilfs_btree_node_nchildren_min(btree, node)) {
1315                         path[level].bp_op = nilfs_btree_do_delete;
1316                         stats->bs_nblocks++;
1317                         goto out;
1318                 }
1319
1320                 parent = nilfs_btree_get_node(btree, path, level + 1);
1321                 pindex = path[level + 1].bp_index;
1322
1323                 if (pindex > 0) {
1324                         /* left sibling */
1325                         sibptr = nilfs_btree_node_get_ptr(btree, parent,
1326                                                           pindex - 1);
1327                         ret = nilfs_bmap_get_block(&btree->bt_bmap, sibptr,
1328                                                    &bh);
1329                         if (ret < 0)
1330                                 goto err_out_curr_node;
1331                         sib = (struct nilfs_btree_node *)bh->b_data;
1332                         if (nilfs_btree_node_get_nchildren(btree, sib) >
1333                             nilfs_btree_node_nchildren_min(btree, sib)) {
1334                                 path[level].bp_sib_bh = bh;
1335                                 path[level].bp_op = nilfs_btree_borrow_left;
1336                                 stats->bs_nblocks++;
1337                                 goto out;
1338                         } else {
1339                                 path[level].bp_sib_bh = bh;
1340                                 path[level].bp_op = nilfs_btree_concat_left;
1341                                 stats->bs_nblocks++;
1342                                 /* continue; */
1343                         }
1344                 } else if (pindex <
1345                            nilfs_btree_node_get_nchildren(btree, parent) - 1) {
1346                         /* right sibling */
1347                         sibptr = nilfs_btree_node_get_ptr(btree, parent,
1348                                                           pindex + 1);
1349                         ret = nilfs_bmap_get_block(&btree->bt_bmap, sibptr,
1350                                                    &bh);
1351                         if (ret < 0)
1352                                 goto err_out_curr_node;
1353                         sib = (struct nilfs_btree_node *)bh->b_data;
1354                         if (nilfs_btree_node_get_nchildren(btree, sib) >
1355                             nilfs_btree_node_nchildren_min(btree, sib)) {
1356                                 path[level].bp_sib_bh = bh;
1357                                 path[level].bp_op = nilfs_btree_borrow_right;
1358                                 stats->bs_nblocks++;
1359                                 goto out;
1360                         } else {
1361                                 path[level].bp_sib_bh = bh;
1362                                 path[level].bp_op = nilfs_btree_concat_right;
1363                                 stats->bs_nblocks++;
1364                                 /* continue; */
1365                         }
1366                 } else {
1367                         /* no siblings */
1368                         /* the only child of the root node */
1369                         BUG_ON(level != nilfs_btree_height(btree) - 2);
1370                         if (nilfs_btree_node_get_nchildren(btree, node) - 1 <=
1371                             NILFS_BTREE_ROOT_NCHILDREN_MAX) {
1372                                 path[level].bp_op = nilfs_btree_shrink;
1373                                 stats->bs_nblocks += 2;
1374                         } else {
1375                                 path[level].bp_op = nilfs_btree_do_delete;
1376                                 stats->bs_nblocks++;
1377                         }
1378
1379                         goto out;
1380
1381                 }
1382         }
1383
1384         node = nilfs_btree_get_root(btree);
1385         path[level].bp_oldreq.bpr_ptr =
1386                 nilfs_btree_node_get_ptr(btree, node, path[level].bp_index);
1387         if (btree->bt_bmap.b_pops->bpop_prepare_end_ptr != NULL) {
1388                 ret = btree->bt_bmap.b_pops->bpop_prepare_end_ptr(
1389                         &btree->bt_bmap, &path[level].bp_oldreq);
1390                 if (ret < 0)
1391                         goto err_out_child_node;
1392         }
1393         /* child of the root node is deleted */
1394         path[level].bp_op = nilfs_btree_do_delete;
1395         stats->bs_nblocks++;
1396
1397         /* success */
1398  out:
1399         *levelp = level;
1400         return ret;
1401
1402         /* error */
1403  err_out_curr_node:
1404         if (btree->bt_bmap.b_pops->bpop_abort_end_ptr != NULL)
1405                 btree->bt_bmap.b_pops->bpop_abort_end_ptr(
1406                         &btree->bt_bmap, &path[level].bp_oldreq);
1407  err_out_child_node:
1408         for (level--; level >= NILFS_BTREE_LEVEL_NODE_MIN; level--) {
1409                 nilfs_bmap_put_block(&btree->bt_bmap, path[level].bp_sib_bh);
1410                 if (btree->bt_bmap.b_pops->bpop_abort_end_ptr != NULL)
1411                         btree->bt_bmap.b_pops->bpop_abort_end_ptr(
1412                                 &btree->bt_bmap, &path[level].bp_oldreq);
1413         }
1414         *levelp = level;
1415         stats->bs_nblocks = 0;
1416         return ret;
1417 }
1418
1419 static void nilfs_btree_commit_delete(struct nilfs_btree *btree,
1420                                       struct nilfs_btree_path *path,
1421                                       int maxlevel)
1422 {
1423         int level;
1424
1425         for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) {
1426                 if (btree->bt_bmap.b_pops->bpop_commit_end_ptr != NULL)
1427                         btree->bt_bmap.b_pops->bpop_commit_end_ptr(
1428                                 &btree->bt_bmap, &path[level].bp_oldreq);
1429                 path[level].bp_op(btree, path, level, NULL, NULL);
1430         }
1431
1432         if (!nilfs_bmap_dirty(&btree->bt_bmap))
1433                 nilfs_bmap_set_dirty(&btree->bt_bmap);
1434 }
1435
1436 static int nilfs_btree_delete(struct nilfs_bmap *bmap, __u64 key)
1437
1438 {
1439         struct nilfs_btree *btree;
1440         struct nilfs_btree_path *path;
1441         struct nilfs_bmap_stats stats;
1442         int level, ret;
1443
1444         btree = (struct nilfs_btree *)bmap;
1445         path = nilfs_btree_alloc_path(btree);
1446         if (path == NULL)
1447                 return -ENOMEM;
1448         nilfs_btree_init_path(btree, path);
1449         ret = nilfs_btree_do_lookup(btree, path, key, NULL,
1450                                     NILFS_BTREE_LEVEL_NODE_MIN);
1451         if (ret < 0)
1452                 goto out;
1453
1454         ret = nilfs_btree_prepare_delete(btree, path, &level, &stats);
1455         if (ret < 0)
1456                 goto out;
1457         nilfs_btree_commit_delete(btree, path, level);
1458         nilfs_bmap_sub_blocks(bmap, stats.bs_nblocks);
1459
1460 out:
1461         nilfs_btree_clear_path(btree, path);
1462         nilfs_btree_free_path(btree, path);
1463         return ret;
1464 }
1465
1466 static int nilfs_btree_last_key(const struct nilfs_bmap *bmap, __u64 *keyp)
1467 {
1468         struct nilfs_btree *btree;
1469         struct nilfs_btree_path *path;
1470         int ret;
1471
1472         btree = (struct nilfs_btree *)bmap;
1473         path = nilfs_btree_alloc_path(btree);
1474         if (path == NULL)
1475                 return -ENOMEM;
1476         nilfs_btree_init_path(btree, path);
1477
1478         ret = nilfs_btree_do_lookup_last(btree, path, keyp, NULL);
1479
1480         nilfs_btree_clear_path(btree, path);
1481         nilfs_btree_free_path(btree, path);
1482
1483         return ret;
1484 }
1485
1486 static int nilfs_btree_check_delete(struct nilfs_bmap *bmap, __u64 key)
1487 {
1488         struct buffer_head *bh;
1489         struct nilfs_btree *btree;
1490         struct nilfs_btree_node *root, *node;
1491         __u64 maxkey, nextmaxkey;
1492         __u64 ptr;
1493         int nchildren, ret;
1494
1495         btree = (struct nilfs_btree *)bmap;
1496         root = nilfs_btree_get_root(btree);
1497         switch (nilfs_btree_height(btree)) {
1498         case 2:
1499                 bh = NULL;
1500                 node = root;
1501                 break;
1502         case 3:
1503                 nchildren = nilfs_btree_node_get_nchildren(btree, root);
1504                 if (nchildren > 1)
1505                         return 0;
1506                 ptr = nilfs_btree_node_get_ptr(btree, root, nchildren - 1);
1507                 ret = nilfs_bmap_get_block(bmap, ptr, &bh);
1508                 if (ret < 0)
1509                         return ret;
1510                 node = (struct nilfs_btree_node *)bh->b_data;
1511                 break;
1512         default:
1513                 return 0;
1514         }
1515
1516         nchildren = nilfs_btree_node_get_nchildren(btree, node);
1517         maxkey = nilfs_btree_node_get_key(btree, node, nchildren - 1);
1518         nextmaxkey = (nchildren > 1) ?
1519                 nilfs_btree_node_get_key(btree, node, nchildren - 2) : 0;
1520         if (bh != NULL)
1521                 nilfs_bmap_put_block(bmap, bh);
1522
1523         return (maxkey == key) && (nextmaxkey < bmap->b_low);
1524 }
1525
1526 static int nilfs_btree_gather_data(struct nilfs_bmap *bmap,
1527                                    __u64 *keys, __u64 *ptrs, int nitems)
1528 {
1529         struct buffer_head *bh;
1530         struct nilfs_btree *btree;
1531         struct nilfs_btree_node *node, *root;
1532         __le64 *dkeys;
1533         __le64 *dptrs;
1534         __u64 ptr;
1535         int nchildren, i, ret;
1536
1537         btree = (struct nilfs_btree *)bmap;
1538         root = nilfs_btree_get_root(btree);
1539         switch (nilfs_btree_height(btree)) {
1540         case 2:
1541                 bh = NULL;
1542                 node = root;
1543                 break;
1544         case 3:
1545                 nchildren = nilfs_btree_node_get_nchildren(btree, root);
1546                 BUG_ON(nchildren > 1);
1547                 ptr = nilfs_btree_node_get_ptr(btree, root, nchildren - 1);
1548                 ret = nilfs_bmap_get_block(bmap, ptr, &bh);
1549                 if (ret < 0)
1550                         return ret;
1551                 node = (struct nilfs_btree_node *)bh->b_data;
1552                 break;
1553         default:
1554                 node = NULL;
1555                 BUG();
1556         }
1557
1558         nchildren = nilfs_btree_node_get_nchildren(btree, node);
1559         if (nchildren < nitems)
1560                 nitems = nchildren;
1561         dkeys = nilfs_btree_node_dkeys(btree, node);
1562         dptrs = nilfs_btree_node_dptrs(btree, node);
1563         for (i = 0; i < nitems; i++) {
1564                 keys[i] = nilfs_bmap_dkey_to_key(dkeys[i]);
1565                 ptrs[i] = nilfs_bmap_dptr_to_ptr(dptrs[i]);
1566         }
1567
1568         if (bh != NULL)
1569                 nilfs_bmap_put_block(bmap, bh);
1570
1571         return nitems;
1572 }
1573
1574 static int
1575 nilfs_btree_prepare_convert_and_insert(struct nilfs_bmap *bmap, __u64 key,
1576                                        union nilfs_bmap_ptr_req *dreq,
1577                                        union nilfs_bmap_ptr_req *nreq,
1578                                        struct buffer_head **bhp,
1579                                        struct nilfs_bmap_stats *stats)
1580 {
1581         struct buffer_head *bh;
1582         struct nilfs_btree *btree;
1583         int ret;
1584
1585         btree = (struct nilfs_btree *)bmap;
1586         stats->bs_nblocks = 0;
1587
1588         /* for data */
1589         /* cannot find near ptr */
1590         if (btree->bt_ops->btop_find_target != NULL)
1591                 dreq->bpr_ptr
1592                         = btree->bt_ops->btop_find_target(btree, NULL, key);
1593         ret = bmap->b_pops->bpop_prepare_alloc_ptr(bmap, dreq);
1594         if (ret < 0)
1595                 return ret;
1596
1597         *bhp = NULL;
1598         stats->bs_nblocks++;
1599         if (nreq != NULL) {
1600                 nreq->bpr_ptr = dreq->bpr_ptr + 1;
1601                 ret = bmap->b_pops->bpop_prepare_alloc_ptr(bmap, nreq);
1602                 if (ret < 0)
1603                         goto err_out_dreq;
1604
1605                 ret = nilfs_bmap_get_new_block(bmap, nreq->bpr_ptr, &bh);
1606                 if (ret < 0)
1607                         goto err_out_nreq;
1608
1609                 *bhp = bh;
1610                 stats->bs_nblocks++;
1611         }
1612
1613         /* success */
1614         return 0;
1615
1616         /* error */
1617  err_out_nreq:
1618         bmap->b_pops->bpop_abort_alloc_ptr(bmap, nreq);
1619  err_out_dreq:
1620         bmap->b_pops->bpop_abort_alloc_ptr(bmap, dreq);
1621         stats->bs_nblocks = 0;
1622         return ret;
1623
1624 }
1625
1626 static void
1627 nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *bmap,
1628                                       __u64 key, __u64 ptr,
1629                                       const __u64 *keys, const __u64 *ptrs,
1630                                       int n, __u64 low, __u64 high,
1631                                       union nilfs_bmap_ptr_req *dreq,
1632                                       union nilfs_bmap_ptr_req *nreq,
1633                                       struct buffer_head *bh)
1634 {
1635         struct nilfs_btree *btree;
1636         struct nilfs_btree_node *node;
1637         __u64 tmpptr;
1638
1639         /* free resources */
1640         if (bmap->b_ops->bop_clear != NULL)
1641                 bmap->b_ops->bop_clear(bmap);
1642
1643         /* ptr must be a pointer to a buffer head. */
1644         set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr));
1645
1646         /* convert and insert */
1647         btree = (struct nilfs_btree *)bmap;
1648         nilfs_btree_init(bmap, low, high);
1649         if (nreq != NULL) {
1650                 if (bmap->b_pops->bpop_commit_alloc_ptr != NULL) {
1651                         bmap->b_pops->bpop_commit_alloc_ptr(bmap, dreq);
1652                         bmap->b_pops->bpop_commit_alloc_ptr(bmap, nreq);
1653                 }
1654
1655                 /* create child node at level 1 */
1656                 lock_buffer(bh);
1657                 node = (struct nilfs_btree_node *)bh->b_data;
1658                 nilfs_btree_node_init(btree, node, 0, 1, n, keys, ptrs);
1659                 nilfs_btree_node_insert(btree, node,
1660                                         key, dreq->bpr_ptr, n);
1661                 if (!buffer_dirty(bh))
1662                         nilfs_btnode_mark_dirty(bh);
1663                 if (!nilfs_bmap_dirty(bmap))
1664                         nilfs_bmap_set_dirty(bmap);
1665
1666                 unlock_buffer(bh);
1667                 nilfs_bmap_put_block(bmap, bh);
1668
1669                 /* create root node at level 2 */
1670                 node = nilfs_btree_get_root(btree);
1671                 tmpptr = nreq->bpr_ptr;
1672                 nilfs_btree_node_init(btree, node, NILFS_BTREE_NODE_ROOT,
1673                                       2, 1, &keys[0], &tmpptr);
1674         } else {
1675                 if (bmap->b_pops->bpop_commit_alloc_ptr != NULL)
1676                         bmap->b_pops->bpop_commit_alloc_ptr(bmap, dreq);
1677
1678                 /* create root node at level 1 */
1679                 node = nilfs_btree_get_root(btree);
1680                 nilfs_btree_node_init(btree, node, NILFS_BTREE_NODE_ROOT,
1681                                       1, n, keys, ptrs);
1682                 nilfs_btree_node_insert(btree, node,
1683                                         key, dreq->bpr_ptr, n);
1684                 if (!nilfs_bmap_dirty(bmap))
1685                         nilfs_bmap_set_dirty(bmap);
1686         }
1687
1688         if (btree->bt_ops->btop_set_target != NULL)
1689                 btree->bt_ops->btop_set_target(btree, key, dreq->bpr_ptr);
1690 }
1691
1692 /**
1693  * nilfs_btree_convert_and_insert -
1694  * @bmap:
1695  * @key:
1696  * @ptr:
1697  * @keys:
1698  * @ptrs:
1699  * @n:
1700  * @low:
1701  * @high:
1702  */
1703 int nilfs_btree_convert_and_insert(struct nilfs_bmap *bmap,
1704                                    __u64 key, __u64 ptr,
1705                                    const __u64 *keys, const __u64 *ptrs,
1706                                    int n, __u64 low, __u64 high)
1707 {
1708         struct buffer_head *bh;
1709         union nilfs_bmap_ptr_req dreq, nreq, *di, *ni;
1710         struct nilfs_bmap_stats stats;
1711         int ret;
1712
1713         if (n + 1 <= NILFS_BTREE_ROOT_NCHILDREN_MAX) {
1714                 di = &dreq;
1715                 ni = NULL;
1716         } else if ((n + 1) <= NILFS_BTREE_NODE_NCHILDREN_MAX(
1717                            1 << bmap->b_inode->i_blkbits)) {
1718                 di = &dreq;
1719                 ni = &nreq;
1720         } else {
1721                 di = NULL;
1722                 ni = NULL;
1723                 BUG();
1724         }
1725
1726         ret = nilfs_btree_prepare_convert_and_insert(bmap, key, di, ni, &bh,
1727                                                      &stats);
1728         if (ret < 0)
1729                 return ret;
1730         nilfs_btree_commit_convert_and_insert(bmap, key, ptr, keys, ptrs, n,
1731                                               low, high, di, ni, bh);
1732         nilfs_bmap_add_blocks(bmap, stats.bs_nblocks);
1733         return 0;
1734 }
1735
1736 static int nilfs_btree_propagate_p(struct nilfs_btree *btree,
1737                                    struct nilfs_btree_path *path,
1738                                    int level,
1739                                    struct buffer_head *bh)
1740 {
1741         while ((++level < nilfs_btree_height(btree) - 1) &&
1742                !buffer_dirty(path[level].bp_bh))
1743                 nilfs_btnode_mark_dirty(path[level].bp_bh);
1744
1745         return 0;
1746 }
1747
1748 static int nilfs_btree_prepare_update_v(struct nilfs_btree *btree,
1749                                         struct nilfs_btree_path *path,
1750                                         int level)
1751 {
1752         struct nilfs_btree_node *parent;
1753         int ret;
1754
1755         parent = nilfs_btree_get_node(btree, path, level + 1);
1756         path[level].bp_oldreq.bpr_ptr =
1757                 nilfs_btree_node_get_ptr(btree, parent,
1758                                          path[level + 1].bp_index);
1759         path[level].bp_newreq.bpr_ptr = path[level].bp_oldreq.bpr_ptr + 1;
1760         ret = nilfs_bmap_prepare_update(&btree->bt_bmap,
1761                                         &path[level].bp_oldreq,
1762                                         &path[level].bp_newreq);
1763         if (ret < 0)
1764                 return ret;
1765
1766         if (buffer_nilfs_node(path[level].bp_bh)) {
1767                 path[level].bp_ctxt.oldkey = path[level].bp_oldreq.bpr_ptr;
1768                 path[level].bp_ctxt.newkey = path[level].bp_newreq.bpr_ptr;
1769                 path[level].bp_ctxt.bh = path[level].bp_bh;
1770                 ret = nilfs_btnode_prepare_change_key(
1771                         &NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache,
1772                         &path[level].bp_ctxt);
1773                 if (ret < 0) {
1774                         nilfs_bmap_abort_update(&btree->bt_bmap,
1775                                                 &path[level].bp_oldreq,
1776                                                 &path[level].bp_newreq);
1777                         return ret;
1778                 }
1779         }
1780
1781         return 0;
1782 }
1783
1784 static void nilfs_btree_commit_update_v(struct nilfs_btree *btree,
1785                                         struct nilfs_btree_path *path,
1786                                         int level)
1787 {
1788         struct nilfs_btree_node *parent;
1789
1790         nilfs_bmap_commit_update(&btree->bt_bmap,
1791                                  &path[level].bp_oldreq,
1792                                  &path[level].bp_newreq);
1793
1794         if (buffer_nilfs_node(path[level].bp_bh)) {
1795                 nilfs_btnode_commit_change_key(
1796                         &NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache,
1797                         &path[level].bp_ctxt);
1798                 path[level].bp_bh = path[level].bp_ctxt.bh;
1799         }
1800         set_buffer_nilfs_volatile(path[level].bp_bh);
1801
1802         parent = nilfs_btree_get_node(btree, path, level + 1);
1803         nilfs_btree_node_set_ptr(btree, parent, path[level + 1].bp_index,
1804                                  path[level].bp_newreq.bpr_ptr);
1805 }
1806
1807 static void nilfs_btree_abort_update_v(struct nilfs_btree *btree,
1808                                        struct nilfs_btree_path *path,
1809                                        int level)
1810 {
1811         nilfs_bmap_abort_update(&btree->bt_bmap,
1812                                 &path[level].bp_oldreq,
1813                                 &path[level].bp_newreq);
1814         if (buffer_nilfs_node(path[level].bp_bh))
1815                 nilfs_btnode_abort_change_key(
1816                         &NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache,
1817                         &path[level].bp_ctxt);
1818 }
1819
1820 static int nilfs_btree_prepare_propagate_v(struct nilfs_btree *btree,
1821                                            struct nilfs_btree_path *path,
1822                                            int minlevel,
1823                                            int *maxlevelp)
1824 {
1825         int level, ret;
1826
1827         level = minlevel;
1828         if (!buffer_nilfs_volatile(path[level].bp_bh)) {
1829                 ret = nilfs_btree_prepare_update_v(btree, path, level);
1830                 if (ret < 0)
1831                         return ret;
1832         }
1833         while ((++level < nilfs_btree_height(btree) - 1) &&
1834                !buffer_dirty(path[level].bp_bh)) {
1835
1836                 BUG_ON(buffer_nilfs_volatile(path[level].bp_bh));
1837                 ret = nilfs_btree_prepare_update_v(btree, path, level);
1838                 if (ret < 0)
1839                         goto out;
1840         }
1841
1842         /* success */
1843         BUG_ON(maxlevelp == NULL);
1844         *maxlevelp = level - 1;
1845         return 0;
1846
1847         /* error */
1848  out:
1849         while (--level > minlevel)
1850                 nilfs_btree_abort_update_v(btree, path, level);
1851         if (!buffer_nilfs_volatile(path[level].bp_bh))
1852                 nilfs_btree_abort_update_v(btree, path, level);
1853         return ret;
1854 }
1855
1856 static void nilfs_btree_commit_propagate_v(struct nilfs_btree *btree,
1857                                            struct nilfs_btree_path *path,
1858                                            int minlevel,
1859                                            int maxlevel,
1860                                            struct buffer_head *bh)
1861 {
1862         int level;
1863
1864         if (!buffer_nilfs_volatile(path[minlevel].bp_bh))
1865                 nilfs_btree_commit_update_v(btree, path, minlevel);
1866
1867         for (level = minlevel + 1; level <= maxlevel; level++)
1868                 nilfs_btree_commit_update_v(btree, path, level);
1869 }
1870
1871 static int nilfs_btree_propagate_v(struct nilfs_btree *btree,
1872                                    struct nilfs_btree_path *path,
1873                                    int level,
1874                                    struct buffer_head *bh)
1875 {
1876         int maxlevel, ret;
1877         struct nilfs_btree_node *parent;
1878         __u64 ptr;
1879
1880         get_bh(bh);
1881         path[level].bp_bh = bh;
1882         ret = nilfs_btree_prepare_propagate_v(btree, path, level, &maxlevel);
1883         if (ret < 0)
1884                 goto out;
1885
1886         if (buffer_nilfs_volatile(path[level].bp_bh)) {
1887                 parent = nilfs_btree_get_node(btree, path, level + 1);
1888                 ptr = nilfs_btree_node_get_ptr(btree, parent,
1889                                                path[level + 1].bp_index);
1890                 ret = nilfs_bmap_mark_dirty(&btree->bt_bmap, ptr);
1891                 if (ret < 0)
1892                         goto out;
1893         }
1894
1895         nilfs_btree_commit_propagate_v(btree, path, level, maxlevel, bh);
1896
1897  out:
1898         brelse(path[level].bp_bh);
1899         path[level].bp_bh = NULL;
1900         return ret;
1901 }
1902
1903 static int nilfs_btree_propagate(const struct nilfs_bmap *bmap,
1904                                  struct buffer_head *bh)
1905 {
1906         struct nilfs_btree *btree;
1907         struct nilfs_btree_path *path;
1908         struct nilfs_btree_node *node;
1909         __u64 key;
1910         int level, ret;
1911
1912         BUG_ON(!buffer_dirty(bh));
1913
1914         btree = (struct nilfs_btree *)bmap;
1915         path = nilfs_btree_alloc_path(btree);
1916         if (path == NULL)
1917                 return -ENOMEM;
1918         nilfs_btree_init_path(btree, path);
1919
1920         if (buffer_nilfs_node(bh)) {
1921                 node = (struct nilfs_btree_node *)bh->b_data;
1922                 key = nilfs_btree_node_get_key(btree, node, 0);
1923                 level = nilfs_btree_node_get_level(btree, node);
1924         } else {
1925                 key = nilfs_bmap_data_get_key(bmap, bh);
1926                 level = NILFS_BTREE_LEVEL_DATA;
1927         }
1928
1929         ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1);
1930         if (ret < 0) {
1931                 /* BUG_ON(ret == -ENOENT); */
1932                 if (ret == -ENOENT) {
1933                         printk(KERN_CRIT "%s: key = %llu, level == %d\n",
1934                                __func__, (unsigned long long)key, level);
1935                         BUG();
1936                 }
1937                 goto out;
1938         }
1939
1940         ret = btree->bt_ops->btop_propagate(btree, path, level, bh);
1941
1942  out:
1943         nilfs_btree_clear_path(btree, path);
1944         nilfs_btree_free_path(btree, path);
1945
1946         return ret;
1947 }
1948
1949 static int nilfs_btree_propagate_gc(const struct nilfs_bmap *bmap,
1950                                     struct buffer_head *bh)
1951 {
1952         return nilfs_bmap_mark_dirty(bmap, bh->b_blocknr);
1953 }
1954
1955 static void nilfs_btree_add_dirty_buffer(struct nilfs_btree *btree,
1956                                          struct list_head *lists,
1957                                          struct buffer_head *bh)
1958 {
1959         struct list_head *head;
1960         struct buffer_head *cbh;
1961         struct nilfs_btree_node *node, *cnode;
1962         __u64 key, ckey;
1963         int level;
1964
1965         get_bh(bh);
1966         node = (struct nilfs_btree_node *)bh->b_data;
1967         key = nilfs_btree_node_get_key(btree, node, 0);
1968         level = nilfs_btree_node_get_level(btree, node);
1969         list_for_each(head, &lists[level]) {
1970                 cbh = list_entry(head, struct buffer_head, b_assoc_buffers);
1971                 cnode = (struct nilfs_btree_node *)cbh->b_data;
1972                 ckey = nilfs_btree_node_get_key(btree, cnode, 0);
1973                 if (key < ckey)
1974                         break;
1975         }
1976         list_add_tail(&bh->b_assoc_buffers, head);
1977 }
1978
1979 static void nilfs_btree_lookup_dirty_buffers(struct nilfs_bmap *bmap,
1980                                              struct list_head *listp)
1981 {
1982         struct nilfs_btree *btree = (struct nilfs_btree *)bmap;
1983         struct address_space *btcache = &NILFS_BMAP_I(bmap)->i_btnode_cache;
1984         struct list_head lists[NILFS_BTREE_LEVEL_MAX];
1985         struct pagevec pvec;
1986         struct buffer_head *bh, *head;
1987         pgoff_t index = 0;
1988         int level, i;
1989
1990         for (level = NILFS_BTREE_LEVEL_NODE_MIN;
1991              level < NILFS_BTREE_LEVEL_MAX;
1992              level++)
1993                 INIT_LIST_HEAD(&lists[level]);
1994
1995         pagevec_init(&pvec, 0);
1996
1997         while (pagevec_lookup_tag(&pvec, btcache, &index, PAGECACHE_TAG_DIRTY,
1998                                   PAGEVEC_SIZE)) {
1999                 for (i = 0; i < pagevec_count(&pvec); i++) {
2000                         bh = head = page_buffers(pvec.pages[i]);
2001                         do {
2002                                 if (buffer_dirty(bh))
2003                                         nilfs_btree_add_dirty_buffer(btree,
2004                                                                      lists, bh);
2005                         } while ((bh = bh->b_this_page) != head);
2006                 }
2007                 pagevec_release(&pvec);
2008                 cond_resched();
2009         }
2010
2011         for (level = NILFS_BTREE_LEVEL_NODE_MIN;
2012              level < NILFS_BTREE_LEVEL_MAX;
2013              level++)
2014                 list_splice(&lists[level], listp->prev);
2015 }
2016
2017 static int nilfs_btree_assign_p(struct nilfs_btree *btree,
2018                                 struct nilfs_btree_path *path,
2019                                 int level,
2020                                 struct buffer_head **bh,
2021                                 sector_t blocknr,
2022                                 union nilfs_binfo *binfo)
2023 {
2024         struct nilfs_btree_node *parent;
2025         __u64 key;
2026         __u64 ptr;
2027         int ret;
2028
2029         parent = nilfs_btree_get_node(btree, path, level + 1);
2030         ptr = nilfs_btree_node_get_ptr(btree, parent,
2031                                        path[level + 1].bp_index);
2032         if (buffer_nilfs_node(*bh)) {
2033                 path[level].bp_ctxt.oldkey = ptr;
2034                 path[level].bp_ctxt.newkey = blocknr;
2035                 path[level].bp_ctxt.bh = *bh;
2036                 ret = nilfs_btnode_prepare_change_key(
2037                         &NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache,
2038                         &path[level].bp_ctxt);
2039                 if (ret < 0)
2040                         return ret;
2041                 nilfs_btnode_commit_change_key(
2042                         &NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache,
2043                         &path[level].bp_ctxt);
2044                 *bh = path[level].bp_ctxt.bh;
2045         }
2046
2047         nilfs_btree_node_set_ptr(btree, parent,
2048                                  path[level + 1].bp_index, blocknr);
2049
2050         key = nilfs_btree_node_get_key(btree, parent,
2051                                        path[level + 1].bp_index);
2052         /* on-disk format */
2053         binfo->bi_dat.bi_blkoff = nilfs_bmap_key_to_dkey(key);
2054         binfo->bi_dat.bi_level = level;
2055
2056         return 0;
2057 }
2058
2059 static int nilfs_btree_assign_v(struct nilfs_btree *btree,
2060                                 struct nilfs_btree_path *path,
2061                                 int level,
2062                                 struct buffer_head **bh,
2063                                 sector_t blocknr,
2064                                 union nilfs_binfo *binfo)
2065 {
2066         struct nilfs_btree_node *parent;
2067         __u64 key;
2068         __u64 ptr;
2069         union nilfs_bmap_ptr_req req;
2070         int ret;
2071
2072         parent = nilfs_btree_get_node(btree, path, level + 1);
2073         ptr = nilfs_btree_node_get_ptr(btree, parent,
2074                                        path[level + 1].bp_index);
2075         req.bpr_ptr = ptr;
2076         ret = btree->bt_bmap.b_pops->bpop_prepare_start_ptr(&btree->bt_bmap,
2077                                                                &req);
2078         if (ret < 0)
2079                 return ret;
2080         btree->bt_bmap.b_pops->bpop_commit_start_ptr(&btree->bt_bmap,
2081                                                         &req, blocknr);
2082
2083         key = nilfs_btree_node_get_key(btree, parent,
2084                                        path[level + 1].bp_index);
2085         /* on-disk format */
2086         binfo->bi_v.bi_vblocknr = nilfs_bmap_ptr_to_dptr(ptr);
2087         binfo->bi_v.bi_blkoff = nilfs_bmap_key_to_dkey(key);
2088
2089         return 0;
2090 }
2091
2092 static int nilfs_btree_assign(struct nilfs_bmap *bmap,
2093                               struct buffer_head **bh,
2094                               sector_t blocknr,
2095                               union nilfs_binfo *binfo)
2096 {
2097         struct nilfs_btree *btree;
2098         struct nilfs_btree_path *path;
2099         struct nilfs_btree_node *node;
2100         __u64 key;
2101         int level, ret;
2102
2103         btree = (struct nilfs_btree *)bmap;
2104         path = nilfs_btree_alloc_path(btree);
2105         if (path == NULL)
2106                 return -ENOMEM;
2107         nilfs_btree_init_path(btree, path);
2108
2109         if (buffer_nilfs_node(*bh)) {
2110                 node = (struct nilfs_btree_node *)(*bh)->b_data;
2111                 key = nilfs_btree_node_get_key(btree, node, 0);
2112                 level = nilfs_btree_node_get_level(btree, node);
2113         } else {
2114                 key = nilfs_bmap_data_get_key(bmap, *bh);
2115                 level = NILFS_BTREE_LEVEL_DATA;
2116         }
2117
2118         ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1);
2119         if (ret < 0) {
2120                 BUG_ON(ret == -ENOENT);
2121                 goto out;
2122         }
2123
2124         ret = btree->bt_ops->btop_assign(btree, path, level, bh,
2125                                             blocknr, binfo);
2126
2127  out:
2128         nilfs_btree_clear_path(btree, path);
2129         nilfs_btree_free_path(btree, path);
2130
2131         return ret;
2132 }
2133
2134 static int nilfs_btree_assign_gc(struct nilfs_bmap *bmap,
2135                                  struct buffer_head **bh,
2136                                  sector_t blocknr,
2137                                  union nilfs_binfo *binfo)
2138 {
2139         struct nilfs_btree *btree;
2140         struct nilfs_btree_node *node;
2141         __u64 key;
2142         int ret;
2143
2144         btree = (struct nilfs_btree *)bmap;
2145         ret = nilfs_bmap_move_v(bmap, (*bh)->b_blocknr, blocknr);
2146         if (ret < 0)
2147                 return ret;
2148
2149         if (buffer_nilfs_node(*bh)) {
2150                 node = (struct nilfs_btree_node *)(*bh)->b_data;
2151                 key = nilfs_btree_node_get_key(btree, node, 0);
2152         } else
2153                 key = nilfs_bmap_data_get_key(bmap, *bh);
2154
2155         /* on-disk format */
2156         binfo->bi_v.bi_vblocknr = cpu_to_le64((*bh)->b_blocknr);
2157         binfo->bi_v.bi_blkoff = nilfs_bmap_key_to_dkey(key);
2158
2159         return 0;
2160 }
2161
2162 static int nilfs_btree_mark(struct nilfs_bmap *bmap, __u64 key, int level)
2163 {
2164         struct buffer_head *bh;
2165         struct nilfs_btree *btree;
2166         struct nilfs_btree_path *path;
2167         __u64 ptr;
2168         int ret;
2169
2170         btree = (struct nilfs_btree *)bmap;
2171         path = nilfs_btree_alloc_path(btree);
2172         if (path == NULL)
2173                 return -ENOMEM;
2174         nilfs_btree_init_path(btree, path);
2175
2176         ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level + 1);
2177         if (ret < 0) {
2178                 BUG_ON(ret == -ENOENT);
2179                 goto out;
2180         }
2181         ret = nilfs_bmap_get_block(&btree->bt_bmap, ptr, &bh);
2182         if (ret < 0) {
2183                 BUG_ON(ret == -ENOENT);
2184                 goto out;
2185         }
2186
2187         if (!buffer_dirty(bh))
2188                 nilfs_btnode_mark_dirty(bh);
2189         nilfs_bmap_put_block(&btree->bt_bmap, bh);
2190         if (!nilfs_bmap_dirty(&btree->bt_bmap))
2191                 nilfs_bmap_set_dirty(&btree->bt_bmap);
2192
2193  out:
2194         nilfs_btree_clear_path(btree, path);
2195         nilfs_btree_free_path(btree, path);
2196         return ret;
2197 }
2198
2199 static const struct nilfs_bmap_operations nilfs_btree_ops = {
2200         .bop_lookup             =       nilfs_btree_lookup,
2201         .bop_insert             =       nilfs_btree_insert,
2202         .bop_delete             =       nilfs_btree_delete,
2203         .bop_clear              =       NULL,
2204
2205         .bop_propagate          =       nilfs_btree_propagate,
2206
2207         .bop_lookup_dirty_buffers =     nilfs_btree_lookup_dirty_buffers,
2208
2209         .bop_assign             =       nilfs_btree_assign,
2210         .bop_mark               =       nilfs_btree_mark,
2211
2212         .bop_last_key           =       nilfs_btree_last_key,
2213         .bop_check_insert       =       NULL,
2214         .bop_check_delete       =       nilfs_btree_check_delete,
2215         .bop_gather_data        =       nilfs_btree_gather_data,
2216 };
2217
2218 static const struct nilfs_bmap_operations nilfs_btree_ops_gc = {
2219         .bop_lookup             =       NULL,
2220         .bop_insert             =       NULL,
2221         .bop_delete             =       NULL,
2222         .bop_clear              =       NULL,
2223
2224         .bop_propagate          =       nilfs_btree_propagate_gc,
2225
2226         .bop_lookup_dirty_buffers =     nilfs_btree_lookup_dirty_buffers,
2227
2228         .bop_assign             =       nilfs_btree_assign_gc,
2229         .bop_mark               =       NULL,
2230
2231         .bop_last_key           =       NULL,
2232         .bop_check_insert       =       NULL,
2233         .bop_check_delete       =       NULL,
2234         .bop_gather_data        =       NULL,
2235 };
2236
2237 static const struct nilfs_btree_operations nilfs_btree_ops_v = {
2238         .btop_find_target       =       nilfs_btree_find_target_v,
2239         .btop_set_target        =       nilfs_btree_set_target_v,
2240         .btop_propagate         =       nilfs_btree_propagate_v,
2241         .btop_assign            =       nilfs_btree_assign_v,
2242 };
2243
2244 static const struct nilfs_btree_operations nilfs_btree_ops_p = {
2245         .btop_find_target       =       NULL,
2246         .btop_set_target        =       NULL,
2247         .btop_propagate         =       nilfs_btree_propagate_p,
2248         .btop_assign            =       nilfs_btree_assign_p,
2249 };
2250
2251 int nilfs_btree_init(struct nilfs_bmap *bmap, __u64 low, __u64 high)
2252 {
2253         struct nilfs_btree *btree;
2254
2255         btree = (struct nilfs_btree *)bmap;
2256         bmap->b_ops = &nilfs_btree_ops;
2257         bmap->b_low = low;
2258         bmap->b_high = high;
2259         switch (bmap->b_inode->i_ino) {
2260         case NILFS_DAT_INO:
2261                 btree->bt_ops = &nilfs_btree_ops_p;
2262                 break;
2263         default:
2264                 btree->bt_ops = &nilfs_btree_ops_v;
2265                 break;
2266         }
2267
2268         return 0;
2269 }
2270
2271 void nilfs_btree_init_gc(struct nilfs_bmap *bmap)
2272 {
2273         bmap->b_low = NILFS_BMAP_LARGE_LOW;
2274         bmap->b_high = NILFS_BMAP_LARGE_HIGH;
2275         bmap->b_ops = &nilfs_btree_ops_gc;
2276 }