2 * This program is free software; you can redistribute it and/or
3 * modify it under the terms of the GNU General Public License
4 * as published by the Free Software Foundation; either version
5 * 2 of the License, or (at your option) any later version.
7 * Robert Olsson <robert.olsson@its.uu.se> Uppsala Universitet
8 * & Swedish University of Agricultural Sciences.
10 * Jens Laas <jens.laas@data.slu.se> Swedish University of
11 * Agricultural Sciences.
13 * Hans Liss <hans.liss@its.uu.se> Uppsala Universitet
15 * This work is based on the LPC-trie which is originally descibed in:
17 * An experimental study of compression methods for dynamic tries
18 * Stefan Nilsson and Matti Tikkanen. Algorithmica, 33(1):19-33, 2002.
19 * http://www.nada.kth.se/~snilsson/public/papers/dyntrie2/
22 * IP-address lookup using LC-tries. Stefan Nilsson and Gunnar Karlsson
23 * IEEE Journal on Selected Areas in Communications, 17(6):1083-1092, June 1999
25 * Version: $Id: fib_trie.c,v 1.3 2005/06/08 14:20:01 robert Exp $
28 * Code from fib_hash has been reused which includes the following header:
31 * INET An implementation of the TCP/IP protocol suite for the LINUX
32 * operating system. INET is implemented using the BSD Socket
33 * interface as the means of communication with the user level.
35 * IPv4 FIB: lookup engine and maintenance routines.
38 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
40 * This program is free software; you can redistribute it and/or
41 * modify it under the terms of the GNU General Public License
42 * as published by the Free Software Foundation; either version
43 * 2 of the License, or (at your option) any later version.
46 #define VERSION "0.325"
48 #include <linux/config.h>
49 #include <asm/uaccess.h>
50 #include <asm/system.h>
51 #include <asm/bitops.h>
52 #include <linux/types.h>
53 #include <linux/kernel.h>
54 #include <linux/sched.h>
56 #include <linux/string.h>
57 #include <linux/socket.h>
58 #include <linux/sockios.h>
59 #include <linux/errno.h>
61 #include <linux/inet.h>
62 #include <linux/netdevice.h>
63 #include <linux/if_arp.h>
64 #include <linux/proc_fs.h>
65 #include <linux/skbuff.h>
66 #include <linux/netlink.h>
67 #include <linux/init.h>
68 #include <linux/list.h>
70 #include <net/protocol.h>
71 #include <net/route.h>
74 #include <net/ip_fib.h>
75 #include "fib_lookup.h"
77 #undef CONFIG_IP_FIB_TRIE_STATS
78 #define MAX_CHILDS 16384
80 #define KEYLENGTH (8*sizeof(t_key))
81 #define MASK_PFX(k, l) (((l)==0)?0:(k >> (KEYLENGTH-l)) << (KEYLENGTH-l))
82 #define TKEY_GET_MASK(offset, bits) (((bits)==0)?0:((t_key)(-1) << (KEYLENGTH - bits) >> offset))
84 static DEFINE_RWLOCK(fib_lock);
86 typedef unsigned int t_key;
90 #define NODE_TYPE_MASK 0x1UL
91 #define NODE_PARENT(node) \
92 ((struct tnode *)((node)->parent & ~NODE_TYPE_MASK))
93 #define NODE_SET_PARENT(node, ptr) \
94 ((node)->parent = (((unsigned long)(ptr)) | \
95 ((node)->parent & NODE_TYPE_MASK)))
96 #define NODE_INIT_PARENT(node, type) \
97 ((node)->parent = (type))
98 #define NODE_TYPE(node) \
99 ((node)->parent & NODE_TYPE_MASK)
101 #define IS_TNODE(n) (!(n->parent & T_LEAF))
102 #define IS_LEAF(n) (n->parent & T_LEAF)
106 unsigned long parent;
111 unsigned long parent;
112 struct hlist_head list;
116 struct hlist_node hlist;
118 struct list_head falh;
123 unsigned long parent;
124 unsigned short pos:5; /* 2log(KEYLENGTH) bits needed */
125 unsigned short bits:5; /* 2log(KEYLENGTH) bits needed */
126 unsigned short full_children; /* KEYLENGTH bits needed */
127 unsigned short empty_children; /* KEYLENGTH bits needed */
128 struct node *child[0];
131 #ifdef CONFIG_IP_FIB_TRIE_STATS
132 struct trie_use_stats {
134 unsigned int backtrack;
135 unsigned int semantic_match_passed;
136 unsigned int semantic_match_miss;
137 unsigned int null_node_hit;
138 unsigned int resize_node_skipped;
143 unsigned int totdepth;
144 unsigned int maxdepth;
147 unsigned int nullpointers;
148 unsigned int nodesizes[MAX_CHILDS];
153 #ifdef CONFIG_IP_FIB_TRIE_STATS
154 struct trie_use_stats stats;
157 unsigned int revision;
160 static void put_child(struct trie *t, struct tnode *tn, int i, struct node *n);
161 static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n, int wasfull);
162 static struct node *resize(struct trie *t, struct tnode *tn);
163 static struct tnode *inflate(struct trie *t, struct tnode *tn);
164 static struct tnode *halve(struct trie *t, struct tnode *tn);
165 static void tnode_free(struct tnode *tn);
166 static void trie_dump_seq(struct seq_file *seq, struct trie *t);
168 static kmem_cache_t *fn_alias_kmem;
169 static struct trie *trie_local = NULL, *trie_main = NULL;
171 static inline struct node *tnode_get_child(struct tnode *tn, int i)
173 BUG_ON(i >= 1 << tn->bits);
178 static inline int tnode_child_length(const struct tnode *tn)
180 return 1 << tn->bits;
183 static inline t_key tkey_extract_bits(t_key a, int offset, int bits)
185 if (offset < KEYLENGTH)
186 return ((t_key)(a << offset)) >> (KEYLENGTH - bits);
191 static inline int tkey_equals(t_key a, t_key b)
196 static inline int tkey_sub_equals(t_key a, int offset, int bits, t_key b)
198 if (bits == 0 || offset >= KEYLENGTH)
200 bits = bits > KEYLENGTH ? KEYLENGTH : bits;
201 return ((a ^ b) << offset) >> (KEYLENGTH - bits) == 0;
204 static inline int tkey_mismatch(t_key a, int offset, t_key b)
211 while ((diff << i) >> (KEYLENGTH-1) == 0)
216 /* Candidate for fib_semantics */
218 static void fn_free_alias(struct fib_alias *fa)
220 fib_release_info(fa->fa_info);
221 kmem_cache_free(fn_alias_kmem, fa);
225 To understand this stuff, an understanding of keys and all their bits is
226 necessary. Every node in the trie has a key associated with it, but not
227 all of the bits in that key are significant.
229 Consider a node 'n' and its parent 'tp'.
231 If n is a leaf, every bit in its key is significant. Its presence is
232 necessitaded by path compression, since during a tree traversal (when
233 searching for a leaf - unless we are doing an insertion) we will completely
234 ignore all skipped bits we encounter. Thus we need to verify, at the end of
235 a potentially successful search, that we have indeed been walking the
238 Note that we can never "miss" the correct key in the tree if present by
239 following the wrong path. Path compression ensures that segments of the key
240 that are the same for all keys with a given prefix are skipped, but the
241 skipped part *is* identical for each node in the subtrie below the skipped
242 bit! trie_insert() in this implementation takes care of that - note the
243 call to tkey_sub_equals() in trie_insert().
245 if n is an internal node - a 'tnode' here, the various parts of its key
246 have many different meanings.
249 _________________________________________________________________
250 | i | i | i | i | i | i | i | N | N | N | S | S | S | S | S | C |
251 -----------------------------------------------------------------
252 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
254 _________________________________________________________________
255 | C | C | C | u | u | u | u | u | u | u | u | u | u | u | u | u |
256 -----------------------------------------------------------------
257 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
264 First, let's just ignore the bits that come before the parent tp, that is
265 the bits from 0 to (tp->pos-1). They are *known* but at this point we do
266 not use them for anything.
268 The bits from (tp->pos) to (tp->pos + tp->bits - 1) - "N", above - are the
269 index into the parent's child array. That is, they will be used to find
270 'n' among tp's children.
272 The bits from (tp->pos + tp->bits) to (n->pos - 1) - "S" - are skipped bits
275 All the bits we have seen so far are significant to the node n. The rest
276 of the bits are really not needed or indeed known in n->key.
278 The bits from (n->pos) to (n->pos + n->bits - 1) - "C" - are the index into
279 n's child array, and will of course be different for each child.
282 The rest of the bits, from (n->pos + n->bits) onward, are completely unknown
287 static inline void check_tnode(const struct tnode *tn)
289 WARN_ON(tn && tn->pos+tn->bits > 32);
292 static int halve_threshold = 25;
293 static int inflate_threshold = 50;
295 static struct leaf *leaf_new(void)
297 struct leaf *l = kmalloc(sizeof(struct leaf), GFP_KERNEL);
299 NODE_INIT_PARENT(l, T_LEAF);
300 INIT_HLIST_HEAD(&l->list);
305 static struct leaf_info *leaf_info_new(int plen)
307 struct leaf_info *li = kmalloc(sizeof(struct leaf_info), GFP_KERNEL);
313 INIT_LIST_HEAD(&li->falh);
318 static inline void free_leaf(struct leaf *l)
323 static inline void free_leaf_info(struct leaf_info *li)
328 static struct tnode *tnode_alloc(unsigned int size)
330 if (size <= PAGE_SIZE) {
331 return kmalloc(size, GFP_KERNEL);
333 return (struct tnode *)
334 __get_free_pages(GFP_KERNEL, get_order(size));
338 static void __tnode_free(struct tnode *tn)
340 unsigned int size = sizeof(struct tnode) +
341 (1 << tn->bits) * sizeof(struct node *);
343 if (size <= PAGE_SIZE)
346 free_pages((unsigned long)tn, get_order(size));
349 static struct tnode* tnode_new(t_key key, int pos, int bits)
351 int nchildren = 1<<bits;
352 int sz = sizeof(struct tnode) + nchildren * sizeof(struct node *);
353 struct tnode *tn = tnode_alloc(sz);
357 NODE_INIT_PARENT(tn, T_TNODE);
361 tn->full_children = 0;
362 tn->empty_children = 1<<bits;
365 pr_debug("AT %p s=%u %u\n", tn, (unsigned int) sizeof(struct tnode),
366 (unsigned int) (sizeof(struct node) * 1<<bits));
370 static void tnode_free(struct tnode *tn)
373 free_leaf((struct leaf *)tn);
374 pr_debug("FL %p \n", tn);
377 pr_debug("FT %p \n", tn);
382 * Check whether a tnode 'n' is "full", i.e. it is an internal node
383 * and no bits are skipped. See discussion in dyntree paper p. 6
386 static inline int tnode_full(const struct tnode *tn, const struct node *n)
388 if (n == NULL || IS_LEAF(n))
391 return ((struct tnode *) n)->pos == tn->pos + tn->bits;
394 static inline void put_child(struct trie *t, struct tnode *tn, int i, struct node *n)
396 tnode_put_child_reorg(tn, i, n, -1);
400 * Add a child at position i overwriting the old value.
401 * Update the value of full_children and empty_children.
404 static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n, int wasfull)
409 BUG_ON(i >= 1<<tn->bits);
411 write_lock_bh(&fib_lock);
414 /* update emptyChildren */
415 if (n == NULL && chi != NULL)
416 tn->empty_children++;
417 else if (n != NULL && chi == NULL)
418 tn->empty_children--;
420 /* update fullChildren */
422 wasfull = tnode_full(tn, chi);
424 isfull = tnode_full(tn, n);
425 if (wasfull && !isfull)
427 else if (!wasfull && isfull)
431 NODE_SET_PARENT(n, tn);
434 write_unlock_bh(&fib_lock);
437 static struct node *resize(struct trie *t, struct tnode *tn)
441 struct tnode *old_tn;
446 pr_debug("In tnode_resize %p inflate_threshold=%d threshold=%d\n",
447 tn, inflate_threshold, halve_threshold);
450 if (tn->empty_children == tnode_child_length(tn)) {
455 if (tn->empty_children == tnode_child_length(tn) - 1)
456 for (i = 0; i < tnode_child_length(tn); i++) {
459 write_lock_bh(&fib_lock);
462 write_unlock_bh(&fib_lock);
466 /* compress one level */
467 NODE_INIT_PARENT(n, NODE_TYPE(n));
469 write_unlock_bh(&fib_lock);
474 * Double as long as the resulting node has a number of
475 * nonempty nodes that are above the threshold.
479 * From "Implementing a dynamic compressed trie" by Stefan Nilsson of
480 * the Helsinki University of Technology and Matti Tikkanen of Nokia
481 * Telecommunications, page 6:
482 * "A node is doubled if the ratio of non-empty children to all
483 * children in the *doubled* node is at least 'high'."
485 * 'high' in this instance is the variable 'inflate_threshold'. It
486 * is expressed as a percentage, so we multiply it with
487 * tnode_child_length() and instead of multiplying by 2 (since the
488 * child array will be doubled by inflate()) and multiplying
489 * the left-hand side by 100 (to handle the percentage thing) we
490 * multiply the left-hand side by 50.
492 * The left-hand side may look a bit weird: tnode_child_length(tn)
493 * - tn->empty_children is of course the number of non-null children
494 * in the current node. tn->full_children is the number of "full"
495 * children, that is non-null tnodes with a skip value of 0.
496 * All of those will be doubled in the resulting inflated tnode, so
497 * we just count them one extra time here.
499 * A clearer way to write this would be:
501 * to_be_doubled = tn->full_children;
502 * not_to_be_doubled = tnode_child_length(tn) - tn->empty_children -
505 * new_child_length = tnode_child_length(tn) * 2;
507 * new_fill_factor = 100 * (not_to_be_doubled + 2*to_be_doubled) /
509 * if (new_fill_factor >= inflate_threshold)
511 * ...and so on, tho it would mess up the while () loop.
514 * 100 * (not_to_be_doubled + 2*to_be_doubled) / new_child_length >=
518 * 100 * (not_to_be_doubled + 2*to_be_doubled) >=
519 * inflate_threshold * new_child_length
521 * expand not_to_be_doubled and to_be_doubled, and shorten:
522 * 100 * (tnode_child_length(tn) - tn->empty_children +
523 * tn->full_children) >= inflate_threshold * new_child_length
525 * expand new_child_length:
526 * 100 * (tnode_child_length(tn) - tn->empty_children +
527 * tn->full_children) >=
528 * inflate_threshold * tnode_child_length(tn) * 2
531 * 50 * (tn->full_children + tnode_child_length(tn) -
532 * tn->empty_children) >= inflate_threshold *
533 * tnode_child_length(tn)
540 while ((tn->full_children > 0 &&
541 50 * (tn->full_children + tnode_child_length(tn) - tn->empty_children) >=
542 inflate_threshold * tnode_child_length(tn))) {
548 #ifdef CONFIG_IP_FIB_TRIE_STATS
549 t->stats.resize_node_skipped++;
558 * Halve as long as the number of empty children in this
559 * node is above threshold.
563 while (tn->bits > 1 &&
564 100 * (tnode_child_length(tn) - tn->empty_children) <
565 halve_threshold * tnode_child_length(tn)) {
571 #ifdef CONFIG_IP_FIB_TRIE_STATS
572 t->stats.resize_node_skipped++;
579 /* Only one child remains */
581 if (tn->empty_children == tnode_child_length(tn) - 1)
582 for (i = 0; i < tnode_child_length(tn); i++) {
585 write_lock_bh(&fib_lock);
589 write_unlock_bh(&fib_lock);
593 /* compress one level */
595 NODE_INIT_PARENT(n, NODE_TYPE(n));
597 write_unlock_bh(&fib_lock);
602 return (struct node *) tn;
605 static struct tnode *inflate(struct trie *t, struct tnode *tn)
608 struct tnode *oldtnode = tn;
609 int olen = tnode_child_length(tn);
612 pr_debug("In inflate\n");
614 tn = tnode_new(oldtnode->key, oldtnode->pos, oldtnode->bits + 1);
617 return ERR_PTR(-ENOMEM);
620 * Preallocate and store tnodes before the actual work so we
621 * don't get into an inconsistent state if memory allocation
622 * fails. In case of failure we return the oldnode and inflate
623 * of tnode is ignored.
626 for (i = 0; i < olen; i++) {
627 struct tnode *inode = (struct tnode *) tnode_get_child(oldtnode, i);
631 inode->pos == oldtnode->pos + oldtnode->bits &&
633 struct tnode *left, *right;
634 t_key m = TKEY_GET_MASK(inode->pos, 1);
636 left = tnode_new(inode->key&(~m), inode->pos + 1,
641 right = tnode_new(inode->key|m, inode->pos + 1,
649 put_child(t, tn, 2*i, (struct node *) left);
650 put_child(t, tn, 2*i+1, (struct node *) right);
654 for (i = 0; i < olen; i++) {
655 struct node *node = tnode_get_child(oldtnode, i);
656 struct tnode *left, *right;
663 /* A leaf or an internal node with skipped bits */
665 if (IS_LEAF(node) || ((struct tnode *) node)->pos >
666 tn->pos + tn->bits - 1) {
667 if (tkey_extract_bits(node->key, oldtnode->pos + oldtnode->bits,
669 put_child(t, tn, 2*i, node);
671 put_child(t, tn, 2*i+1, node);
675 /* An internal node with two children */
676 inode = (struct tnode *) node;
678 if (inode->bits == 1) {
679 put_child(t, tn, 2*i, inode->child[0]);
680 put_child(t, tn, 2*i+1, inode->child[1]);
686 /* An internal node with more than two children */
688 /* We will replace this node 'inode' with two new
689 * ones, 'left' and 'right', each with half of the
690 * original children. The two new nodes will have
691 * a position one bit further down the key and this
692 * means that the "significant" part of their keys
693 * (see the discussion near the top of this file)
694 * will differ by one bit, which will be "0" in
695 * left's key and "1" in right's key. Since we are
696 * moving the key position by one step, the bit that
697 * we are moving away from - the bit at position
698 * (inode->pos) - is the one that will differ between
699 * left and right. So... we synthesize that bit in the
701 * The mask 'm' below will be a single "one" bit at
702 * the position (inode->pos)
705 /* Use the old key, but set the new significant
709 left = (struct tnode *) tnode_get_child(tn, 2*i);
710 put_child(t, tn, 2*i, NULL);
714 right = (struct tnode *) tnode_get_child(tn, 2*i+1);
715 put_child(t, tn, 2*i+1, NULL);
719 size = tnode_child_length(left);
720 for (j = 0; j < size; j++) {
721 put_child(t, left, j, inode->child[j]);
722 put_child(t, right, j, inode->child[j + size]);
724 put_child(t, tn, 2*i, resize(t, left));
725 put_child(t, tn, 2*i+1, resize(t, right));
729 tnode_free(oldtnode);
733 int size = tnode_child_length(tn);
736 for (j = 0; j < size; j++)
738 tnode_free((struct tnode *)tn->child[j]);
742 return ERR_PTR(-ENOMEM);
746 static struct tnode *halve(struct trie *t, struct tnode *tn)
748 struct tnode *oldtnode = tn;
749 struct node *left, *right;
751 int olen = tnode_child_length(tn);
753 pr_debug("In halve\n");
755 tn = tnode_new(oldtnode->key, oldtnode->pos, oldtnode->bits - 1);
758 return ERR_PTR(-ENOMEM);
761 * Preallocate and store tnodes before the actual work so we
762 * don't get into an inconsistent state if memory allocation
763 * fails. In case of failure we return the oldnode and halve
764 * of tnode is ignored.
767 for (i = 0; i < olen; i += 2) {
768 left = tnode_get_child(oldtnode, i);
769 right = tnode_get_child(oldtnode, i+1);
771 /* Two nonempty children */
775 newn = tnode_new(left->key, tn->pos + tn->bits, 1);
780 put_child(t, tn, i/2, (struct node *)newn);
785 for (i = 0; i < olen; i += 2) {
786 struct tnode *newBinNode;
788 left = tnode_get_child(oldtnode, i);
789 right = tnode_get_child(oldtnode, i+1);
791 /* At least one of the children is empty */
793 if (right == NULL) /* Both are empty */
795 put_child(t, tn, i/2, right);
800 put_child(t, tn, i/2, left);
804 /* Two nonempty children */
805 newBinNode = (struct tnode *) tnode_get_child(tn, i/2);
806 put_child(t, tn, i/2, NULL);
807 put_child(t, newBinNode, 0, left);
808 put_child(t, newBinNode, 1, right);
809 put_child(t, tn, i/2, resize(t, newBinNode));
811 tnode_free(oldtnode);
815 int size = tnode_child_length(tn);
818 for (j = 0; j < size; j++)
820 tnode_free((struct tnode *)tn->child[j]);
824 return ERR_PTR(-ENOMEM);
828 static void trie_init(struct trie *t)
836 #ifdef CONFIG_IP_FIB_TRIE_STATS
837 memset(&t->stats, 0, sizeof(struct trie_use_stats));
841 static struct leaf_info *find_leaf_info(struct hlist_head *head, int plen)
843 struct hlist_node *node;
844 struct leaf_info *li;
846 hlist_for_each_entry(li, node, head, hlist)
847 if (li->plen == plen)
853 static inline struct list_head * get_fa_head(struct leaf *l, int plen)
855 struct leaf_info *li = find_leaf_info(&l->list, plen);
863 static void insert_leaf_info(struct hlist_head *head, struct leaf_info *new)
865 struct leaf_info *li = NULL, *last = NULL;
866 struct hlist_node *node;
868 write_lock_bh(&fib_lock);
870 if (hlist_empty(head)) {
871 hlist_add_head(&new->hlist, head);
873 hlist_for_each_entry(li, node, head, hlist) {
874 if (new->plen > li->plen)
880 hlist_add_after(&last->hlist, &new->hlist);
882 hlist_add_before(&new->hlist, &li->hlist);
884 write_unlock_bh(&fib_lock);
888 fib_find_node(struct trie *t, u32 key)
897 while (n != NULL && NODE_TYPE(n) == T_TNODE) {
898 tn = (struct tnode *) n;
902 if (tkey_sub_equals(tn->key, pos, tn->pos-pos, key)) {
903 pos = tn->pos + tn->bits;
904 n = tnode_get_child(tn, tkey_extract_bits(key, tn->pos, tn->bits));
908 /* Case we have found a leaf. Compare prefixes */
910 if (n != NULL && IS_LEAF(n) && tkey_equals(key, n->key))
911 return (struct leaf *)n;
916 static struct node *trie_rebalance(struct trie *t, struct tnode *tn)
921 struct tnode *tp = NULL;
926 while (tn != NULL && NODE_PARENT(tn) != NULL) {
927 BUG_ON(i > 12); /* Why is this a bug? -ojn */
930 tp = NODE_PARENT(tn);
931 cindex = tkey_extract_bits(key, tp->pos, tp->bits);
932 wasfull = tnode_full(tp, tnode_get_child(tp, cindex));
933 tn = (struct tnode *) resize (t, (struct tnode *)tn);
934 tnode_put_child_reorg((struct tnode *)tp, cindex,(struct node*)tn, wasfull);
936 if (!NODE_PARENT(tn))
939 tn = NODE_PARENT(tn);
941 /* Handle last (top) tnode */
943 tn = (struct tnode*) resize(t, (struct tnode *)tn);
945 return (struct node*) tn;
948 static struct list_head *
949 fib_insert_node(struct trie *t, int *err, u32 key, int plen)
952 struct tnode *tp = NULL, *tn = NULL;
956 struct list_head *fa_head = NULL;
957 struct leaf_info *li;
963 /* If we point to NULL, stop. Either the tree is empty and we should
964 * just put a new leaf in if, or we have reached an empty child slot,
965 * and we should just put our new leaf in that.
966 * If we point to a T_TNODE, check if it matches our key. Note that
967 * a T_TNODE might be skipping any number of bits - its 'pos' need
968 * not be the parent's 'pos'+'bits'!
970 * If it does match the current key, get pos/bits from it, extract
971 * the index from our key, push the T_TNODE and walk the tree.
973 * If it doesn't, we have to replace it with a new T_TNODE.
975 * If we point to a T_LEAF, it might or might not have the same key
976 * as we do. If it does, just change the value, update the T_LEAF's
977 * value, and return it.
978 * If it doesn't, we need to replace it with a T_TNODE.
981 while (n != NULL && NODE_TYPE(n) == T_TNODE) {
982 tn = (struct tnode *) n;
986 if (tkey_sub_equals(tn->key, pos, tn->pos-pos, key)) {
988 pos = tn->pos + tn->bits;
989 n = tnode_get_child(tn, tkey_extract_bits(key, tn->pos, tn->bits));
991 BUG_ON(n && NODE_PARENT(n) != tn);
997 * n ----> NULL, LEAF or TNODE
999 * tp is n's (parent) ----> NULL or TNODE
1002 BUG_ON(tp && IS_LEAF(tp));
1004 /* Case 1: n is a leaf. Compare prefixes */
1006 if (n != NULL && IS_LEAF(n) && tkey_equals(key, n->key)) {
1007 struct leaf *l = (struct leaf *) n;
1009 li = leaf_info_new(plen);
1016 fa_head = &li->falh;
1017 insert_leaf_info(&l->list, li);
1029 li = leaf_info_new(plen);
1032 tnode_free((struct tnode *) l);
1037 fa_head = &li->falh;
1038 insert_leaf_info(&l->list, li);
1040 if (t->trie && n == NULL) {
1041 /* Case 2: n is NULL, and will just insert a new leaf */
1043 NODE_SET_PARENT(l, tp);
1045 cindex = tkey_extract_bits(key, tp->pos, tp->bits);
1046 put_child(t, (struct tnode *)tp, cindex, (struct node *)l);
1048 /* Case 3: n is a LEAF or a TNODE and the key doesn't match. */
1050 * Add a new tnode here
1051 * first tnode need some special handling
1055 pos = tp->pos+tp->bits;
1060 newpos = tkey_mismatch(key, pos, n->key);
1061 tn = tnode_new(n->key, newpos, 1);
1064 tn = tnode_new(key, newpos, 1); /* First tnode */
1069 tnode_free((struct tnode *) l);
1074 NODE_SET_PARENT(tn, tp);
1076 missbit = tkey_extract_bits(key, newpos, 1);
1077 put_child(t, tn, missbit, (struct node *)l);
1078 put_child(t, tn, 1-missbit, n);
1081 cindex = tkey_extract_bits(key, tp->pos, tp->bits);
1082 put_child(t, (struct tnode *)tp, cindex, (struct node *)tn);
1084 t->trie = (struct node*) tn; /* First tnode */
1089 if (tp && tp->pos + tp->bits > 32)
1090 printk("ERROR tp=%p pos=%d, bits=%d, key=%0x plen=%d\n",
1091 tp, tp->pos, tp->bits, key, plen);
1093 /* Rebalance the trie */
1094 t->trie = trie_rebalance(t, tp);
1102 fn_trie_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
1103 struct nlmsghdr *nlhdr, struct netlink_skb_parms *req)
1105 struct trie *t = (struct trie *) tb->tb_data;
1106 struct fib_alias *fa, *new_fa;
1107 struct list_head *fa_head = NULL;
1108 struct fib_info *fi;
1109 int plen = r->rtm_dst_len;
1110 int type = r->rtm_type;
1111 u8 tos = r->rtm_tos;
1121 memcpy(&key, rta->rta_dst, 4);
1125 pr_debug("Insert table=%d %08x/%d\n", tb->tb_id, key, plen);
1127 mask = ntohl(inet_make_mask(plen));
1134 fi = fib_create_info(r, rta, nlhdr, &err);
1139 l = fib_find_node(t, key);
1143 fa_head = get_fa_head(l, plen);
1144 fa = fib_find_alias(fa_head, tos, fi->fib_priority);
1147 /* Now fa, if non-NULL, points to the first fib alias
1148 * with the same keys [prefix,tos,priority], if such key already
1149 * exists or to the node before which we will insert new one.
1151 * If fa is NULL, we will need to allocate a new one and
1152 * insert to the head of f.
1154 * If f is NULL, no fib node matched the destination key
1155 * and we need to allocate a new one of those as well.
1158 if (fa && fa->fa_info->fib_priority == fi->fib_priority) {
1159 struct fib_alias *fa_orig;
1162 if (nlhdr->nlmsg_flags & NLM_F_EXCL)
1165 if (nlhdr->nlmsg_flags & NLM_F_REPLACE) {
1166 struct fib_info *fi_drop;
1169 write_lock_bh(&fib_lock);
1171 fi_drop = fa->fa_info;
1174 fa->fa_scope = r->rtm_scope;
1175 state = fa->fa_state;
1176 fa->fa_state &= ~FA_S_ACCESSED;
1178 write_unlock_bh(&fib_lock);
1180 fib_release_info(fi_drop);
1181 if (state & FA_S_ACCESSED)
1186 /* Error if we find a perfect match which
1187 * uses the same scope, type, and nexthop
1191 list_for_each_entry(fa, fa_orig->fa_list.prev, fa_list) {
1192 if (fa->fa_tos != tos)
1194 if (fa->fa_info->fib_priority != fi->fib_priority)
1196 if (fa->fa_type == type &&
1197 fa->fa_scope == r->rtm_scope &&
1198 fa->fa_info == fi) {
1202 if (!(nlhdr->nlmsg_flags & NLM_F_APPEND))
1206 if (!(nlhdr->nlmsg_flags & NLM_F_CREATE))
1210 new_fa = kmem_cache_alloc(fn_alias_kmem, SLAB_KERNEL);
1214 new_fa->fa_info = fi;
1215 new_fa->fa_tos = tos;
1216 new_fa->fa_type = type;
1217 new_fa->fa_scope = r->rtm_scope;
1218 new_fa->fa_state = 0;
1220 * Insert new entry to the list.
1224 fa_head = fib_insert_node(t, &err, key, plen);
1227 goto out_free_new_fa;
1230 write_lock_bh(&fib_lock);
1232 list_add_tail(&new_fa->fa_list, (fa ? &fa->fa_list : fa_head));
1234 write_unlock_bh(&fib_lock);
1237 rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, tb->tb_id, nlhdr, req);
1242 kmem_cache_free(fn_alias_kmem, new_fa);
1244 fib_release_info(fi);
1249 static inline int check_leaf(struct trie *t, struct leaf *l,
1250 t_key key, int *plen, const struct flowi *flp,
1251 struct fib_result *res)
1255 struct leaf_info *li;
1256 struct hlist_head *hhead = &l->list;
1257 struct hlist_node *node;
1259 hlist_for_each_entry(li, node, hhead, hlist) {
1261 mask = ntohl(inet_make_mask(i));
1262 if (l->key != (key & mask))
1265 if ((err = fib_semantic_match(&li->falh, flp, res, l->key, mask, i)) <= 0) {
1267 #ifdef CONFIG_IP_FIB_TRIE_STATS
1268 t->stats.semantic_match_passed++;
1272 #ifdef CONFIG_IP_FIB_TRIE_STATS
1273 t->stats.semantic_match_miss++;
1280 fn_trie_lookup(struct fib_table *tb, const struct flowi *flp, struct fib_result *res)
1282 struct trie *t = (struct trie *) tb->tb_data;
1287 t_key key = ntohl(flp->fl4_dst);
1290 int current_prefix_length = KEYLENGTH;
1292 t_key node_prefix, key_prefix, pref_mismatch;
1297 read_lock(&fib_lock);
1302 #ifdef CONFIG_IP_FIB_TRIE_STATS
1308 if ((ret = check_leaf(t, (struct leaf *)n, key, &plen, flp, res)) <= 0)
1312 pn = (struct tnode *) n;
1320 cindex = tkey_extract_bits(MASK_PFX(key, current_prefix_length), pos, bits);
1322 n = tnode_get_child(pn, cindex);
1325 #ifdef CONFIG_IP_FIB_TRIE_STATS
1326 t->stats.null_node_hit++;
1332 if ((ret = check_leaf(t, (struct leaf *)n, key, &plen, flp, res)) <= 0)
1340 cn = (struct tnode *)n;
1343 * It's a tnode, and we can do some extra checks here if we
1344 * like, to avoid descending into a dead-end branch.
1345 * This tnode is in the parent's child array at index
1346 * key[p_pos..p_pos+p_bits] but potentially with some bits
1347 * chopped off, so in reality the index may be just a
1348 * subprefix, padded with zero at the end.
1349 * We can also take a look at any skipped bits in this
1350 * tnode - everything up to p_pos is supposed to be ok,
1351 * and the non-chopped bits of the index (se previous
1352 * paragraph) are also guaranteed ok, but the rest is
1353 * considered unknown.
1355 * The skipped bits are key[pos+bits..cn->pos].
1358 /* If current_prefix_length < pos+bits, we are already doing
1359 * actual prefix matching, which means everything from
1360 * pos+(bits-chopped_off) onward must be zero along some
1361 * branch of this subtree - otherwise there is *no* valid
1362 * prefix present. Here we can only check the skipped
1363 * bits. Remember, since we have already indexed into the
1364 * parent's child array, we know that the bits we chopped of
1368 /* NOTA BENE: CHECKING ONLY SKIPPED BITS FOR THE NEW NODE HERE */
1370 if (current_prefix_length < pos+bits) {
1371 if (tkey_extract_bits(cn->key, current_prefix_length,
1372 cn->pos - current_prefix_length) != 0 ||
1378 * If chopped_off=0, the index is fully validated and we
1379 * only need to look at the skipped bits for this, the new,
1380 * tnode. What we actually want to do is to find out if
1381 * these skipped bits match our key perfectly, or if we will
1382 * have to count on finding a matching prefix further down,
1383 * because if we do, we would like to have some way of
1384 * verifying the existence of such a prefix at this point.
1387 /* The only thing we can do at this point is to verify that
1388 * any such matching prefix can indeed be a prefix to our
1389 * key, and if the bits in the node we are inspecting that
1390 * do not match our key are not ZERO, this cannot be true.
1391 * Thus, find out where there is a mismatch (before cn->pos)
1392 * and verify that all the mismatching bits are zero in the
1396 /* Note: We aren't very concerned about the piece of the key
1397 * that precede pn->pos+pn->bits, since these have already been
1398 * checked. The bits after cn->pos aren't checked since these are
1399 * by definition "unknown" at this point. Thus, what we want to
1400 * see is if we are about to enter the "prefix matching" state,
1401 * and in that case verify that the skipped bits that will prevail
1402 * throughout this subtree are zero, as they have to be if we are
1403 * to find a matching prefix.
1406 node_prefix = MASK_PFX(cn->key, cn->pos);
1407 key_prefix = MASK_PFX(key, cn->pos);
1408 pref_mismatch = key_prefix^node_prefix;
1411 /* In short: If skipped bits in this node do not match the search
1412 * key, enter the "prefix matching" state.directly.
1414 if (pref_mismatch) {
1415 while (!(pref_mismatch & (1<<(KEYLENGTH-1)))) {
1417 pref_mismatch = pref_mismatch <<1;
1419 key_prefix = tkey_extract_bits(cn->key, mp, cn->pos-mp);
1421 if (key_prefix != 0)
1424 if (current_prefix_length >= cn->pos)
1425 current_prefix_length = mp;
1428 pn = (struct tnode *)n; /* Descend */
1435 /* As zero don't change the child key (cindex) */
1436 while ((chopped_off <= pn->bits) && !(cindex & (1<<(chopped_off-1))))
1439 /* Decrease current_... with bits chopped off */
1440 if (current_prefix_length > pn->pos + pn->bits - chopped_off)
1441 current_prefix_length = pn->pos + pn->bits - chopped_off;
1444 * Either we do the actual chop off according or if we have
1445 * chopped off all bits in this tnode walk up to our parent.
1448 if (chopped_off <= pn->bits) {
1449 cindex &= ~(1 << (chopped_off-1));
1451 if (NODE_PARENT(pn) == NULL)
1454 /* Get Child's index */
1455 cindex = tkey_extract_bits(pn->key, NODE_PARENT(pn)->pos, NODE_PARENT(pn)->bits);
1456 pn = NODE_PARENT(pn);
1459 #ifdef CONFIG_IP_FIB_TRIE_STATS
1460 t->stats.backtrack++;
1468 read_unlock(&fib_lock);
1472 static int trie_leaf_remove(struct trie *t, t_key key)
1475 struct tnode *tp = NULL;
1476 struct node *n = t->trie;
1479 pr_debug("entering trie_leaf_remove(%p)\n", n);
1481 /* Note that in the case skipped bits, those bits are *not* checked!
1482 * When we finish this, we will have NULL or a T_LEAF, and the
1483 * T_LEAF may or may not match our key.
1486 while (n != NULL && IS_TNODE(n)) {
1487 struct tnode *tn = (struct tnode *) n;
1489 n = tnode_get_child(tn ,tkey_extract_bits(key, tn->pos, tn->bits));
1491 BUG_ON(n && NODE_PARENT(n) != tn);
1493 l = (struct leaf *) n;
1495 if (!n || !tkey_equals(l->key, key))
1500 * Remove the leaf and rebalance the tree
1506 tp = NODE_PARENT(n);
1507 tnode_free((struct tnode *) n);
1510 cindex = tkey_extract_bits(key, tp->pos, tp->bits);
1511 put_child(t, (struct tnode *)tp, cindex, NULL);
1512 t->trie = trie_rebalance(t, tp);
1520 fn_trie_delete(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
1521 struct nlmsghdr *nlhdr, struct netlink_skb_parms *req)
1523 struct trie *t = (struct trie *) tb->tb_data;
1525 int plen = r->rtm_dst_len;
1526 u8 tos = r->rtm_tos;
1527 struct fib_alias *fa, *fa_to_delete;
1528 struct list_head *fa_head;
1531 struct leaf_info *li;
1539 memcpy(&key, rta->rta_dst, 4);
1542 mask = ntohl(inet_make_mask(plen));
1548 l = fib_find_node(t, key);
1553 fa_head = get_fa_head(l, plen);
1554 fa = fib_find_alias(fa_head, tos, 0);
1559 pr_debug("Deleting %08x/%d tos=%d t=%p\n", key, plen, tos, t);
1561 fa_to_delete = NULL;
1562 fa_head = fa->fa_list.prev;
1563 list_for_each_entry(fa, fa_head, fa_list) {
1564 struct fib_info *fi = fa->fa_info;
1566 if (fa->fa_tos != tos)
1569 if ((!r->rtm_type ||
1570 fa->fa_type == r->rtm_type) &&
1571 (r->rtm_scope == RT_SCOPE_NOWHERE ||
1572 fa->fa_scope == r->rtm_scope) &&
1573 (!r->rtm_protocol ||
1574 fi->fib_protocol == r->rtm_protocol) &&
1575 fib_nh_match(r, nlhdr, rta, fi) == 0) {
1585 rtmsg_fib(RTM_DELROUTE, htonl(key), fa, plen, tb->tb_id, nlhdr, req);
1587 l = fib_find_node(t, key);
1588 li = find_leaf_info(&l->list, plen);
1590 write_lock_bh(&fib_lock);
1592 list_del(&fa->fa_list);
1594 if (list_empty(fa_head)) {
1595 hlist_del(&li->hlist);
1598 write_unlock_bh(&fib_lock);
1603 if (hlist_empty(&l->list))
1604 trie_leaf_remove(t, key);
1606 if (fa->fa_state & FA_S_ACCESSED)
1613 static int trie_flush_list(struct trie *t, struct list_head *head)
1615 struct fib_alias *fa, *fa_node;
1618 list_for_each_entry_safe(fa, fa_node, head, fa_list) {
1619 struct fib_info *fi = fa->fa_info;
1621 if (fi && (fi->fib_flags&RTNH_F_DEAD)) {
1622 write_lock_bh(&fib_lock);
1623 list_del(&fa->fa_list);
1624 write_unlock_bh(&fib_lock);
1633 static int trie_flush_leaf(struct trie *t, struct leaf *l)
1636 struct hlist_head *lih = &l->list;
1637 struct hlist_node *node, *tmp;
1638 struct leaf_info *li = NULL;
1640 hlist_for_each_entry_safe(li, node, tmp, lih, hlist) {
1641 found += trie_flush_list(t, &li->falh);
1643 if (list_empty(&li->falh)) {
1644 write_lock_bh(&fib_lock);
1645 hlist_del(&li->hlist);
1646 write_unlock_bh(&fib_lock);
1654 static struct leaf *nextleaf(struct trie *t, struct leaf *thisleaf)
1656 struct node *c = (struct node *) thisleaf;
1661 if (t->trie == NULL)
1664 if (IS_LEAF(t->trie)) /* trie w. just a leaf */
1665 return (struct leaf *) t->trie;
1667 p = (struct tnode*) t->trie; /* Start */
1669 p = (struct tnode *) NODE_PARENT(c);
1674 /* Find the next child of the parent */
1676 pos = 1 + tkey_extract_bits(c->key, p->pos, p->bits);
1680 last = 1 << p->bits;
1681 for (idx = pos; idx < last ; idx++) {
1685 /* Decend if tnode */
1686 while (IS_TNODE(p->child[idx])) {
1687 p = (struct tnode*) p->child[idx];
1690 /* Rightmost non-NULL branch */
1691 if (p && IS_TNODE(p))
1692 while (p->child[idx] == NULL && idx < (1 << p->bits)) idx++;
1694 /* Done with this tnode? */
1695 if (idx >= (1 << p->bits) || p->child[idx] == NULL)
1698 return (struct leaf*) p->child[idx];
1701 /* No more children go up one step */
1702 c = (struct node *) p;
1703 p = (struct tnode *) NODE_PARENT(p);
1705 return NULL; /* Ready. Root of trie */
1708 static int fn_trie_flush(struct fib_table *tb)
1710 struct trie *t = (struct trie *) tb->tb_data;
1711 struct leaf *ll = NULL, *l = NULL;
1716 for (h = 0; (l = nextleaf(t, l)) != NULL; h++) {
1717 found += trie_flush_leaf(t, l);
1719 if (ll && hlist_empty(&ll->list))
1720 trie_leaf_remove(t, ll->key);
1724 if (ll && hlist_empty(&ll->list))
1725 trie_leaf_remove(t, ll->key);
1727 pr_debug("trie_flush found=%d\n", found);
1731 static int trie_last_dflt = -1;
1734 fn_trie_select_default(struct fib_table *tb, const struct flowi *flp, struct fib_result *res)
1736 struct trie *t = (struct trie *) tb->tb_data;
1737 int order, last_idx;
1738 struct fib_info *fi = NULL;
1739 struct fib_info *last_resort;
1740 struct fib_alias *fa = NULL;
1741 struct list_head *fa_head;
1748 read_lock(&fib_lock);
1750 l = fib_find_node(t, 0);
1754 fa_head = get_fa_head(l, 0);
1758 if (list_empty(fa_head))
1761 list_for_each_entry(fa, fa_head, fa_list) {
1762 struct fib_info *next_fi = fa->fa_info;
1764 if (fa->fa_scope != res->scope ||
1765 fa->fa_type != RTN_UNICAST)
1768 if (next_fi->fib_priority > res->fi->fib_priority)
1770 if (!next_fi->fib_nh[0].nh_gw ||
1771 next_fi->fib_nh[0].nh_scope != RT_SCOPE_LINK)
1773 fa->fa_state |= FA_S_ACCESSED;
1776 if (next_fi != res->fi)
1778 } else if (!fib_detect_death(fi, order, &last_resort,
1779 &last_idx, &trie_last_dflt)) {
1781 fib_info_put(res->fi);
1783 atomic_inc(&fi->fib_clntref);
1784 trie_last_dflt = order;
1790 if (order <= 0 || fi == NULL) {
1791 trie_last_dflt = -1;
1795 if (!fib_detect_death(fi, order, &last_resort, &last_idx, &trie_last_dflt)) {
1797 fib_info_put(res->fi);
1799 atomic_inc(&fi->fib_clntref);
1800 trie_last_dflt = order;
1803 if (last_idx >= 0) {
1805 fib_info_put(res->fi);
1806 res->fi = last_resort;
1808 atomic_inc(&last_resort->fib_clntref);
1810 trie_last_dflt = last_idx;
1812 read_unlock(&fib_lock);
1815 static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah, struct fib_table *tb,
1816 struct sk_buff *skb, struct netlink_callback *cb)
1819 struct fib_alias *fa;
1821 u32 xkey = htonl(key);
1826 list_for_each_entry(fa, fah, fa_list) {
1831 if (fa->fa_info->fib_nh == NULL) {
1832 printk("Trie error _fib_nh=NULL in fa[%d] k=%08x plen=%d\n", i, key, plen);
1836 if (fa->fa_info == NULL) {
1837 printk("Trie error fa_info=NULL in fa[%d] k=%08x plen=%d\n", i, key, plen);
1842 if (fib_dump_info(skb, NETLINK_CB(cb->skb).pid,
1851 fa->fa_info, 0) < 0) {
1861 static int fn_trie_dump_plen(struct trie *t, int plen, struct fib_table *tb, struct sk_buff *skb,
1862 struct netlink_callback *cb)
1865 struct list_head *fa_head;
1866 struct leaf *l = NULL;
1870 for (h = 0; (l = nextleaf(t, l)) != NULL; h++) {
1874 memset(&cb->args[3], 0,
1875 sizeof(cb->args) - 3*sizeof(cb->args[0]));
1877 fa_head = get_fa_head(l, plen);
1882 if (list_empty(fa_head))
1885 if (fn_trie_dump_fa(l->key, plen, fa_head, tb, skb, cb)<0) {
1894 static int fn_trie_dump(struct fib_table *tb, struct sk_buff *skb, struct netlink_callback *cb)
1897 struct trie *t = (struct trie *) tb->tb_data;
1901 read_lock(&fib_lock);
1902 for (m = 0; m <= 32; m++) {
1906 memset(&cb->args[2], 0,
1907 sizeof(cb->args) - 2*sizeof(cb->args[0]));
1909 if (fn_trie_dump_plen(t, 32-m, tb, skb, cb)<0) {
1914 read_unlock(&fib_lock);
1918 read_unlock(&fib_lock);
1922 /* Fix more generic FIB names for init later */
1924 #ifdef CONFIG_IP_MULTIPLE_TABLES
1925 struct fib_table * fib_hash_init(int id)
1927 struct fib_table * __init fib_hash_init(int id)
1930 struct fib_table *tb;
1933 if (fn_alias_kmem == NULL)
1934 fn_alias_kmem = kmem_cache_create("ip_fib_alias",
1935 sizeof(struct fib_alias),
1936 0, SLAB_HWCACHE_ALIGN,
1939 tb = kmalloc(sizeof(struct fib_table) + sizeof(struct trie),
1945 tb->tb_lookup = fn_trie_lookup;
1946 tb->tb_insert = fn_trie_insert;
1947 tb->tb_delete = fn_trie_delete;
1948 tb->tb_flush = fn_trie_flush;
1949 tb->tb_select_default = fn_trie_select_default;
1950 tb->tb_dump = fn_trie_dump;
1951 memset(tb->tb_data, 0, sizeof(struct trie));
1953 t = (struct trie *) tb->tb_data;
1957 if (id == RT_TABLE_LOCAL)
1959 else if (id == RT_TABLE_MAIN)
1962 if (id == RT_TABLE_LOCAL)
1963 printk("IPv4 FIB: Using LC-trie version %s\n", VERSION);
1968 /* Trie dump functions */
1970 static void putspace_seq(struct seq_file *seq, int n)
1973 seq_printf(seq, " ");
1976 static void printbin_seq(struct seq_file *seq, unsigned int v, int bits)
1979 seq_printf(seq, "%s", (v & (1<<bits))?"1":"0");
1982 static void printnode_seq(struct seq_file *seq, int indent, struct node *n,
1983 int pend, int cindex, int bits)
1985 putspace_seq(seq, indent);
1987 seq_printf(seq, "|");
1989 seq_printf(seq, "+");
1991 seq_printf(seq, "%d/", cindex);
1992 printbin_seq(seq, cindex, bits);
1993 seq_printf(seq, ": ");
1995 seq_printf(seq, "<root>: ");
1996 seq_printf(seq, "%s:%p ", IS_LEAF(n)?"Leaf":"Internal node", n);
1999 struct leaf *l = (struct leaf *)n;
2000 struct fib_alias *fa;
2003 seq_printf(seq, "key=%d.%d.%d.%d\n",
2004 n->key >> 24, (n->key >> 16) % 256, (n->key >> 8) % 256, n->key % 256);
2006 for (i = 32; i >= 0; i--)
2007 if (find_leaf_info(&l->list, i)) {
2008 struct list_head *fa_head = get_fa_head(l, i);
2013 if (list_empty(fa_head))
2016 putspace_seq(seq, indent+2);
2017 seq_printf(seq, "{/%d...dumping}\n", i);
2019 list_for_each_entry(fa, fa_head, fa_list) {
2020 putspace_seq(seq, indent+2);
2021 if (fa->fa_info == NULL) {
2022 seq_printf(seq, "Error fa_info=NULL\n");
2025 if (fa->fa_info->fib_nh == NULL) {
2026 seq_printf(seq, "Error _fib_nh=NULL\n");
2030 seq_printf(seq, "{type=%d scope=%d TOS=%d}\n",
2037 struct tnode *tn = (struct tnode *)n;
2038 int plen = ((struct tnode *)n)->pos;
2039 t_key prf = MASK_PFX(n->key, plen);
2041 seq_printf(seq, "key=%d.%d.%d.%d/%d\n",
2042 prf >> 24, (prf >> 16) % 256, (prf >> 8) % 256, prf % 256, plen);
2044 putspace_seq(seq, indent); seq_printf(seq, "| ");
2045 seq_printf(seq, "{key prefix=%08x/", tn->key & TKEY_GET_MASK(0, tn->pos));
2046 printbin_seq(seq, tkey_extract_bits(tn->key, 0, tn->pos), tn->pos);
2047 seq_printf(seq, "}\n");
2048 putspace_seq(seq, indent); seq_printf(seq, "| ");
2049 seq_printf(seq, "{pos=%d", tn->pos);
2050 seq_printf(seq, " (skip=%d bits)", tn->pos - pend);
2051 seq_printf(seq, " bits=%d (%u children)}\n", tn->bits, (1 << tn->bits));
2052 putspace_seq(seq, indent); seq_printf(seq, "| ");
2053 seq_printf(seq, "{empty=%d full=%d}\n", tn->empty_children, tn->full_children);
2057 static void trie_dump_seq(struct seq_file *seq, struct trie *t)
2059 struct node *n = t->trie;
2066 read_lock(&fib_lock);
2068 seq_printf(seq, "------ trie_dump of t=%p ------\n", t);
2071 seq_printf(seq, "------ trie is empty\n");
2073 read_unlock(&fib_lock);
2077 printnode_seq(seq, indent, n, pend, cindex, 0);
2080 read_unlock(&fib_lock);
2084 tn = (struct tnode *)n;
2085 pend = tn->pos+tn->bits;
2086 putspace_seq(seq, indent); seq_printf(seq, "\\--\n");
2090 while (tn && cindex < (1 << tn->bits)) {
2091 if (tn->child[cindex]) {
2094 printnode_seq(seq, indent, tn->child[cindex], pend, cindex, tn->bits);
2095 if (IS_LEAF(tn->child[cindex])) {
2099 * New tnode. Decend one level
2103 tn = (struct tnode *)tn->child[cindex];
2104 pend = tn->pos + tn->bits;
2105 putspace_seq(seq, indent); seq_printf(seq, "\\--\n");
2113 * Test if we are done
2116 while (cindex >= (1 << tn->bits)) {
2118 * Move upwards and test for root
2119 * pop off all traversed nodes
2122 if (NODE_PARENT(tn) == NULL) {
2127 cindex = tkey_extract_bits(tn->key, NODE_PARENT(tn)->pos, NODE_PARENT(tn)->bits);
2129 tn = NODE_PARENT(tn);
2130 pend = tn->pos + tn->bits;
2136 read_unlock(&fib_lock);
2139 static struct trie_stat *trie_stat_new(void)
2141 struct trie_stat *s;
2144 s = kmalloc(sizeof(struct trie_stat), GFP_KERNEL);
2152 s->nullpointers = 0;
2154 for (i = 0; i < MAX_CHILDS; i++)
2155 s->nodesizes[i] = 0;
2160 static struct trie_stat *trie_collect_stats(struct trie *t)
2162 struct node *n = t->trie;
2163 struct trie_stat *s = trie_stat_new();
2173 read_lock(&fib_lock);
2176 struct tnode *tn = (struct tnode *)n;
2177 pend = tn->pos+tn->bits;
2178 s->nodesizes[tn->bits]++;
2181 while (tn && cindex < (1 << tn->bits)) {
2182 if (tn->child[cindex]) {
2185 if (IS_LEAF(tn->child[cindex])) {
2189 if (depth > s->maxdepth)
2190 s->maxdepth = depth;
2191 s->totdepth += depth;
2195 * New tnode. Decend one level
2199 s->nodesizes[tn->bits]++;
2202 n = tn->child[cindex];
2203 tn = (struct tnode *)n;
2204 pend = tn->pos+tn->bits;
2214 * Test if we are done
2217 while (cindex >= (1 << tn->bits)) {
2219 * Move upwards and test for root
2220 * pop off all traversed nodes
2223 if (NODE_PARENT(tn) == NULL) {
2229 cindex = tkey_extract_bits(tn->key, NODE_PARENT(tn)->pos, NODE_PARENT(tn)->bits);
2230 tn = NODE_PARENT(tn);
2232 n = (struct node *)tn;
2233 pend = tn->pos+tn->bits;
2239 read_unlock(&fib_lock);
2243 #ifdef CONFIG_PROC_FS
2245 static struct fib_alias *fib_triestat_get_first(struct seq_file *seq)
2250 static struct fib_alias *fib_triestat_get_next(struct seq_file *seq)
2255 static void *fib_triestat_seq_start(struct seq_file *seq, loff_t *pos)
2257 if (!ip_fib_main_table)
2261 return fib_triestat_get_next(seq);
2263 return SEQ_START_TOKEN;
2266 static void *fib_triestat_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2269 if (v == SEQ_START_TOKEN)
2270 return fib_triestat_get_first(seq);
2272 return fib_triestat_get_next(seq);
2275 static void fib_triestat_seq_stop(struct seq_file *seq, void *v)
2281 * This outputs /proc/net/fib_triestats
2283 * It always works in backward compatibility mode.
2284 * The format of the file is not supposed to be changed.
2287 static void collect_and_show(struct trie *t, struct seq_file *seq)
2289 int bytes = 0; /* How many bytes are used, a ref is 4 bytes */
2290 int i, max, pointers;
2291 struct trie_stat *stat;
2294 stat = trie_collect_stats(t);
2297 seq_printf(seq, "trie=%p\n", t);
2301 avdepth = stat->totdepth*100 / stat->leaves;
2304 seq_printf(seq, "Aver depth: %d.%02d\n", avdepth / 100, avdepth % 100);
2305 seq_printf(seq, "Max depth: %4d\n", stat->maxdepth);
2307 seq_printf(seq, "Leaves: %d\n", stat->leaves);
2308 bytes += sizeof(struct leaf) * stat->leaves;
2309 seq_printf(seq, "Internal nodes: %d\n", stat->tnodes);
2310 bytes += sizeof(struct tnode) * stat->tnodes;
2314 while (max >= 0 && stat->nodesizes[max] == 0)
2318 for (i = 1; i <= max; i++)
2319 if (stat->nodesizes[i] != 0) {
2320 seq_printf(seq, " %d: %d", i, stat->nodesizes[i]);
2321 pointers += (1<<i) * stat->nodesizes[i];
2323 seq_printf(seq, "\n");
2324 seq_printf(seq, "Pointers: %d\n", pointers);
2325 bytes += sizeof(struct node *) * pointers;
2326 seq_printf(seq, "Null ptrs: %d\n", stat->nullpointers);
2327 seq_printf(seq, "Total size: %d kB\n", bytes / 1024);
2332 #ifdef CONFIG_IP_FIB_TRIE_STATS
2333 seq_printf(seq, "Counters:\n---------\n");
2334 seq_printf(seq,"gets = %d\n", t->stats.gets);
2335 seq_printf(seq,"backtracks = %d\n", t->stats.backtrack);
2336 seq_printf(seq,"semantic match passed = %d\n", t->stats.semantic_match_passed);
2337 seq_printf(seq,"semantic match miss = %d\n", t->stats.semantic_match_miss);
2338 seq_printf(seq,"null node hit= %d\n", t->stats.null_node_hit);
2339 seq_printf(seq,"skipped node resize = %d\n", t->stats.resize_node_skipped);
2341 memset(&(t->stats), 0, sizeof(t->stats));
2343 #endif /* CONFIG_IP_FIB_TRIE_STATS */
2346 static int fib_triestat_seq_show(struct seq_file *seq, void *v)
2350 if (v == SEQ_START_TOKEN) {
2351 seq_printf(seq, "Basic info: size of leaf: %Zd bytes, size of tnode: %Zd bytes.\n",
2352 sizeof(struct leaf), sizeof(struct tnode));
2354 collect_and_show(trie_local, seq);
2357 collect_and_show(trie_main, seq);
2359 snprintf(bf, sizeof(bf), "*\t%08X\t%08X", 200, 400);
2361 seq_printf(seq, "%-127s\n", bf);
2366 static struct seq_operations fib_triestat_seq_ops = {
2367 .start = fib_triestat_seq_start,
2368 .next = fib_triestat_seq_next,
2369 .stop = fib_triestat_seq_stop,
2370 .show = fib_triestat_seq_show,
2373 static int fib_triestat_seq_open(struct inode *inode, struct file *file)
2375 struct seq_file *seq;
2378 rc = seq_open(file, &fib_triestat_seq_ops);
2382 seq = file->private_data;
2389 static struct file_operations fib_triestat_seq_fops = {
2390 .owner = THIS_MODULE,
2391 .open = fib_triestat_seq_open,
2393 .llseek = seq_lseek,
2394 .release = seq_release_private,
2397 int __init fib_stat_proc_init(void)
2399 if (!proc_net_fops_create("fib_triestat", S_IRUGO, &fib_triestat_seq_fops))
2404 void __init fib_stat_proc_exit(void)
2406 proc_net_remove("fib_triestat");
2409 static struct fib_alias *fib_trie_get_first(struct seq_file *seq)
2414 static struct fib_alias *fib_trie_get_next(struct seq_file *seq)
2419 static void *fib_trie_seq_start(struct seq_file *seq, loff_t *pos)
2421 if (!ip_fib_main_table)
2425 return fib_trie_get_next(seq);
2427 return SEQ_START_TOKEN;
2430 static void *fib_trie_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2433 if (v == SEQ_START_TOKEN)
2434 return fib_trie_get_first(seq);
2436 return fib_trie_get_next(seq);
2440 static void fib_trie_seq_stop(struct seq_file *seq, void *v)
2445 * This outputs /proc/net/fib_trie.
2447 * It always works in backward compatibility mode.
2448 * The format of the file is not supposed to be changed.
2451 static int fib_trie_seq_show(struct seq_file *seq, void *v)
2455 if (v == SEQ_START_TOKEN) {
2457 trie_dump_seq(seq, trie_local);
2460 trie_dump_seq(seq, trie_main);
2462 snprintf(bf, sizeof(bf),
2463 "*\t%08X\t%08X", 200, 400);
2464 seq_printf(seq, "%-127s\n", bf);
2470 static struct seq_operations fib_trie_seq_ops = {
2471 .start = fib_trie_seq_start,
2472 .next = fib_trie_seq_next,
2473 .stop = fib_trie_seq_stop,
2474 .show = fib_trie_seq_show,
2477 static int fib_trie_seq_open(struct inode *inode, struct file *file)
2479 struct seq_file *seq;
2482 rc = seq_open(file, &fib_trie_seq_ops);
2486 seq = file->private_data;
2493 static struct file_operations fib_trie_seq_fops = {
2494 .owner = THIS_MODULE,
2495 .open = fib_trie_seq_open,
2497 .llseek = seq_lseek,
2498 .release= seq_release_private,
2501 int __init fib_proc_init(void)
2503 if (!proc_net_fops_create("fib_trie", S_IRUGO, &fib_trie_seq_fops))
2508 void __init fib_proc_exit(void)
2510 proc_net_remove("fib_trie");
2513 #endif /* CONFIG_PROC_FS */