]> www.pilppa.org Git - linux-2.6-omap-h63xx.git/blob - net/netfilter/nf_conntrack_expect.c
e0cd9d00aa61e872f2f769c142e065a529fcdf23
[linux-2.6-omap-h63xx.git] / net / netfilter / nf_conntrack_expect.c
1 /* Expectation handling for nf_conntrack. */
2
3 /* (C) 1999-2001 Paul `Rusty' Russell
4  * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
5  * (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org>
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License version 2 as
9  * published by the Free Software Foundation.
10  */
11
12 #include <linux/types.h>
13 #include <linux/netfilter.h>
14 #include <linux/skbuff.h>
15 #include <linux/proc_fs.h>
16 #include <linux/seq_file.h>
17 #include <linux/stddef.h>
18 #include <linux/slab.h>
19 #include <linux/err.h>
20 #include <linux/percpu.h>
21 #include <linux/kernel.h>
22 #include <linux/jhash.h>
23 #include <net/net_namespace.h>
24
25 #include <net/netfilter/nf_conntrack.h>
26 #include <net/netfilter/nf_conntrack_core.h>
27 #include <net/netfilter/nf_conntrack_expect.h>
28 #include <net/netfilter/nf_conntrack_helper.h>
29 #include <net/netfilter/nf_conntrack_tuple.h>
30
31 struct hlist_head *nf_ct_expect_hash __read_mostly;
32 EXPORT_SYMBOL_GPL(nf_ct_expect_hash);
33
34 unsigned int nf_ct_expect_hsize __read_mostly;
35 EXPORT_SYMBOL_GPL(nf_ct_expect_hsize);
36
37 static unsigned int nf_ct_expect_hash_rnd __read_mostly;
38 static unsigned int nf_ct_expect_count;
39 unsigned int nf_ct_expect_max __read_mostly;
40 static int nf_ct_expect_hash_rnd_initted __read_mostly;
41 static int nf_ct_expect_vmalloc;
42
43 static struct kmem_cache *nf_ct_expect_cachep __read_mostly;
44
45 /* nf_conntrack_expect helper functions */
46 void nf_ct_unlink_expect(struct nf_conntrack_expect *exp)
47 {
48         struct nf_conn_help *master_help = nfct_help(exp->master);
49
50         NF_CT_ASSERT(master_help);
51         NF_CT_ASSERT(!timer_pending(&exp->timeout));
52
53         hlist_del(&exp->hnode);
54         nf_ct_expect_count--;
55
56         hlist_del(&exp->lnode);
57         master_help->expecting--;
58         nf_ct_expect_put(exp);
59
60         NF_CT_STAT_INC(expect_delete);
61 }
62 EXPORT_SYMBOL_GPL(nf_ct_unlink_expect);
63
64 static void nf_ct_expectation_timed_out(unsigned long ul_expect)
65 {
66         struct nf_conntrack_expect *exp = (void *)ul_expect;
67
68         write_lock_bh(&nf_conntrack_lock);
69         nf_ct_unlink_expect(exp);
70         write_unlock_bh(&nf_conntrack_lock);
71         nf_ct_expect_put(exp);
72 }
73
74 static unsigned int nf_ct_expect_dst_hash(const struct nf_conntrack_tuple *tuple)
75 {
76         unsigned int hash;
77
78         if (unlikely(!nf_ct_expect_hash_rnd_initted)) {
79                 get_random_bytes(&nf_ct_expect_hash_rnd, 4);
80                 nf_ct_expect_hash_rnd_initted = 1;
81         }
82
83         hash = jhash2(tuple->dst.u3.all, ARRAY_SIZE(tuple->dst.u3.all),
84                       (((tuple->dst.protonum ^ tuple->src.l3num) << 16) |
85                        (__force __u16)tuple->dst.u.all) ^ nf_ct_expect_hash_rnd);
86         return ((u64)hash * nf_ct_expect_hsize) >> 32;
87 }
88
89 struct nf_conntrack_expect *
90 __nf_ct_expect_find(const struct nf_conntrack_tuple *tuple)
91 {
92         struct nf_conntrack_expect *i;
93         struct hlist_node *n;
94         unsigned int h;
95
96         if (!nf_ct_expect_count)
97                 return NULL;
98
99         h = nf_ct_expect_dst_hash(tuple);
100         hlist_for_each_entry(i, n, &nf_ct_expect_hash[h], hnode) {
101                 if (nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask))
102                         return i;
103         }
104         return NULL;
105 }
106 EXPORT_SYMBOL_GPL(__nf_ct_expect_find);
107
108 /* Just find a expectation corresponding to a tuple. */
109 struct nf_conntrack_expect *
110 nf_ct_expect_find_get(const struct nf_conntrack_tuple *tuple)
111 {
112         struct nf_conntrack_expect *i;
113
114         read_lock_bh(&nf_conntrack_lock);
115         i = __nf_ct_expect_find(tuple);
116         if (i)
117                 atomic_inc(&i->use);
118         read_unlock_bh(&nf_conntrack_lock);
119
120         return i;
121 }
122 EXPORT_SYMBOL_GPL(nf_ct_expect_find_get);
123
124 /* If an expectation for this connection is found, it gets delete from
125  * global list then returned. */
126 struct nf_conntrack_expect *
127 nf_ct_find_expectation(const struct nf_conntrack_tuple *tuple)
128 {
129         struct nf_conntrack_expect *exp;
130
131         exp = __nf_ct_expect_find(tuple);
132         if (!exp)
133                 return NULL;
134
135         /* If master is not in hash table yet (ie. packet hasn't left
136            this machine yet), how can other end know about expected?
137            Hence these are not the droids you are looking for (if
138            master ct never got confirmed, we'd hold a reference to it
139            and weird things would happen to future packets). */
140         if (!nf_ct_is_confirmed(exp->master))
141                 return NULL;
142
143         if (exp->flags & NF_CT_EXPECT_PERMANENT) {
144                 atomic_inc(&exp->use);
145                 return exp;
146         } else if (del_timer(&exp->timeout)) {
147                 nf_ct_unlink_expect(exp);
148                 return exp;
149         }
150
151         return NULL;
152 }
153
154 /* delete all expectations for this conntrack */
155 void nf_ct_remove_expectations(struct nf_conn *ct)
156 {
157         struct nf_conn_help *help = nfct_help(ct);
158         struct nf_conntrack_expect *exp;
159         struct hlist_node *n, *next;
160
161         /* Optimization: most connection never expect any others. */
162         if (!help || help->expecting == 0)
163                 return;
164
165         hlist_for_each_entry_safe(exp, n, next, &help->expectations, lnode) {
166                 if (del_timer(&exp->timeout)) {
167                         nf_ct_unlink_expect(exp);
168                         nf_ct_expect_put(exp);
169                 }
170         }
171 }
172 EXPORT_SYMBOL_GPL(nf_ct_remove_expectations);
173
174 /* Would two expected things clash? */
175 static inline int expect_clash(const struct nf_conntrack_expect *a,
176                                const struct nf_conntrack_expect *b)
177 {
178         /* Part covered by intersection of masks must be unequal,
179            otherwise they clash */
180         struct nf_conntrack_tuple_mask intersect_mask;
181         int count;
182
183         intersect_mask.src.u.all = a->mask.src.u.all & b->mask.src.u.all;
184
185         for (count = 0; count < NF_CT_TUPLE_L3SIZE; count++){
186                 intersect_mask.src.u3.all[count] =
187                         a->mask.src.u3.all[count] & b->mask.src.u3.all[count];
188         }
189
190         return nf_ct_tuple_mask_cmp(&a->tuple, &b->tuple, &intersect_mask);
191 }
192
193 static inline int expect_matches(const struct nf_conntrack_expect *a,
194                                  const struct nf_conntrack_expect *b)
195 {
196         return a->master == b->master
197                 && nf_ct_tuple_equal(&a->tuple, &b->tuple)
198                 && nf_ct_tuple_mask_equal(&a->mask, &b->mask);
199 }
200
201 /* Generally a bad idea to call this: could have matched already. */
202 void nf_ct_unexpect_related(struct nf_conntrack_expect *exp)
203 {
204         write_lock_bh(&nf_conntrack_lock);
205         if (del_timer(&exp->timeout)) {
206                 nf_ct_unlink_expect(exp);
207                 nf_ct_expect_put(exp);
208         }
209         write_unlock_bh(&nf_conntrack_lock);
210 }
211 EXPORT_SYMBOL_GPL(nf_ct_unexpect_related);
212
213 /* We don't increase the master conntrack refcount for non-fulfilled
214  * conntracks. During the conntrack destruction, the expectations are
215  * always killed before the conntrack itself */
216 struct nf_conntrack_expect *nf_ct_expect_alloc(struct nf_conn *me)
217 {
218         struct nf_conntrack_expect *new;
219
220         new = kmem_cache_alloc(nf_ct_expect_cachep, GFP_ATOMIC);
221         if (!new)
222                 return NULL;
223
224         new->master = me;
225         atomic_set(&new->use, 1);
226         return new;
227 }
228 EXPORT_SYMBOL_GPL(nf_ct_expect_alloc);
229
230 void nf_ct_expect_init(struct nf_conntrack_expect *exp, int family,
231                        union nf_inet_addr *saddr,
232                        union nf_inet_addr *daddr,
233                        u_int8_t proto, __be16 *src, __be16 *dst)
234 {
235         int len;
236
237         if (family == AF_INET)
238                 len = 4;
239         else
240                 len = 16;
241
242         exp->flags = 0;
243         exp->expectfn = NULL;
244         exp->helper = NULL;
245         exp->tuple.src.l3num = family;
246         exp->tuple.dst.protonum = proto;
247
248         if (saddr) {
249                 memcpy(&exp->tuple.src.u3, saddr, len);
250                 if (sizeof(exp->tuple.src.u3) > len)
251                         /* address needs to be cleared for nf_ct_tuple_equal */
252                         memset((void *)&exp->tuple.src.u3 + len, 0x00,
253                                sizeof(exp->tuple.src.u3) - len);
254                 memset(&exp->mask.src.u3, 0xFF, len);
255                 if (sizeof(exp->mask.src.u3) > len)
256                         memset((void *)&exp->mask.src.u3 + len, 0x00,
257                                sizeof(exp->mask.src.u3) - len);
258         } else {
259                 memset(&exp->tuple.src.u3, 0x00, sizeof(exp->tuple.src.u3));
260                 memset(&exp->mask.src.u3, 0x00, sizeof(exp->mask.src.u3));
261         }
262
263         if (src) {
264                 exp->tuple.src.u.all = *src;
265                 exp->mask.src.u.all = htons(0xFFFF);
266         } else {
267                 exp->tuple.src.u.all = 0;
268                 exp->mask.src.u.all = 0;
269         }
270
271         memcpy(&exp->tuple.dst.u3, daddr, len);
272         if (sizeof(exp->tuple.dst.u3) > len)
273                 /* address needs to be cleared for nf_ct_tuple_equal */
274                 memset((void *)&exp->tuple.dst.u3 + len, 0x00,
275                        sizeof(exp->tuple.dst.u3) - len);
276
277         exp->tuple.dst.u.all = *dst;
278 }
279 EXPORT_SYMBOL_GPL(nf_ct_expect_init);
280
281 void nf_ct_expect_put(struct nf_conntrack_expect *exp)
282 {
283         if (atomic_dec_and_test(&exp->use))
284                 kmem_cache_free(nf_ct_expect_cachep, exp);
285 }
286 EXPORT_SYMBOL_GPL(nf_ct_expect_put);
287
288 static void nf_ct_expect_insert(struct nf_conntrack_expect *exp)
289 {
290         struct nf_conn_help *master_help = nfct_help(exp->master);
291         unsigned int h = nf_ct_expect_dst_hash(&exp->tuple);
292
293         atomic_inc(&exp->use);
294
295         hlist_add_head(&exp->lnode, &master_help->expectations);
296         master_help->expecting++;
297
298         hlist_add_head(&exp->hnode, &nf_ct_expect_hash[h]);
299         nf_ct_expect_count++;
300
301         setup_timer(&exp->timeout, nf_ct_expectation_timed_out,
302                     (unsigned long)exp);
303         exp->timeout.expires = jiffies + master_help->helper->timeout * HZ;
304         add_timer(&exp->timeout);
305
306         atomic_inc(&exp->use);
307         NF_CT_STAT_INC(expect_create);
308 }
309
310 /* Race with expectations being used means we could have none to find; OK. */
311 static void evict_oldest_expect(struct nf_conn *master)
312 {
313         struct nf_conn_help *master_help = nfct_help(master);
314         struct nf_conntrack_expect *exp = NULL;
315         struct hlist_node *n;
316
317         hlist_for_each_entry(exp, n, &master_help->expectations, lnode)
318                 ; /* nothing */
319
320         if (exp && del_timer(&exp->timeout)) {
321                 nf_ct_unlink_expect(exp);
322                 nf_ct_expect_put(exp);
323         }
324 }
325
326 static inline int refresh_timer(struct nf_conntrack_expect *i)
327 {
328         struct nf_conn_help *master_help = nfct_help(i->master);
329
330         if (!del_timer(&i->timeout))
331                 return 0;
332
333         i->timeout.expires = jiffies + master_help->helper->timeout*HZ;
334         add_timer(&i->timeout);
335         return 1;
336 }
337
338 int nf_ct_expect_related(struct nf_conntrack_expect *expect)
339 {
340         struct nf_conntrack_expect *i;
341         struct nf_conn *master = expect->master;
342         struct nf_conn_help *master_help = nfct_help(master);
343         struct hlist_node *n;
344         unsigned int h;
345         int ret;
346
347         NF_CT_ASSERT(master_help);
348
349         write_lock_bh(&nf_conntrack_lock);
350         if (!master_help->helper) {
351                 ret = -ESHUTDOWN;
352                 goto out;
353         }
354         h = nf_ct_expect_dst_hash(&expect->tuple);
355         hlist_for_each_entry(i, n, &nf_ct_expect_hash[h], hnode) {
356                 if (expect_matches(i, expect)) {
357                         /* Refresh timer: if it's dying, ignore.. */
358                         if (refresh_timer(i)) {
359                                 ret = 0;
360                                 goto out;
361                         }
362                 } else if (expect_clash(i, expect)) {
363                         ret = -EBUSY;
364                         goto out;
365                 }
366         }
367         /* Will be over limit? */
368         if (master_help->helper->max_expected &&
369             master_help->expecting >= master_help->helper->max_expected)
370                 evict_oldest_expect(master);
371
372         if (nf_ct_expect_count >= nf_ct_expect_max) {
373                 if (net_ratelimit())
374                         printk(KERN_WARNING
375                                "nf_conntrack: expectation table full");
376                 ret = -EMFILE;
377                 goto out;
378         }
379
380         nf_ct_expect_insert(expect);
381         nf_ct_expect_event(IPEXP_NEW, expect);
382         ret = 0;
383 out:
384         write_unlock_bh(&nf_conntrack_lock);
385         return ret;
386 }
387 EXPORT_SYMBOL_GPL(nf_ct_expect_related);
388
389 #ifdef CONFIG_PROC_FS
390 struct ct_expect_iter_state {
391         unsigned int bucket;
392 };
393
394 static struct hlist_node *ct_expect_get_first(struct seq_file *seq)
395 {
396         struct ct_expect_iter_state *st = seq->private;
397
398         for (st->bucket = 0; st->bucket < nf_ct_expect_hsize; st->bucket++) {
399                 if (!hlist_empty(&nf_ct_expect_hash[st->bucket]))
400                         return nf_ct_expect_hash[st->bucket].first;
401         }
402         return NULL;
403 }
404
405 static struct hlist_node *ct_expect_get_next(struct seq_file *seq,
406                                              struct hlist_node *head)
407 {
408         struct ct_expect_iter_state *st = seq->private;
409
410         head = head->next;
411         while (head == NULL) {
412                 if (++st->bucket >= nf_ct_expect_hsize)
413                         return NULL;
414                 head = nf_ct_expect_hash[st->bucket].first;
415         }
416         return head;
417 }
418
419 static struct hlist_node *ct_expect_get_idx(struct seq_file *seq, loff_t pos)
420 {
421         struct hlist_node *head = ct_expect_get_first(seq);
422
423         if (head)
424                 while (pos && (head = ct_expect_get_next(seq, head)))
425                         pos--;
426         return pos ? NULL : head;
427 }
428
429 static void *exp_seq_start(struct seq_file *seq, loff_t *pos)
430 {
431         read_lock_bh(&nf_conntrack_lock);
432         return ct_expect_get_idx(seq, *pos);
433 }
434
435 static void *exp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
436 {
437         (*pos)++;
438         return ct_expect_get_next(seq, v);
439 }
440
441 static void exp_seq_stop(struct seq_file *seq, void *v)
442 {
443         read_unlock_bh(&nf_conntrack_lock);
444 }
445
446 static int exp_seq_show(struct seq_file *s, void *v)
447 {
448         struct nf_conntrack_expect *expect;
449         struct hlist_node *n = v;
450
451         expect = hlist_entry(n, struct nf_conntrack_expect, hnode);
452
453         if (expect->timeout.function)
454                 seq_printf(s, "%ld ", timer_pending(&expect->timeout)
455                            ? (long)(expect->timeout.expires - jiffies)/HZ : 0);
456         else
457                 seq_printf(s, "- ");
458         seq_printf(s, "l3proto = %u proto=%u ",
459                    expect->tuple.src.l3num,
460                    expect->tuple.dst.protonum);
461         print_tuple(s, &expect->tuple,
462                     __nf_ct_l3proto_find(expect->tuple.src.l3num),
463                     __nf_ct_l4proto_find(expect->tuple.src.l3num,
464                                        expect->tuple.dst.protonum));
465         return seq_putc(s, '\n');
466 }
467
468 static const struct seq_operations exp_seq_ops = {
469         .start = exp_seq_start,
470         .next = exp_seq_next,
471         .stop = exp_seq_stop,
472         .show = exp_seq_show
473 };
474
475 static int exp_open(struct inode *inode, struct file *file)
476 {
477         return seq_open_private(file, &exp_seq_ops,
478                         sizeof(struct ct_expect_iter_state));
479 }
480
481 static const struct file_operations exp_file_ops = {
482         .owner   = THIS_MODULE,
483         .open    = exp_open,
484         .read    = seq_read,
485         .llseek  = seq_lseek,
486         .release = seq_release_private,
487 };
488 #endif /* CONFIG_PROC_FS */
489
490 static int __init exp_proc_init(void)
491 {
492 #ifdef CONFIG_PROC_FS
493         struct proc_dir_entry *proc;
494
495         proc = proc_net_fops_create(&init_net, "nf_conntrack_expect", 0440, &exp_file_ops);
496         if (!proc)
497                 return -ENOMEM;
498 #endif /* CONFIG_PROC_FS */
499         return 0;
500 }
501
502 static void exp_proc_remove(void)
503 {
504 #ifdef CONFIG_PROC_FS
505         proc_net_remove(&init_net, "nf_conntrack_expect");
506 #endif /* CONFIG_PROC_FS */
507 }
508
509 module_param_named(expect_hashsize, nf_ct_expect_hsize, uint, 0600);
510
511 int __init nf_conntrack_expect_init(void)
512 {
513         int err = -ENOMEM;
514
515         if (!nf_ct_expect_hsize) {
516                 nf_ct_expect_hsize = nf_conntrack_htable_size / 256;
517                 if (!nf_ct_expect_hsize)
518                         nf_ct_expect_hsize = 1;
519         }
520         nf_ct_expect_max = nf_ct_expect_hsize * 4;
521
522         nf_ct_expect_hash = nf_ct_alloc_hashtable(&nf_ct_expect_hsize,
523                                                   &nf_ct_expect_vmalloc);
524         if (nf_ct_expect_hash == NULL)
525                 goto err1;
526
527         nf_ct_expect_cachep = kmem_cache_create("nf_conntrack_expect",
528                                         sizeof(struct nf_conntrack_expect),
529                                         0, 0, NULL);
530         if (!nf_ct_expect_cachep)
531                 goto err2;
532
533         err = exp_proc_init();
534         if (err < 0)
535                 goto err3;
536
537         return 0;
538
539 err3:
540         nf_ct_free_hashtable(nf_ct_expect_hash, nf_ct_expect_vmalloc,
541                              nf_ct_expect_hsize);
542 err2:
543         kmem_cache_destroy(nf_ct_expect_cachep);
544 err1:
545         return err;
546 }
547
548 void nf_conntrack_expect_fini(void)
549 {
550         exp_proc_remove();
551         kmem_cache_destroy(nf_ct_expect_cachep);
552         nf_ct_free_hashtable(nf_ct_expect_hash, nf_ct_expect_vmalloc,
553                              nf_ct_expect_hsize);
554 }