]> www.pilppa.org Git - linux-2.6-omap-h63xx.git/blob - net/xfrm/xfrm_policy.c
Merge branch 'master' of master.kernel.org:/pub/scm/linux/kernel/git/davem/net-2.6
[linux-2.6-omap-h63xx.git] / net / xfrm / xfrm_policy.c
1 /*
2  * xfrm_policy.c
3  *
4  * Changes:
5  *      Mitsuru KANDA @USAGI
6  *      Kazunori MIYAZAWA @USAGI
7  *      Kunihiro Ishiguro <kunihiro@ipinfusion.com>
8  *              IPv6 support
9  *      Kazunori MIYAZAWA @USAGI
10  *      YOSHIFUJI Hideaki
11  *              Split up af-specific portion
12  *      Derek Atkins <derek@ihtfp.com>          Add the post_input processor
13  *
14  */
15
16 #include <linux/err.h>
17 #include <linux/slab.h>
18 #include <linux/kmod.h>
19 #include <linux/list.h>
20 #include <linux/spinlock.h>
21 #include <linux/workqueue.h>
22 #include <linux/notifier.h>
23 #include <linux/netdevice.h>
24 #include <linux/netfilter.h>
25 #include <linux/module.h>
26 #include <linux/cache.h>
27 #include <linux/audit.h>
28 #include <net/dst.h>
29 #include <net/xfrm.h>
30 #include <net/ip.h>
31 #ifdef CONFIG_XFRM_STATISTICS
32 #include <net/snmp.h>
33 #endif
34
35 #include "xfrm_hash.h"
36
37 DEFINE_MUTEX(xfrm_cfg_mutex);
38 EXPORT_SYMBOL(xfrm_cfg_mutex);
39
40 static DEFINE_RWLOCK(xfrm_policy_lock);
41
42 static DEFINE_RWLOCK(xfrm_policy_afinfo_lock);
43 static struct xfrm_policy_afinfo *xfrm_policy_afinfo[NPROTO];
44
45 static struct kmem_cache *xfrm_dst_cache __read_mostly;
46
47 static HLIST_HEAD(xfrm_policy_gc_list);
48 static DEFINE_SPINLOCK(xfrm_policy_gc_lock);
49
50 static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family);
51 static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo);
52 static void xfrm_init_pmtu(struct dst_entry *dst);
53
54 static inline int
55 __xfrm4_selector_match(struct xfrm_selector *sel, struct flowi *fl)
56 {
57         return  addr_match(&fl->fl4_dst, &sel->daddr, sel->prefixlen_d) &&
58                 addr_match(&fl->fl4_src, &sel->saddr, sel->prefixlen_s) &&
59                 !((xfrm_flowi_dport(fl) ^ sel->dport) & sel->dport_mask) &&
60                 !((xfrm_flowi_sport(fl) ^ sel->sport) & sel->sport_mask) &&
61                 (fl->proto == sel->proto || !sel->proto) &&
62                 (fl->oif == sel->ifindex || !sel->ifindex);
63 }
64
65 static inline int
66 __xfrm6_selector_match(struct xfrm_selector *sel, struct flowi *fl)
67 {
68         return  addr_match(&fl->fl6_dst, &sel->daddr, sel->prefixlen_d) &&
69                 addr_match(&fl->fl6_src, &sel->saddr, sel->prefixlen_s) &&
70                 !((xfrm_flowi_dport(fl) ^ sel->dport) & sel->dport_mask) &&
71                 !((xfrm_flowi_sport(fl) ^ sel->sport) & sel->sport_mask) &&
72                 (fl->proto == sel->proto || !sel->proto) &&
73                 (fl->oif == sel->ifindex || !sel->ifindex);
74 }
75
76 int xfrm_selector_match(struct xfrm_selector *sel, struct flowi *fl,
77                     unsigned short family)
78 {
79         switch (family) {
80         case AF_INET:
81                 return __xfrm4_selector_match(sel, fl);
82         case AF_INET6:
83                 return __xfrm6_selector_match(sel, fl);
84         }
85         return 0;
86 }
87
88 static inline struct dst_entry *__xfrm_dst_lookup(struct net *net, int tos,
89                                                   xfrm_address_t *saddr,
90                                                   xfrm_address_t *daddr,
91                                                   int family)
92 {
93         struct xfrm_policy_afinfo *afinfo;
94         struct dst_entry *dst;
95
96         afinfo = xfrm_policy_get_afinfo(family);
97         if (unlikely(afinfo == NULL))
98                 return ERR_PTR(-EAFNOSUPPORT);
99
100         dst = afinfo->dst_lookup(net, tos, saddr, daddr);
101
102         xfrm_policy_put_afinfo(afinfo);
103
104         return dst;
105 }
106
107 static inline struct dst_entry *xfrm_dst_lookup(struct xfrm_state *x, int tos,
108                                                 xfrm_address_t *prev_saddr,
109                                                 xfrm_address_t *prev_daddr,
110                                                 int family)
111 {
112         struct net *net = xs_net(x);
113         xfrm_address_t *saddr = &x->props.saddr;
114         xfrm_address_t *daddr = &x->id.daddr;
115         struct dst_entry *dst;
116
117         if (x->type->flags & XFRM_TYPE_LOCAL_COADDR) {
118                 saddr = x->coaddr;
119                 daddr = prev_daddr;
120         }
121         if (x->type->flags & XFRM_TYPE_REMOTE_COADDR) {
122                 saddr = prev_saddr;
123                 daddr = x->coaddr;
124         }
125
126         dst = __xfrm_dst_lookup(net, tos, saddr, daddr, family);
127
128         if (!IS_ERR(dst)) {
129                 if (prev_saddr != saddr)
130                         memcpy(prev_saddr, saddr,  sizeof(*prev_saddr));
131                 if (prev_daddr != daddr)
132                         memcpy(prev_daddr, daddr,  sizeof(*prev_daddr));
133         }
134
135         return dst;
136 }
137
138 static inline unsigned long make_jiffies(long secs)
139 {
140         if (secs >= (MAX_SCHEDULE_TIMEOUT-1)/HZ)
141                 return MAX_SCHEDULE_TIMEOUT-1;
142         else
143                 return secs*HZ;
144 }
145
146 static void xfrm_policy_timer(unsigned long data)
147 {
148         struct xfrm_policy *xp = (struct xfrm_policy*)data;
149         unsigned long now = get_seconds();
150         long next = LONG_MAX;
151         int warn = 0;
152         int dir;
153
154         read_lock(&xp->lock);
155
156         if (xp->walk.dead)
157                 goto out;
158
159         dir = xfrm_policy_id2dir(xp->index);
160
161         if (xp->lft.hard_add_expires_seconds) {
162                 long tmo = xp->lft.hard_add_expires_seconds +
163                         xp->curlft.add_time - now;
164                 if (tmo <= 0)
165                         goto expired;
166                 if (tmo < next)
167                         next = tmo;
168         }
169         if (xp->lft.hard_use_expires_seconds) {
170                 long tmo = xp->lft.hard_use_expires_seconds +
171                         (xp->curlft.use_time ? : xp->curlft.add_time) - now;
172                 if (tmo <= 0)
173                         goto expired;
174                 if (tmo < next)
175                         next = tmo;
176         }
177         if (xp->lft.soft_add_expires_seconds) {
178                 long tmo = xp->lft.soft_add_expires_seconds +
179                         xp->curlft.add_time - now;
180                 if (tmo <= 0) {
181                         warn = 1;
182                         tmo = XFRM_KM_TIMEOUT;
183                 }
184                 if (tmo < next)
185                         next = tmo;
186         }
187         if (xp->lft.soft_use_expires_seconds) {
188                 long tmo = xp->lft.soft_use_expires_seconds +
189                         (xp->curlft.use_time ? : xp->curlft.add_time) - now;
190                 if (tmo <= 0) {
191                         warn = 1;
192                         tmo = XFRM_KM_TIMEOUT;
193                 }
194                 if (tmo < next)
195                         next = tmo;
196         }
197
198         if (warn)
199                 km_policy_expired(xp, dir, 0, 0);
200         if (next != LONG_MAX &&
201             !mod_timer(&xp->timer, jiffies + make_jiffies(next)))
202                 xfrm_pol_hold(xp);
203
204 out:
205         read_unlock(&xp->lock);
206         xfrm_pol_put(xp);
207         return;
208
209 expired:
210         read_unlock(&xp->lock);
211         if (!xfrm_policy_delete(xp, dir))
212                 km_policy_expired(xp, dir, 1, 0);
213         xfrm_pol_put(xp);
214 }
215
216
217 /* Allocate xfrm_policy. Not used here, it is supposed to be used by pfkeyv2
218  * SPD calls.
219  */
220
221 struct xfrm_policy *xfrm_policy_alloc(struct net *net, gfp_t gfp)
222 {
223         struct xfrm_policy *policy;
224
225         policy = kzalloc(sizeof(struct xfrm_policy), gfp);
226
227         if (policy) {
228                 write_pnet(&policy->xp_net, net);
229                 INIT_LIST_HEAD(&policy->walk.all);
230                 INIT_HLIST_NODE(&policy->bydst);
231                 INIT_HLIST_NODE(&policy->byidx);
232                 rwlock_init(&policy->lock);
233                 atomic_set(&policy->refcnt, 1);
234                 setup_timer(&policy->timer, xfrm_policy_timer,
235                                 (unsigned long)policy);
236         }
237         return policy;
238 }
239 EXPORT_SYMBOL(xfrm_policy_alloc);
240
241 /* Destroy xfrm_policy: descendant resources must be released to this moment. */
242
243 void xfrm_policy_destroy(struct xfrm_policy *policy)
244 {
245         BUG_ON(!policy->walk.dead);
246
247         BUG_ON(policy->bundles);
248
249         if (del_timer(&policy->timer))
250                 BUG();
251
252         security_xfrm_policy_free(policy->security);
253         kfree(policy);
254 }
255 EXPORT_SYMBOL(xfrm_policy_destroy);
256
257 static void xfrm_policy_gc_kill(struct xfrm_policy *policy)
258 {
259         struct dst_entry *dst;
260
261         while ((dst = policy->bundles) != NULL) {
262                 policy->bundles = dst->next;
263                 dst_free(dst);
264         }
265
266         if (del_timer(&policy->timer))
267                 atomic_dec(&policy->refcnt);
268
269         if (atomic_read(&policy->refcnt) > 1)
270                 flow_cache_flush();
271
272         xfrm_pol_put(policy);
273 }
274
275 static void xfrm_policy_gc_task(struct work_struct *work)
276 {
277         struct xfrm_policy *policy;
278         struct hlist_node *entry, *tmp;
279         struct hlist_head gc_list;
280
281         spin_lock_bh(&xfrm_policy_gc_lock);
282         gc_list.first = xfrm_policy_gc_list.first;
283         INIT_HLIST_HEAD(&xfrm_policy_gc_list);
284         spin_unlock_bh(&xfrm_policy_gc_lock);
285
286         hlist_for_each_entry_safe(policy, entry, tmp, &gc_list, bydst)
287                 xfrm_policy_gc_kill(policy);
288 }
289 static DECLARE_WORK(xfrm_policy_gc_work, xfrm_policy_gc_task);
290
291 /* Rule must be locked. Release descentant resources, announce
292  * entry dead. The rule must be unlinked from lists to the moment.
293  */
294
295 static void xfrm_policy_kill(struct xfrm_policy *policy)
296 {
297         int dead;
298
299         write_lock_bh(&policy->lock);
300         dead = policy->walk.dead;
301         policy->walk.dead = 1;
302         write_unlock_bh(&policy->lock);
303
304         if (unlikely(dead)) {
305                 WARN_ON(1);
306                 return;
307         }
308
309         spin_lock_bh(&xfrm_policy_gc_lock);
310         hlist_add_head(&policy->bydst, &xfrm_policy_gc_list);
311         spin_unlock_bh(&xfrm_policy_gc_lock);
312
313         schedule_work(&xfrm_policy_gc_work);
314 }
315
316 static unsigned int xfrm_policy_hashmax __read_mostly = 1 * 1024 * 1024;
317
318 static inline unsigned int idx_hash(struct net *net, u32 index)
319 {
320         return __idx_hash(index, net->xfrm.policy_idx_hmask);
321 }
322
323 static struct hlist_head *policy_hash_bysel(struct net *net, struct xfrm_selector *sel, unsigned short family, int dir)
324 {
325         unsigned int hmask = net->xfrm.policy_bydst[dir].hmask;
326         unsigned int hash = __sel_hash(sel, family, hmask);
327
328         return (hash == hmask + 1 ?
329                 &net->xfrm.policy_inexact[dir] :
330                 net->xfrm.policy_bydst[dir].table + hash);
331 }
332
333 static struct hlist_head *policy_hash_direct(struct net *net, xfrm_address_t *daddr, xfrm_address_t *saddr, unsigned short family, int dir)
334 {
335         unsigned int hmask = net->xfrm.policy_bydst[dir].hmask;
336         unsigned int hash = __addr_hash(daddr, saddr, family, hmask);
337
338         return net->xfrm.policy_bydst[dir].table + hash;
339 }
340
341 static void xfrm_dst_hash_transfer(struct hlist_head *list,
342                                    struct hlist_head *ndsttable,
343                                    unsigned int nhashmask)
344 {
345         struct hlist_node *entry, *tmp, *entry0 = NULL;
346         struct xfrm_policy *pol;
347         unsigned int h0 = 0;
348
349 redo:
350         hlist_for_each_entry_safe(pol, entry, tmp, list, bydst) {
351                 unsigned int h;
352
353                 h = __addr_hash(&pol->selector.daddr, &pol->selector.saddr,
354                                 pol->family, nhashmask);
355                 if (!entry0) {
356                         hlist_del(entry);
357                         hlist_add_head(&pol->bydst, ndsttable+h);
358                         h0 = h;
359                 } else {
360                         if (h != h0)
361                                 continue;
362                         hlist_del(entry);
363                         hlist_add_after(entry0, &pol->bydst);
364                 }
365                 entry0 = entry;
366         }
367         if (!hlist_empty(list)) {
368                 entry0 = NULL;
369                 goto redo;
370         }
371 }
372
373 static void xfrm_idx_hash_transfer(struct hlist_head *list,
374                                    struct hlist_head *nidxtable,
375                                    unsigned int nhashmask)
376 {
377         struct hlist_node *entry, *tmp;
378         struct xfrm_policy *pol;
379
380         hlist_for_each_entry_safe(pol, entry, tmp, list, byidx) {
381                 unsigned int h;
382
383                 h = __idx_hash(pol->index, nhashmask);
384                 hlist_add_head(&pol->byidx, nidxtable+h);
385         }
386 }
387
388 static unsigned long xfrm_new_hash_mask(unsigned int old_hmask)
389 {
390         return ((old_hmask + 1) << 1) - 1;
391 }
392
393 static void xfrm_bydst_resize(struct net *net, int dir)
394 {
395         unsigned int hmask = net->xfrm.policy_bydst[dir].hmask;
396         unsigned int nhashmask = xfrm_new_hash_mask(hmask);
397         unsigned int nsize = (nhashmask + 1) * sizeof(struct hlist_head);
398         struct hlist_head *odst = net->xfrm.policy_bydst[dir].table;
399         struct hlist_head *ndst = xfrm_hash_alloc(nsize);
400         int i;
401
402         if (!ndst)
403                 return;
404
405         write_lock_bh(&xfrm_policy_lock);
406
407         for (i = hmask; i >= 0; i--)
408                 xfrm_dst_hash_transfer(odst + i, ndst, nhashmask);
409
410         net->xfrm.policy_bydst[dir].table = ndst;
411         net->xfrm.policy_bydst[dir].hmask = nhashmask;
412
413         write_unlock_bh(&xfrm_policy_lock);
414
415         xfrm_hash_free(odst, (hmask + 1) * sizeof(struct hlist_head));
416 }
417
418 static void xfrm_byidx_resize(struct net *net, int total)
419 {
420         unsigned int hmask = net->xfrm.policy_idx_hmask;
421         unsigned int nhashmask = xfrm_new_hash_mask(hmask);
422         unsigned int nsize = (nhashmask + 1) * sizeof(struct hlist_head);
423         struct hlist_head *oidx = net->xfrm.policy_byidx;
424         struct hlist_head *nidx = xfrm_hash_alloc(nsize);
425         int i;
426
427         if (!nidx)
428                 return;
429
430         write_lock_bh(&xfrm_policy_lock);
431
432         for (i = hmask; i >= 0; i--)
433                 xfrm_idx_hash_transfer(oidx + i, nidx, nhashmask);
434
435         net->xfrm.policy_byidx = nidx;
436         net->xfrm.policy_idx_hmask = nhashmask;
437
438         write_unlock_bh(&xfrm_policy_lock);
439
440         xfrm_hash_free(oidx, (hmask + 1) * sizeof(struct hlist_head));
441 }
442
443 static inline int xfrm_bydst_should_resize(struct net *net, int dir, int *total)
444 {
445         unsigned int cnt = net->xfrm.policy_count[dir];
446         unsigned int hmask = net->xfrm.policy_bydst[dir].hmask;
447
448         if (total)
449                 *total += cnt;
450
451         if ((hmask + 1) < xfrm_policy_hashmax &&
452             cnt > hmask)
453                 return 1;
454
455         return 0;
456 }
457
458 static inline int xfrm_byidx_should_resize(struct net *net, int total)
459 {
460         unsigned int hmask = net->xfrm.policy_idx_hmask;
461
462         if ((hmask + 1) < xfrm_policy_hashmax &&
463             total > hmask)
464                 return 1;
465
466         return 0;
467 }
468
469 void xfrm_spd_getinfo(struct xfrmk_spdinfo *si)
470 {
471         read_lock_bh(&xfrm_policy_lock);
472         si->incnt = init_net.xfrm.policy_count[XFRM_POLICY_IN];
473         si->outcnt = init_net.xfrm.policy_count[XFRM_POLICY_OUT];
474         si->fwdcnt = init_net.xfrm.policy_count[XFRM_POLICY_FWD];
475         si->inscnt = init_net.xfrm.policy_count[XFRM_POLICY_IN+XFRM_POLICY_MAX];
476         si->outscnt = init_net.xfrm.policy_count[XFRM_POLICY_OUT+XFRM_POLICY_MAX];
477         si->fwdscnt = init_net.xfrm.policy_count[XFRM_POLICY_FWD+XFRM_POLICY_MAX];
478         si->spdhcnt = init_net.xfrm.policy_idx_hmask;
479         si->spdhmcnt = xfrm_policy_hashmax;
480         read_unlock_bh(&xfrm_policy_lock);
481 }
482 EXPORT_SYMBOL(xfrm_spd_getinfo);
483
484 static DEFINE_MUTEX(hash_resize_mutex);
485 static void xfrm_hash_resize(struct work_struct *work)
486 {
487         struct net *net = container_of(work, struct net, xfrm.policy_hash_work);
488         int dir, total;
489
490         mutex_lock(&hash_resize_mutex);
491
492         total = 0;
493         for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) {
494                 if (xfrm_bydst_should_resize(net, dir, &total))
495                         xfrm_bydst_resize(net, dir);
496         }
497         if (xfrm_byidx_should_resize(net, total))
498                 xfrm_byidx_resize(net, total);
499
500         mutex_unlock(&hash_resize_mutex);
501 }
502
503 /* Generate new index... KAME seems to generate them ordered by cost
504  * of an absolute inpredictability of ordering of rules. This will not pass. */
505 static u32 xfrm_gen_index(struct net *net, int dir)
506 {
507         static u32 idx_generator;
508
509         for (;;) {
510                 struct hlist_node *entry;
511                 struct hlist_head *list;
512                 struct xfrm_policy *p;
513                 u32 idx;
514                 int found;
515
516                 idx = (idx_generator | dir);
517                 idx_generator += 8;
518                 if (idx == 0)
519                         idx = 8;
520                 list = net->xfrm.policy_byidx + idx_hash(net, idx);
521                 found = 0;
522                 hlist_for_each_entry(p, entry, list, byidx) {
523                         if (p->index == idx) {
524                                 found = 1;
525                                 break;
526                         }
527                 }
528                 if (!found)
529                         return idx;
530         }
531 }
532
533 static inline int selector_cmp(struct xfrm_selector *s1, struct xfrm_selector *s2)
534 {
535         u32 *p1 = (u32 *) s1;
536         u32 *p2 = (u32 *) s2;
537         int len = sizeof(struct xfrm_selector) / sizeof(u32);
538         int i;
539
540         for (i = 0; i < len; i++) {
541                 if (p1[i] != p2[i])
542                         return 1;
543         }
544
545         return 0;
546 }
547
548 int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
549 {
550         struct net *net = xp_net(policy);
551         struct xfrm_policy *pol;
552         struct xfrm_policy *delpol;
553         struct hlist_head *chain;
554         struct hlist_node *entry, *newpos;
555         struct dst_entry *gc_list;
556
557         write_lock_bh(&xfrm_policy_lock);
558         chain = policy_hash_bysel(net, &policy->selector, policy->family, dir);
559         delpol = NULL;
560         newpos = NULL;
561         hlist_for_each_entry(pol, entry, chain, bydst) {
562                 if (pol->type == policy->type &&
563                     !selector_cmp(&pol->selector, &policy->selector) &&
564                     xfrm_sec_ctx_match(pol->security, policy->security) &&
565                     !WARN_ON(delpol)) {
566                         if (excl) {
567                                 write_unlock_bh(&xfrm_policy_lock);
568                                 return -EEXIST;
569                         }
570                         delpol = pol;
571                         if (policy->priority > pol->priority)
572                                 continue;
573                 } else if (policy->priority >= pol->priority) {
574                         newpos = &pol->bydst;
575                         continue;
576                 }
577                 if (delpol)
578                         break;
579         }
580         if (newpos)
581                 hlist_add_after(newpos, &policy->bydst);
582         else
583                 hlist_add_head(&policy->bydst, chain);
584         xfrm_pol_hold(policy);
585         net->xfrm.policy_count[dir]++;
586         atomic_inc(&flow_cache_genid);
587         if (delpol) {
588                 hlist_del(&delpol->bydst);
589                 hlist_del(&delpol->byidx);
590                 list_del(&delpol->walk.all);
591                 net->xfrm.policy_count[dir]--;
592         }
593         policy->index = delpol ? delpol->index : xfrm_gen_index(net, dir);
594         hlist_add_head(&policy->byidx, net->xfrm.policy_byidx+idx_hash(net, policy->index));
595         policy->curlft.add_time = get_seconds();
596         policy->curlft.use_time = 0;
597         if (!mod_timer(&policy->timer, jiffies + HZ))
598                 xfrm_pol_hold(policy);
599         list_add(&policy->walk.all, &net->xfrm.policy_all);
600         write_unlock_bh(&xfrm_policy_lock);
601
602         if (delpol)
603                 xfrm_policy_kill(delpol);
604         else if (xfrm_bydst_should_resize(net, dir, NULL))
605                 schedule_work(&net->xfrm.policy_hash_work);
606
607         read_lock_bh(&xfrm_policy_lock);
608         gc_list = NULL;
609         entry = &policy->bydst;
610         hlist_for_each_entry_continue(policy, entry, bydst) {
611                 struct dst_entry *dst;
612
613                 write_lock(&policy->lock);
614                 dst = policy->bundles;
615                 if (dst) {
616                         struct dst_entry *tail = dst;
617                         while (tail->next)
618                                 tail = tail->next;
619                         tail->next = gc_list;
620                         gc_list = dst;
621
622                         policy->bundles = NULL;
623                 }
624                 write_unlock(&policy->lock);
625         }
626         read_unlock_bh(&xfrm_policy_lock);
627
628         while (gc_list) {
629                 struct dst_entry *dst = gc_list;
630
631                 gc_list = dst->next;
632                 dst_free(dst);
633         }
634
635         return 0;
636 }
637 EXPORT_SYMBOL(xfrm_policy_insert);
638
639 struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u8 type, int dir,
640                                           struct xfrm_selector *sel,
641                                           struct xfrm_sec_ctx *ctx, int delete,
642                                           int *err)
643 {
644         struct xfrm_policy *pol, *ret;
645         struct hlist_head *chain;
646         struct hlist_node *entry;
647
648         *err = 0;
649         write_lock_bh(&xfrm_policy_lock);
650         chain = policy_hash_bysel(net, sel, sel->family, dir);
651         ret = NULL;
652         hlist_for_each_entry(pol, entry, chain, bydst) {
653                 if (pol->type == type &&
654                     !selector_cmp(sel, &pol->selector) &&
655                     xfrm_sec_ctx_match(ctx, pol->security)) {
656                         xfrm_pol_hold(pol);
657                         if (delete) {
658                                 *err = security_xfrm_policy_delete(
659                                                                 pol->security);
660                                 if (*err) {
661                                         write_unlock_bh(&xfrm_policy_lock);
662                                         return pol;
663                                 }
664                                 hlist_del(&pol->bydst);
665                                 hlist_del(&pol->byidx);
666                                 list_del(&pol->walk.all);
667                                 net->xfrm.policy_count[dir]--;
668                         }
669                         ret = pol;
670                         break;
671                 }
672         }
673         write_unlock_bh(&xfrm_policy_lock);
674
675         if (ret && delete) {
676                 atomic_inc(&flow_cache_genid);
677                 xfrm_policy_kill(ret);
678         }
679         return ret;
680 }
681 EXPORT_SYMBOL(xfrm_policy_bysel_ctx);
682
683 struct xfrm_policy *xfrm_policy_byid(struct net *net, u8 type, int dir, u32 id,
684                                      int delete, int *err)
685 {
686         struct xfrm_policy *pol, *ret;
687         struct hlist_head *chain;
688         struct hlist_node *entry;
689
690         *err = -ENOENT;
691         if (xfrm_policy_id2dir(id) != dir)
692                 return NULL;
693
694         *err = 0;
695         write_lock_bh(&xfrm_policy_lock);
696         chain = net->xfrm.policy_byidx + idx_hash(net, id);
697         ret = NULL;
698         hlist_for_each_entry(pol, entry, chain, byidx) {
699                 if (pol->type == type && pol->index == id) {
700                         xfrm_pol_hold(pol);
701                         if (delete) {
702                                 *err = security_xfrm_policy_delete(
703                                                                 pol->security);
704                                 if (*err) {
705                                         write_unlock_bh(&xfrm_policy_lock);
706                                         return pol;
707                                 }
708                                 hlist_del(&pol->bydst);
709                                 hlist_del(&pol->byidx);
710                                 list_del(&pol->walk.all);
711                                 net->xfrm.policy_count[dir]--;
712                         }
713                         ret = pol;
714                         break;
715                 }
716         }
717         write_unlock_bh(&xfrm_policy_lock);
718
719         if (ret && delete) {
720                 atomic_inc(&flow_cache_genid);
721                 xfrm_policy_kill(ret);
722         }
723         return ret;
724 }
725 EXPORT_SYMBOL(xfrm_policy_byid);
726
727 #ifdef CONFIG_SECURITY_NETWORK_XFRM
728 static inline int
729 xfrm_policy_flush_secctx_check(struct net *net, u8 type, struct xfrm_audit *audit_info)
730 {
731         int dir, err = 0;
732
733         for (dir = 0; dir < XFRM_POLICY_MAX; dir++) {
734                 struct xfrm_policy *pol;
735                 struct hlist_node *entry;
736                 int i;
737
738                 hlist_for_each_entry(pol, entry,
739                                      &net->xfrm.policy_inexact[dir], bydst) {
740                         if (pol->type != type)
741                                 continue;
742                         err = security_xfrm_policy_delete(pol->security);
743                         if (err) {
744                                 xfrm_audit_policy_delete(pol, 0,
745                                                          audit_info->loginuid,
746                                                          audit_info->sessionid,
747                                                          audit_info->secid);
748                                 return err;
749                         }
750                 }
751                 for (i = net->xfrm.policy_bydst[dir].hmask; i >= 0; i--) {
752                         hlist_for_each_entry(pol, entry,
753                                              net->xfrm.policy_bydst[dir].table + i,
754                                              bydst) {
755                                 if (pol->type != type)
756                                         continue;
757                                 err = security_xfrm_policy_delete(
758                                                                 pol->security);
759                                 if (err) {
760                                         xfrm_audit_policy_delete(pol, 0,
761                                                         audit_info->loginuid,
762                                                         audit_info->sessionid,
763                                                         audit_info->secid);
764                                         return err;
765                                 }
766                         }
767                 }
768         }
769         return err;
770 }
771 #else
772 static inline int
773 xfrm_policy_flush_secctx_check(struct net *net, u8 type, struct xfrm_audit *audit_info)
774 {
775         return 0;
776 }
777 #endif
778
779 int xfrm_policy_flush(struct net *net, u8 type, struct xfrm_audit *audit_info)
780 {
781         int dir, err = 0;
782
783         write_lock_bh(&xfrm_policy_lock);
784
785         err = xfrm_policy_flush_secctx_check(net, type, audit_info);
786         if (err)
787                 goto out;
788
789         for (dir = 0; dir < XFRM_POLICY_MAX; dir++) {
790                 struct xfrm_policy *pol;
791                 struct hlist_node *entry;
792                 int i, killed;
793
794                 killed = 0;
795         again1:
796                 hlist_for_each_entry(pol, entry,
797                                      &net->xfrm.policy_inexact[dir], bydst) {
798                         if (pol->type != type)
799                                 continue;
800                         hlist_del(&pol->bydst);
801                         hlist_del(&pol->byidx);
802                         list_del(&pol->walk.all);
803                         write_unlock_bh(&xfrm_policy_lock);
804
805                         xfrm_audit_policy_delete(pol, 1, audit_info->loginuid,
806                                                  audit_info->sessionid,
807                                                  audit_info->secid);
808
809                         xfrm_policy_kill(pol);
810                         killed++;
811
812                         write_lock_bh(&xfrm_policy_lock);
813                         goto again1;
814                 }
815
816                 for (i = net->xfrm.policy_bydst[dir].hmask; i >= 0; i--) {
817         again2:
818                         hlist_for_each_entry(pol, entry,
819                                              net->xfrm.policy_bydst[dir].table + i,
820                                              bydst) {
821                                 if (pol->type != type)
822                                         continue;
823                                 hlist_del(&pol->bydst);
824                                 hlist_del(&pol->byidx);
825                                 list_del(&pol->walk.all);
826                                 write_unlock_bh(&xfrm_policy_lock);
827
828                                 xfrm_audit_policy_delete(pol, 1,
829                                                          audit_info->loginuid,
830                                                          audit_info->sessionid,
831                                                          audit_info->secid);
832                                 xfrm_policy_kill(pol);
833                                 killed++;
834
835                                 write_lock_bh(&xfrm_policy_lock);
836                                 goto again2;
837                         }
838                 }
839
840                 net->xfrm.policy_count[dir] -= killed;
841         }
842         atomic_inc(&flow_cache_genid);
843 out:
844         write_unlock_bh(&xfrm_policy_lock);
845         return err;
846 }
847 EXPORT_SYMBOL(xfrm_policy_flush);
848
849 int xfrm_policy_walk(struct net *net, struct xfrm_policy_walk *walk,
850                      int (*func)(struct xfrm_policy *, int, int, void*),
851                      void *data)
852 {
853         struct xfrm_policy *pol;
854         struct xfrm_policy_walk_entry *x;
855         int error = 0;
856
857         if (walk->type >= XFRM_POLICY_TYPE_MAX &&
858             walk->type != XFRM_POLICY_TYPE_ANY)
859                 return -EINVAL;
860
861         if (list_empty(&walk->walk.all) && walk->seq != 0)
862                 return 0;
863
864         write_lock_bh(&xfrm_policy_lock);
865         if (list_empty(&walk->walk.all))
866                 x = list_first_entry(&net->xfrm.policy_all, struct xfrm_policy_walk_entry, all);
867         else
868                 x = list_entry(&walk->walk.all, struct xfrm_policy_walk_entry, all);
869         list_for_each_entry_from(x, &net->xfrm.policy_all, all) {
870                 if (x->dead)
871                         continue;
872                 pol = container_of(x, struct xfrm_policy, walk);
873                 if (walk->type != XFRM_POLICY_TYPE_ANY &&
874                     walk->type != pol->type)
875                         continue;
876                 error = func(pol, xfrm_policy_id2dir(pol->index),
877                              walk->seq, data);
878                 if (error) {
879                         list_move_tail(&walk->walk.all, &x->all);
880                         goto out;
881                 }
882                 walk->seq++;
883         }
884         if (walk->seq == 0) {
885                 error = -ENOENT;
886                 goto out;
887         }
888         list_del_init(&walk->walk.all);
889 out:
890         write_unlock_bh(&xfrm_policy_lock);
891         return error;
892 }
893 EXPORT_SYMBOL(xfrm_policy_walk);
894
895 void xfrm_policy_walk_init(struct xfrm_policy_walk *walk, u8 type)
896 {
897         INIT_LIST_HEAD(&walk->walk.all);
898         walk->walk.dead = 1;
899         walk->type = type;
900         walk->seq = 0;
901 }
902 EXPORT_SYMBOL(xfrm_policy_walk_init);
903
904 void xfrm_policy_walk_done(struct xfrm_policy_walk *walk)
905 {
906         if (list_empty(&walk->walk.all))
907                 return;
908
909         write_lock_bh(&xfrm_policy_lock);
910         list_del(&walk->walk.all);
911         write_unlock_bh(&xfrm_policy_lock);
912 }
913 EXPORT_SYMBOL(xfrm_policy_walk_done);
914
915 /*
916  * Find policy to apply to this flow.
917  *
918  * Returns 0 if policy found, else an -errno.
919  */
920 static int xfrm_policy_match(struct xfrm_policy *pol, struct flowi *fl,
921                              u8 type, u16 family, int dir)
922 {
923         struct xfrm_selector *sel = &pol->selector;
924         int match, ret = -ESRCH;
925
926         if (pol->family != family ||
927             pol->type != type)
928                 return ret;
929
930         match = xfrm_selector_match(sel, fl, family);
931         if (match)
932                 ret = security_xfrm_policy_lookup(pol->security, fl->secid,
933                                                   dir);
934
935         return ret;
936 }
937
938 static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type,
939                                                      struct flowi *fl,
940                                                      u16 family, u8 dir)
941 {
942         int err;
943         struct xfrm_policy *pol, *ret;
944         xfrm_address_t *daddr, *saddr;
945         struct hlist_node *entry;
946         struct hlist_head *chain;
947         u32 priority = ~0U;
948
949         daddr = xfrm_flowi_daddr(fl, family);
950         saddr = xfrm_flowi_saddr(fl, family);
951         if (unlikely(!daddr || !saddr))
952                 return NULL;
953
954         read_lock_bh(&xfrm_policy_lock);
955         chain = policy_hash_direct(net, daddr, saddr, family, dir);
956         ret = NULL;
957         hlist_for_each_entry(pol, entry, chain, bydst) {
958                 err = xfrm_policy_match(pol, fl, type, family, dir);
959                 if (err) {
960                         if (err == -ESRCH)
961                                 continue;
962                         else {
963                                 ret = ERR_PTR(err);
964                                 goto fail;
965                         }
966                 } else {
967                         ret = pol;
968                         priority = ret->priority;
969                         break;
970                 }
971         }
972         chain = &net->xfrm.policy_inexact[dir];
973         hlist_for_each_entry(pol, entry, chain, bydst) {
974                 err = xfrm_policy_match(pol, fl, type, family, dir);
975                 if (err) {
976                         if (err == -ESRCH)
977                                 continue;
978                         else {
979                                 ret = ERR_PTR(err);
980                                 goto fail;
981                         }
982                 } else if (pol->priority < priority) {
983                         ret = pol;
984                         break;
985                 }
986         }
987         if (ret)
988                 xfrm_pol_hold(ret);
989 fail:
990         read_unlock_bh(&xfrm_policy_lock);
991
992         return ret;
993 }
994
995 static int xfrm_policy_lookup(struct net *net, struct flowi *fl, u16 family,
996                               u8 dir, void **objp, atomic_t **obj_refp)
997 {
998         struct xfrm_policy *pol;
999         int err = 0;
1000
1001 #ifdef CONFIG_XFRM_SUB_POLICY
1002         pol = xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_SUB, fl, family, dir);
1003         if (IS_ERR(pol)) {
1004                 err = PTR_ERR(pol);
1005                 pol = NULL;
1006         }
1007         if (pol || err)
1008                 goto end;
1009 #endif
1010         pol = xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_MAIN, fl, family, dir);
1011         if (IS_ERR(pol)) {
1012                 err = PTR_ERR(pol);
1013                 pol = NULL;
1014         }
1015 #ifdef CONFIG_XFRM_SUB_POLICY
1016 end:
1017 #endif
1018         if ((*objp = (void *) pol) != NULL)
1019                 *obj_refp = &pol->refcnt;
1020         return err;
1021 }
1022
1023 static inline int policy_to_flow_dir(int dir)
1024 {
1025         if (XFRM_POLICY_IN == FLOW_DIR_IN &&
1026             XFRM_POLICY_OUT == FLOW_DIR_OUT &&
1027             XFRM_POLICY_FWD == FLOW_DIR_FWD)
1028                 return dir;
1029         switch (dir) {
1030         default:
1031         case XFRM_POLICY_IN:
1032                 return FLOW_DIR_IN;
1033         case XFRM_POLICY_OUT:
1034                 return FLOW_DIR_OUT;
1035         case XFRM_POLICY_FWD:
1036                 return FLOW_DIR_FWD;
1037         }
1038 }
1039
1040 static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, struct flowi *fl)
1041 {
1042         struct xfrm_policy *pol;
1043
1044         read_lock_bh(&xfrm_policy_lock);
1045         if ((pol = sk->sk_policy[dir]) != NULL) {
1046                 int match = xfrm_selector_match(&pol->selector, fl,
1047                                                 sk->sk_family);
1048                 int err = 0;
1049
1050                 if (match) {
1051                         err = security_xfrm_policy_lookup(pol->security,
1052                                                       fl->secid,
1053                                                       policy_to_flow_dir(dir));
1054                         if (!err)
1055                                 xfrm_pol_hold(pol);
1056                         else if (err == -ESRCH)
1057                                 pol = NULL;
1058                         else
1059                                 pol = ERR_PTR(err);
1060                 } else
1061                         pol = NULL;
1062         }
1063         read_unlock_bh(&xfrm_policy_lock);
1064         return pol;
1065 }
1066
1067 static void __xfrm_policy_link(struct xfrm_policy *pol, int dir)
1068 {
1069         struct net *net = xp_net(pol);
1070         struct hlist_head *chain = policy_hash_bysel(net, &pol->selector,
1071                                                      pol->family, dir);
1072
1073         list_add(&pol->walk.all, &net->xfrm.policy_all);
1074         hlist_add_head(&pol->bydst, chain);
1075         hlist_add_head(&pol->byidx, net->xfrm.policy_byidx+idx_hash(net, pol->index));
1076         net->xfrm.policy_count[dir]++;
1077         xfrm_pol_hold(pol);
1078
1079         if (xfrm_bydst_should_resize(net, dir, NULL))
1080                 schedule_work(&net->xfrm.policy_hash_work);
1081 }
1082
1083 static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol,
1084                                                 int dir)
1085 {
1086         struct net *net = xp_net(pol);
1087
1088         if (hlist_unhashed(&pol->bydst))
1089                 return NULL;
1090
1091         hlist_del(&pol->bydst);
1092         hlist_del(&pol->byidx);
1093         list_del(&pol->walk.all);
1094         net->xfrm.policy_count[dir]--;
1095
1096         return pol;
1097 }
1098
1099 int xfrm_policy_delete(struct xfrm_policy *pol, int dir)
1100 {
1101         write_lock_bh(&xfrm_policy_lock);
1102         pol = __xfrm_policy_unlink(pol, dir);
1103         write_unlock_bh(&xfrm_policy_lock);
1104         if (pol) {
1105                 if (dir < XFRM_POLICY_MAX)
1106                         atomic_inc(&flow_cache_genid);
1107                 xfrm_policy_kill(pol);
1108                 return 0;
1109         }
1110         return -ENOENT;
1111 }
1112 EXPORT_SYMBOL(xfrm_policy_delete);
1113
1114 int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol)
1115 {
1116         struct net *net = xp_net(pol);
1117         struct xfrm_policy *old_pol;
1118
1119 #ifdef CONFIG_XFRM_SUB_POLICY
1120         if (pol && pol->type != XFRM_POLICY_TYPE_MAIN)
1121                 return -EINVAL;
1122 #endif
1123
1124         write_lock_bh(&xfrm_policy_lock);
1125         old_pol = sk->sk_policy[dir];
1126         sk->sk_policy[dir] = pol;
1127         if (pol) {
1128                 pol->curlft.add_time = get_seconds();
1129                 pol->index = xfrm_gen_index(net, XFRM_POLICY_MAX+dir);
1130                 __xfrm_policy_link(pol, XFRM_POLICY_MAX+dir);
1131         }
1132         if (old_pol)
1133                 __xfrm_policy_unlink(old_pol, XFRM_POLICY_MAX+dir);
1134         write_unlock_bh(&xfrm_policy_lock);
1135
1136         if (old_pol) {
1137                 xfrm_policy_kill(old_pol);
1138         }
1139         return 0;
1140 }
1141
1142 static struct xfrm_policy *clone_policy(struct xfrm_policy *old, int dir)
1143 {
1144         struct xfrm_policy *newp = xfrm_policy_alloc(xp_net(old), GFP_ATOMIC);
1145
1146         if (newp) {
1147                 newp->selector = old->selector;
1148                 if (security_xfrm_policy_clone(old->security,
1149                                                &newp->security)) {
1150                         kfree(newp);
1151                         return NULL;  /* ENOMEM */
1152                 }
1153                 newp->lft = old->lft;
1154                 newp->curlft = old->curlft;
1155                 newp->action = old->action;
1156                 newp->flags = old->flags;
1157                 newp->xfrm_nr = old->xfrm_nr;
1158                 newp->index = old->index;
1159                 newp->type = old->type;
1160                 memcpy(newp->xfrm_vec, old->xfrm_vec,
1161                        newp->xfrm_nr*sizeof(struct xfrm_tmpl));
1162                 write_lock_bh(&xfrm_policy_lock);
1163                 __xfrm_policy_link(newp, XFRM_POLICY_MAX+dir);
1164                 write_unlock_bh(&xfrm_policy_lock);
1165                 xfrm_pol_put(newp);
1166         }
1167         return newp;
1168 }
1169
1170 int __xfrm_sk_clone_policy(struct sock *sk)
1171 {
1172         struct xfrm_policy *p0 = sk->sk_policy[0],
1173                            *p1 = sk->sk_policy[1];
1174
1175         sk->sk_policy[0] = sk->sk_policy[1] = NULL;
1176         if (p0 && (sk->sk_policy[0] = clone_policy(p0, 0)) == NULL)
1177                 return -ENOMEM;
1178         if (p1 && (sk->sk_policy[1] = clone_policy(p1, 1)) == NULL)
1179                 return -ENOMEM;
1180         return 0;
1181 }
1182
1183 static int
1184 xfrm_get_saddr(struct net *net, xfrm_address_t *local, xfrm_address_t *remote,
1185                unsigned short family)
1186 {
1187         int err;
1188         struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
1189
1190         if (unlikely(afinfo == NULL))
1191                 return -EINVAL;
1192         err = afinfo->get_saddr(net, local, remote);
1193         xfrm_policy_put_afinfo(afinfo);
1194         return err;
1195 }
1196
1197 /* Resolve list of templates for the flow, given policy. */
1198
1199 static int
1200 xfrm_tmpl_resolve_one(struct xfrm_policy *policy, struct flowi *fl,
1201                       struct xfrm_state **xfrm,
1202                       unsigned short family)
1203 {
1204         struct net *net = xp_net(policy);
1205         int nx;
1206         int i, error;
1207         xfrm_address_t *daddr = xfrm_flowi_daddr(fl, family);
1208         xfrm_address_t *saddr = xfrm_flowi_saddr(fl, family);
1209         xfrm_address_t tmp;
1210
1211         for (nx=0, i = 0; i < policy->xfrm_nr; i++) {
1212                 struct xfrm_state *x;
1213                 xfrm_address_t *remote = daddr;
1214                 xfrm_address_t *local  = saddr;
1215                 struct xfrm_tmpl *tmpl = &policy->xfrm_vec[i];
1216
1217                 if (tmpl->mode == XFRM_MODE_TUNNEL ||
1218                     tmpl->mode == XFRM_MODE_BEET) {
1219                         remote = &tmpl->id.daddr;
1220                         local = &tmpl->saddr;
1221                         family = tmpl->encap_family;
1222                         if (xfrm_addr_any(local, family)) {
1223                                 error = xfrm_get_saddr(net, &tmp, remote, family);
1224                                 if (error)
1225                                         goto fail;
1226                                 local = &tmp;
1227                         }
1228                 }
1229
1230                 x = xfrm_state_find(remote, local, fl, tmpl, policy, &error, family);
1231
1232                 if (x && x->km.state == XFRM_STATE_VALID) {
1233                         xfrm[nx++] = x;
1234                         daddr = remote;
1235                         saddr = local;
1236                         continue;
1237                 }
1238                 if (x) {
1239                         error = (x->km.state == XFRM_STATE_ERROR ?
1240                                  -EINVAL : -EAGAIN);
1241                         xfrm_state_put(x);
1242                 }
1243                 else if (error == -ESRCH)
1244                         error = -EAGAIN;
1245
1246                 if (!tmpl->optional)
1247                         goto fail;
1248         }
1249         return nx;
1250
1251 fail:
1252         for (nx--; nx>=0; nx--)
1253                 xfrm_state_put(xfrm[nx]);
1254         return error;
1255 }
1256
1257 static int
1258 xfrm_tmpl_resolve(struct xfrm_policy **pols, int npols, struct flowi *fl,
1259                   struct xfrm_state **xfrm,
1260                   unsigned short family)
1261 {
1262         struct xfrm_state *tp[XFRM_MAX_DEPTH];
1263         struct xfrm_state **tpp = (npols > 1) ? tp : xfrm;
1264         int cnx = 0;
1265         int error;
1266         int ret;
1267         int i;
1268
1269         for (i = 0; i < npols; i++) {
1270                 if (cnx + pols[i]->xfrm_nr >= XFRM_MAX_DEPTH) {
1271                         error = -ENOBUFS;
1272                         goto fail;
1273                 }
1274
1275                 ret = xfrm_tmpl_resolve_one(pols[i], fl, &tpp[cnx], family);
1276                 if (ret < 0) {
1277                         error = ret;
1278                         goto fail;
1279                 } else
1280                         cnx += ret;
1281         }
1282
1283         /* found states are sorted for outbound processing */
1284         if (npols > 1)
1285                 xfrm_state_sort(xfrm, tpp, cnx, family);
1286
1287         return cnx;
1288
1289  fail:
1290         for (cnx--; cnx>=0; cnx--)
1291                 xfrm_state_put(tpp[cnx]);
1292         return error;
1293
1294 }
1295
1296 /* Check that the bundle accepts the flow and its components are
1297  * still valid.
1298  */
1299
1300 static struct dst_entry *
1301 xfrm_find_bundle(struct flowi *fl, struct xfrm_policy *policy, unsigned short family)
1302 {
1303         struct dst_entry *x;
1304         struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
1305         if (unlikely(afinfo == NULL))
1306                 return ERR_PTR(-EINVAL);
1307         x = afinfo->find_bundle(fl, policy);
1308         xfrm_policy_put_afinfo(afinfo);
1309         return x;
1310 }
1311
1312 static inline int xfrm_get_tos(struct flowi *fl, int family)
1313 {
1314         struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
1315         int tos;
1316
1317         if (!afinfo)
1318                 return -EINVAL;
1319
1320         tos = afinfo->get_tos(fl);
1321
1322         xfrm_policy_put_afinfo(afinfo);
1323
1324         return tos;
1325 }
1326
1327 static inline struct xfrm_dst *xfrm_alloc_dst(int family)
1328 {
1329         struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
1330         struct xfrm_dst *xdst;
1331
1332         if (!afinfo)
1333                 return ERR_PTR(-EINVAL);
1334
1335         xdst = dst_alloc(afinfo->dst_ops) ?: ERR_PTR(-ENOBUFS);
1336
1337         xfrm_policy_put_afinfo(afinfo);
1338
1339         return xdst;
1340 }
1341
1342 static inline int xfrm_init_path(struct xfrm_dst *path, struct dst_entry *dst,
1343                                  int nfheader_len)
1344 {
1345         struct xfrm_policy_afinfo *afinfo =
1346                 xfrm_policy_get_afinfo(dst->ops->family);
1347         int err;
1348
1349         if (!afinfo)
1350                 return -EINVAL;
1351
1352         err = afinfo->init_path(path, dst, nfheader_len);
1353
1354         xfrm_policy_put_afinfo(afinfo);
1355
1356         return err;
1357 }
1358
1359 static inline int xfrm_fill_dst(struct xfrm_dst *xdst, struct net_device *dev)
1360 {
1361         struct xfrm_policy_afinfo *afinfo =
1362                 xfrm_policy_get_afinfo(xdst->u.dst.ops->family);
1363         int err;
1364
1365         if (!afinfo)
1366                 return -EINVAL;
1367
1368         err = afinfo->fill_dst(xdst, dev);
1369
1370         xfrm_policy_put_afinfo(afinfo);
1371
1372         return err;
1373 }
1374
1375 /* Allocate chain of dst_entry's, attach known xfrm's, calculate
1376  * all the metrics... Shortly, bundle a bundle.
1377  */
1378
1379 static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy,
1380                                             struct xfrm_state **xfrm, int nx,
1381                                             struct flowi *fl,
1382                                             struct dst_entry *dst)
1383 {
1384         unsigned long now = jiffies;
1385         struct net_device *dev;
1386         struct dst_entry *dst_prev = NULL;
1387         struct dst_entry *dst0 = NULL;
1388         int i = 0;
1389         int err;
1390         int header_len = 0;
1391         int nfheader_len = 0;
1392         int trailer_len = 0;
1393         int tos;
1394         int family = policy->selector.family;
1395         xfrm_address_t saddr, daddr;
1396
1397         xfrm_flowi_addr_get(fl, &saddr, &daddr, family);
1398
1399         tos = xfrm_get_tos(fl, family);
1400         err = tos;
1401         if (tos < 0)
1402                 goto put_states;
1403
1404         dst_hold(dst);
1405
1406         for (; i < nx; i++) {
1407                 struct xfrm_dst *xdst = xfrm_alloc_dst(family);
1408                 struct dst_entry *dst1 = &xdst->u.dst;
1409
1410                 err = PTR_ERR(xdst);
1411                 if (IS_ERR(xdst)) {
1412                         dst_release(dst);
1413                         goto put_states;
1414                 }
1415
1416                 if (!dst_prev)
1417                         dst0 = dst1;
1418                 else {
1419                         dst_prev->child = dst_clone(dst1);
1420                         dst1->flags |= DST_NOHASH;
1421                 }
1422
1423                 xdst->route = dst;
1424                 memcpy(&dst1->metrics, &dst->metrics, sizeof(dst->metrics));
1425
1426                 if (xfrm[i]->props.mode != XFRM_MODE_TRANSPORT) {
1427                         family = xfrm[i]->props.family;
1428                         dst = xfrm_dst_lookup(xfrm[i], tos, &saddr, &daddr,
1429                                               family);
1430                         err = PTR_ERR(dst);
1431                         if (IS_ERR(dst))
1432                                 goto put_states;
1433                 } else
1434                         dst_hold(dst);
1435
1436                 dst1->xfrm = xfrm[i];
1437                 xdst->genid = xfrm[i]->genid;
1438
1439                 dst1->obsolete = -1;
1440                 dst1->flags |= DST_HOST;
1441                 dst1->lastuse = now;
1442
1443                 dst1->input = dst_discard;
1444                 dst1->output = xfrm[i]->outer_mode->afinfo->output;
1445
1446                 dst1->next = dst_prev;
1447                 dst_prev = dst1;
1448
1449                 header_len += xfrm[i]->props.header_len;
1450                 if (xfrm[i]->type->flags & XFRM_TYPE_NON_FRAGMENT)
1451                         nfheader_len += xfrm[i]->props.header_len;
1452                 trailer_len += xfrm[i]->props.trailer_len;
1453         }
1454
1455         dst_prev->child = dst;
1456         dst0->path = dst;
1457
1458         err = -ENODEV;
1459         dev = dst->dev;
1460         if (!dev)
1461                 goto free_dst;
1462
1463         /* Copy neighbout for reachability confirmation */
1464         dst0->neighbour = neigh_clone(dst->neighbour);
1465
1466         xfrm_init_path((struct xfrm_dst *)dst0, dst, nfheader_len);
1467         xfrm_init_pmtu(dst_prev);
1468
1469         for (dst_prev = dst0; dst_prev != dst; dst_prev = dst_prev->child) {
1470                 struct xfrm_dst *xdst = (struct xfrm_dst *)dst_prev;
1471
1472                 err = xfrm_fill_dst(xdst, dev);
1473                 if (err)
1474                         goto free_dst;
1475
1476                 dst_prev->header_len = header_len;
1477                 dst_prev->trailer_len = trailer_len;
1478                 header_len -= xdst->u.dst.xfrm->props.header_len;
1479                 trailer_len -= xdst->u.dst.xfrm->props.trailer_len;
1480         }
1481
1482 out:
1483         return dst0;
1484
1485 put_states:
1486         for (; i < nx; i++)
1487                 xfrm_state_put(xfrm[i]);
1488 free_dst:
1489         if (dst0)
1490                 dst_free(dst0);
1491         dst0 = ERR_PTR(err);
1492         goto out;
1493 }
1494
1495 static int inline
1496 xfrm_dst_alloc_copy(void **target, void *src, int size)
1497 {
1498         if (!*target) {
1499                 *target = kmalloc(size, GFP_ATOMIC);
1500                 if (!*target)
1501                         return -ENOMEM;
1502         }
1503         memcpy(*target, src, size);
1504         return 0;
1505 }
1506
1507 static int inline
1508 xfrm_dst_update_parent(struct dst_entry *dst, struct xfrm_selector *sel)
1509 {
1510 #ifdef CONFIG_XFRM_SUB_POLICY
1511         struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
1512         return xfrm_dst_alloc_copy((void **)&(xdst->partner),
1513                                    sel, sizeof(*sel));
1514 #else
1515         return 0;
1516 #endif
1517 }
1518
1519 static int inline
1520 xfrm_dst_update_origin(struct dst_entry *dst, struct flowi *fl)
1521 {
1522 #ifdef CONFIG_XFRM_SUB_POLICY
1523         struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
1524         return xfrm_dst_alloc_copy((void **)&(xdst->origin), fl, sizeof(*fl));
1525 #else
1526         return 0;
1527 #endif
1528 }
1529
1530 static int stale_bundle(struct dst_entry *dst);
1531
1532 /* Main function: finds/creates a bundle for given flow.
1533  *
1534  * At the moment we eat a raw IP route. Mostly to speed up lookups
1535  * on interfaces with disabled IPsec.
1536  */
1537 int __xfrm_lookup(struct net *net, struct dst_entry **dst_p, struct flowi *fl,
1538                   struct sock *sk, int flags)
1539 {
1540         struct xfrm_policy *policy;
1541         struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
1542         int npols;
1543         int pol_dead;
1544         int xfrm_nr;
1545         int pi;
1546         struct xfrm_state *xfrm[XFRM_MAX_DEPTH];
1547         struct dst_entry *dst, *dst_orig = *dst_p;
1548         int nx = 0;
1549         int err;
1550         u32 genid;
1551         u16 family;
1552         u8 dir = policy_to_flow_dir(XFRM_POLICY_OUT);
1553
1554 restart:
1555         genid = atomic_read(&flow_cache_genid);
1556         policy = NULL;
1557         for (pi = 0; pi < ARRAY_SIZE(pols); pi++)
1558                 pols[pi] = NULL;
1559         npols = 0;
1560         pol_dead = 0;
1561         xfrm_nr = 0;
1562
1563         if (sk && sk->sk_policy[XFRM_POLICY_OUT]) {
1564                 policy = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl);
1565                 err = PTR_ERR(policy);
1566                 if (IS_ERR(policy)) {
1567                         XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR);
1568                         goto dropdst;
1569                 }
1570         }
1571
1572         if (!policy) {
1573                 /* To accelerate a bit...  */
1574                 if ((dst_orig->flags & DST_NOXFRM) ||
1575                     !net->xfrm.policy_count[XFRM_POLICY_OUT])
1576                         goto nopol;
1577
1578                 policy = flow_cache_lookup(net, fl, dst_orig->ops->family,
1579                                            dir, xfrm_policy_lookup);
1580                 err = PTR_ERR(policy);
1581                 if (IS_ERR(policy)) {
1582                         XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR);
1583                         goto dropdst;
1584                 }
1585         }
1586
1587         if (!policy)
1588                 goto nopol;
1589
1590         family = dst_orig->ops->family;
1591         pols[0] = policy;
1592         npols ++;
1593         xfrm_nr += pols[0]->xfrm_nr;
1594
1595         err = -ENOENT;
1596         if ((flags & XFRM_LOOKUP_ICMP) && !(policy->flags & XFRM_POLICY_ICMP))
1597                 goto error;
1598
1599         policy->curlft.use_time = get_seconds();
1600
1601         switch (policy->action) {
1602         default:
1603         case XFRM_POLICY_BLOCK:
1604                 /* Prohibit the flow */
1605                 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLBLOCK);
1606                 err = -EPERM;
1607                 goto error;
1608
1609         case XFRM_POLICY_ALLOW:
1610 #ifndef CONFIG_XFRM_SUB_POLICY
1611                 if (policy->xfrm_nr == 0) {
1612                         /* Flow passes not transformed. */
1613                         xfrm_pol_put(policy);
1614                         return 0;
1615                 }
1616 #endif
1617
1618                 /* Try to find matching bundle.
1619                  *
1620                  * LATER: help from flow cache. It is optional, this
1621                  * is required only for output policy.
1622                  */
1623                 dst = xfrm_find_bundle(fl, policy, family);
1624                 if (IS_ERR(dst)) {
1625                         XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLECHECKERROR);
1626                         err = PTR_ERR(dst);
1627                         goto error;
1628                 }
1629
1630                 if (dst)
1631                         break;
1632
1633 #ifdef CONFIG_XFRM_SUB_POLICY
1634                 if (pols[0]->type != XFRM_POLICY_TYPE_MAIN) {
1635                         pols[1] = xfrm_policy_lookup_bytype(net,
1636                                                             XFRM_POLICY_TYPE_MAIN,
1637                                                             fl, family,
1638                                                             XFRM_POLICY_OUT);
1639                         if (pols[1]) {
1640                                 if (IS_ERR(pols[1])) {
1641                                         XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR);
1642                                         err = PTR_ERR(pols[1]);
1643                                         goto error;
1644                                 }
1645                                 if (pols[1]->action == XFRM_POLICY_BLOCK) {
1646                                         XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLBLOCK);
1647                                         err = -EPERM;
1648                                         goto error;
1649                                 }
1650                                 npols ++;
1651                                 xfrm_nr += pols[1]->xfrm_nr;
1652                         }
1653                 }
1654
1655                 /*
1656                  * Because neither flowi nor bundle information knows about
1657                  * transformation template size. On more than one policy usage
1658                  * we can realize whether all of them is bypass or not after
1659                  * they are searched. See above not-transformed bypass
1660                  * is surrounded by non-sub policy configuration, too.
1661                  */
1662                 if (xfrm_nr == 0) {
1663                         /* Flow passes not transformed. */
1664                         xfrm_pols_put(pols, npols);
1665                         return 0;
1666                 }
1667
1668 #endif
1669                 nx = xfrm_tmpl_resolve(pols, npols, fl, xfrm, family);
1670
1671                 if (unlikely(nx<0)) {
1672                         err = nx;
1673                         if (err == -EAGAIN && net->xfrm.sysctl_larval_drop) {
1674                                 /* EREMOTE tells the caller to generate
1675                                  * a one-shot blackhole route.
1676                                  */
1677                                 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES);
1678                                 xfrm_pol_put(policy);
1679                                 return -EREMOTE;
1680                         }
1681                         if (err == -EAGAIN && (flags & XFRM_LOOKUP_WAIT)) {
1682                                 DECLARE_WAITQUEUE(wait, current);
1683
1684                                 add_wait_queue(&net->xfrm.km_waitq, &wait);
1685                                 set_current_state(TASK_INTERRUPTIBLE);
1686                                 schedule();
1687                                 set_current_state(TASK_RUNNING);
1688                                 remove_wait_queue(&net->xfrm.km_waitq, &wait);
1689
1690                                 nx = xfrm_tmpl_resolve(pols, npols, fl, xfrm, family);
1691
1692                                 if (nx == -EAGAIN && signal_pending(current)) {
1693                                         XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES);
1694                                         err = -ERESTART;
1695                                         goto error;
1696                                 }
1697                                 if (nx == -EAGAIN ||
1698                                     genid != atomic_read(&flow_cache_genid)) {
1699                                         xfrm_pols_put(pols, npols);
1700                                         goto restart;
1701                                 }
1702                                 err = nx;
1703                         }
1704                         if (err < 0) {
1705                                 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES);
1706                                 goto error;
1707                         }
1708                 }
1709                 if (nx == 0) {
1710                         /* Flow passes not transformed. */
1711                         xfrm_pols_put(pols, npols);
1712                         return 0;
1713                 }
1714
1715                 dst = xfrm_bundle_create(policy, xfrm, nx, fl, dst_orig);
1716                 err = PTR_ERR(dst);
1717                 if (IS_ERR(dst)) {
1718                         XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLEGENERROR);
1719                         goto error;
1720                 }
1721
1722                 for (pi = 0; pi < npols; pi++) {
1723                         read_lock_bh(&pols[pi]->lock);
1724                         pol_dead |= pols[pi]->walk.dead;
1725                         read_unlock_bh(&pols[pi]->lock);
1726                 }
1727
1728                 write_lock_bh(&policy->lock);
1729                 if (unlikely(pol_dead || stale_bundle(dst))) {
1730                         /* Wow! While we worked on resolving, this
1731                          * policy has gone. Retry. It is not paranoia,
1732                          * we just cannot enlist new bundle to dead object.
1733                          * We can't enlist stable bundles either.
1734                          */
1735                         write_unlock_bh(&policy->lock);
1736                         dst_free(dst);
1737
1738                         if (pol_dead)
1739                                 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLDEAD);
1740                         else
1741                                 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLECHECKERROR);
1742                         err = -EHOSTUNREACH;
1743                         goto error;
1744                 }
1745
1746                 if (npols > 1)
1747                         err = xfrm_dst_update_parent(dst, &pols[1]->selector);
1748                 else
1749                         err = xfrm_dst_update_origin(dst, fl);
1750                 if (unlikely(err)) {
1751                         write_unlock_bh(&policy->lock);
1752                         dst_free(dst);
1753                         XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLECHECKERROR);
1754                         goto error;
1755                 }
1756
1757                 dst->next = policy->bundles;
1758                 policy->bundles = dst;
1759                 dst_hold(dst);
1760                 write_unlock_bh(&policy->lock);
1761         }
1762         *dst_p = dst;
1763         dst_release(dst_orig);
1764         xfrm_pols_put(pols, npols);
1765         return 0;
1766
1767 error:
1768         xfrm_pols_put(pols, npols);
1769 dropdst:
1770         dst_release(dst_orig);
1771         *dst_p = NULL;
1772         return err;
1773
1774 nopol:
1775         err = -ENOENT;
1776         if (flags & XFRM_LOOKUP_ICMP)
1777                 goto dropdst;
1778         return 0;
1779 }
1780 EXPORT_SYMBOL(__xfrm_lookup);
1781
1782 int xfrm_lookup(struct net *net, struct dst_entry **dst_p, struct flowi *fl,
1783                 struct sock *sk, int flags)
1784 {
1785         int err = __xfrm_lookup(net, dst_p, fl, sk, flags);
1786
1787         if (err == -EREMOTE) {
1788                 dst_release(*dst_p);
1789                 *dst_p = NULL;
1790                 err = -EAGAIN;
1791         }
1792
1793         return err;
1794 }
1795 EXPORT_SYMBOL(xfrm_lookup);
1796
1797 static inline int
1798 xfrm_secpath_reject(int idx, struct sk_buff *skb, struct flowi *fl)
1799 {
1800         struct xfrm_state *x;
1801
1802         if (!skb->sp || idx < 0 || idx >= skb->sp->len)
1803                 return 0;
1804         x = skb->sp->xvec[idx];
1805         if (!x->type->reject)
1806                 return 0;
1807         return x->type->reject(x, skb, fl);
1808 }
1809
1810 /* When skb is transformed back to its "native" form, we have to
1811  * check policy restrictions. At the moment we make this in maximally
1812  * stupid way. Shame on me. :-) Of course, connected sockets must
1813  * have policy cached at them.
1814  */
1815
1816 static inline int
1817 xfrm_state_ok(struct xfrm_tmpl *tmpl, struct xfrm_state *x,
1818               unsigned short family)
1819 {
1820         if (xfrm_state_kern(x))
1821                 return tmpl->optional && !xfrm_state_addr_cmp(tmpl, x, tmpl->encap_family);
1822         return  x->id.proto == tmpl->id.proto &&
1823                 (x->id.spi == tmpl->id.spi || !tmpl->id.spi) &&
1824                 (x->props.reqid == tmpl->reqid || !tmpl->reqid) &&
1825                 x->props.mode == tmpl->mode &&
1826                 (tmpl->allalgs || (tmpl->aalgos & (1<<x->props.aalgo)) ||
1827                  !(xfrm_id_proto_match(tmpl->id.proto, IPSEC_PROTO_ANY))) &&
1828                 !(x->props.mode != XFRM_MODE_TRANSPORT &&
1829                   xfrm_state_addr_cmp(tmpl, x, family));
1830 }
1831
1832 /*
1833  * 0 or more than 0 is returned when validation is succeeded (either bypass
1834  * because of optional transport mode, or next index of the mathced secpath
1835  * state with the template.
1836  * -1 is returned when no matching template is found.
1837  * Otherwise "-2 - errored_index" is returned.
1838  */
1839 static inline int
1840 xfrm_policy_ok(struct xfrm_tmpl *tmpl, struct sec_path *sp, int start,
1841                unsigned short family)
1842 {
1843         int idx = start;
1844
1845         if (tmpl->optional) {
1846                 if (tmpl->mode == XFRM_MODE_TRANSPORT)
1847                         return start;
1848         } else
1849                 start = -1;
1850         for (; idx < sp->len; idx++) {
1851                 if (xfrm_state_ok(tmpl, sp->xvec[idx], family))
1852                         return ++idx;
1853                 if (sp->xvec[idx]->props.mode != XFRM_MODE_TRANSPORT) {
1854                         if (start == -1)
1855                                 start = -2-idx;
1856                         break;
1857                 }
1858         }
1859         return start;
1860 }
1861
1862 int __xfrm_decode_session(struct sk_buff *skb, struct flowi *fl,
1863                           unsigned int family, int reverse)
1864 {
1865         struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
1866         int err;
1867
1868         if (unlikely(afinfo == NULL))
1869                 return -EAFNOSUPPORT;
1870
1871         afinfo->decode_session(skb, fl, reverse);
1872         err = security_xfrm_decode_session(skb, &fl->secid);
1873         xfrm_policy_put_afinfo(afinfo);
1874         return err;
1875 }
1876 EXPORT_SYMBOL(__xfrm_decode_session);
1877
1878 static inline int secpath_has_nontransport(struct sec_path *sp, int k, int *idxp)
1879 {
1880         for (; k < sp->len; k++) {
1881                 if (sp->xvec[k]->props.mode != XFRM_MODE_TRANSPORT) {
1882                         *idxp = k;
1883                         return 1;
1884                 }
1885         }
1886
1887         return 0;
1888 }
1889
1890 int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
1891                         unsigned short family)
1892 {
1893         struct net *net = dev_net(skb->dev);
1894         struct xfrm_policy *pol;
1895         struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
1896         int npols = 0;
1897         int xfrm_nr;
1898         int pi;
1899         int reverse;
1900         struct flowi fl;
1901         u8 fl_dir;
1902         int xerr_idx = -1;
1903
1904         reverse = dir & ~XFRM_POLICY_MASK;
1905         dir &= XFRM_POLICY_MASK;
1906         fl_dir = policy_to_flow_dir(dir);
1907
1908         if (__xfrm_decode_session(skb, &fl, family, reverse) < 0) {
1909                 XFRM_INC_STATS(net, LINUX_MIB_XFRMINHDRERROR);
1910                 return 0;
1911         }
1912
1913         nf_nat_decode_session(skb, &fl, family);
1914
1915         /* First, check used SA against their selectors. */
1916         if (skb->sp) {
1917                 int i;
1918
1919                 for (i=skb->sp->len-1; i>=0; i--) {
1920                         struct xfrm_state *x = skb->sp->xvec[i];
1921                         if (!xfrm_selector_match(&x->sel, &fl, family)) {
1922                                 XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEMISMATCH);
1923                                 return 0;
1924                         }
1925                 }
1926         }
1927
1928         pol = NULL;
1929         if (sk && sk->sk_policy[dir]) {
1930                 pol = xfrm_sk_policy_lookup(sk, dir, &fl);
1931                 if (IS_ERR(pol)) {
1932                         XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR);
1933                         return 0;
1934                 }
1935         }
1936
1937         if (!pol)
1938                 pol = flow_cache_lookup(net, &fl, family, fl_dir,
1939                                         xfrm_policy_lookup);
1940
1941         if (IS_ERR(pol)) {
1942                 XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR);
1943                 return 0;
1944         }
1945
1946         if (!pol) {
1947                 if (skb->sp && secpath_has_nontransport(skb->sp, 0, &xerr_idx)) {
1948                         xfrm_secpath_reject(xerr_idx, skb, &fl);
1949                         XFRM_INC_STATS(net, LINUX_MIB_XFRMINNOPOLS);
1950                         return 0;
1951                 }
1952                 return 1;
1953         }
1954
1955         pol->curlft.use_time = get_seconds();
1956
1957         pols[0] = pol;
1958         npols ++;
1959 #ifdef CONFIG_XFRM_SUB_POLICY
1960         if (pols[0]->type != XFRM_POLICY_TYPE_MAIN) {
1961                 pols[1] = xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_MAIN,
1962                                                     &fl, family,
1963                                                     XFRM_POLICY_IN);
1964                 if (pols[1]) {
1965                         if (IS_ERR(pols[1])) {
1966                                 XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR);
1967                                 return 0;
1968                         }
1969                         pols[1]->curlft.use_time = get_seconds();
1970                         npols ++;
1971                 }
1972         }
1973 #endif
1974
1975         if (pol->action == XFRM_POLICY_ALLOW) {
1976                 struct sec_path *sp;
1977                 static struct sec_path dummy;
1978                 struct xfrm_tmpl *tp[XFRM_MAX_DEPTH];
1979                 struct xfrm_tmpl *stp[XFRM_MAX_DEPTH];
1980                 struct xfrm_tmpl **tpp = tp;
1981                 int ti = 0;
1982                 int i, k;
1983
1984                 if ((sp = skb->sp) == NULL)
1985                         sp = &dummy;
1986
1987                 for (pi = 0; pi < npols; pi++) {
1988                         if (pols[pi] != pol &&
1989                             pols[pi]->action != XFRM_POLICY_ALLOW) {
1990                                 XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLBLOCK);
1991                                 goto reject;
1992                         }
1993                         if (ti + pols[pi]->xfrm_nr >= XFRM_MAX_DEPTH) {
1994                                 XFRM_INC_STATS(net, LINUX_MIB_XFRMINBUFFERERROR);
1995                                 goto reject_error;
1996                         }
1997                         for (i = 0; i < pols[pi]->xfrm_nr; i++)
1998                                 tpp[ti++] = &pols[pi]->xfrm_vec[i];
1999                 }
2000                 xfrm_nr = ti;
2001                 if (npols > 1) {
2002                         xfrm_tmpl_sort(stp, tpp, xfrm_nr, family);
2003                         tpp = stp;
2004                 }
2005
2006                 /* For each tunnel xfrm, find the first matching tmpl.
2007                  * For each tmpl before that, find corresponding xfrm.
2008                  * Order is _important_. Later we will implement
2009                  * some barriers, but at the moment barriers
2010                  * are implied between each two transformations.
2011                  */
2012                 for (i = xfrm_nr-1, k = 0; i >= 0; i--) {
2013                         k = xfrm_policy_ok(tpp[i], sp, k, family);
2014                         if (k < 0) {
2015                                 if (k < -1)
2016                                         /* "-2 - errored_index" returned */
2017                                         xerr_idx = -(2+k);
2018                                 XFRM_INC_STATS(net, LINUX_MIB_XFRMINTMPLMISMATCH);
2019                                 goto reject;
2020                         }
2021                 }
2022
2023                 if (secpath_has_nontransport(sp, k, &xerr_idx)) {
2024                         XFRM_INC_STATS(net, LINUX_MIB_XFRMINTMPLMISMATCH);
2025                         goto reject;
2026                 }
2027
2028                 xfrm_pols_put(pols, npols);
2029                 return 1;
2030         }
2031         XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLBLOCK);
2032
2033 reject:
2034         xfrm_secpath_reject(xerr_idx, skb, &fl);
2035 reject_error:
2036         xfrm_pols_put(pols, npols);
2037         return 0;
2038 }
2039 EXPORT_SYMBOL(__xfrm_policy_check);
2040
2041 int __xfrm_route_forward(struct sk_buff *skb, unsigned short family)
2042 {
2043         struct net *net = dev_net(skb->dev);
2044         struct flowi fl;
2045
2046         if (xfrm_decode_session(skb, &fl, family) < 0) {
2047                 /* XXX: we should have something like FWDHDRERROR here. */
2048                 XFRM_INC_STATS(net, LINUX_MIB_XFRMINHDRERROR);
2049                 return 0;
2050         }
2051
2052         return xfrm_lookup(net, &skb->dst, &fl, NULL, 0) == 0;
2053 }
2054 EXPORT_SYMBOL(__xfrm_route_forward);
2055
2056 /* Optimize later using cookies and generation ids. */
2057
2058 static struct dst_entry *xfrm_dst_check(struct dst_entry *dst, u32 cookie)
2059 {
2060         /* Code (such as __xfrm4_bundle_create()) sets dst->obsolete
2061          * to "-1" to force all XFRM destinations to get validated by
2062          * dst_ops->check on every use.  We do this because when a
2063          * normal route referenced by an XFRM dst is obsoleted we do
2064          * not go looking around for all parent referencing XFRM dsts
2065          * so that we can invalidate them.  It is just too much work.
2066          * Instead we make the checks here on every use.  For example:
2067          *
2068          *      XFRM dst A --> IPv4 dst X
2069          *
2070          * X is the "xdst->route" of A (X is also the "dst->path" of A
2071          * in this example).  If X is marked obsolete, "A" will not
2072          * notice.  That's what we are validating here via the
2073          * stale_bundle() check.
2074          *
2075          * When a policy's bundle is pruned, we dst_free() the XFRM
2076          * dst which causes it's ->obsolete field to be set to a
2077          * positive non-zero integer.  If an XFRM dst has been pruned
2078          * like this, we want to force a new route lookup.
2079          */
2080         if (dst->obsolete < 0 && !stale_bundle(dst))
2081                 return dst;
2082
2083         return NULL;
2084 }
2085
2086 static int stale_bundle(struct dst_entry *dst)
2087 {
2088         return !xfrm_bundle_ok(NULL, (struct xfrm_dst *)dst, NULL, AF_UNSPEC, 0);
2089 }
2090
2091 void xfrm_dst_ifdown(struct dst_entry *dst, struct net_device *dev)
2092 {
2093         while ((dst = dst->child) && dst->xfrm && dst->dev == dev) {
2094                 dst->dev = dev_net(dev)->loopback_dev;
2095                 dev_hold(dst->dev);
2096                 dev_put(dev);
2097         }
2098 }
2099 EXPORT_SYMBOL(xfrm_dst_ifdown);
2100
2101 static void xfrm_link_failure(struct sk_buff *skb)
2102 {
2103         /* Impossible. Such dst must be popped before reaches point of failure. */
2104         return;
2105 }
2106
2107 static struct dst_entry *xfrm_negative_advice(struct dst_entry *dst)
2108 {
2109         if (dst) {
2110                 if (dst->obsolete) {
2111                         dst_release(dst);
2112                         dst = NULL;
2113                 }
2114         }
2115         return dst;
2116 }
2117
2118 static void prune_one_bundle(struct xfrm_policy *pol, int (*func)(struct dst_entry *), struct dst_entry **gc_list_p)
2119 {
2120         struct dst_entry *dst, **dstp;
2121
2122         write_lock(&pol->lock);
2123         dstp = &pol->bundles;
2124         while ((dst=*dstp) != NULL) {
2125                 if (func(dst)) {
2126                         *dstp = dst->next;
2127                         dst->next = *gc_list_p;
2128                         *gc_list_p = dst;
2129                 } else {
2130                         dstp = &dst->next;
2131                 }
2132         }
2133         write_unlock(&pol->lock);
2134 }
2135
2136 static void xfrm_prune_bundles(struct net *net, int (*func)(struct dst_entry *))
2137 {
2138         struct dst_entry *gc_list = NULL;
2139         int dir;
2140
2141         read_lock_bh(&xfrm_policy_lock);
2142         for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) {
2143                 struct xfrm_policy *pol;
2144                 struct hlist_node *entry;
2145                 struct hlist_head *table;
2146                 int i;
2147
2148                 hlist_for_each_entry(pol, entry,
2149                                      &net->xfrm.policy_inexact[dir], bydst)
2150                         prune_one_bundle(pol, func, &gc_list);
2151
2152                 table = net->xfrm.policy_bydst[dir].table;
2153                 for (i = net->xfrm.policy_bydst[dir].hmask; i >= 0; i--) {
2154                         hlist_for_each_entry(pol, entry, table + i, bydst)
2155                                 prune_one_bundle(pol, func, &gc_list);
2156                 }
2157         }
2158         read_unlock_bh(&xfrm_policy_lock);
2159
2160         while (gc_list) {
2161                 struct dst_entry *dst = gc_list;
2162                 gc_list = dst->next;
2163                 dst_free(dst);
2164         }
2165 }
2166
2167 static int unused_bundle(struct dst_entry *dst)
2168 {
2169         return !atomic_read(&dst->__refcnt);
2170 }
2171
2172 static void __xfrm_garbage_collect(struct net *net)
2173 {
2174         xfrm_prune_bundles(net, unused_bundle);
2175 }
2176
2177 static int xfrm_flush_bundles(struct net *net)
2178 {
2179         xfrm_prune_bundles(net, stale_bundle);
2180         return 0;
2181 }
2182
2183 static void xfrm_init_pmtu(struct dst_entry *dst)
2184 {
2185         do {
2186                 struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
2187                 u32 pmtu, route_mtu_cached;
2188
2189                 pmtu = dst_mtu(dst->child);
2190                 xdst->child_mtu_cached = pmtu;
2191
2192                 pmtu = xfrm_state_mtu(dst->xfrm, pmtu);
2193
2194                 route_mtu_cached = dst_mtu(xdst->route);
2195                 xdst->route_mtu_cached = route_mtu_cached;
2196
2197                 if (pmtu > route_mtu_cached)
2198                         pmtu = route_mtu_cached;
2199
2200                 dst->metrics[RTAX_MTU-1] = pmtu;
2201         } while ((dst = dst->next));
2202 }
2203
2204 /* Check that the bundle accepts the flow and its components are
2205  * still valid.
2206  */
2207
2208 int xfrm_bundle_ok(struct xfrm_policy *pol, struct xfrm_dst *first,
2209                 struct flowi *fl, int family, int strict)
2210 {
2211         struct dst_entry *dst = &first->u.dst;
2212         struct xfrm_dst *last;
2213         u32 mtu;
2214
2215         if (!dst_check(dst->path, ((struct xfrm_dst *)dst)->path_cookie) ||
2216             (dst->dev && !netif_running(dst->dev)))
2217                 return 0;
2218 #ifdef CONFIG_XFRM_SUB_POLICY
2219         if (fl) {
2220                 if (first->origin && !flow_cache_uli_match(first->origin, fl))
2221                         return 0;
2222                 if (first->partner &&
2223                     !xfrm_selector_match(first->partner, fl, family))
2224                         return 0;
2225         }
2226 #endif
2227
2228         last = NULL;
2229
2230         do {
2231                 struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
2232
2233                 if (fl && !xfrm_selector_match(&dst->xfrm->sel, fl, family))
2234                         return 0;
2235                 if (fl && pol &&
2236                     !security_xfrm_state_pol_flow_match(dst->xfrm, pol, fl))
2237                         return 0;
2238                 if (dst->xfrm->km.state != XFRM_STATE_VALID)
2239                         return 0;
2240                 if (xdst->genid != dst->xfrm->genid)
2241                         return 0;
2242
2243                 if (strict && fl &&
2244                     !(dst->xfrm->outer_mode->flags & XFRM_MODE_FLAG_TUNNEL) &&
2245                     !xfrm_state_addr_flow_check(dst->xfrm, fl, family))
2246                         return 0;
2247
2248                 mtu = dst_mtu(dst->child);
2249                 if (xdst->child_mtu_cached != mtu) {
2250                         last = xdst;
2251                         xdst->child_mtu_cached = mtu;
2252                 }
2253
2254                 if (!dst_check(xdst->route, xdst->route_cookie))
2255                         return 0;
2256                 mtu = dst_mtu(xdst->route);
2257                 if (xdst->route_mtu_cached != mtu) {
2258                         last = xdst;
2259                         xdst->route_mtu_cached = mtu;
2260                 }
2261
2262                 dst = dst->child;
2263         } while (dst->xfrm);
2264
2265         if (likely(!last))
2266                 return 1;
2267
2268         mtu = last->child_mtu_cached;
2269         for (;;) {
2270                 dst = &last->u.dst;
2271
2272                 mtu = xfrm_state_mtu(dst->xfrm, mtu);
2273                 if (mtu > last->route_mtu_cached)
2274                         mtu = last->route_mtu_cached;
2275                 dst->metrics[RTAX_MTU-1] = mtu;
2276
2277                 if (last == first)
2278                         break;
2279
2280                 last = (struct xfrm_dst *)last->u.dst.next;
2281                 last->child_mtu_cached = mtu;
2282         }
2283
2284         return 1;
2285 }
2286
2287 EXPORT_SYMBOL(xfrm_bundle_ok);
2288
2289 int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo)
2290 {
2291         int err = 0;
2292         if (unlikely(afinfo == NULL))
2293                 return -EINVAL;
2294         if (unlikely(afinfo->family >= NPROTO))
2295                 return -EAFNOSUPPORT;
2296         write_lock_bh(&xfrm_policy_afinfo_lock);
2297         if (unlikely(xfrm_policy_afinfo[afinfo->family] != NULL))
2298                 err = -ENOBUFS;
2299         else {
2300                 struct dst_ops *dst_ops = afinfo->dst_ops;
2301                 if (likely(dst_ops->kmem_cachep == NULL))
2302                         dst_ops->kmem_cachep = xfrm_dst_cache;
2303                 if (likely(dst_ops->check == NULL))
2304                         dst_ops->check = xfrm_dst_check;
2305                 if (likely(dst_ops->negative_advice == NULL))
2306                         dst_ops->negative_advice = xfrm_negative_advice;
2307                 if (likely(dst_ops->link_failure == NULL))
2308                         dst_ops->link_failure = xfrm_link_failure;
2309                 if (likely(afinfo->garbage_collect == NULL))
2310                         afinfo->garbage_collect = __xfrm_garbage_collect;
2311                 xfrm_policy_afinfo[afinfo->family] = afinfo;
2312         }
2313         write_unlock_bh(&xfrm_policy_afinfo_lock);
2314         return err;
2315 }
2316 EXPORT_SYMBOL(xfrm_policy_register_afinfo);
2317
2318 int xfrm_policy_unregister_afinfo(struct xfrm_policy_afinfo *afinfo)
2319 {
2320         int err = 0;
2321         if (unlikely(afinfo == NULL))
2322                 return -EINVAL;
2323         if (unlikely(afinfo->family >= NPROTO))
2324                 return -EAFNOSUPPORT;
2325         write_lock_bh(&xfrm_policy_afinfo_lock);
2326         if (likely(xfrm_policy_afinfo[afinfo->family] != NULL)) {
2327                 if (unlikely(xfrm_policy_afinfo[afinfo->family] != afinfo))
2328                         err = -EINVAL;
2329                 else {
2330                         struct dst_ops *dst_ops = afinfo->dst_ops;
2331                         xfrm_policy_afinfo[afinfo->family] = NULL;
2332                         dst_ops->kmem_cachep = NULL;
2333                         dst_ops->check = NULL;
2334                         dst_ops->negative_advice = NULL;
2335                         dst_ops->link_failure = NULL;
2336                         afinfo->garbage_collect = NULL;
2337                 }
2338         }
2339         write_unlock_bh(&xfrm_policy_afinfo_lock);
2340         return err;
2341 }
2342 EXPORT_SYMBOL(xfrm_policy_unregister_afinfo);
2343
2344 static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family)
2345 {
2346         struct xfrm_policy_afinfo *afinfo;
2347         if (unlikely(family >= NPROTO))
2348                 return NULL;
2349         read_lock(&xfrm_policy_afinfo_lock);
2350         afinfo = xfrm_policy_afinfo[family];
2351         if (unlikely(!afinfo))
2352                 read_unlock(&xfrm_policy_afinfo_lock);
2353         return afinfo;
2354 }
2355
2356 static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo)
2357 {
2358         read_unlock(&xfrm_policy_afinfo_lock);
2359 }
2360
2361 static int xfrm_dev_event(struct notifier_block *this, unsigned long event, void *ptr)
2362 {
2363         struct net_device *dev = ptr;
2364
2365         switch (event) {
2366         case NETDEV_DOWN:
2367                 xfrm_flush_bundles(dev_net(dev));
2368         }
2369         return NOTIFY_DONE;
2370 }
2371
2372 static struct notifier_block xfrm_dev_notifier = {
2373         .notifier_call  = xfrm_dev_event,
2374 };
2375
2376 #ifdef CONFIG_XFRM_STATISTICS
2377 static int __net_init xfrm_statistics_init(struct net *net)
2378 {
2379         int rv;
2380
2381         if (snmp_mib_init((void **)net->mib.xfrm_statistics,
2382                           sizeof(struct linux_xfrm_mib)) < 0)
2383                 return -ENOMEM;
2384         rv = xfrm_proc_init(net);
2385         if (rv < 0)
2386                 snmp_mib_free((void **)net->mib.xfrm_statistics);
2387         return rv;
2388 }
2389
2390 static void xfrm_statistics_fini(struct net *net)
2391 {
2392         xfrm_proc_fini(net);
2393         snmp_mib_free((void **)net->mib.xfrm_statistics);
2394 }
2395 #else
2396 static int __net_init xfrm_statistics_init(struct net *net)
2397 {
2398         return 0;
2399 }
2400
2401 static void xfrm_statistics_fini(struct net *net)
2402 {
2403 }
2404 #endif
2405
2406 static int __net_init xfrm_policy_init(struct net *net)
2407 {
2408         unsigned int hmask, sz;
2409         int dir;
2410
2411         if (net_eq(net, &init_net))
2412                 xfrm_dst_cache = kmem_cache_create("xfrm_dst_cache",
2413                                            sizeof(struct xfrm_dst),
2414                                            0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
2415                                            NULL);
2416
2417         hmask = 8 - 1;
2418         sz = (hmask+1) * sizeof(struct hlist_head);
2419
2420         net->xfrm.policy_byidx = xfrm_hash_alloc(sz);
2421         if (!net->xfrm.policy_byidx)
2422                 goto out_byidx;
2423         net->xfrm.policy_idx_hmask = hmask;
2424
2425         for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) {
2426                 struct xfrm_policy_hash *htab;
2427
2428                 net->xfrm.policy_count[dir] = 0;
2429                 INIT_HLIST_HEAD(&net->xfrm.policy_inexact[dir]);
2430
2431                 htab = &net->xfrm.policy_bydst[dir];
2432                 htab->table = xfrm_hash_alloc(sz);
2433                 if (!htab->table)
2434                         goto out_bydst;
2435                 htab->hmask = hmask;
2436         }
2437
2438         INIT_LIST_HEAD(&net->xfrm.policy_all);
2439         INIT_WORK(&net->xfrm.policy_hash_work, xfrm_hash_resize);
2440         if (net_eq(net, &init_net))
2441                 register_netdevice_notifier(&xfrm_dev_notifier);
2442         return 0;
2443
2444 out_bydst:
2445         for (dir--; dir >= 0; dir--) {
2446                 struct xfrm_policy_hash *htab;
2447
2448                 htab = &net->xfrm.policy_bydst[dir];
2449                 xfrm_hash_free(htab->table, sz);
2450         }
2451         xfrm_hash_free(net->xfrm.policy_byidx, sz);
2452 out_byidx:
2453         return -ENOMEM;
2454 }
2455
2456 static void xfrm_policy_fini(struct net *net)
2457 {
2458         struct xfrm_audit audit_info;
2459         unsigned int sz;
2460         int dir;
2461
2462         flush_work(&net->xfrm.policy_hash_work);
2463 #ifdef CONFIG_XFRM_SUB_POLICY
2464         audit_info.loginuid = -1;
2465         audit_info.sessionid = -1;
2466         audit_info.secid = 0;
2467         xfrm_policy_flush(net, XFRM_POLICY_TYPE_SUB, &audit_info);
2468 #endif
2469         audit_info.loginuid = -1;
2470         audit_info.sessionid = -1;
2471         audit_info.secid = 0;
2472         xfrm_policy_flush(net, XFRM_POLICY_TYPE_MAIN, &audit_info);
2473         flush_work(&xfrm_policy_gc_work);
2474
2475         WARN_ON(!list_empty(&net->xfrm.policy_all));
2476
2477         for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) {
2478                 struct xfrm_policy_hash *htab;
2479
2480                 WARN_ON(!hlist_empty(&net->xfrm.policy_inexact[dir]));
2481
2482                 htab = &net->xfrm.policy_bydst[dir];
2483                 sz = (htab->hmask + 1);
2484                 WARN_ON(!hlist_empty(htab->table));
2485                 xfrm_hash_free(htab->table, sz);
2486         }
2487
2488         sz = (net->xfrm.policy_idx_hmask + 1) * sizeof(struct hlist_head);
2489         WARN_ON(!hlist_empty(net->xfrm.policy_byidx));
2490         xfrm_hash_free(net->xfrm.policy_byidx, sz);
2491 }
2492
2493 static int __net_init xfrm_net_init(struct net *net)
2494 {
2495         int rv;
2496
2497         rv = xfrm_statistics_init(net);
2498         if (rv < 0)
2499                 goto out_statistics;
2500         rv = xfrm_state_init(net);
2501         if (rv < 0)
2502                 goto out_state;
2503         rv = xfrm_policy_init(net);
2504         if (rv < 0)
2505                 goto out_policy;
2506         rv = xfrm_sysctl_init(net);
2507         if (rv < 0)
2508                 goto out_sysctl;
2509         return 0;
2510
2511 out_sysctl:
2512         xfrm_policy_fini(net);
2513 out_policy:
2514         xfrm_state_fini(net);
2515 out_state:
2516         xfrm_statistics_fini(net);
2517 out_statistics:
2518         return rv;
2519 }
2520
2521 static void __net_exit xfrm_net_exit(struct net *net)
2522 {
2523         xfrm_sysctl_fini(net);
2524         xfrm_policy_fini(net);
2525         xfrm_state_fini(net);
2526         xfrm_statistics_fini(net);
2527 }
2528
2529 static struct pernet_operations __net_initdata xfrm_net_ops = {
2530         .init = xfrm_net_init,
2531         .exit = xfrm_net_exit,
2532 };
2533
2534 void __init xfrm_init(void)
2535 {
2536         register_pernet_subsys(&xfrm_net_ops);
2537         xfrm_input_init();
2538 }
2539
2540 #ifdef CONFIG_AUDITSYSCALL
2541 static void xfrm_audit_common_policyinfo(struct xfrm_policy *xp,
2542                                          struct audit_buffer *audit_buf)
2543 {
2544         struct xfrm_sec_ctx *ctx = xp->security;
2545         struct xfrm_selector *sel = &xp->selector;
2546
2547         if (ctx)
2548                 audit_log_format(audit_buf, " sec_alg=%u sec_doi=%u sec_obj=%s",
2549                                  ctx->ctx_alg, ctx->ctx_doi, ctx->ctx_str);
2550
2551         switch(sel->family) {
2552         case AF_INET:
2553                 audit_log_format(audit_buf, " src=%pI4", &sel->saddr.a4);
2554                 if (sel->prefixlen_s != 32)
2555                         audit_log_format(audit_buf, " src_prefixlen=%d",
2556                                          sel->prefixlen_s);
2557                 audit_log_format(audit_buf, " dst=%pI4", &sel->daddr.a4);
2558                 if (sel->prefixlen_d != 32)
2559                         audit_log_format(audit_buf, " dst_prefixlen=%d",
2560                                          sel->prefixlen_d);
2561                 break;
2562         case AF_INET6:
2563                 audit_log_format(audit_buf, " src=%pI6", sel->saddr.a6);
2564                 if (sel->prefixlen_s != 128)
2565                         audit_log_format(audit_buf, " src_prefixlen=%d",
2566                                          sel->prefixlen_s);
2567                 audit_log_format(audit_buf, " dst=%pI6", sel->daddr.a6);
2568                 if (sel->prefixlen_d != 128)
2569                         audit_log_format(audit_buf, " dst_prefixlen=%d",
2570                                          sel->prefixlen_d);
2571                 break;
2572         }
2573 }
2574
2575 void xfrm_audit_policy_add(struct xfrm_policy *xp, int result,
2576                            uid_t auid, u32 sessionid, u32 secid)
2577 {
2578         struct audit_buffer *audit_buf;
2579
2580         audit_buf = xfrm_audit_start("SPD-add");
2581         if (audit_buf == NULL)
2582                 return;
2583         xfrm_audit_helper_usrinfo(auid, sessionid, secid, audit_buf);
2584         audit_log_format(audit_buf, " res=%u", result);
2585         xfrm_audit_common_policyinfo(xp, audit_buf);
2586         audit_log_end(audit_buf);
2587 }
2588 EXPORT_SYMBOL_GPL(xfrm_audit_policy_add);
2589
2590 void xfrm_audit_policy_delete(struct xfrm_policy *xp, int result,
2591                               uid_t auid, u32 sessionid, u32 secid)
2592 {
2593         struct audit_buffer *audit_buf;
2594
2595         audit_buf = xfrm_audit_start("SPD-delete");
2596         if (audit_buf == NULL)
2597                 return;
2598         xfrm_audit_helper_usrinfo(auid, sessionid, secid, audit_buf);
2599         audit_log_format(audit_buf, " res=%u", result);
2600         xfrm_audit_common_policyinfo(xp, audit_buf);
2601         audit_log_end(audit_buf);
2602 }
2603 EXPORT_SYMBOL_GPL(xfrm_audit_policy_delete);
2604 #endif
2605
2606 #ifdef CONFIG_XFRM_MIGRATE
2607 static int xfrm_migrate_selector_match(struct xfrm_selector *sel_cmp,
2608                                        struct xfrm_selector *sel_tgt)
2609 {
2610         if (sel_cmp->proto == IPSEC_ULPROTO_ANY) {
2611                 if (sel_tgt->family == sel_cmp->family &&
2612                     xfrm_addr_cmp(&sel_tgt->daddr, &sel_cmp->daddr,
2613                                   sel_cmp->family) == 0 &&
2614                     xfrm_addr_cmp(&sel_tgt->saddr, &sel_cmp->saddr,
2615                                   sel_cmp->family) == 0 &&
2616                     sel_tgt->prefixlen_d == sel_cmp->prefixlen_d &&
2617                     sel_tgt->prefixlen_s == sel_cmp->prefixlen_s) {
2618                         return 1;
2619                 }
2620         } else {
2621                 if (memcmp(sel_tgt, sel_cmp, sizeof(*sel_tgt)) == 0) {
2622                         return 1;
2623                 }
2624         }
2625         return 0;
2626 }
2627
2628 static struct xfrm_policy * xfrm_migrate_policy_find(struct xfrm_selector *sel,
2629                                                      u8 dir, u8 type)
2630 {
2631         struct xfrm_policy *pol, *ret = NULL;
2632         struct hlist_node *entry;
2633         struct hlist_head *chain;
2634         u32 priority = ~0U;
2635
2636         read_lock_bh(&xfrm_policy_lock);
2637         chain = policy_hash_direct(&init_net, &sel->daddr, &sel->saddr, sel->family, dir);
2638         hlist_for_each_entry(pol, entry, chain, bydst) {
2639                 if (xfrm_migrate_selector_match(sel, &pol->selector) &&
2640                     pol->type == type) {
2641                         ret = pol;
2642                         priority = ret->priority;
2643                         break;
2644                 }
2645         }
2646         chain = &init_net.xfrm.policy_inexact[dir];
2647         hlist_for_each_entry(pol, entry, chain, bydst) {
2648                 if (xfrm_migrate_selector_match(sel, &pol->selector) &&
2649                     pol->type == type &&
2650                     pol->priority < priority) {
2651                         ret = pol;
2652                         break;
2653                 }
2654         }
2655
2656         if (ret)
2657                 xfrm_pol_hold(ret);
2658
2659         read_unlock_bh(&xfrm_policy_lock);
2660
2661         return ret;
2662 }
2663
2664 static int migrate_tmpl_match(struct xfrm_migrate *m, struct xfrm_tmpl *t)
2665 {
2666         int match = 0;
2667
2668         if (t->mode == m->mode && t->id.proto == m->proto &&
2669             (m->reqid == 0 || t->reqid == m->reqid)) {
2670                 switch (t->mode) {
2671                 case XFRM_MODE_TUNNEL:
2672                 case XFRM_MODE_BEET:
2673                         if (xfrm_addr_cmp(&t->id.daddr, &m->old_daddr,
2674                                           m->old_family) == 0 &&
2675                             xfrm_addr_cmp(&t->saddr, &m->old_saddr,
2676                                           m->old_family) == 0) {
2677                                 match = 1;
2678                         }
2679                         break;
2680                 case XFRM_MODE_TRANSPORT:
2681                         /* in case of transport mode, template does not store
2682                            any IP addresses, hence we just compare mode and
2683                            protocol */
2684                         match = 1;
2685                         break;
2686                 default:
2687                         break;
2688                 }
2689         }
2690         return match;
2691 }
2692
2693 /* update endpoint address(es) of template(s) */
2694 static int xfrm_policy_migrate(struct xfrm_policy *pol,
2695                                struct xfrm_migrate *m, int num_migrate)
2696 {
2697         struct xfrm_migrate *mp;
2698         struct dst_entry *dst;
2699         int i, j, n = 0;
2700
2701         write_lock_bh(&pol->lock);
2702         if (unlikely(pol->walk.dead)) {
2703                 /* target policy has been deleted */
2704                 write_unlock_bh(&pol->lock);
2705                 return -ENOENT;
2706         }
2707
2708         for (i = 0; i < pol->xfrm_nr; i++) {
2709                 for (j = 0, mp = m; j < num_migrate; j++, mp++) {
2710                         if (!migrate_tmpl_match(mp, &pol->xfrm_vec[i]))
2711                                 continue;
2712                         n++;
2713                         if (pol->xfrm_vec[i].mode != XFRM_MODE_TUNNEL &&
2714                             pol->xfrm_vec[i].mode != XFRM_MODE_BEET)
2715                                 continue;
2716                         /* update endpoints */
2717                         memcpy(&pol->xfrm_vec[i].id.daddr, &mp->new_daddr,
2718                                sizeof(pol->xfrm_vec[i].id.daddr));
2719                         memcpy(&pol->xfrm_vec[i].saddr, &mp->new_saddr,
2720                                sizeof(pol->xfrm_vec[i].saddr));
2721                         pol->xfrm_vec[i].encap_family = mp->new_family;
2722                         /* flush bundles */
2723                         while ((dst = pol->bundles) != NULL) {
2724                                 pol->bundles = dst->next;
2725                                 dst_free(dst);
2726                         }
2727                 }
2728         }
2729
2730         write_unlock_bh(&pol->lock);
2731
2732         if (!n)
2733                 return -ENODATA;
2734
2735         return 0;
2736 }
2737
2738 static int xfrm_migrate_check(struct xfrm_migrate *m, int num_migrate)
2739 {
2740         int i, j;
2741
2742         if (num_migrate < 1 || num_migrate > XFRM_MAX_DEPTH)
2743                 return -EINVAL;
2744
2745         for (i = 0; i < num_migrate; i++) {
2746                 if ((xfrm_addr_cmp(&m[i].old_daddr, &m[i].new_daddr,
2747                                    m[i].old_family) == 0) &&
2748                     (xfrm_addr_cmp(&m[i].old_saddr, &m[i].new_saddr,
2749                                    m[i].old_family) == 0))
2750                         return -EINVAL;
2751                 if (xfrm_addr_any(&m[i].new_daddr, m[i].new_family) ||
2752                     xfrm_addr_any(&m[i].new_saddr, m[i].new_family))
2753                         return -EINVAL;
2754
2755                 /* check if there is any duplicated entry */
2756                 for (j = i + 1; j < num_migrate; j++) {
2757                         if (!memcmp(&m[i].old_daddr, &m[j].old_daddr,
2758                                     sizeof(m[i].old_daddr)) &&
2759                             !memcmp(&m[i].old_saddr, &m[j].old_saddr,
2760                                     sizeof(m[i].old_saddr)) &&
2761                             m[i].proto == m[j].proto &&
2762                             m[i].mode == m[j].mode &&
2763                             m[i].reqid == m[j].reqid &&
2764                             m[i].old_family == m[j].old_family)
2765                                 return -EINVAL;
2766                 }
2767         }
2768
2769         return 0;
2770 }
2771
2772 int xfrm_migrate(struct xfrm_selector *sel, u8 dir, u8 type,
2773                  struct xfrm_migrate *m, int num_migrate,
2774                  struct xfrm_kmaddress *k)
2775 {
2776         int i, err, nx_cur = 0, nx_new = 0;
2777         struct xfrm_policy *pol = NULL;
2778         struct xfrm_state *x, *xc;
2779         struct xfrm_state *x_cur[XFRM_MAX_DEPTH];
2780         struct xfrm_state *x_new[XFRM_MAX_DEPTH];
2781         struct xfrm_migrate *mp;
2782
2783         if ((err = xfrm_migrate_check(m, num_migrate)) < 0)
2784                 goto out;
2785
2786         /* Stage 1 - find policy */
2787         if ((pol = xfrm_migrate_policy_find(sel, dir, type)) == NULL) {
2788                 err = -ENOENT;
2789                 goto out;
2790         }
2791
2792         /* Stage 2 - find and update state(s) */
2793         for (i = 0, mp = m; i < num_migrate; i++, mp++) {
2794                 if ((x = xfrm_migrate_state_find(mp))) {
2795                         x_cur[nx_cur] = x;
2796                         nx_cur++;
2797                         if ((xc = xfrm_state_migrate(x, mp))) {
2798                                 x_new[nx_new] = xc;
2799                                 nx_new++;
2800                         } else {
2801                                 err = -ENODATA;
2802                                 goto restore_state;
2803                         }
2804                 }
2805         }
2806
2807         /* Stage 3 - update policy */
2808         if ((err = xfrm_policy_migrate(pol, m, num_migrate)) < 0)
2809                 goto restore_state;
2810
2811         /* Stage 4 - delete old state(s) */
2812         if (nx_cur) {
2813                 xfrm_states_put(x_cur, nx_cur);
2814                 xfrm_states_delete(x_cur, nx_cur);
2815         }
2816
2817         /* Stage 5 - announce */
2818         km_migrate(sel, dir, type, m, num_migrate, k);
2819
2820         xfrm_pol_put(pol);
2821
2822         return 0;
2823 out:
2824         return err;
2825
2826 restore_state:
2827         if (pol)
2828                 xfrm_pol_put(pol);
2829         if (nx_cur)
2830                 xfrm_states_put(x_cur, nx_cur);
2831         if (nx_new)
2832                 xfrm_states_delete(x_new, nx_new);
2833
2834         return err;
2835 }
2836 EXPORT_SYMBOL(xfrm_migrate);
2837 #endif