3 * Linux INET6 implementation
6 * Pedro Roque <roque@di.fc.ul.pt>
10 * linux/net/ipv4/tcp_input.c
11 * linux/net/ipv4/tcp_output.c
14 * Hideaki YOSHIFUJI : sin6_scope_id support
15 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
16 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
17 * a single port at the same time.
18 * YOSHIFUJI Hideaki @USAGI: convert /proc/net/tcp6 to seq_file.
20 * This program is free software; you can redistribute it and/or
21 * modify it under the terms of the GNU General Public License
22 * as published by the Free Software Foundation; either version
23 * 2 of the License, or (at your option) any later version.
26 #include <linux/module.h>
27 #include <linux/errno.h>
28 #include <linux/types.h>
29 #include <linux/socket.h>
30 #include <linux/sockios.h>
31 #include <linux/net.h>
32 #include <linux/jiffies.h>
34 #include <linux/in6.h>
35 #include <linux/netdevice.h>
36 #include <linux/init.h>
37 #include <linux/jhash.h>
38 #include <linux/ipsec.h>
39 #include <linux/times.h>
41 #include <linux/ipv6.h>
42 #include <linux/icmpv6.h>
43 #include <linux/random.h>
46 #include <net/ndisc.h>
47 #include <net/inet6_hashtables.h>
48 #include <net/inet6_connection_sock.h>
50 #include <net/transp_v6.h>
51 #include <net/addrconf.h>
52 #include <net/ip6_route.h>
53 #include <net/ip6_checksum.h>
54 #include <net/inet_ecn.h>
55 #include <net/protocol.h>
58 #include <net/dsfield.h>
59 #include <net/timewait_sock.h>
60 #include <net/netdma.h>
61 #include <net/inet_common.h>
63 #include <asm/uaccess.h>
65 #include <linux/proc_fs.h>
66 #include <linux/seq_file.h>
68 #include <linux/crypto.h>
69 #include <linux/scatterlist.h>
71 static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb);
72 static void tcp_v6_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
73 struct request_sock *req);
75 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
77 static struct inet_connection_sock_af_ops ipv6_mapped;
78 static struct inet_connection_sock_af_ops ipv6_specific;
79 #ifdef CONFIG_TCP_MD5SIG
80 static struct tcp_sock_af_ops tcp_sock_ipv6_specific;
81 static struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific;
83 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(struct sock *sk,
84 struct in6_addr *addr)
90 static void tcp_v6_hash(struct sock *sk)
92 if (sk->sk_state != TCP_CLOSE) {
93 if (inet_csk(sk)->icsk_af_ops == &ipv6_mapped) {
103 static __inline__ __sum16 tcp_v6_check(struct tcphdr *th, int len,
104 struct in6_addr *saddr,
105 struct in6_addr *daddr,
108 return csum_ipv6_magic(saddr, daddr, len, IPPROTO_TCP, base);
111 static __u32 tcp_v6_init_sequence(struct sk_buff *skb)
113 return secure_tcpv6_sequence_number(ipv6_hdr(skb)->daddr.s6_addr32,
114 ipv6_hdr(skb)->saddr.s6_addr32,
116 tcp_hdr(skb)->source);
119 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
122 struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
123 struct inet_sock *inet = inet_sk(sk);
124 struct inet_connection_sock *icsk = inet_csk(sk);
125 struct ipv6_pinfo *np = inet6_sk(sk);
126 struct tcp_sock *tp = tcp_sk(sk);
127 struct in6_addr *saddr = NULL, *final_p = NULL, final;
129 struct dst_entry *dst;
133 if (addr_len < SIN6_LEN_RFC2133)
136 if (usin->sin6_family != AF_INET6)
137 return(-EAFNOSUPPORT);
139 memset(&fl, 0, sizeof(fl));
142 fl.fl6_flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
143 IP6_ECN_flow_init(fl.fl6_flowlabel);
144 if (fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) {
145 struct ip6_flowlabel *flowlabel;
146 flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel);
147 if (flowlabel == NULL)
149 ipv6_addr_copy(&usin->sin6_addr, &flowlabel->dst);
150 fl6_sock_release(flowlabel);
155 * connect() to INADDR_ANY means loopback (BSD'ism).
158 if(ipv6_addr_any(&usin->sin6_addr))
159 usin->sin6_addr.s6_addr[15] = 0x1;
161 addr_type = ipv6_addr_type(&usin->sin6_addr);
163 if(addr_type & IPV6_ADDR_MULTICAST)
166 if (addr_type&IPV6_ADDR_LINKLOCAL) {
167 if (addr_len >= sizeof(struct sockaddr_in6) &&
168 usin->sin6_scope_id) {
169 /* If interface is set while binding, indices
172 if (sk->sk_bound_dev_if &&
173 sk->sk_bound_dev_if != usin->sin6_scope_id)
176 sk->sk_bound_dev_if = usin->sin6_scope_id;
179 /* Connect to link-local address requires an interface */
180 if (!sk->sk_bound_dev_if)
184 if (tp->rx_opt.ts_recent_stamp &&
185 !ipv6_addr_equal(&np->daddr, &usin->sin6_addr)) {
186 tp->rx_opt.ts_recent = 0;
187 tp->rx_opt.ts_recent_stamp = 0;
191 ipv6_addr_copy(&np->daddr, &usin->sin6_addr);
192 np->flow_label = fl.fl6_flowlabel;
198 if (addr_type == IPV6_ADDR_MAPPED) {
199 u32 exthdrlen = icsk->icsk_ext_hdr_len;
200 struct sockaddr_in sin;
202 SOCK_DEBUG(sk, "connect: ipv4 mapped\n");
204 if (__ipv6_only_sock(sk))
207 sin.sin_family = AF_INET;
208 sin.sin_port = usin->sin6_port;
209 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
211 icsk->icsk_af_ops = &ipv6_mapped;
212 sk->sk_backlog_rcv = tcp_v4_do_rcv;
213 #ifdef CONFIG_TCP_MD5SIG
214 tp->af_specific = &tcp_sock_ipv6_mapped_specific;
217 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
220 icsk->icsk_ext_hdr_len = exthdrlen;
221 icsk->icsk_af_ops = &ipv6_specific;
222 sk->sk_backlog_rcv = tcp_v6_do_rcv;
223 #ifdef CONFIG_TCP_MD5SIG
224 tp->af_specific = &tcp_sock_ipv6_specific;
228 ipv6_addr_set(&np->saddr, 0, 0, htonl(0x0000FFFF),
230 ipv6_addr_set(&np->rcv_saddr, 0, 0, htonl(0x0000FFFF),
237 if (!ipv6_addr_any(&np->rcv_saddr))
238 saddr = &np->rcv_saddr;
240 fl.proto = IPPROTO_TCP;
241 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
242 ipv6_addr_copy(&fl.fl6_src,
243 (saddr ? saddr : &np->saddr));
244 fl.oif = sk->sk_bound_dev_if;
245 fl.fl_ip_dport = usin->sin6_port;
246 fl.fl_ip_sport = inet->sport;
248 if (np->opt && np->opt->srcrt) {
249 struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt;
250 ipv6_addr_copy(&final, &fl.fl6_dst);
251 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
255 security_sk_classify_flow(sk, &fl);
257 err = ip6_dst_lookup(sk, &dst, &fl);
261 ipv6_addr_copy(&fl.fl6_dst, final_p);
263 if ((err = __xfrm_lookup(&dst, &fl, sk, XFRM_LOOKUP_WAIT)) < 0) {
265 err = ip6_dst_blackhole(sk, &dst, &fl);
272 ipv6_addr_copy(&np->rcv_saddr, saddr);
275 /* set the source address */
276 ipv6_addr_copy(&np->saddr, saddr);
277 inet->rcv_saddr = LOOPBACK4_IPV6;
279 sk->sk_gso_type = SKB_GSO_TCPV6;
280 __ip6_dst_store(sk, dst, NULL, NULL);
282 icsk->icsk_ext_hdr_len = 0;
284 icsk->icsk_ext_hdr_len = (np->opt->opt_flen +
287 tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
289 inet->dport = usin->sin6_port;
291 tcp_set_state(sk, TCP_SYN_SENT);
292 err = inet6_hash_connect(&tcp_death_row, sk);
297 tp->write_seq = secure_tcpv6_sequence_number(np->saddr.s6_addr32,
302 err = tcp_connect(sk);
309 tcp_set_state(sk, TCP_CLOSE);
313 sk->sk_route_caps = 0;
317 static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
318 int type, int code, int offset, __be32 info)
320 struct ipv6hdr *hdr = (struct ipv6hdr*)skb->data;
321 const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
322 struct ipv6_pinfo *np;
327 struct net *net = dev_net(skb->dev);
329 sk = inet6_lookup(net, &tcp_hashinfo, &hdr->daddr,
330 th->dest, &hdr->saddr, th->source, skb->dev->ifindex);
333 ICMP6_INC_STATS_BH(net, __in6_dev_get(skb->dev),
338 if (sk->sk_state == TCP_TIME_WAIT) {
339 inet_twsk_put(inet_twsk(sk));
344 if (sock_owned_by_user(sk))
345 NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS);
347 if (sk->sk_state == TCP_CLOSE)
351 seq = ntohl(th->seq);
352 if (sk->sk_state != TCP_LISTEN &&
353 !between(seq, tp->snd_una, tp->snd_nxt)) {
354 NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
360 if (type == ICMPV6_PKT_TOOBIG) {
361 struct dst_entry *dst = NULL;
363 if (sock_owned_by_user(sk))
365 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
368 /* icmp should have updated the destination cache entry */
369 dst = __sk_dst_check(sk, np->dst_cookie);
372 struct inet_sock *inet = inet_sk(sk);
375 /* BUGGG_FUTURE: Again, it is not clear how
376 to handle rthdr case. Ignore this complexity
379 memset(&fl, 0, sizeof(fl));
380 fl.proto = IPPROTO_TCP;
381 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
382 ipv6_addr_copy(&fl.fl6_src, &np->saddr);
383 fl.oif = sk->sk_bound_dev_if;
384 fl.fl_ip_dport = inet->dport;
385 fl.fl_ip_sport = inet->sport;
386 security_skb_classify_flow(skb, &fl);
388 if ((err = ip6_dst_lookup(sk, &dst, &fl))) {
389 sk->sk_err_soft = -err;
393 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
394 sk->sk_err_soft = -err;
401 if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
402 tcp_sync_mss(sk, dst_mtu(dst));
403 tcp_simple_retransmit(sk);
404 } /* else let the usual retransmit timer handle it */
409 icmpv6_err_convert(type, code, &err);
411 /* Might be for an request_sock */
412 switch (sk->sk_state) {
413 struct request_sock *req, **prev;
415 if (sock_owned_by_user(sk))
418 req = inet6_csk_search_req(sk, &prev, th->dest, &hdr->daddr,
419 &hdr->saddr, inet6_iif(skb));
423 /* ICMPs are not backlogged, hence we cannot get
424 * an established socket here.
426 WARN_ON(req->sk != NULL);
428 if (seq != tcp_rsk(req)->snt_isn) {
429 NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
433 inet_csk_reqsk_queue_drop(sk, req, prev);
437 case TCP_SYN_RECV: /* Cannot happen.
438 It can, it SYNs are crossed. --ANK */
439 if (!sock_owned_by_user(sk)) {
441 sk->sk_error_report(sk); /* Wake people up to see the error (see connect in sock.c) */
445 sk->sk_err_soft = err;
449 if (!sock_owned_by_user(sk) && np->recverr) {
451 sk->sk_error_report(sk);
453 sk->sk_err_soft = err;
461 static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req)
463 struct inet6_request_sock *treq = inet6_rsk(req);
464 struct ipv6_pinfo *np = inet6_sk(sk);
465 struct sk_buff * skb;
466 struct ipv6_txoptions *opt = NULL;
467 struct in6_addr * final_p = NULL, final;
469 struct dst_entry *dst;
472 memset(&fl, 0, sizeof(fl));
473 fl.proto = IPPROTO_TCP;
474 ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
475 ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr);
476 fl.fl6_flowlabel = 0;
478 fl.fl_ip_dport = inet_rsk(req)->rmt_port;
479 fl.fl_ip_sport = inet_sk(sk)->sport;
480 security_req_classify_flow(req, &fl);
483 if (opt && opt->srcrt) {
484 struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
485 ipv6_addr_copy(&final, &fl.fl6_dst);
486 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
490 err = ip6_dst_lookup(sk, &dst, &fl);
494 ipv6_addr_copy(&fl.fl6_dst, final_p);
495 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0)
498 skb = tcp_make_synack(sk, dst, req);
500 struct tcphdr *th = tcp_hdr(skb);
502 th->check = tcp_v6_check(th, skb->len,
503 &treq->loc_addr, &treq->rmt_addr,
504 csum_partial((char *)th, skb->len, skb->csum));
506 ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
507 err = ip6_xmit(sk, skb, &fl, opt, 0);
508 err = net_xmit_eval(err);
512 if (opt && opt != np->opt)
513 sock_kfree_s(sk, opt, opt->tot_len);
518 static inline void syn_flood_warning(struct sk_buff *skb)
520 #ifdef CONFIG_SYN_COOKIES
521 if (sysctl_tcp_syncookies)
523 "TCPv6: Possible SYN flooding on port %d. "
524 "Sending cookies.\n", ntohs(tcp_hdr(skb)->dest));
528 "TCPv6: Possible SYN flooding on port %d. "
529 "Dropping request.\n", ntohs(tcp_hdr(skb)->dest));
532 static void tcp_v6_reqsk_destructor(struct request_sock *req)
534 if (inet6_rsk(req)->pktopts)
535 kfree_skb(inet6_rsk(req)->pktopts);
538 #ifdef CONFIG_TCP_MD5SIG
539 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(struct sock *sk,
540 struct in6_addr *addr)
542 struct tcp_sock *tp = tcp_sk(sk);
547 if (!tp->md5sig_info || !tp->md5sig_info->entries6)
550 for (i = 0; i < tp->md5sig_info->entries6; i++) {
551 if (ipv6_addr_equal(&tp->md5sig_info->keys6[i].addr, addr))
552 return &tp->md5sig_info->keys6[i].base;
557 static struct tcp_md5sig_key *tcp_v6_md5_lookup(struct sock *sk,
558 struct sock *addr_sk)
560 return tcp_v6_md5_do_lookup(sk, &inet6_sk(addr_sk)->daddr);
563 static struct tcp_md5sig_key *tcp_v6_reqsk_md5_lookup(struct sock *sk,
564 struct request_sock *req)
566 return tcp_v6_md5_do_lookup(sk, &inet6_rsk(req)->rmt_addr);
569 static int tcp_v6_md5_do_add(struct sock *sk, struct in6_addr *peer,
570 char *newkey, u8 newkeylen)
572 /* Add key to the list */
573 struct tcp_md5sig_key *key;
574 struct tcp_sock *tp = tcp_sk(sk);
575 struct tcp6_md5sig_key *keys;
577 key = tcp_v6_md5_do_lookup(sk, peer);
579 /* modify existing entry - just update that one */
582 key->keylen = newkeylen;
584 /* reallocate new list if current one is full. */
585 if (!tp->md5sig_info) {
586 tp->md5sig_info = kzalloc(sizeof(*tp->md5sig_info), GFP_ATOMIC);
587 if (!tp->md5sig_info) {
591 sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
593 if (tcp_alloc_md5sig_pool() == NULL) {
597 if (tp->md5sig_info->alloced6 == tp->md5sig_info->entries6) {
598 keys = kmalloc((sizeof (tp->md5sig_info->keys6[0]) *
599 (tp->md5sig_info->entries6 + 1)), GFP_ATOMIC);
602 tcp_free_md5sig_pool();
607 if (tp->md5sig_info->entries6)
608 memmove(keys, tp->md5sig_info->keys6,
609 (sizeof (tp->md5sig_info->keys6[0]) *
610 tp->md5sig_info->entries6));
612 kfree(tp->md5sig_info->keys6);
613 tp->md5sig_info->keys6 = keys;
614 tp->md5sig_info->alloced6++;
617 ipv6_addr_copy(&tp->md5sig_info->keys6[tp->md5sig_info->entries6].addr,
619 tp->md5sig_info->keys6[tp->md5sig_info->entries6].base.key = newkey;
620 tp->md5sig_info->keys6[tp->md5sig_info->entries6].base.keylen = newkeylen;
622 tp->md5sig_info->entries6++;
627 static int tcp_v6_md5_add_func(struct sock *sk, struct sock *addr_sk,
628 u8 *newkey, __u8 newkeylen)
630 return tcp_v6_md5_do_add(sk, &inet6_sk(addr_sk)->daddr,
634 static int tcp_v6_md5_do_del(struct sock *sk, struct in6_addr *peer)
636 struct tcp_sock *tp = tcp_sk(sk);
639 for (i = 0; i < tp->md5sig_info->entries6; i++) {
640 if (ipv6_addr_equal(&tp->md5sig_info->keys6[i].addr, peer)) {
642 kfree(tp->md5sig_info->keys6[i].base.key);
643 tp->md5sig_info->entries6--;
645 if (tp->md5sig_info->entries6 == 0) {
646 kfree(tp->md5sig_info->keys6);
647 tp->md5sig_info->keys6 = NULL;
648 tp->md5sig_info->alloced6 = 0;
650 /* shrink the database */
651 if (tp->md5sig_info->entries6 != i)
652 memmove(&tp->md5sig_info->keys6[i],
653 &tp->md5sig_info->keys6[i+1],
654 (tp->md5sig_info->entries6 - i)
655 * sizeof (tp->md5sig_info->keys6[0]));
657 tcp_free_md5sig_pool();
664 static void tcp_v6_clear_md5_list (struct sock *sk)
666 struct tcp_sock *tp = tcp_sk(sk);
669 if (tp->md5sig_info->entries6) {
670 for (i = 0; i < tp->md5sig_info->entries6; i++)
671 kfree(tp->md5sig_info->keys6[i].base.key);
672 tp->md5sig_info->entries6 = 0;
673 tcp_free_md5sig_pool();
676 kfree(tp->md5sig_info->keys6);
677 tp->md5sig_info->keys6 = NULL;
678 tp->md5sig_info->alloced6 = 0;
680 if (tp->md5sig_info->entries4) {
681 for (i = 0; i < tp->md5sig_info->entries4; i++)
682 kfree(tp->md5sig_info->keys4[i].base.key);
683 tp->md5sig_info->entries4 = 0;
684 tcp_free_md5sig_pool();
687 kfree(tp->md5sig_info->keys4);
688 tp->md5sig_info->keys4 = NULL;
689 tp->md5sig_info->alloced4 = 0;
692 static int tcp_v6_parse_md5_keys (struct sock *sk, char __user *optval,
695 struct tcp_md5sig cmd;
696 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr;
699 if (optlen < sizeof(cmd))
702 if (copy_from_user(&cmd, optval, sizeof(cmd)))
705 if (sin6->sin6_family != AF_INET6)
708 if (!cmd.tcpm_keylen) {
709 if (!tcp_sk(sk)->md5sig_info)
711 if (ipv6_addr_v4mapped(&sin6->sin6_addr))
712 return tcp_v4_md5_do_del(sk, sin6->sin6_addr.s6_addr32[3]);
713 return tcp_v6_md5_do_del(sk, &sin6->sin6_addr);
716 if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
719 if (!tcp_sk(sk)->md5sig_info) {
720 struct tcp_sock *tp = tcp_sk(sk);
721 struct tcp_md5sig_info *p;
723 p = kzalloc(sizeof(struct tcp_md5sig_info), GFP_KERNEL);
728 sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
731 newkey = kmemdup(cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL);
734 if (ipv6_addr_v4mapped(&sin6->sin6_addr)) {
735 return tcp_v4_md5_do_add(sk, sin6->sin6_addr.s6_addr32[3],
736 newkey, cmd.tcpm_keylen);
738 return tcp_v6_md5_do_add(sk, &sin6->sin6_addr, newkey, cmd.tcpm_keylen);
741 static int tcp_v6_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp,
742 struct in6_addr *daddr,
743 struct in6_addr *saddr, int nbytes)
745 struct tcp6_pseudohdr *bp;
746 struct scatterlist sg;
748 bp = &hp->md5_blk.ip6;
749 /* 1. TCP pseudo-header (RFC2460) */
750 ipv6_addr_copy(&bp->saddr, saddr);
751 ipv6_addr_copy(&bp->daddr, daddr);
752 bp->protocol = cpu_to_be32(IPPROTO_TCP);
753 bp->len = cpu_to_be32(nbytes);
755 sg_init_one(&sg, bp, sizeof(*bp));
756 return crypto_hash_update(&hp->md5_desc, &sg, sizeof(*bp));
759 static int tcp_v6_md5_hash_hdr(char *md5_hash, struct tcp_md5sig_key *key,
760 struct in6_addr *daddr, struct in6_addr *saddr,
763 struct tcp_md5sig_pool *hp;
764 struct hash_desc *desc;
766 hp = tcp_get_md5sig_pool();
768 goto clear_hash_noput;
769 desc = &hp->md5_desc;
771 if (crypto_hash_init(desc))
773 if (tcp_v6_md5_hash_pseudoheader(hp, daddr, saddr, th->doff << 2))
775 if (tcp_md5_hash_header(hp, th))
777 if (tcp_md5_hash_key(hp, key))
779 if (crypto_hash_final(desc, md5_hash))
782 tcp_put_md5sig_pool();
786 tcp_put_md5sig_pool();
788 memset(md5_hash, 0, 16);
792 static int tcp_v6_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key,
793 struct sock *sk, struct request_sock *req,
796 struct in6_addr *saddr, *daddr;
797 struct tcp_md5sig_pool *hp;
798 struct hash_desc *desc;
799 struct tcphdr *th = tcp_hdr(skb);
802 saddr = &inet6_sk(sk)->saddr;
803 daddr = &inet6_sk(sk)->daddr;
805 saddr = &inet6_rsk(req)->loc_addr;
806 daddr = &inet6_rsk(req)->rmt_addr;
808 struct ipv6hdr *ip6h = ipv6_hdr(skb);
809 saddr = &ip6h->saddr;
810 daddr = &ip6h->daddr;
813 hp = tcp_get_md5sig_pool();
815 goto clear_hash_noput;
816 desc = &hp->md5_desc;
818 if (crypto_hash_init(desc))
821 if (tcp_v6_md5_hash_pseudoheader(hp, daddr, saddr, skb->len))
823 if (tcp_md5_hash_header(hp, th))
825 if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
827 if (tcp_md5_hash_key(hp, key))
829 if (crypto_hash_final(desc, md5_hash))
832 tcp_put_md5sig_pool();
836 tcp_put_md5sig_pool();
838 memset(md5_hash, 0, 16);
842 static int tcp_v6_inbound_md5_hash (struct sock *sk, struct sk_buff *skb)
844 __u8 *hash_location = NULL;
845 struct tcp_md5sig_key *hash_expected;
846 struct ipv6hdr *ip6h = ipv6_hdr(skb);
847 struct tcphdr *th = tcp_hdr(skb);
851 hash_expected = tcp_v6_md5_do_lookup(sk, &ip6h->saddr);
852 hash_location = tcp_parse_md5sig_option(th);
854 /* We've parsed the options - do we have a hash? */
855 if (!hash_expected && !hash_location)
858 if (hash_expected && !hash_location) {
859 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
863 if (!hash_expected && hash_location) {
864 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
868 /* check the signature */
869 genhash = tcp_v6_md5_hash_skb(newhash,
873 if (genhash || memcmp(hash_location, newhash, 16) != 0) {
874 if (net_ratelimit()) {
875 printk(KERN_INFO "MD5 Hash %s for "
876 "(" NIP6_FMT ", %u)->"
877 "(" NIP6_FMT ", %u)\n",
878 genhash ? "failed" : "mismatch",
879 NIP6(ip6h->saddr), ntohs(th->source),
880 NIP6(ip6h->daddr), ntohs(th->dest));
888 struct request_sock_ops tcp6_request_sock_ops __read_mostly = {
890 .obj_size = sizeof(struct tcp6_request_sock),
891 .rtx_syn_ack = tcp_v6_send_synack,
892 .send_ack = tcp_v6_reqsk_send_ack,
893 .destructor = tcp_v6_reqsk_destructor,
894 .send_reset = tcp_v6_send_reset
897 #ifdef CONFIG_TCP_MD5SIG
898 static struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
899 .md5_lookup = tcp_v6_reqsk_md5_lookup,
903 static struct timewait_sock_ops tcp6_timewait_sock_ops = {
904 .twsk_obj_size = sizeof(struct tcp6_timewait_sock),
905 .twsk_unique = tcp_twsk_unique,
906 .twsk_destructor= tcp_twsk_destructor,
909 static void tcp_v6_send_check(struct sock *sk, int len, struct sk_buff *skb)
911 struct ipv6_pinfo *np = inet6_sk(sk);
912 struct tcphdr *th = tcp_hdr(skb);
914 if (skb->ip_summed == CHECKSUM_PARTIAL) {
915 th->check = ~csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP, 0);
916 skb->csum_start = skb_transport_header(skb) - skb->head;
917 skb->csum_offset = offsetof(struct tcphdr, check);
919 th->check = csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP,
920 csum_partial((char *)th, th->doff<<2,
925 static int tcp_v6_gso_send_check(struct sk_buff *skb)
927 struct ipv6hdr *ipv6h;
930 if (!pskb_may_pull(skb, sizeof(*th)))
933 ipv6h = ipv6_hdr(skb);
937 th->check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr, skb->len,
939 skb->csum_start = skb_transport_header(skb) - skb->head;
940 skb->csum_offset = offsetof(struct tcphdr, check);
941 skb->ip_summed = CHECKSUM_PARTIAL;
945 static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb)
947 struct tcphdr *th = tcp_hdr(skb), *t1;
948 struct sk_buff *buff;
950 struct net *net = dev_net(skb->dst->dev);
951 struct sock *ctl_sk = net->ipv6.tcp_sk;
952 unsigned int tot_len = sizeof(*th);
953 #ifdef CONFIG_TCP_MD5SIG
954 struct tcp_md5sig_key *key;
960 if (!ipv6_unicast_destination(skb))
963 #ifdef CONFIG_TCP_MD5SIG
965 key = tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->daddr);
970 tot_len += TCPOLEN_MD5SIG_ALIGNED;
974 * We need to grab some memory, and put together an RST,
975 * and then put it into the queue to be sent.
978 buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len,
983 skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len);
985 t1 = (struct tcphdr *) skb_push(buff, tot_len);
987 /* Swap the send and the receive. */
988 memset(t1, 0, sizeof(*t1));
989 t1->dest = th->source;
990 t1->source = th->dest;
991 t1->doff = tot_len / 4;
995 t1->seq = th->ack_seq;
998 t1->ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin
999 + skb->len - (th->doff<<2));
1002 #ifdef CONFIG_TCP_MD5SIG
1004 __be32 *opt = (__be32*)(t1 + 1);
1005 opt[0] = htonl((TCPOPT_NOP << 24) |
1006 (TCPOPT_NOP << 16) |
1007 (TCPOPT_MD5SIG << 8) |
1009 tcp_v6_md5_hash_hdr((__u8 *)&opt[1], key,
1010 &ipv6_hdr(skb)->daddr,
1011 &ipv6_hdr(skb)->saddr, t1);
1015 buff->csum = csum_partial((char *)t1, sizeof(*t1), 0);
1017 memset(&fl, 0, sizeof(fl));
1018 ipv6_addr_copy(&fl.fl6_dst, &ipv6_hdr(skb)->saddr);
1019 ipv6_addr_copy(&fl.fl6_src, &ipv6_hdr(skb)->daddr);
1021 t1->check = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst,
1022 sizeof(*t1), IPPROTO_TCP,
1025 fl.proto = IPPROTO_TCP;
1026 fl.oif = inet6_iif(skb);
1027 fl.fl_ip_dport = t1->dest;
1028 fl.fl_ip_sport = t1->source;
1029 security_skb_classify_flow(skb, &fl);
1031 /* Pass a socket to ip6_dst_lookup either it is for RST
1032 * Underlying function will use this to retrieve the network
1035 if (!ip6_dst_lookup(ctl_sk, &buff->dst, &fl)) {
1037 if (xfrm_lookup(&buff->dst, &fl, NULL, 0) >= 0) {
1038 ip6_xmit(ctl_sk, buff, &fl, NULL, 0);
1039 TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
1040 TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS);
1048 static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 ts,
1049 struct tcp_md5sig_key *key)
1051 struct tcphdr *th = tcp_hdr(skb), *t1;
1052 struct sk_buff *buff;
1054 struct net *net = dev_net(skb->dst->dev);
1055 struct sock *ctl_sk = net->ipv6.tcp_sk;
1056 unsigned int tot_len = sizeof(struct tcphdr);
1060 tot_len += TCPOLEN_TSTAMP_ALIGNED;
1061 #ifdef CONFIG_TCP_MD5SIG
1063 tot_len += TCPOLEN_MD5SIG_ALIGNED;
1066 buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len,
1071 skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len);
1073 t1 = (struct tcphdr *) skb_push(buff,tot_len);
1075 /* Swap the send and the receive. */
1076 memset(t1, 0, sizeof(*t1));
1077 t1->dest = th->source;
1078 t1->source = th->dest;
1079 t1->doff = tot_len/4;
1080 t1->seq = htonl(seq);
1081 t1->ack_seq = htonl(ack);
1083 t1->window = htons(win);
1085 topt = (__be32 *)(t1 + 1);
1088 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
1089 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
1090 *topt++ = htonl(tcp_time_stamp);
1094 #ifdef CONFIG_TCP_MD5SIG
1096 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
1097 (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
1098 tcp_v6_md5_hash_hdr((__u8 *)topt, key,
1099 &ipv6_hdr(skb)->saddr,
1100 &ipv6_hdr(skb)->daddr, t1);
1104 buff->csum = csum_partial((char *)t1, tot_len, 0);
1106 memset(&fl, 0, sizeof(fl));
1107 ipv6_addr_copy(&fl.fl6_dst, &ipv6_hdr(skb)->saddr);
1108 ipv6_addr_copy(&fl.fl6_src, &ipv6_hdr(skb)->daddr);
1110 t1->check = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst,
1111 tot_len, IPPROTO_TCP,
1114 fl.proto = IPPROTO_TCP;
1115 fl.oif = inet6_iif(skb);
1116 fl.fl_ip_dport = t1->dest;
1117 fl.fl_ip_sport = t1->source;
1118 security_skb_classify_flow(skb, &fl);
1120 if (!ip6_dst_lookup(ctl_sk, &buff->dst, &fl)) {
1121 if (xfrm_lookup(&buff->dst, &fl, NULL, 0) >= 0) {
1122 ip6_xmit(ctl_sk, buff, &fl, NULL, 0);
1123 TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
1131 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
1133 struct inet_timewait_sock *tw = inet_twsk(sk);
1134 struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
1136 tcp_v6_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
1137 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
1138 tcptw->tw_ts_recent, tcp_twsk_md5_key(tcptw));
1143 static void tcp_v6_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
1144 struct request_sock *req)
1146 tcp_v6_send_ack(skb, tcp_rsk(req)->snt_isn + 1, tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd, req->ts_recent,
1147 tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->daddr));
1151 static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
1153 struct request_sock *req, **prev;
1154 const struct tcphdr *th = tcp_hdr(skb);
1157 /* Find possible connection requests. */
1158 req = inet6_csk_search_req(sk, &prev, th->source,
1159 &ipv6_hdr(skb)->saddr,
1160 &ipv6_hdr(skb)->daddr, inet6_iif(skb));
1162 return tcp_check_req(sk, skb, req, prev);
1164 nsk = __inet6_lookup_established(sock_net(sk), &tcp_hashinfo,
1165 &ipv6_hdr(skb)->saddr, th->source,
1166 &ipv6_hdr(skb)->daddr, ntohs(th->dest), inet6_iif(skb));
1169 if (nsk->sk_state != TCP_TIME_WAIT) {
1173 inet_twsk_put(inet_twsk(nsk));
1177 #ifdef CONFIG_SYN_COOKIES
1178 if (!th->rst && !th->syn && th->ack)
1179 sk = cookie_v6_check(sk, skb);
1184 /* FIXME: this is substantially similar to the ipv4 code.
1185 * Can some kind of merge be done? -- erics
1187 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1189 struct inet6_request_sock *treq;
1190 struct ipv6_pinfo *np = inet6_sk(sk);
1191 struct tcp_options_received tmp_opt;
1192 struct tcp_sock *tp = tcp_sk(sk);
1193 struct request_sock *req = NULL;
1194 __u32 isn = TCP_SKB_CB(skb)->when;
1195 #ifdef CONFIG_SYN_COOKIES
1196 int want_cookie = 0;
1198 #define want_cookie 0
1201 if (skb->protocol == htons(ETH_P_IP))
1202 return tcp_v4_conn_request(sk, skb);
1204 if (!ipv6_unicast_destination(skb))
1207 if (inet_csk_reqsk_queue_is_full(sk) && !isn) {
1208 if (net_ratelimit())
1209 syn_flood_warning(skb);
1210 #ifdef CONFIG_SYN_COOKIES
1211 if (sysctl_tcp_syncookies)
1218 if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
1221 req = inet6_reqsk_alloc(&tcp6_request_sock_ops);
1225 #ifdef CONFIG_TCP_MD5SIG
1226 tcp_rsk(req)->af_specific = &tcp_request_sock_ipv6_ops;
1229 tcp_clear_options(&tmp_opt);
1230 tmp_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
1231 tmp_opt.user_mss = tp->rx_opt.user_mss;
1233 tcp_parse_options(skb, &tmp_opt, 0);
1235 if (want_cookie && !tmp_opt.saw_tstamp)
1236 tcp_clear_options(&tmp_opt);
1238 tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
1239 tcp_openreq_init(req, &tmp_opt, skb);
1241 treq = inet6_rsk(req);
1242 ipv6_addr_copy(&treq->rmt_addr, &ipv6_hdr(skb)->saddr);
1243 ipv6_addr_copy(&treq->loc_addr, &ipv6_hdr(skb)->daddr);
1245 TCP_ECN_create_request(req, tcp_hdr(skb));
1248 isn = cookie_v6_init_sequence(sk, skb, &req->mss);
1249 req->cookie_ts = tmp_opt.tstamp_ok;
1251 if (ipv6_opt_accepted(sk, skb) ||
1252 np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo ||
1253 np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) {
1254 atomic_inc(&skb->users);
1255 treq->pktopts = skb;
1257 treq->iif = sk->sk_bound_dev_if;
1259 /* So that link locals have meaning */
1260 if (!sk->sk_bound_dev_if &&
1261 ipv6_addr_type(&treq->rmt_addr) & IPV6_ADDR_LINKLOCAL)
1262 treq->iif = inet6_iif(skb);
1264 isn = tcp_v6_init_sequence(skb);
1267 tcp_rsk(req)->snt_isn = isn;
1269 security_inet_conn_request(sk, skb, req);
1271 if (tcp_v6_send_synack(sk, req))
1275 inet6_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
1283 return 0; /* don't send reset */
1286 static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1287 struct request_sock *req,
1288 struct dst_entry *dst)
1290 struct inet6_request_sock *treq;
1291 struct ipv6_pinfo *newnp, *np = inet6_sk(sk);
1292 struct tcp6_sock *newtcp6sk;
1293 struct inet_sock *newinet;
1294 struct tcp_sock *newtp;
1296 struct ipv6_txoptions *opt;
1297 #ifdef CONFIG_TCP_MD5SIG
1298 struct tcp_md5sig_key *key;
1301 if (skb->protocol == htons(ETH_P_IP)) {
1306 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst);
1311 newtcp6sk = (struct tcp6_sock *)newsk;
1312 inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
1314 newinet = inet_sk(newsk);
1315 newnp = inet6_sk(newsk);
1316 newtp = tcp_sk(newsk);
1318 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1320 ipv6_addr_set(&newnp->daddr, 0, 0, htonl(0x0000FFFF),
1323 ipv6_addr_set(&newnp->saddr, 0, 0, htonl(0x0000FFFF),
1326 ipv6_addr_copy(&newnp->rcv_saddr, &newnp->saddr);
1328 inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
1329 newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1330 #ifdef CONFIG_TCP_MD5SIG
1331 newtp->af_specific = &tcp_sock_ipv6_mapped_specific;
1334 newnp->pktoptions = NULL;
1336 newnp->mcast_oif = inet6_iif(skb);
1337 newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
1340 * No need to charge this sock to the relevant IPv6 refcnt debug socks count
1341 * here, tcp_create_openreq_child now does this for us, see the comment in
1342 * that function for the gory details. -acme
1345 /* It is tricky place. Until this moment IPv4 tcp
1346 worked with IPv6 icsk.icsk_af_ops.
1349 tcp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie);
1354 treq = inet6_rsk(req);
1357 if (sk_acceptq_is_full(sk))
1361 struct in6_addr *final_p = NULL, final;
1364 memset(&fl, 0, sizeof(fl));
1365 fl.proto = IPPROTO_TCP;
1366 ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
1367 if (opt && opt->srcrt) {
1368 struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
1369 ipv6_addr_copy(&final, &fl.fl6_dst);
1370 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1373 ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr);
1374 fl.oif = sk->sk_bound_dev_if;
1375 fl.fl_ip_dport = inet_rsk(req)->rmt_port;
1376 fl.fl_ip_sport = inet_sk(sk)->sport;
1377 security_req_classify_flow(req, &fl);
1379 if (ip6_dst_lookup(sk, &dst, &fl))
1383 ipv6_addr_copy(&fl.fl6_dst, final_p);
1385 if ((xfrm_lookup(&dst, &fl, sk, 0)) < 0)
1389 newsk = tcp_create_openreq_child(sk, req, skb);
1394 * No need to charge this sock to the relevant IPv6 refcnt debug socks
1395 * count here, tcp_create_openreq_child now does this for us, see the
1396 * comment in that function for the gory details. -acme
1399 newsk->sk_gso_type = SKB_GSO_TCPV6;
1400 __ip6_dst_store(newsk, dst, NULL, NULL);
1402 newtcp6sk = (struct tcp6_sock *)newsk;
1403 inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
1405 newtp = tcp_sk(newsk);
1406 newinet = inet_sk(newsk);
1407 newnp = inet6_sk(newsk);
1409 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1411 ipv6_addr_copy(&newnp->daddr, &treq->rmt_addr);
1412 ipv6_addr_copy(&newnp->saddr, &treq->loc_addr);
1413 ipv6_addr_copy(&newnp->rcv_saddr, &treq->loc_addr);
1414 newsk->sk_bound_dev_if = treq->iif;
1416 /* Now IPv6 options...
1418 First: no IPv4 options.
1420 newinet->opt = NULL;
1421 newnp->ipv6_fl_list = NULL;
1424 newnp->rxopt.all = np->rxopt.all;
1426 /* Clone pktoptions received with SYN */
1427 newnp->pktoptions = NULL;
1428 if (treq->pktopts != NULL) {
1429 newnp->pktoptions = skb_clone(treq->pktopts, GFP_ATOMIC);
1430 kfree_skb(treq->pktopts);
1431 treq->pktopts = NULL;
1432 if (newnp->pktoptions)
1433 skb_set_owner_r(newnp->pktoptions, newsk);
1436 newnp->mcast_oif = inet6_iif(skb);
1437 newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
1439 /* Clone native IPv6 options from listening socket (if any)
1441 Yes, keeping reference count would be much more clever,
1442 but we make one more one thing there: reattach optmem
1446 newnp->opt = ipv6_dup_options(newsk, opt);
1448 sock_kfree_s(sk, opt, opt->tot_len);
1451 inet_csk(newsk)->icsk_ext_hdr_len = 0;
1453 inet_csk(newsk)->icsk_ext_hdr_len = (newnp->opt->opt_nflen +
1454 newnp->opt->opt_flen);
1456 tcp_mtup_init(newsk);
1457 tcp_sync_mss(newsk, dst_mtu(dst));
1458 newtp->advmss = dst_metric(dst, RTAX_ADVMSS);
1459 tcp_initialize_rcv_mss(newsk);
1461 newinet->daddr = newinet->saddr = newinet->rcv_saddr = LOOPBACK4_IPV6;
1463 #ifdef CONFIG_TCP_MD5SIG
1464 /* Copy over the MD5 key from the original socket */
1465 if ((key = tcp_v6_md5_do_lookup(sk, &newnp->daddr)) != NULL) {
1466 /* We're using one, so create a matching key
1467 * on the newsk structure. If we fail to get
1468 * memory, then we end up not copying the key
1471 char *newkey = kmemdup(key->key, key->keylen, GFP_ATOMIC);
1473 tcp_v6_md5_do_add(newsk, &inet6_sk(sk)->daddr,
1474 newkey, key->keylen);
1478 __inet6_hash(newsk);
1479 __inet_inherit_port(sk, newsk);
1484 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1486 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
1487 if (opt && opt != np->opt)
1488 sock_kfree_s(sk, opt, opt->tot_len);
1493 static __sum16 tcp_v6_checksum_init(struct sk_buff *skb)
1495 if (skb->ip_summed == CHECKSUM_COMPLETE) {
1496 if (!tcp_v6_check(tcp_hdr(skb), skb->len, &ipv6_hdr(skb)->saddr,
1497 &ipv6_hdr(skb)->daddr, skb->csum)) {
1498 skb->ip_summed = CHECKSUM_UNNECESSARY;
1503 skb->csum = ~csum_unfold(tcp_v6_check(tcp_hdr(skb), skb->len,
1504 &ipv6_hdr(skb)->saddr,
1505 &ipv6_hdr(skb)->daddr, 0));
1507 if (skb->len <= 76) {
1508 return __skb_checksum_complete(skb);
1513 /* The socket must have it's spinlock held when we get
1516 * We have a potential double-lock case here, so even when
1517 * doing backlog processing we use the BH locking scheme.
1518 * This is because we cannot sleep with the original spinlock
1521 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1523 struct ipv6_pinfo *np = inet6_sk(sk);
1524 struct tcp_sock *tp;
1525 struct sk_buff *opt_skb = NULL;
1527 /* Imagine: socket is IPv6. IPv4 packet arrives,
1528 goes to IPv4 receive handler and backlogged.
1529 From backlog it always goes here. Kerboom...
1530 Fortunately, tcp_rcv_established and rcv_established
1531 handle them correctly, but it is not case with
1532 tcp_v6_hnd_req and tcp_v6_send_reset(). --ANK
1535 if (skb->protocol == htons(ETH_P_IP))
1536 return tcp_v4_do_rcv(sk, skb);
1538 #ifdef CONFIG_TCP_MD5SIG
1539 if (tcp_v6_inbound_md5_hash (sk, skb))
1543 if (sk_filter(sk, skb))
1547 * socket locking is here for SMP purposes as backlog rcv
1548 * is currently called with bh processing disabled.
1551 /* Do Stevens' IPV6_PKTOPTIONS.
1553 Yes, guys, it is the only place in our code, where we
1554 may make it not affecting IPv4.
1555 The rest of code is protocol independent,
1556 and I do not like idea to uglify IPv4.
1558 Actually, all the idea behind IPV6_PKTOPTIONS
1559 looks not very well thought. For now we latch
1560 options, received in the last packet, enqueued
1561 by tcp. Feel free to propose better solution.
1565 opt_skb = skb_clone(skb, GFP_ATOMIC);
1567 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1568 TCP_CHECK_TIMER(sk);
1569 if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len))
1571 TCP_CHECK_TIMER(sk);
1573 goto ipv6_pktoptions;
1577 if (skb->len < tcp_hdrlen(skb) || tcp_checksum_complete(skb))
1580 if (sk->sk_state == TCP_LISTEN) {
1581 struct sock *nsk = tcp_v6_hnd_req(sk, skb);
1586 * Queue it on the new socket if the new socket is active,
1587 * otherwise we just shortcircuit this and continue with
1591 if (tcp_child_process(sk, nsk, skb))
1594 __kfree_skb(opt_skb);
1599 TCP_CHECK_TIMER(sk);
1600 if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len))
1602 TCP_CHECK_TIMER(sk);
1604 goto ipv6_pktoptions;
1608 tcp_v6_send_reset(sk, skb);
1611 __kfree_skb(opt_skb);
1615 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
1620 /* Do you ask, what is it?
1622 1. skb was enqueued by tcp.
1623 2. skb is added to tail of read queue, rather than out of order.
1624 3. socket is not in passive state.
1625 4. Finally, it really contains options, which user wants to receive.
1628 if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1629 !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1630 if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
1631 np->mcast_oif = inet6_iif(opt_skb);
1632 if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
1633 np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit;
1634 if (ipv6_opt_accepted(sk, opt_skb)) {
1635 skb_set_owner_r(opt_skb, sk);
1636 opt_skb = xchg(&np->pktoptions, opt_skb);
1638 __kfree_skb(opt_skb);
1639 opt_skb = xchg(&np->pktoptions, NULL);
1648 static int tcp_v6_rcv(struct sk_buff *skb)
1653 struct net *net = dev_net(skb->dev);
1655 if (skb->pkt_type != PACKET_HOST)
1659 * Count it even if it's bad.
1661 TCP_INC_STATS_BH(net, TCP_MIB_INSEGS);
1663 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1668 if (th->doff < sizeof(struct tcphdr)/4)
1670 if (!pskb_may_pull(skb, th->doff*4))
1673 if (!skb_csum_unnecessary(skb) && tcp_v6_checksum_init(skb))
1677 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1678 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1679 skb->len - th->doff*4);
1680 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1681 TCP_SKB_CB(skb)->when = 0;
1682 TCP_SKB_CB(skb)->flags = ipv6_get_dsfield(ipv6_hdr(skb));
1683 TCP_SKB_CB(skb)->sacked = 0;
1685 sk = __inet6_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest);
1690 if (sk->sk_state == TCP_TIME_WAIT)
1693 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
1694 goto discard_and_relse;
1696 if (sk_filter(sk, skb))
1697 goto discard_and_relse;
1701 bh_lock_sock_nested(sk);
1703 if (!sock_owned_by_user(sk)) {
1704 #ifdef CONFIG_NET_DMA
1705 struct tcp_sock *tp = tcp_sk(sk);
1706 if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
1707 tp->ucopy.dma_chan = get_softnet_dma();
1708 if (tp->ucopy.dma_chan)
1709 ret = tcp_v6_do_rcv(sk, skb);
1713 if (!tcp_prequeue(sk, skb))
1714 ret = tcp_v6_do_rcv(sk, skb);
1717 sk_add_backlog(sk, skb);
1721 return ret ? -1 : 0;
1724 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1727 if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
1729 TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
1731 tcp_v6_send_reset(NULL, skb);
1748 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1749 inet_twsk_put(inet_twsk(sk));
1753 if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
1754 TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
1755 inet_twsk_put(inet_twsk(sk));
1759 switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1764 sk2 = inet6_lookup_listener(dev_net(skb->dev), &tcp_hashinfo,
1765 &ipv6_hdr(skb)->daddr,
1766 ntohs(th->dest), inet6_iif(skb));
1768 struct inet_timewait_sock *tw = inet_twsk(sk);
1769 inet_twsk_deschedule(tw, &tcp_death_row);
1774 /* Fall through to ACK */
1777 tcp_v6_timewait_ack(sk, skb);
1781 case TCP_TW_SUCCESS:;
1786 static int tcp_v6_remember_stamp(struct sock *sk)
1788 /* Alas, not yet... */
1792 static struct inet_connection_sock_af_ops ipv6_specific = {
1793 .queue_xmit = inet6_csk_xmit,
1794 .send_check = tcp_v6_send_check,
1795 .rebuild_header = inet6_sk_rebuild_header,
1796 .conn_request = tcp_v6_conn_request,
1797 .syn_recv_sock = tcp_v6_syn_recv_sock,
1798 .remember_stamp = tcp_v6_remember_stamp,
1799 .net_header_len = sizeof(struct ipv6hdr),
1800 .setsockopt = ipv6_setsockopt,
1801 .getsockopt = ipv6_getsockopt,
1802 .addr2sockaddr = inet6_csk_addr2sockaddr,
1803 .sockaddr_len = sizeof(struct sockaddr_in6),
1804 .bind_conflict = inet6_csk_bind_conflict,
1805 #ifdef CONFIG_COMPAT
1806 .compat_setsockopt = compat_ipv6_setsockopt,
1807 .compat_getsockopt = compat_ipv6_getsockopt,
1811 #ifdef CONFIG_TCP_MD5SIG
1812 static struct tcp_sock_af_ops tcp_sock_ipv6_specific = {
1813 .md5_lookup = tcp_v6_md5_lookup,
1814 .calc_md5_hash = tcp_v6_md5_hash_skb,
1815 .md5_add = tcp_v6_md5_add_func,
1816 .md5_parse = tcp_v6_parse_md5_keys,
1821 * TCP over IPv4 via INET6 API
1824 static struct inet_connection_sock_af_ops ipv6_mapped = {
1825 .queue_xmit = ip_queue_xmit,
1826 .send_check = tcp_v4_send_check,
1827 .rebuild_header = inet_sk_rebuild_header,
1828 .conn_request = tcp_v6_conn_request,
1829 .syn_recv_sock = tcp_v6_syn_recv_sock,
1830 .remember_stamp = tcp_v4_remember_stamp,
1831 .net_header_len = sizeof(struct iphdr),
1832 .setsockopt = ipv6_setsockopt,
1833 .getsockopt = ipv6_getsockopt,
1834 .addr2sockaddr = inet6_csk_addr2sockaddr,
1835 .sockaddr_len = sizeof(struct sockaddr_in6),
1836 .bind_conflict = inet6_csk_bind_conflict,
1837 #ifdef CONFIG_COMPAT
1838 .compat_setsockopt = compat_ipv6_setsockopt,
1839 .compat_getsockopt = compat_ipv6_getsockopt,
1843 #ifdef CONFIG_TCP_MD5SIG
1844 static struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = {
1845 .md5_lookup = tcp_v4_md5_lookup,
1846 .calc_md5_hash = tcp_v4_md5_hash_skb,
1847 .md5_add = tcp_v6_md5_add_func,
1848 .md5_parse = tcp_v6_parse_md5_keys,
1852 /* NOTE: A lot of things set to zero explicitly by call to
1853 * sk_alloc() so need not be done here.
1855 static int tcp_v6_init_sock(struct sock *sk)
1857 struct inet_connection_sock *icsk = inet_csk(sk);
1858 struct tcp_sock *tp = tcp_sk(sk);
1860 skb_queue_head_init(&tp->out_of_order_queue);
1861 tcp_init_xmit_timers(sk);
1862 tcp_prequeue_init(tp);
1864 icsk->icsk_rto = TCP_TIMEOUT_INIT;
1865 tp->mdev = TCP_TIMEOUT_INIT;
1867 /* So many TCP implementations out there (incorrectly) count the
1868 * initial SYN frame in their delayed-ACK and congestion control
1869 * algorithms that we must have the following bandaid to talk
1870 * efficiently to them. -DaveM
1874 /* See draft-stevens-tcpca-spec-01 for discussion of the
1875 * initialization of these values.
1877 tp->snd_ssthresh = 0x7fffffff;
1878 tp->snd_cwnd_clamp = ~0;
1879 tp->mss_cache = 536;
1881 tp->reordering = sysctl_tcp_reordering;
1883 sk->sk_state = TCP_CLOSE;
1885 icsk->icsk_af_ops = &ipv6_specific;
1886 icsk->icsk_ca_ops = &tcp_init_congestion_ops;
1887 icsk->icsk_sync_mss = tcp_sync_mss;
1888 sk->sk_write_space = sk_stream_write_space;
1889 sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
1891 #ifdef CONFIG_TCP_MD5SIG
1892 tp->af_specific = &tcp_sock_ipv6_specific;
1895 sk->sk_sndbuf = sysctl_tcp_wmem[1];
1896 sk->sk_rcvbuf = sysctl_tcp_rmem[1];
1898 atomic_inc(&tcp_sockets_allocated);
1903 static void tcp_v6_destroy_sock(struct sock *sk)
1905 #ifdef CONFIG_TCP_MD5SIG
1906 /* Clean up the MD5 key list */
1907 if (tcp_sk(sk)->md5sig_info)
1908 tcp_v6_clear_md5_list(sk);
1910 tcp_v4_destroy_sock(sk);
1911 inet6_destroy_sock(sk);
1914 #ifdef CONFIG_PROC_FS
1915 /* Proc filesystem TCPv6 sock list dumping. */
1916 static void get_openreq6(struct seq_file *seq,
1917 struct sock *sk, struct request_sock *req, int i, int uid)
1919 int ttd = req->expires - jiffies;
1920 struct in6_addr *src = &inet6_rsk(req)->loc_addr;
1921 struct in6_addr *dest = &inet6_rsk(req)->rmt_addr;
1927 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1928 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p\n",
1930 src->s6_addr32[0], src->s6_addr32[1],
1931 src->s6_addr32[2], src->s6_addr32[3],
1932 ntohs(inet_sk(sk)->sport),
1933 dest->s6_addr32[0], dest->s6_addr32[1],
1934 dest->s6_addr32[2], dest->s6_addr32[3],
1935 ntohs(inet_rsk(req)->rmt_port),
1937 0,0, /* could print option size, but that is af dependent. */
1938 1, /* timers active (only the expire timer) */
1939 jiffies_to_clock_t(ttd),
1942 0, /* non standard timer */
1943 0, /* open_requests have no inode */
1947 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
1949 struct in6_addr *dest, *src;
1952 unsigned long timer_expires;
1953 struct inet_sock *inet = inet_sk(sp);
1954 struct tcp_sock *tp = tcp_sk(sp);
1955 const struct inet_connection_sock *icsk = inet_csk(sp);
1956 struct ipv6_pinfo *np = inet6_sk(sp);
1959 src = &np->rcv_saddr;
1960 destp = ntohs(inet->dport);
1961 srcp = ntohs(inet->sport);
1963 if (icsk->icsk_pending == ICSK_TIME_RETRANS) {
1965 timer_expires = icsk->icsk_timeout;
1966 } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
1968 timer_expires = icsk->icsk_timeout;
1969 } else if (timer_pending(&sp->sk_timer)) {
1971 timer_expires = sp->sk_timer.expires;
1974 timer_expires = jiffies;
1978 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1979 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %lu %lu %u %u %d\n",
1981 src->s6_addr32[0], src->s6_addr32[1],
1982 src->s6_addr32[2], src->s6_addr32[3], srcp,
1983 dest->s6_addr32[0], dest->s6_addr32[1],
1984 dest->s6_addr32[2], dest->s6_addr32[3], destp,
1986 tp->write_seq-tp->snd_una,
1987 (sp->sk_state == TCP_LISTEN) ? sp->sk_ack_backlog : (tp->rcv_nxt - tp->copied_seq),
1989 jiffies_to_clock_t(timer_expires - jiffies),
1990 icsk->icsk_retransmits,
1992 icsk->icsk_probes_out,
1994 atomic_read(&sp->sk_refcnt), sp,
1995 jiffies_to_clock_t(icsk->icsk_rto),
1996 jiffies_to_clock_t(icsk->icsk_ack.ato),
1997 (icsk->icsk_ack.quick << 1 ) | icsk->icsk_ack.pingpong,
1998 tp->snd_cwnd, tp->snd_ssthresh>=0xFFFF?-1:tp->snd_ssthresh
2002 static void get_timewait6_sock(struct seq_file *seq,
2003 struct inet_timewait_sock *tw, int i)
2005 struct in6_addr *dest, *src;
2007 struct inet6_timewait_sock *tw6 = inet6_twsk((struct sock *)tw);
2008 int ttd = tw->tw_ttd - jiffies;
2013 dest = &tw6->tw_v6_daddr;
2014 src = &tw6->tw_v6_rcv_saddr;
2015 destp = ntohs(tw->tw_dport);
2016 srcp = ntohs(tw->tw_sport);
2019 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2020 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p\n",
2022 src->s6_addr32[0], src->s6_addr32[1],
2023 src->s6_addr32[2], src->s6_addr32[3], srcp,
2024 dest->s6_addr32[0], dest->s6_addr32[1],
2025 dest->s6_addr32[2], dest->s6_addr32[3], destp,
2026 tw->tw_substate, 0, 0,
2027 3, jiffies_to_clock_t(ttd), 0, 0, 0, 0,
2028 atomic_read(&tw->tw_refcnt), tw);
2031 static int tcp6_seq_show(struct seq_file *seq, void *v)
2033 struct tcp_iter_state *st;
2035 if (v == SEQ_START_TOKEN) {
2040 "st tx_queue rx_queue tr tm->when retrnsmt"
2041 " uid timeout inode\n");
2046 switch (st->state) {
2047 case TCP_SEQ_STATE_LISTENING:
2048 case TCP_SEQ_STATE_ESTABLISHED:
2049 get_tcp6_sock(seq, v, st->num);
2051 case TCP_SEQ_STATE_OPENREQ:
2052 get_openreq6(seq, st->syn_wait_sk, v, st->num, st->uid);
2054 case TCP_SEQ_STATE_TIME_WAIT:
2055 get_timewait6_sock(seq, v, st->num);
2062 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
2066 .owner = THIS_MODULE,
2069 .show = tcp6_seq_show,
2073 int tcp6_proc_init(struct net *net)
2075 return tcp_proc_register(net, &tcp6_seq_afinfo);
2078 void tcp6_proc_exit(struct net *net)
2080 tcp_proc_unregister(net, &tcp6_seq_afinfo);
2084 struct proto tcpv6_prot = {
2086 .owner = THIS_MODULE,
2088 .connect = tcp_v6_connect,
2089 .disconnect = tcp_disconnect,
2090 .accept = inet_csk_accept,
2092 .init = tcp_v6_init_sock,
2093 .destroy = tcp_v6_destroy_sock,
2094 .shutdown = tcp_shutdown,
2095 .setsockopt = tcp_setsockopt,
2096 .getsockopt = tcp_getsockopt,
2097 .recvmsg = tcp_recvmsg,
2098 .backlog_rcv = tcp_v6_do_rcv,
2099 .hash = tcp_v6_hash,
2100 .unhash = inet_unhash,
2101 .get_port = inet_csk_get_port,
2102 .enter_memory_pressure = tcp_enter_memory_pressure,
2103 .sockets_allocated = &tcp_sockets_allocated,
2104 .memory_allocated = &tcp_memory_allocated,
2105 .memory_pressure = &tcp_memory_pressure,
2106 .orphan_count = &tcp_orphan_count,
2107 .sysctl_mem = sysctl_tcp_mem,
2108 .sysctl_wmem = sysctl_tcp_wmem,
2109 .sysctl_rmem = sysctl_tcp_rmem,
2110 .max_header = MAX_TCP_HEADER,
2111 .obj_size = sizeof(struct tcp6_sock),
2112 .twsk_prot = &tcp6_timewait_sock_ops,
2113 .rsk_prot = &tcp6_request_sock_ops,
2114 .h.hashinfo = &tcp_hashinfo,
2115 #ifdef CONFIG_COMPAT
2116 .compat_setsockopt = compat_tcp_setsockopt,
2117 .compat_getsockopt = compat_tcp_getsockopt,
2121 static struct inet6_protocol tcpv6_protocol = {
2122 .handler = tcp_v6_rcv,
2123 .err_handler = tcp_v6_err,
2124 .gso_send_check = tcp_v6_gso_send_check,
2125 .gso_segment = tcp_tso_segment,
2126 .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
2129 static struct inet_protosw tcpv6_protosw = {
2130 .type = SOCK_STREAM,
2131 .protocol = IPPROTO_TCP,
2132 .prot = &tcpv6_prot,
2133 .ops = &inet6_stream_ops,
2136 .flags = INET_PROTOSW_PERMANENT |
2140 static int tcpv6_net_init(struct net *net)
2142 return inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6,
2143 SOCK_RAW, IPPROTO_TCP, net);
2146 static void tcpv6_net_exit(struct net *net)
2148 inet_ctl_sock_destroy(net->ipv6.tcp_sk);
2149 inet_twsk_purge(net, &tcp_hashinfo, &tcp_death_row, AF_INET6);
2152 static struct pernet_operations tcpv6_net_ops = {
2153 .init = tcpv6_net_init,
2154 .exit = tcpv6_net_exit,
2157 int __init tcpv6_init(void)
2161 ret = inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP);
2165 /* register inet6 protocol */
2166 ret = inet6_register_protosw(&tcpv6_protosw);
2168 goto out_tcpv6_protocol;
2170 ret = register_pernet_subsys(&tcpv6_net_ops);
2172 goto out_tcpv6_protosw;
2177 inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2179 inet6_unregister_protosw(&tcpv6_protosw);
2183 void tcpv6_exit(void)
2185 unregister_pernet_subsys(&tcpv6_net_ops);
2186 inet6_unregister_protosw(&tcpv6_protosw);
2187 inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);