4 * An implementation of the DCCP protocol
5 * Arnaldo Carvalho de Melo <acme@conectiva.com.br>
7 * This program is free software; you can redistribute it and/or modify it
8 * under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
12 #include <linux/config.h>
13 #include <linux/dccp.h>
14 #include <linux/module.h>
15 #include <linux/types.h>
16 #include <linux/sched.h>
17 #include <linux/kernel.h>
18 #include <linux/skbuff.h>
19 #include <linux/netdevice.h>
21 #include <linux/if_arp.h>
22 #include <linux/init.h>
23 #include <linux/random.h>
24 #include <net/checksum.h>
26 #include <net/inet_common.h>
27 #include <net/inet_sock.h>
28 #include <net/protocol.h>
32 #include <asm/semaphore.h>
33 #include <linux/spinlock.h>
34 #include <linux/timer.h>
35 #include <linux/delay.h>
36 #include <linux/poll.h>
42 DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly;
44 EXPORT_SYMBOL_GPL(dccp_statistics);
46 atomic_t dccp_orphan_count = ATOMIC_INIT(0);
48 EXPORT_SYMBOL_GPL(dccp_orphan_count);
50 struct inet_hashinfo __cacheline_aligned dccp_hashinfo = {
51 .lhash_lock = RW_LOCK_UNLOCKED,
52 .lhash_users = ATOMIC_INIT(0),
53 .lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(dccp_hashinfo.lhash_wait),
56 EXPORT_SYMBOL_GPL(dccp_hashinfo);
58 static struct net_protocol dccp_protocol = {
59 .handler = dccp_v4_rcv,
60 .err_handler = dccp_v4_err,
64 const char *dccp_packet_name(const int type)
66 static const char *dccp_packet_names[] = {
67 [DCCP_PKT_REQUEST] = "REQUEST",
68 [DCCP_PKT_RESPONSE] = "RESPONSE",
69 [DCCP_PKT_DATA] = "DATA",
70 [DCCP_PKT_ACK] = "ACK",
71 [DCCP_PKT_DATAACK] = "DATAACK",
72 [DCCP_PKT_CLOSEREQ] = "CLOSEREQ",
73 [DCCP_PKT_CLOSE] = "CLOSE",
74 [DCCP_PKT_RESET] = "RESET",
75 [DCCP_PKT_SYNC] = "SYNC",
76 [DCCP_PKT_SYNCACK] = "SYNCACK",
79 if (type >= DCCP_NR_PKT_TYPES)
82 return dccp_packet_names[type];
85 EXPORT_SYMBOL_GPL(dccp_packet_name);
87 const char *dccp_state_name(const int state)
89 static char *dccp_state_names[] = {
91 [DCCP_REQUESTING] = "REQUESTING",
92 [DCCP_PARTOPEN] = "PARTOPEN",
93 [DCCP_LISTEN] = "LISTEN",
94 [DCCP_RESPOND] = "RESPOND",
95 [DCCP_CLOSING] = "CLOSING",
96 [DCCP_TIME_WAIT] = "TIME_WAIT",
97 [DCCP_CLOSED] = "CLOSED",
100 if (state >= DCCP_MAX_STATES)
101 return "INVALID STATE!";
103 return dccp_state_names[state];
106 EXPORT_SYMBOL_GPL(dccp_state_name);
108 void dccp_hash(struct sock *sk)
110 inet_hash(&dccp_hashinfo, sk);
113 EXPORT_SYMBOL_GPL(dccp_hash);
115 void dccp_unhash(struct sock *sk)
117 inet_unhash(&dccp_hashinfo, sk);
120 EXPORT_SYMBOL_GPL(dccp_unhash);
122 int dccp_init_sock(struct sock *sk)
124 struct dccp_sock *dp = dccp_sk(sk);
125 struct inet_connection_sock *icsk = inet_csk(sk);
126 static int dccp_ctl_socket_init = 1;
128 dccp_options_init(&dp->dccps_options);
129 do_gettimeofday(&dp->dccps_epoch);
132 * FIXME: We're hardcoding the CCID, and doing this at this point makes
133 * the listening (master) sock get CCID control blocks, which is not
134 * necessary, but for now, to not mess with the test userspace apps,
135 * lets leave it here, later the real solution is to do this in a
136 * setsockopt(CCIDs-I-want/accept). -acme
138 if (likely(!dccp_ctl_socket_init)) {
139 int rc = dccp_feat_init(sk);
144 if (dp->dccps_options.dccpo_send_ack_vector) {
145 dp->dccps_hc_rx_ackvec = dccp_ackvec_alloc(GFP_KERNEL);
146 if (dp->dccps_hc_rx_ackvec == NULL)
149 dp->dccps_hc_rx_ccid =
150 ccid_hc_rx_new(dp->dccps_options.dccpo_rx_ccid,
152 dp->dccps_hc_tx_ccid =
153 ccid_hc_tx_new(dp->dccps_options.dccpo_tx_ccid,
155 if (unlikely(dp->dccps_hc_rx_ccid == NULL ||
156 dp->dccps_hc_tx_ccid == NULL)) {
157 ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
158 ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
159 if (dp->dccps_options.dccpo_send_ack_vector) {
160 dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
161 dp->dccps_hc_rx_ackvec = NULL;
163 dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
167 /* control socket doesn't need feat nego */
168 INIT_LIST_HEAD(&dp->dccps_options.dccpo_pending);
169 INIT_LIST_HEAD(&dp->dccps_options.dccpo_conf);
170 dccp_ctl_socket_init = 0;
173 dccp_init_xmit_timers(sk);
174 icsk->icsk_rto = DCCP_TIMEOUT_INIT;
175 sk->sk_state = DCCP_CLOSED;
176 sk->sk_write_space = dccp_write_space;
177 icsk->icsk_sync_mss = dccp_sync_mss;
178 dp->dccps_mss_cache = 536;
179 dp->dccps_role = DCCP_ROLE_UNDEFINED;
180 dp->dccps_service = DCCP_SERVICE_INVALID_VALUE;
181 dp->dccps_l_ack_ratio = dp->dccps_r_ack_ratio = 1;
186 EXPORT_SYMBOL_GPL(dccp_init_sock);
188 int dccp_destroy_sock(struct sock *sk)
190 struct dccp_sock *dp = dccp_sk(sk);
193 * DCCP doesn't use sk_write_queue, just sk_send_head
194 * for retransmissions
196 if (sk->sk_send_head != NULL) {
197 kfree_skb(sk->sk_send_head);
198 sk->sk_send_head = NULL;
201 /* Clean up a referenced DCCP bind bucket. */
202 if (inet_csk(sk)->icsk_bind_hash != NULL)
203 inet_put_port(&dccp_hashinfo, sk);
205 kfree(dp->dccps_service_list);
206 dp->dccps_service_list = NULL;
208 if (dp->dccps_options.dccpo_send_ack_vector) {
209 dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
210 dp->dccps_hc_rx_ackvec = NULL;
212 ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
213 ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
214 dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
216 /* clean up feature negotiation state */
222 EXPORT_SYMBOL_GPL(dccp_destroy_sock);
224 static inline int dccp_listen_start(struct sock *sk)
226 struct dccp_sock *dp = dccp_sk(sk);
228 dp->dccps_role = DCCP_ROLE_LISTEN;
230 * Apps need to use setsockopt(DCCP_SOCKOPT_SERVICE)
231 * before calling listen()
233 if (dccp_service_not_initialized(sk))
235 return inet_csk_listen_start(sk, TCP_SYNQ_HSIZE);
238 int dccp_disconnect(struct sock *sk, int flags)
240 struct inet_connection_sock *icsk = inet_csk(sk);
241 struct inet_sock *inet = inet_sk(sk);
243 const int old_state = sk->sk_state;
245 if (old_state != DCCP_CLOSED)
246 dccp_set_state(sk, DCCP_CLOSED);
248 /* ABORT function of RFC793 */
249 if (old_state == DCCP_LISTEN) {
250 inet_csk_listen_stop(sk);
251 /* FIXME: do the active reset thing */
252 } else if (old_state == DCCP_REQUESTING)
253 sk->sk_err = ECONNRESET;
255 dccp_clear_xmit_timers(sk);
256 __skb_queue_purge(&sk->sk_receive_queue);
257 if (sk->sk_send_head != NULL) {
258 __kfree_skb(sk->sk_send_head);
259 sk->sk_send_head = NULL;
264 if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
265 inet_reset_saddr(sk);
268 sock_reset_flag(sk, SOCK_DONE);
270 icsk->icsk_backoff = 0;
271 inet_csk_delack_init(sk);
274 BUG_TRAP(!inet->num || icsk->icsk_bind_hash);
276 sk->sk_error_report(sk);
280 EXPORT_SYMBOL_GPL(dccp_disconnect);
283 * Wait for a DCCP event.
285 * Note that we don't need to lock the socket, as the upper poll layers
286 * take care of normal races (between the test and the event) and we don't
287 * go look at any of the socket buffers directly.
289 unsigned int dccp_poll(struct file *file, struct socket *sock,
293 struct sock *sk = sock->sk;
295 poll_wait(file, sk->sk_sleep, wait);
296 if (sk->sk_state == DCCP_LISTEN)
297 return inet_csk_listen_poll(sk);
299 /* Socket is not locked. We are protected from async events
300 by poll logic and correct handling of state changes
301 made by another threads is impossible in any case.
308 if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == DCCP_CLOSED)
310 if (sk->sk_shutdown & RCV_SHUTDOWN)
311 mask |= POLLIN | POLLRDNORM;
314 if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) {
315 if (atomic_read(&sk->sk_rmem_alloc) > 0)
316 mask |= POLLIN | POLLRDNORM;
318 if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
319 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) {
320 mask |= POLLOUT | POLLWRNORM;
321 } else { /* send SIGIO later */
322 set_bit(SOCK_ASYNC_NOSPACE,
323 &sk->sk_socket->flags);
324 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
326 /* Race breaker. If space is freed after
327 * wspace test but before the flags are set,
328 * IO signal will be lost.
330 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk))
331 mask |= POLLOUT | POLLWRNORM;
338 EXPORT_SYMBOL_GPL(dccp_poll);
340 int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg)
342 dccp_pr_debug("entry\n");
346 EXPORT_SYMBOL_GPL(dccp_ioctl);
348 static int dccp_setsockopt_service(struct sock *sk, const __be32 service,
349 char __user *optval, int optlen)
351 struct dccp_sock *dp = dccp_sk(sk);
352 struct dccp_service_list *sl = NULL;
354 if (service == DCCP_SERVICE_INVALID_VALUE ||
355 optlen > DCCP_SERVICE_LIST_MAX_LEN * sizeof(u32))
358 if (optlen > sizeof(service)) {
359 sl = kmalloc(optlen, GFP_KERNEL);
363 sl->dccpsl_nr = optlen / sizeof(u32) - 1;
364 if (copy_from_user(sl->dccpsl_list,
365 optval + sizeof(service),
366 optlen - sizeof(service)) ||
367 dccp_list_has_service(sl, DCCP_SERVICE_INVALID_VALUE)) {
374 dp->dccps_service = service;
376 kfree(dp->dccps_service_list);
378 dp->dccps_service_list = sl;
383 /* byte 1 is feature. the rest is the preference list */
384 static int dccp_setsockopt_change(struct sock *sk, int type,
385 struct dccp_so_feat __user *optval)
387 struct dccp_so_feat opt;
391 if (copy_from_user(&opt, optval, sizeof(opt)))
394 val = kmalloc(opt.dccpsf_len, GFP_KERNEL);
398 if (copy_from_user(val, opt.dccpsf_val, opt.dccpsf_len)) {
403 rc = dccp_feat_change(sk, type, opt.dccpsf_feat, val, opt.dccpsf_len,
416 int dccp_setsockopt(struct sock *sk, int level, int optname,
417 char __user *optval, int optlen)
419 struct dccp_sock *dp;
423 if (level != SOL_DCCP)
424 return inet_csk(sk)->icsk_af_ops->setsockopt(sk, level,
428 if (optlen < sizeof(int))
431 if (get_user(val, (int __user *)optval))
434 if (optname == DCCP_SOCKOPT_SERVICE)
435 return dccp_setsockopt_service(sk, val, optval, optlen);
442 case DCCP_SOCKOPT_PACKET_SIZE:
443 dp->dccps_packet_size = val;
446 case DCCP_SOCKOPT_CHANGE_L:
447 if (optlen != sizeof(struct dccp_so_feat))
450 err = dccp_setsockopt_change(sk, DCCPO_CHANGE_L,
451 (struct dccp_so_feat *)
455 case DCCP_SOCKOPT_CHANGE_R:
456 if (optlen != sizeof(struct dccp_so_feat))
459 err = dccp_setsockopt_change(sk, DCCPO_CHANGE_R,
460 (struct dccp_so_feat *)
473 EXPORT_SYMBOL_GPL(dccp_setsockopt);
475 static int dccp_getsockopt_service(struct sock *sk, int len,
476 __be32 __user *optval,
479 const struct dccp_sock *dp = dccp_sk(sk);
480 const struct dccp_service_list *sl;
481 int err = -ENOENT, slen = 0, total_len = sizeof(u32);
484 if (dccp_service_not_initialized(sk))
487 if ((sl = dp->dccps_service_list) != NULL) {
488 slen = sl->dccpsl_nr * sizeof(u32);
497 if (put_user(total_len, optlen) ||
498 put_user(dp->dccps_service, optval) ||
499 (sl != NULL && copy_to_user(optval + 1, sl->dccpsl_list, slen)))
506 int dccp_getsockopt(struct sock *sk, int level, int optname,
507 char __user *optval, int __user *optlen)
509 struct dccp_sock *dp;
512 if (level != SOL_DCCP)
513 return inet_csk(sk)->icsk_af_ops->getsockopt(sk, level,
516 if (get_user(len, optlen))
519 if (len < sizeof(int))
525 case DCCP_SOCKOPT_PACKET_SIZE:
526 val = dp->dccps_packet_size;
527 len = sizeof(dp->dccps_packet_size);
529 case DCCP_SOCKOPT_SERVICE:
530 return dccp_getsockopt_service(sk, len,
531 (__be32 __user *)optval, optlen);
533 return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname,
534 len, (u32 __user *)optval, optlen);
536 return ccid_hc_tx_getsockopt(dp->dccps_hc_tx_ccid, sk, optname,
537 len, (u32 __user *)optval, optlen);
542 if (put_user(len, optlen) || copy_to_user(optval, &val, len))
548 EXPORT_SYMBOL_GPL(dccp_getsockopt);
550 int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
553 const struct dccp_sock *dp = dccp_sk(sk);
554 const int flags = msg->msg_flags;
555 const int noblock = flags & MSG_DONTWAIT;
560 if (len > dp->dccps_mss_cache)
564 timeo = sock_sndtimeo(sk, noblock);
567 * We have to use sk_stream_wait_connect here to set sk_write_pending,
568 * so that the trick in dccp_rcv_request_sent_state_process.
570 /* Wait for a connection to finish. */
571 if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN | DCCPF_CLOSING))
572 if ((rc = sk_stream_wait_connect(sk, &timeo)) != 0)
575 size = sk->sk_prot->max_header + len;
577 skb = sock_alloc_send_skb(sk, size, noblock, &rc);
582 skb_reserve(skb, sk->sk_prot->max_header);
583 rc = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
587 rc = dccp_write_xmit(sk, skb, &timeo);
589 * XXX we don't use sk_write_queue, so just discard the packet.
590 * Current plan however is to _use_ sk_write_queue with
591 * an algorith similar to tcp_sendmsg, where the main difference
592 * is that in DCCP we have to respect packet boundaries, so
593 * no coalescing of skbs.
595 * This bug was _quickly_ found & fixed by just looking at an OSTRA
596 * generated callgraph 8) -acme
606 EXPORT_SYMBOL_GPL(dccp_sendmsg);
608 int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
609 size_t len, int nonblock, int flags, int *addr_len)
611 const struct dccp_hdr *dh;
616 if (sk->sk_state == DCCP_LISTEN) {
621 timeo = sock_rcvtimeo(sk, nonblock);
624 struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
627 goto verify_sock_status;
631 if (dh->dccph_type == DCCP_PKT_DATA ||
632 dh->dccph_type == DCCP_PKT_DATAACK)
635 if (dh->dccph_type == DCCP_PKT_RESET ||
636 dh->dccph_type == DCCP_PKT_CLOSE) {
637 dccp_pr_debug("found fin ok!\n");
641 dccp_pr_debug("packet_type=%s\n",
642 dccp_packet_name(dh->dccph_type));
645 if (sock_flag(sk, SOCK_DONE)) {
651 len = sock_error(sk);
655 if (sk->sk_shutdown & RCV_SHUTDOWN) {
660 if (sk->sk_state == DCCP_CLOSED) {
661 if (!sock_flag(sk, SOCK_DONE)) {
662 /* This occurs when user tries to read
663 * from never connected socket.
677 if (signal_pending(current)) {
678 len = sock_intr_errno(timeo);
682 sk_wait_data(sk, &timeo);
687 else if (len < skb->len)
688 msg->msg_flags |= MSG_TRUNC;
690 if (skb_copy_datagram_iovec(skb, 0, msg->msg_iov, len)) {
691 /* Exception. Bailout! */
696 if (!(flags & MSG_PEEK))
705 EXPORT_SYMBOL_GPL(dccp_recvmsg);
707 int inet_dccp_listen(struct socket *sock, int backlog)
709 struct sock *sk = sock->sk;
710 unsigned char old_state;
716 if (sock->state != SS_UNCONNECTED || sock->type != SOCK_DCCP)
719 old_state = sk->sk_state;
720 if (!((1 << old_state) & (DCCPF_CLOSED | DCCPF_LISTEN)))
723 /* Really, if the socket is already in listen state
724 * we can only allow the backlog to be adjusted.
726 if (old_state != DCCP_LISTEN) {
728 * FIXME: here it probably should be sk->sk_prot->listen_start
729 * see tcp_listen_start
731 err = dccp_listen_start(sk);
735 sk->sk_max_ack_backlog = backlog;
743 EXPORT_SYMBOL_GPL(inet_dccp_listen);
745 static const unsigned char dccp_new_state[] = {
746 /* current state: new state: action: */
748 [DCCP_OPEN] = DCCP_CLOSING | DCCP_ACTION_FIN,
749 [DCCP_REQUESTING] = DCCP_CLOSED,
750 [DCCP_PARTOPEN] = DCCP_CLOSING | DCCP_ACTION_FIN,
751 [DCCP_LISTEN] = DCCP_CLOSED,
752 [DCCP_RESPOND] = DCCP_CLOSED,
753 [DCCP_CLOSING] = DCCP_CLOSED,
754 [DCCP_TIME_WAIT] = DCCP_CLOSED,
755 [DCCP_CLOSED] = DCCP_CLOSED,
758 static int dccp_close_state(struct sock *sk)
760 const int next = dccp_new_state[sk->sk_state];
761 const int ns = next & DCCP_STATE_MASK;
763 if (ns != sk->sk_state)
764 dccp_set_state(sk, ns);
766 return next & DCCP_ACTION_FIN;
769 void dccp_close(struct sock *sk, long timeout)
775 sk->sk_shutdown = SHUTDOWN_MASK;
777 if (sk->sk_state == DCCP_LISTEN) {
778 dccp_set_state(sk, DCCP_CLOSED);
781 inet_csk_listen_stop(sk);
783 goto adjudge_to_death;
787 * We need to flush the recv. buffs. We do this only on the
788 * descriptor close, not protocol-sourced closes, because the
789 *reader process may not have drained the data yet!
791 /* FIXME: check for unread data */
792 while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
796 if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
797 /* Check zero linger _after_ checking for unread data. */
798 sk->sk_prot->disconnect(sk, 0);
799 } else if (dccp_close_state(sk)) {
800 dccp_send_close(sk, 1);
803 sk_stream_wait_close(sk, timeout);
807 * It is the last release_sock in its life. It will remove backlog.
811 * Now socket is owned by kernel and we acquire BH lock
812 * to finish close. No need to check for user refs.
816 BUG_TRAP(!sock_owned_by_user(sk));
822 * The last release_sock may have processed the CLOSE or RESET
823 * packet moving sock to CLOSED state, if not we have to fire
824 * the CLOSE/CLOSEREQ retransmission timer, see "8.3. Termination"
825 * in draft-ietf-dccp-spec-11. -acme
827 if (sk->sk_state == DCCP_CLOSING) {
828 /* FIXME: should start at 2 * RTT */
829 /* Timer for repeating the CLOSE/CLOSEREQ until an answer. */
830 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
831 inet_csk(sk)->icsk_rto,
834 /* Yeah, we should use sk->sk_prot->orphan_count, etc */
835 dccp_set_state(sk, DCCP_CLOSED);
839 atomic_inc(sk->sk_prot->orphan_count);
840 if (sk->sk_state == DCCP_CLOSED)
841 inet_csk_destroy_sock(sk);
843 /* Otherwise, socket is reprieved until protocol close. */
850 EXPORT_SYMBOL_GPL(dccp_close);
852 void dccp_shutdown(struct sock *sk, int how)
854 dccp_pr_debug("entry\n");
857 EXPORT_SYMBOL_GPL(dccp_shutdown);
859 static const struct proto_ops inet_dccp_ops = {
861 .owner = THIS_MODULE,
862 .release = inet_release,
864 .connect = inet_stream_connect,
865 .socketpair = sock_no_socketpair,
866 .accept = inet_accept,
867 .getname = inet_getname,
868 /* FIXME: work on tcp_poll to rename it to inet_csk_poll */
871 /* FIXME: work on inet_listen to rename it to sock_common_listen */
872 .listen = inet_dccp_listen,
873 .shutdown = inet_shutdown,
874 .setsockopt = sock_common_setsockopt,
875 .getsockopt = sock_common_getsockopt,
876 .sendmsg = inet_sendmsg,
877 .recvmsg = sock_common_recvmsg,
878 .mmap = sock_no_mmap,
879 .sendpage = sock_no_sendpage,
882 extern struct net_proto_family inet_family_ops;
884 static struct inet_protosw dccp_v4_protosw = {
886 .protocol = IPPROTO_DCCP,
888 .ops = &inet_dccp_ops,
891 .flags = INET_PROTOSW_ICSK,
895 * This is the global socket data structure used for responding to
896 * the Out-of-the-blue (OOTB) packets. A control sock will be created
897 * for this socket at the initialization time.
899 struct socket *dccp_ctl_socket;
901 static char dccp_ctl_socket_err_msg[] __initdata =
902 KERN_ERR "DCCP: Failed to create the control socket.\n";
904 static int __init dccp_ctl_sock_init(void)
906 int rc = sock_create_kern(PF_INET, SOCK_DCCP, IPPROTO_DCCP,
909 printk(dccp_ctl_socket_err_msg);
911 dccp_ctl_socket->sk->sk_allocation = GFP_ATOMIC;
912 inet_sk(dccp_ctl_socket->sk)->uc_ttl = -1;
914 /* Unhash it so that IP input processing does not even
915 * see it, we do not wish this socket to see incoming
918 dccp_ctl_socket->sk->sk_prot->unhash(dccp_ctl_socket->sk);
924 #ifdef CONFIG_IP_DCCP_UNLOAD_HACK
925 void dccp_ctl_sock_exit(void)
927 if (dccp_ctl_socket != NULL) {
928 sock_release(dccp_ctl_socket);
929 dccp_ctl_socket = NULL;
933 EXPORT_SYMBOL_GPL(dccp_ctl_sock_exit);
936 static int __init dccp_mib_init(void)
940 dccp_statistics[0] = alloc_percpu(struct dccp_mib);
941 if (dccp_statistics[0] == NULL)
944 dccp_statistics[1] = alloc_percpu(struct dccp_mib);
945 if (dccp_statistics[1] == NULL)
952 free_percpu(dccp_statistics[0]);
953 dccp_statistics[0] = NULL;
958 static int dccp_mib_exit(void)
960 free_percpu(dccp_statistics[0]);
961 free_percpu(dccp_statistics[1]);
962 dccp_statistics[0] = dccp_statistics[1] = NULL;
965 static int thash_entries;
966 module_param(thash_entries, int, 0444);
967 MODULE_PARM_DESC(thash_entries, "Number of ehash buckets");
969 #ifdef CONFIG_IP_DCCP_DEBUG
971 module_param(dccp_debug, int, 0444);
972 MODULE_PARM_DESC(dccp_debug, "Enable debug messages");
974 EXPORT_SYMBOL_GPL(dccp_debug);
977 static int __init dccp_init(void)
980 int ehash_order, bhash_order, i;
981 int rc = proto_register(&dccp_prot, 1);
987 dccp_hashinfo.bind_bucket_cachep =
988 kmem_cache_create("dccp_bind_bucket",
989 sizeof(struct inet_bind_bucket), 0,
990 SLAB_HWCACHE_ALIGN, NULL, NULL);
991 if (!dccp_hashinfo.bind_bucket_cachep)
992 goto out_proto_unregister;
995 * Size and allocate the main established and bind bucket
998 * The methodology is similar to that of the buffer cache.
1000 if (num_physpages >= (128 * 1024))
1001 goal = num_physpages >> (21 - PAGE_SHIFT);
1003 goal = num_physpages >> (23 - PAGE_SHIFT);
1006 goal = (thash_entries *
1007 sizeof(struct inet_ehash_bucket)) >> PAGE_SHIFT;
1008 for (ehash_order = 0; (1UL << ehash_order) < goal; ehash_order++)
1011 dccp_hashinfo.ehash_size = (1UL << ehash_order) * PAGE_SIZE /
1012 sizeof(struct inet_ehash_bucket);
1013 dccp_hashinfo.ehash_size >>= 1;
1014 while (dccp_hashinfo.ehash_size &
1015 (dccp_hashinfo.ehash_size - 1))
1016 dccp_hashinfo.ehash_size--;
1017 dccp_hashinfo.ehash = (struct inet_ehash_bucket *)
1018 __get_free_pages(GFP_ATOMIC, ehash_order);
1019 } while (!dccp_hashinfo.ehash && --ehash_order > 0);
1021 if (!dccp_hashinfo.ehash) {
1022 printk(KERN_CRIT "Failed to allocate DCCP "
1023 "established hash table\n");
1024 goto out_free_bind_bucket_cachep;
1027 for (i = 0; i < (dccp_hashinfo.ehash_size << 1); i++) {
1028 rwlock_init(&dccp_hashinfo.ehash[i].lock);
1029 INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].chain);
1032 bhash_order = ehash_order;
1035 dccp_hashinfo.bhash_size = (1UL << bhash_order) * PAGE_SIZE /
1036 sizeof(struct inet_bind_hashbucket);
1037 if ((dccp_hashinfo.bhash_size > (64 * 1024)) &&
1040 dccp_hashinfo.bhash = (struct inet_bind_hashbucket *)
1041 __get_free_pages(GFP_ATOMIC, bhash_order);
1042 } while (!dccp_hashinfo.bhash && --bhash_order >= 0);
1044 if (!dccp_hashinfo.bhash) {
1045 printk(KERN_CRIT "Failed to allocate DCCP bind hash table\n");
1046 goto out_free_dccp_ehash;
1049 for (i = 0; i < dccp_hashinfo.bhash_size; i++) {
1050 spin_lock_init(&dccp_hashinfo.bhash[i].lock);
1051 INIT_HLIST_HEAD(&dccp_hashinfo.bhash[i].chain);
1054 rc = dccp_mib_init();
1056 goto out_free_dccp_bhash;
1059 if (inet_add_protocol(&dccp_protocol, IPPROTO_DCCP))
1060 goto out_free_dccp_v4_mibs;
1062 inet_register_protosw(&dccp_v4_protosw);
1064 rc = dccp_ackvec_init();
1066 goto out_unregister_protosw;
1068 rc = dccp_sysctl_init();
1070 goto out_ackvec_exit;
1072 rc = dccp_ctl_sock_init();
1074 goto out_sysctl_exit;
1081 out_unregister_protosw:
1082 inet_unregister_protosw(&dccp_v4_protosw);
1083 inet_del_protocol(&dccp_protocol, IPPROTO_DCCP);
1084 out_free_dccp_v4_mibs:
1086 out_free_dccp_bhash:
1087 free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
1088 dccp_hashinfo.bhash = NULL;
1089 out_free_dccp_ehash:
1090 free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order);
1091 dccp_hashinfo.ehash = NULL;
1092 out_free_bind_bucket_cachep:
1093 kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1094 dccp_hashinfo.bind_bucket_cachep = NULL;
1095 out_proto_unregister:
1096 proto_unregister(&dccp_prot);
1100 static const char dccp_del_proto_err_msg[] __exitdata =
1101 KERN_ERR "can't remove dccp net_protocol\n";
1103 static void __exit dccp_fini(void)
1105 inet_unregister_protosw(&dccp_v4_protosw);
1107 if (inet_del_protocol(&dccp_protocol, IPPROTO_DCCP) < 0)
1108 printk(dccp_del_proto_err_msg);
1111 free_pages((unsigned long)dccp_hashinfo.bhash,
1112 get_order(dccp_hashinfo.bhash_size *
1113 sizeof(struct inet_bind_hashbucket)));
1114 free_pages((unsigned long)dccp_hashinfo.ehash,
1115 get_order(dccp_hashinfo.ehash_size *
1116 sizeof(struct inet_ehash_bucket)));
1117 kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1118 proto_unregister(&dccp_prot);
1123 module_init(dccp_init);
1124 module_exit(dccp_fini);
1127 * __stringify doesn't likes enums, so use SOCK_DCCP (6) and IPPROTO_DCCP (33)
1128 * values directly, Also cover the case where the protocol is not specified,
1129 * i.e. net-pf-PF_INET-proto-0-type-SOCK_DCCP
1131 MODULE_ALIAS("net-pf-" __stringify(PF_INET) "-proto-33-type-6");
1132 MODULE_ALIAS("net-pf-" __stringify(PF_INET) "-proto-0-type-6");
1133 MODULE_LICENSE("GPL");
1134 MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@conectiva.com.br>");
1135 MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol");