4 * An implementation of the DCCP protocol
5 * Arnaldo Carvalho de Melo <acme@conectiva.com.br>
7 * This program is free software; you can redistribute it and/or modify it
8 * under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
12 #include <linux/config.h>
13 #include <linux/dccp.h>
14 #include <linux/module.h>
15 #include <linux/types.h>
16 #include <linux/sched.h>
17 #include <linux/kernel.h>
18 #include <linux/skbuff.h>
19 #include <linux/netdevice.h>
21 #include <linux/if_arp.h>
22 #include <linux/init.h>
23 #include <linux/random.h>
24 #include <net/checksum.h>
26 #include <net/inet_common.h>
27 #include <net/inet_sock.h>
28 #include <net/protocol.h>
32 #include <asm/semaphore.h>
33 #include <linux/spinlock.h>
34 #include <linux/timer.h>
35 #include <linux/delay.h>
36 #include <linux/poll.h>
42 DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly;
44 EXPORT_SYMBOL_GPL(dccp_statistics);
46 atomic_t dccp_orphan_count = ATOMIC_INIT(0);
48 EXPORT_SYMBOL_GPL(dccp_orphan_count);
50 static struct net_protocol dccp_protocol = {
51 .handler = dccp_v4_rcv,
52 .err_handler = dccp_v4_err,
56 const char *dccp_packet_name(const int type)
58 static const char *dccp_packet_names[] = {
59 [DCCP_PKT_REQUEST] = "REQUEST",
60 [DCCP_PKT_RESPONSE] = "RESPONSE",
61 [DCCP_PKT_DATA] = "DATA",
62 [DCCP_PKT_ACK] = "ACK",
63 [DCCP_PKT_DATAACK] = "DATAACK",
64 [DCCP_PKT_CLOSEREQ] = "CLOSEREQ",
65 [DCCP_PKT_CLOSE] = "CLOSE",
66 [DCCP_PKT_RESET] = "RESET",
67 [DCCP_PKT_SYNC] = "SYNC",
68 [DCCP_PKT_SYNCACK] = "SYNCACK",
71 if (type >= DCCP_NR_PKT_TYPES)
74 return dccp_packet_names[type];
77 EXPORT_SYMBOL_GPL(dccp_packet_name);
79 const char *dccp_state_name(const int state)
81 static char *dccp_state_names[] = {
83 [DCCP_REQUESTING] = "REQUESTING",
84 [DCCP_PARTOPEN] = "PARTOPEN",
85 [DCCP_LISTEN] = "LISTEN",
86 [DCCP_RESPOND] = "RESPOND",
87 [DCCP_CLOSING] = "CLOSING",
88 [DCCP_TIME_WAIT] = "TIME_WAIT",
89 [DCCP_CLOSED] = "CLOSED",
92 if (state >= DCCP_MAX_STATES)
93 return "INVALID STATE!";
95 return dccp_state_names[state];
98 EXPORT_SYMBOL_GPL(dccp_state_name);
100 int dccp_init_sock(struct sock *sk)
102 struct dccp_sock *dp = dccp_sk(sk);
103 struct inet_connection_sock *icsk = inet_csk(sk);
104 static int dccp_ctl_socket_init = 1;
106 dccp_options_init(&dp->dccps_options);
107 do_gettimeofday(&dp->dccps_epoch);
110 * FIXME: We're hardcoding the CCID, and doing this at this point makes
111 * the listening (master) sock get CCID control blocks, which is not
112 * necessary, but for now, to not mess with the test userspace apps,
113 * lets leave it here, later the real solution is to do this in a
114 * setsockopt(CCIDs-I-want/accept). -acme
116 if (likely(!dccp_ctl_socket_init)) {
117 int rc = dccp_feat_init(sk);
122 if (dp->dccps_options.dccpo_send_ack_vector) {
123 dp->dccps_hc_rx_ackvec = dccp_ackvec_alloc(GFP_KERNEL);
124 if (dp->dccps_hc_rx_ackvec == NULL)
127 dp->dccps_hc_rx_ccid =
128 ccid_hc_rx_new(dp->dccps_options.dccpo_rx_ccid,
130 dp->dccps_hc_tx_ccid =
131 ccid_hc_tx_new(dp->dccps_options.dccpo_tx_ccid,
133 if (unlikely(dp->dccps_hc_rx_ccid == NULL ||
134 dp->dccps_hc_tx_ccid == NULL)) {
135 ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
136 ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
137 if (dp->dccps_options.dccpo_send_ack_vector) {
138 dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
139 dp->dccps_hc_rx_ackvec = NULL;
141 dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
145 /* control socket doesn't need feat nego */
146 INIT_LIST_HEAD(&dp->dccps_options.dccpo_pending);
147 INIT_LIST_HEAD(&dp->dccps_options.dccpo_conf);
148 dccp_ctl_socket_init = 0;
151 dccp_init_xmit_timers(sk);
152 icsk->icsk_rto = DCCP_TIMEOUT_INIT;
153 sk->sk_state = DCCP_CLOSED;
154 sk->sk_write_space = dccp_write_space;
155 icsk->icsk_sync_mss = dccp_sync_mss;
156 dp->dccps_mss_cache = 536;
157 dp->dccps_role = DCCP_ROLE_UNDEFINED;
158 dp->dccps_service = DCCP_SERVICE_INVALID_VALUE;
159 dp->dccps_l_ack_ratio = dp->dccps_r_ack_ratio = 1;
164 EXPORT_SYMBOL_GPL(dccp_init_sock);
166 int dccp_destroy_sock(struct sock *sk)
168 struct dccp_sock *dp = dccp_sk(sk);
171 * DCCP doesn't use sk_write_queue, just sk_send_head
172 * for retransmissions
174 if (sk->sk_send_head != NULL) {
175 kfree_skb(sk->sk_send_head);
176 sk->sk_send_head = NULL;
179 /* Clean up a referenced DCCP bind bucket. */
180 if (inet_csk(sk)->icsk_bind_hash != NULL)
181 inet_put_port(&dccp_hashinfo, sk);
183 kfree(dp->dccps_service_list);
184 dp->dccps_service_list = NULL;
186 if (dp->dccps_options.dccpo_send_ack_vector) {
187 dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
188 dp->dccps_hc_rx_ackvec = NULL;
190 ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
191 ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
192 dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
194 /* clean up feature negotiation state */
200 EXPORT_SYMBOL_GPL(dccp_destroy_sock);
202 static inline int dccp_listen_start(struct sock *sk)
204 struct dccp_sock *dp = dccp_sk(sk);
206 dp->dccps_role = DCCP_ROLE_LISTEN;
208 * Apps need to use setsockopt(DCCP_SOCKOPT_SERVICE)
209 * before calling listen()
211 if (dccp_service_not_initialized(sk))
213 return inet_csk_listen_start(sk, TCP_SYNQ_HSIZE);
216 int dccp_disconnect(struct sock *sk, int flags)
218 struct inet_connection_sock *icsk = inet_csk(sk);
219 struct inet_sock *inet = inet_sk(sk);
221 const int old_state = sk->sk_state;
223 if (old_state != DCCP_CLOSED)
224 dccp_set_state(sk, DCCP_CLOSED);
226 /* ABORT function of RFC793 */
227 if (old_state == DCCP_LISTEN) {
228 inet_csk_listen_stop(sk);
229 /* FIXME: do the active reset thing */
230 } else if (old_state == DCCP_REQUESTING)
231 sk->sk_err = ECONNRESET;
233 dccp_clear_xmit_timers(sk);
234 __skb_queue_purge(&sk->sk_receive_queue);
235 if (sk->sk_send_head != NULL) {
236 __kfree_skb(sk->sk_send_head);
237 sk->sk_send_head = NULL;
242 if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
243 inet_reset_saddr(sk);
246 sock_reset_flag(sk, SOCK_DONE);
248 icsk->icsk_backoff = 0;
249 inet_csk_delack_init(sk);
252 BUG_TRAP(!inet->num || icsk->icsk_bind_hash);
254 sk->sk_error_report(sk);
258 EXPORT_SYMBOL_GPL(dccp_disconnect);
261 * Wait for a DCCP event.
263 * Note that we don't need to lock the socket, as the upper poll layers
264 * take care of normal races (between the test and the event) and we don't
265 * go look at any of the socket buffers directly.
267 unsigned int dccp_poll(struct file *file, struct socket *sock,
271 struct sock *sk = sock->sk;
273 poll_wait(file, sk->sk_sleep, wait);
274 if (sk->sk_state == DCCP_LISTEN)
275 return inet_csk_listen_poll(sk);
277 /* Socket is not locked. We are protected from async events
278 by poll logic and correct handling of state changes
279 made by another threads is impossible in any case.
286 if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == DCCP_CLOSED)
288 if (sk->sk_shutdown & RCV_SHUTDOWN)
289 mask |= POLLIN | POLLRDNORM;
292 if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) {
293 if (atomic_read(&sk->sk_rmem_alloc) > 0)
294 mask |= POLLIN | POLLRDNORM;
296 if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
297 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) {
298 mask |= POLLOUT | POLLWRNORM;
299 } else { /* send SIGIO later */
300 set_bit(SOCK_ASYNC_NOSPACE,
301 &sk->sk_socket->flags);
302 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
304 /* Race breaker. If space is freed after
305 * wspace test but before the flags are set,
306 * IO signal will be lost.
308 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk))
309 mask |= POLLOUT | POLLWRNORM;
316 EXPORT_SYMBOL_GPL(dccp_poll);
318 int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg)
320 dccp_pr_debug("entry\n");
324 EXPORT_SYMBOL_GPL(dccp_ioctl);
326 static int dccp_setsockopt_service(struct sock *sk, const __be32 service,
327 char __user *optval, int optlen)
329 struct dccp_sock *dp = dccp_sk(sk);
330 struct dccp_service_list *sl = NULL;
332 if (service == DCCP_SERVICE_INVALID_VALUE ||
333 optlen > DCCP_SERVICE_LIST_MAX_LEN * sizeof(u32))
336 if (optlen > sizeof(service)) {
337 sl = kmalloc(optlen, GFP_KERNEL);
341 sl->dccpsl_nr = optlen / sizeof(u32) - 1;
342 if (copy_from_user(sl->dccpsl_list,
343 optval + sizeof(service),
344 optlen - sizeof(service)) ||
345 dccp_list_has_service(sl, DCCP_SERVICE_INVALID_VALUE)) {
352 dp->dccps_service = service;
354 kfree(dp->dccps_service_list);
356 dp->dccps_service_list = sl;
361 /* byte 1 is feature. the rest is the preference list */
362 static int dccp_setsockopt_change(struct sock *sk, int type,
363 struct dccp_so_feat __user *optval)
365 struct dccp_so_feat opt;
369 if (copy_from_user(&opt, optval, sizeof(opt)))
372 val = kmalloc(opt.dccpsf_len, GFP_KERNEL);
376 if (copy_from_user(val, opt.dccpsf_val, opt.dccpsf_len)) {
381 rc = dccp_feat_change(sk, type, opt.dccpsf_feat, val, opt.dccpsf_len,
394 int dccp_setsockopt(struct sock *sk, int level, int optname,
395 char __user *optval, int optlen)
397 struct dccp_sock *dp;
401 if (level != SOL_DCCP)
402 return inet_csk(sk)->icsk_af_ops->setsockopt(sk, level,
406 if (optlen < sizeof(int))
409 if (get_user(val, (int __user *)optval))
412 if (optname == DCCP_SOCKOPT_SERVICE)
413 return dccp_setsockopt_service(sk, val, optval, optlen);
420 case DCCP_SOCKOPT_PACKET_SIZE:
421 dp->dccps_packet_size = val;
424 case DCCP_SOCKOPT_CHANGE_L:
425 if (optlen != sizeof(struct dccp_so_feat))
428 err = dccp_setsockopt_change(sk, DCCPO_CHANGE_L,
429 (struct dccp_so_feat *)
433 case DCCP_SOCKOPT_CHANGE_R:
434 if (optlen != sizeof(struct dccp_so_feat))
437 err = dccp_setsockopt_change(sk, DCCPO_CHANGE_R,
438 (struct dccp_so_feat *)
451 EXPORT_SYMBOL_GPL(dccp_setsockopt);
453 static int dccp_getsockopt_service(struct sock *sk, int len,
454 __be32 __user *optval,
457 const struct dccp_sock *dp = dccp_sk(sk);
458 const struct dccp_service_list *sl;
459 int err = -ENOENT, slen = 0, total_len = sizeof(u32);
462 if (dccp_service_not_initialized(sk))
465 if ((sl = dp->dccps_service_list) != NULL) {
466 slen = sl->dccpsl_nr * sizeof(u32);
475 if (put_user(total_len, optlen) ||
476 put_user(dp->dccps_service, optval) ||
477 (sl != NULL && copy_to_user(optval + 1, sl->dccpsl_list, slen)))
484 int dccp_getsockopt(struct sock *sk, int level, int optname,
485 char __user *optval, int __user *optlen)
487 struct dccp_sock *dp;
490 if (level != SOL_DCCP)
491 return inet_csk(sk)->icsk_af_ops->getsockopt(sk, level,
494 if (get_user(len, optlen))
497 if (len < sizeof(int))
503 case DCCP_SOCKOPT_PACKET_SIZE:
504 val = dp->dccps_packet_size;
505 len = sizeof(dp->dccps_packet_size);
507 case DCCP_SOCKOPT_SERVICE:
508 return dccp_getsockopt_service(sk, len,
509 (__be32 __user *)optval, optlen);
511 return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname,
512 len, (u32 __user *)optval, optlen);
514 return ccid_hc_tx_getsockopt(dp->dccps_hc_tx_ccid, sk, optname,
515 len, (u32 __user *)optval, optlen);
520 if (put_user(len, optlen) || copy_to_user(optval, &val, len))
526 EXPORT_SYMBOL_GPL(dccp_getsockopt);
528 int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
531 const struct dccp_sock *dp = dccp_sk(sk);
532 const int flags = msg->msg_flags;
533 const int noblock = flags & MSG_DONTWAIT;
538 if (len > dp->dccps_mss_cache)
542 timeo = sock_sndtimeo(sk, noblock);
545 * We have to use sk_stream_wait_connect here to set sk_write_pending,
546 * so that the trick in dccp_rcv_request_sent_state_process.
548 /* Wait for a connection to finish. */
549 if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN | DCCPF_CLOSING))
550 if ((rc = sk_stream_wait_connect(sk, &timeo)) != 0)
553 size = sk->sk_prot->max_header + len;
555 skb = sock_alloc_send_skb(sk, size, noblock, &rc);
560 skb_reserve(skb, sk->sk_prot->max_header);
561 rc = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
565 rc = dccp_write_xmit(sk, skb, &timeo);
567 * XXX we don't use sk_write_queue, so just discard the packet.
568 * Current plan however is to _use_ sk_write_queue with
569 * an algorith similar to tcp_sendmsg, where the main difference
570 * is that in DCCP we have to respect packet boundaries, so
571 * no coalescing of skbs.
573 * This bug was _quickly_ found & fixed by just looking at an OSTRA
574 * generated callgraph 8) -acme
584 EXPORT_SYMBOL_GPL(dccp_sendmsg);
586 int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
587 size_t len, int nonblock, int flags, int *addr_len)
589 const struct dccp_hdr *dh;
594 if (sk->sk_state == DCCP_LISTEN) {
599 timeo = sock_rcvtimeo(sk, nonblock);
602 struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
605 goto verify_sock_status;
609 if (dh->dccph_type == DCCP_PKT_DATA ||
610 dh->dccph_type == DCCP_PKT_DATAACK)
613 if (dh->dccph_type == DCCP_PKT_RESET ||
614 dh->dccph_type == DCCP_PKT_CLOSE) {
615 dccp_pr_debug("found fin ok!\n");
619 dccp_pr_debug("packet_type=%s\n",
620 dccp_packet_name(dh->dccph_type));
623 if (sock_flag(sk, SOCK_DONE)) {
629 len = sock_error(sk);
633 if (sk->sk_shutdown & RCV_SHUTDOWN) {
638 if (sk->sk_state == DCCP_CLOSED) {
639 if (!sock_flag(sk, SOCK_DONE)) {
640 /* This occurs when user tries to read
641 * from never connected socket.
655 if (signal_pending(current)) {
656 len = sock_intr_errno(timeo);
660 sk_wait_data(sk, &timeo);
665 else if (len < skb->len)
666 msg->msg_flags |= MSG_TRUNC;
668 if (skb_copy_datagram_iovec(skb, 0, msg->msg_iov, len)) {
669 /* Exception. Bailout! */
674 if (!(flags & MSG_PEEK))
683 EXPORT_SYMBOL_GPL(dccp_recvmsg);
685 int inet_dccp_listen(struct socket *sock, int backlog)
687 struct sock *sk = sock->sk;
688 unsigned char old_state;
694 if (sock->state != SS_UNCONNECTED || sock->type != SOCK_DCCP)
697 old_state = sk->sk_state;
698 if (!((1 << old_state) & (DCCPF_CLOSED | DCCPF_LISTEN)))
701 /* Really, if the socket is already in listen state
702 * we can only allow the backlog to be adjusted.
704 if (old_state != DCCP_LISTEN) {
706 * FIXME: here it probably should be sk->sk_prot->listen_start
707 * see tcp_listen_start
709 err = dccp_listen_start(sk);
713 sk->sk_max_ack_backlog = backlog;
721 EXPORT_SYMBOL_GPL(inet_dccp_listen);
723 static const unsigned char dccp_new_state[] = {
724 /* current state: new state: action: */
726 [DCCP_OPEN] = DCCP_CLOSING | DCCP_ACTION_FIN,
727 [DCCP_REQUESTING] = DCCP_CLOSED,
728 [DCCP_PARTOPEN] = DCCP_CLOSING | DCCP_ACTION_FIN,
729 [DCCP_LISTEN] = DCCP_CLOSED,
730 [DCCP_RESPOND] = DCCP_CLOSED,
731 [DCCP_CLOSING] = DCCP_CLOSED,
732 [DCCP_TIME_WAIT] = DCCP_CLOSED,
733 [DCCP_CLOSED] = DCCP_CLOSED,
736 static int dccp_close_state(struct sock *sk)
738 const int next = dccp_new_state[sk->sk_state];
739 const int ns = next & DCCP_STATE_MASK;
741 if (ns != sk->sk_state)
742 dccp_set_state(sk, ns);
744 return next & DCCP_ACTION_FIN;
747 void dccp_close(struct sock *sk, long timeout)
753 sk->sk_shutdown = SHUTDOWN_MASK;
755 if (sk->sk_state == DCCP_LISTEN) {
756 dccp_set_state(sk, DCCP_CLOSED);
759 inet_csk_listen_stop(sk);
761 goto adjudge_to_death;
765 * We need to flush the recv. buffs. We do this only on the
766 * descriptor close, not protocol-sourced closes, because the
767 *reader process may not have drained the data yet!
769 /* FIXME: check for unread data */
770 while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
774 if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
775 /* Check zero linger _after_ checking for unread data. */
776 sk->sk_prot->disconnect(sk, 0);
777 } else if (dccp_close_state(sk)) {
778 dccp_send_close(sk, 1);
781 sk_stream_wait_close(sk, timeout);
785 * It is the last release_sock in its life. It will remove backlog.
789 * Now socket is owned by kernel and we acquire BH lock
790 * to finish close. No need to check for user refs.
794 BUG_TRAP(!sock_owned_by_user(sk));
800 * The last release_sock may have processed the CLOSE or RESET
801 * packet moving sock to CLOSED state, if not we have to fire
802 * the CLOSE/CLOSEREQ retransmission timer, see "8.3. Termination"
803 * in draft-ietf-dccp-spec-11. -acme
805 if (sk->sk_state == DCCP_CLOSING) {
806 /* FIXME: should start at 2 * RTT */
807 /* Timer for repeating the CLOSE/CLOSEREQ until an answer. */
808 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
809 inet_csk(sk)->icsk_rto,
812 /* Yeah, we should use sk->sk_prot->orphan_count, etc */
813 dccp_set_state(sk, DCCP_CLOSED);
817 atomic_inc(sk->sk_prot->orphan_count);
818 if (sk->sk_state == DCCP_CLOSED)
819 inet_csk_destroy_sock(sk);
821 /* Otherwise, socket is reprieved until protocol close. */
828 EXPORT_SYMBOL_GPL(dccp_close);
830 void dccp_shutdown(struct sock *sk, int how)
832 dccp_pr_debug("entry\n");
835 EXPORT_SYMBOL_GPL(dccp_shutdown);
837 static const struct proto_ops inet_dccp_ops = {
839 .owner = THIS_MODULE,
840 .release = inet_release,
842 .connect = inet_stream_connect,
843 .socketpair = sock_no_socketpair,
844 .accept = inet_accept,
845 .getname = inet_getname,
846 /* FIXME: work on tcp_poll to rename it to inet_csk_poll */
849 /* FIXME: work on inet_listen to rename it to sock_common_listen */
850 .listen = inet_dccp_listen,
851 .shutdown = inet_shutdown,
852 .setsockopt = sock_common_setsockopt,
853 .getsockopt = sock_common_getsockopt,
854 .sendmsg = inet_sendmsg,
855 .recvmsg = sock_common_recvmsg,
856 .mmap = sock_no_mmap,
857 .sendpage = sock_no_sendpage,
860 extern struct net_proto_family inet_family_ops;
862 static struct inet_protosw dccp_v4_protosw = {
864 .protocol = IPPROTO_DCCP,
866 .ops = &inet_dccp_ops,
869 .flags = INET_PROTOSW_ICSK,
873 * This is the global socket data structure used for responding to
874 * the Out-of-the-blue (OOTB) packets. A control sock will be created
875 * for this socket at the initialization time.
877 struct socket *dccp_ctl_socket;
879 static char dccp_ctl_socket_err_msg[] __initdata =
880 KERN_ERR "DCCP: Failed to create the control socket.\n";
882 static int __init dccp_ctl_sock_init(void)
884 int rc = sock_create_kern(PF_INET, SOCK_DCCP, IPPROTO_DCCP,
887 printk(dccp_ctl_socket_err_msg);
889 dccp_ctl_socket->sk->sk_allocation = GFP_ATOMIC;
890 inet_sk(dccp_ctl_socket->sk)->uc_ttl = -1;
892 /* Unhash it so that IP input processing does not even
893 * see it, we do not wish this socket to see incoming
896 dccp_ctl_socket->sk->sk_prot->unhash(dccp_ctl_socket->sk);
902 #ifdef CONFIG_IP_DCCP_UNLOAD_HACK
903 void dccp_ctl_sock_exit(void)
905 if (dccp_ctl_socket != NULL) {
906 sock_release(dccp_ctl_socket);
907 dccp_ctl_socket = NULL;
911 EXPORT_SYMBOL_GPL(dccp_ctl_sock_exit);
914 static int __init init_dccp_v4_mibs(void)
918 dccp_statistics[0] = alloc_percpu(struct dccp_mib);
919 if (dccp_statistics[0] == NULL)
922 dccp_statistics[1] = alloc_percpu(struct dccp_mib);
923 if (dccp_statistics[1] == NULL)
930 free_percpu(dccp_statistics[0]);
931 dccp_statistics[0] = NULL;
936 static int thash_entries;
937 module_param(thash_entries, int, 0444);
938 MODULE_PARM_DESC(thash_entries, "Number of ehash buckets");
940 #ifdef CONFIG_IP_DCCP_DEBUG
942 module_param(dccp_debug, int, 0444);
943 MODULE_PARM_DESC(dccp_debug, "Enable debug messages");
945 EXPORT_SYMBOL_GPL(dccp_debug);
948 static int __init dccp_init(void)
951 int ehash_order, bhash_order, i;
952 int rc = proto_register(&dccp_prot, 1);
958 dccp_hashinfo.bind_bucket_cachep =
959 kmem_cache_create("dccp_bind_bucket",
960 sizeof(struct inet_bind_bucket), 0,
961 SLAB_HWCACHE_ALIGN, NULL, NULL);
962 if (!dccp_hashinfo.bind_bucket_cachep)
963 goto out_proto_unregister;
966 * Size and allocate the main established and bind bucket
969 * The methodology is similar to that of the buffer cache.
971 if (num_physpages >= (128 * 1024))
972 goal = num_physpages >> (21 - PAGE_SHIFT);
974 goal = num_physpages >> (23 - PAGE_SHIFT);
977 goal = (thash_entries *
978 sizeof(struct inet_ehash_bucket)) >> PAGE_SHIFT;
979 for (ehash_order = 0; (1UL << ehash_order) < goal; ehash_order++)
982 dccp_hashinfo.ehash_size = (1UL << ehash_order) * PAGE_SIZE /
983 sizeof(struct inet_ehash_bucket);
984 dccp_hashinfo.ehash_size >>= 1;
985 while (dccp_hashinfo.ehash_size &
986 (dccp_hashinfo.ehash_size - 1))
987 dccp_hashinfo.ehash_size--;
988 dccp_hashinfo.ehash = (struct inet_ehash_bucket *)
989 __get_free_pages(GFP_ATOMIC, ehash_order);
990 } while (!dccp_hashinfo.ehash && --ehash_order > 0);
992 if (!dccp_hashinfo.ehash) {
993 printk(KERN_CRIT "Failed to allocate DCCP "
994 "established hash table\n");
995 goto out_free_bind_bucket_cachep;
998 for (i = 0; i < (dccp_hashinfo.ehash_size << 1); i++) {
999 rwlock_init(&dccp_hashinfo.ehash[i].lock);
1000 INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].chain);
1003 bhash_order = ehash_order;
1006 dccp_hashinfo.bhash_size = (1UL << bhash_order) * PAGE_SIZE /
1007 sizeof(struct inet_bind_hashbucket);
1008 if ((dccp_hashinfo.bhash_size > (64 * 1024)) &&
1011 dccp_hashinfo.bhash = (struct inet_bind_hashbucket *)
1012 __get_free_pages(GFP_ATOMIC, bhash_order);
1013 } while (!dccp_hashinfo.bhash && --bhash_order >= 0);
1015 if (!dccp_hashinfo.bhash) {
1016 printk(KERN_CRIT "Failed to allocate DCCP bind hash table\n");
1017 goto out_free_dccp_ehash;
1020 for (i = 0; i < dccp_hashinfo.bhash_size; i++) {
1021 spin_lock_init(&dccp_hashinfo.bhash[i].lock);
1022 INIT_HLIST_HEAD(&dccp_hashinfo.bhash[i].chain);
1025 rc = init_dccp_v4_mibs();
1027 goto out_free_dccp_bhash;
1030 if (inet_add_protocol(&dccp_protocol, IPPROTO_DCCP))
1031 goto out_free_dccp_v4_mibs;
1033 inet_register_protosw(&dccp_v4_protosw);
1035 rc = dccp_ackvec_init();
1037 goto out_unregister_protosw;
1039 rc = dccp_sysctl_init();
1041 goto out_ackvec_exit;
1043 rc = dccp_ctl_sock_init();
1045 goto out_sysctl_exit;
1052 out_unregister_protosw:
1053 inet_unregister_protosw(&dccp_v4_protosw);
1054 inet_del_protocol(&dccp_protocol, IPPROTO_DCCP);
1055 out_free_dccp_v4_mibs:
1056 free_percpu(dccp_statistics[0]);
1057 free_percpu(dccp_statistics[1]);
1058 dccp_statistics[0] = dccp_statistics[1] = NULL;
1059 out_free_dccp_bhash:
1060 free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
1061 dccp_hashinfo.bhash = NULL;
1062 out_free_dccp_ehash:
1063 free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order);
1064 dccp_hashinfo.ehash = NULL;
1065 out_free_bind_bucket_cachep:
1066 kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1067 dccp_hashinfo.bind_bucket_cachep = NULL;
1068 out_proto_unregister:
1069 proto_unregister(&dccp_prot);
1073 static const char dccp_del_proto_err_msg[] __exitdata =
1074 KERN_ERR "can't remove dccp net_protocol\n";
1076 static void __exit dccp_fini(void)
1078 inet_unregister_protosw(&dccp_v4_protosw);
1080 if (inet_del_protocol(&dccp_protocol, IPPROTO_DCCP) < 0)
1081 printk(dccp_del_proto_err_msg);
1083 free_percpu(dccp_statistics[0]);
1084 free_percpu(dccp_statistics[1]);
1085 free_pages((unsigned long)dccp_hashinfo.bhash,
1086 get_order(dccp_hashinfo.bhash_size *
1087 sizeof(struct inet_bind_hashbucket)));
1088 free_pages((unsigned long)dccp_hashinfo.ehash,
1089 get_order(dccp_hashinfo.ehash_size *
1090 sizeof(struct inet_ehash_bucket)));
1091 kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1092 proto_unregister(&dccp_prot);
1097 module_init(dccp_init);
1098 module_exit(dccp_fini);
1101 * __stringify doesn't likes enums, so use SOCK_DCCP (6) and IPPROTO_DCCP (33)
1102 * values directly, Also cover the case where the protocol is not specified,
1103 * i.e. net-pf-PF_INET-proto-0-type-SOCK_DCCP
1105 MODULE_ALIAS("net-pf-" __stringify(PF_INET) "-proto-33-type-6");
1106 MODULE_ALIAS("net-pf-" __stringify(PF_INET) "-proto-0-type-6");
1107 MODULE_LICENSE("GPL");
1108 MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@conectiva.com.br>");
1109 MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol");