]> www.pilppa.org Git - linux-2.6-omap-h63xx.git/blob - net/dccp/proto.c
[DCCP]: Introduce dccp_ipv4_af_ops
[linux-2.6-omap-h63xx.git] / net / dccp / proto.c
1 /*
2  *  net/dccp/proto.c
3  *
4  *  An implementation of the DCCP protocol
5  *  Arnaldo Carvalho de Melo <acme@conectiva.com.br>
6  *
7  *      This program is free software; you can redistribute it and/or modify it
8  *      under the terms of the GNU General Public License version 2 as
9  *      published by the Free Software Foundation.
10  */
11
12 #include <linux/config.h>
13 #include <linux/dccp.h>
14 #include <linux/module.h>
15 #include <linux/types.h>
16 #include <linux/sched.h>
17 #include <linux/kernel.h>
18 #include <linux/skbuff.h>
19 #include <linux/netdevice.h>
20 #include <linux/in.h>
21 #include <linux/if_arp.h>
22 #include <linux/init.h>
23 #include <linux/random.h>
24 #include <net/checksum.h>
25
26 #include <net/inet_common.h>
27 #include <net/ip.h>
28 #include <net/protocol.h>
29 #include <net/sock.h>
30 #include <net/xfrm.h>
31
32 #include <asm/semaphore.h>
33 #include <linux/spinlock.h>
34 #include <linux/timer.h>
35 #include <linux/delay.h>
36 #include <linux/poll.h>
37 #include <linux/dccp.h>
38
39 #include "ccid.h"
40 #include "dccp.h"
41
42 DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly;
43
44 atomic_t dccp_orphan_count = ATOMIC_INIT(0);
45
46 static struct net_protocol dccp_protocol = {
47         .handler        = dccp_v4_rcv,
48         .err_handler    = dccp_v4_err,
49         .no_policy      = 1,
50 };
51
52 const char *dccp_packet_name(const int type)
53 {
54         static const char *dccp_packet_names[] = {
55                 [DCCP_PKT_REQUEST]  = "REQUEST",
56                 [DCCP_PKT_RESPONSE] = "RESPONSE",
57                 [DCCP_PKT_DATA]     = "DATA",
58                 [DCCP_PKT_ACK]      = "ACK",
59                 [DCCP_PKT_DATAACK]  = "DATAACK",
60                 [DCCP_PKT_CLOSEREQ] = "CLOSEREQ",
61                 [DCCP_PKT_CLOSE]    = "CLOSE",
62                 [DCCP_PKT_RESET]    = "RESET",
63                 [DCCP_PKT_SYNC]     = "SYNC",
64                 [DCCP_PKT_SYNCACK]  = "SYNCACK",
65         };
66
67         if (type >= DCCP_NR_PKT_TYPES)
68                 return "INVALID";
69         else
70                 return dccp_packet_names[type];
71 }
72
73 EXPORT_SYMBOL_GPL(dccp_packet_name);
74
75 const char *dccp_state_name(const int state)
76 {
77         static char *dccp_state_names[] = {
78         [DCCP_OPEN]       = "OPEN",
79         [DCCP_REQUESTING] = "REQUESTING",
80         [DCCP_PARTOPEN]   = "PARTOPEN",
81         [DCCP_LISTEN]     = "LISTEN",
82         [DCCP_RESPOND]    = "RESPOND",
83         [DCCP_CLOSING]    = "CLOSING",
84         [DCCP_TIME_WAIT]  = "TIME_WAIT",
85         [DCCP_CLOSED]     = "CLOSED",
86         };
87
88         if (state >= DCCP_MAX_STATES)
89                 return "INVALID STATE!";
90         else
91                 return dccp_state_names[state];
92 }
93
94 EXPORT_SYMBOL_GPL(dccp_state_name);
95
96 static inline int dccp_listen_start(struct sock *sk)
97 {
98         struct dccp_sock *dp = dccp_sk(sk);
99
100         dp->dccps_role = DCCP_ROLE_LISTEN;
101         /*
102          * Apps need to use setsockopt(DCCP_SOCKOPT_SERVICE)
103          * before calling listen()
104          */
105         if (dccp_service_not_initialized(sk))
106                 return -EPROTO;
107         return inet_csk_listen_start(sk, TCP_SYNQ_HSIZE);
108 }
109
110 int dccp_disconnect(struct sock *sk, int flags)
111 {
112         struct inet_connection_sock *icsk = inet_csk(sk);
113         struct inet_sock *inet = inet_sk(sk);
114         int err = 0;
115         const int old_state = sk->sk_state;
116
117         if (old_state != DCCP_CLOSED)
118                 dccp_set_state(sk, DCCP_CLOSED);
119
120         /* ABORT function of RFC793 */
121         if (old_state == DCCP_LISTEN) {
122                 inet_csk_listen_stop(sk);
123         /* FIXME: do the active reset thing */
124         } else if (old_state == DCCP_REQUESTING)
125                 sk->sk_err = ECONNRESET;
126
127         dccp_clear_xmit_timers(sk);
128         __skb_queue_purge(&sk->sk_receive_queue);
129         if (sk->sk_send_head != NULL) {
130                 __kfree_skb(sk->sk_send_head);
131                 sk->sk_send_head = NULL;
132         }
133
134         inet->dport = 0;
135
136         if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
137                 inet_reset_saddr(sk);
138
139         sk->sk_shutdown = 0;
140         sock_reset_flag(sk, SOCK_DONE);
141
142         icsk->icsk_backoff = 0;
143         inet_csk_delack_init(sk);
144         __sk_dst_reset(sk);
145
146         BUG_TRAP(!inet->num || icsk->icsk_bind_hash);
147
148         sk->sk_error_report(sk);
149         return err;
150 }
151
152 /*
153  *      Wait for a DCCP event.
154  *
155  *      Note that we don't need to lock the socket, as the upper poll layers
156  *      take care of normal races (between the test and the event) and we don't
157  *      go look at any of the socket buffers directly.
158  */
159 static unsigned int dccp_poll(struct file *file, struct socket *sock,
160                               poll_table *wait)
161 {
162         unsigned int mask;
163         struct sock *sk = sock->sk;
164
165         poll_wait(file, sk->sk_sleep, wait);
166         if (sk->sk_state == DCCP_LISTEN)
167                 return inet_csk_listen_poll(sk);
168
169         /* Socket is not locked. We are protected from async events
170            by poll logic and correct handling of state changes
171            made by another threads is impossible in any case.
172          */
173
174         mask = 0;
175         if (sk->sk_err)
176                 mask = POLLERR;
177
178         if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == DCCP_CLOSED)
179                 mask |= POLLHUP;
180         if (sk->sk_shutdown & RCV_SHUTDOWN)
181                 mask |= POLLIN | POLLRDNORM;
182
183         /* Connected? */
184         if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) {
185                 if (atomic_read(&sk->sk_rmem_alloc) > 0)
186                         mask |= POLLIN | POLLRDNORM;
187
188                 if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
189                         if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) {
190                                 mask |= POLLOUT | POLLWRNORM;
191                         } else {  /* send SIGIO later */
192                                 set_bit(SOCK_ASYNC_NOSPACE,
193                                         &sk->sk_socket->flags);
194                                 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
195
196                                 /* Race breaker. If space is freed after
197                                  * wspace test but before the flags are set,
198                                  * IO signal will be lost.
199                                  */
200                                 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk))
201                                         mask |= POLLOUT | POLLWRNORM;
202                         }
203                 }
204         }
205         return mask;
206 }
207
208 int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg)
209 {
210         dccp_pr_debug("entry\n");
211         return -ENOIOCTLCMD;
212 }
213
214 static int dccp_setsockopt_service(struct sock *sk, const u32 service,
215                                    char __user *optval, int optlen)
216 {
217         struct dccp_sock *dp = dccp_sk(sk);
218         struct dccp_service_list *sl = NULL;
219
220         if (service == DCCP_SERVICE_INVALID_VALUE || 
221             optlen > DCCP_SERVICE_LIST_MAX_LEN * sizeof(u32))
222                 return -EINVAL;
223
224         if (optlen > sizeof(service)) {
225                 sl = kmalloc(optlen, GFP_KERNEL);
226                 if (sl == NULL)
227                         return -ENOMEM;
228
229                 sl->dccpsl_nr = optlen / sizeof(u32) - 1;
230                 if (copy_from_user(sl->dccpsl_list,
231                                    optval + sizeof(service),
232                                    optlen - sizeof(service)) ||
233                     dccp_list_has_service(sl, DCCP_SERVICE_INVALID_VALUE)) {
234                         kfree(sl);
235                         return -EFAULT;
236                 }
237         }
238
239         lock_sock(sk);
240         dp->dccps_service = service;
241
242         kfree(dp->dccps_service_list);
243
244         dp->dccps_service_list = sl;
245         release_sock(sk);
246         return 0;
247 }
248
249 int dccp_setsockopt(struct sock *sk, int level, int optname,
250                     char __user *optval, int optlen)
251 {
252         struct dccp_sock *dp;
253         int err;
254         int val;
255
256         if (level != SOL_DCCP)
257                 return inet_csk(sk)->icsk_af_ops->setsockopt(sk, level,
258                                                              optname, optval,
259                                                              optlen);
260
261         if (optlen < sizeof(int))
262                 return -EINVAL;
263
264         if (get_user(val, (int __user *)optval))
265                 return -EFAULT;
266
267         if (optname == DCCP_SOCKOPT_SERVICE)
268                 return dccp_setsockopt_service(sk, val, optval, optlen);
269
270         lock_sock(sk);
271         dp = dccp_sk(sk);
272         err = 0;
273
274         switch (optname) {
275         case DCCP_SOCKOPT_PACKET_SIZE:
276                 dp->dccps_packet_size = val;
277                 break;
278         default:
279                 err = -ENOPROTOOPT;
280                 break;
281         }
282         
283         release_sock(sk);
284         return err;
285 }
286
287 static int dccp_getsockopt_service(struct sock *sk, int len,
288                                    u32 __user *optval,
289                                    int __user *optlen)
290 {
291         const struct dccp_sock *dp = dccp_sk(sk);
292         const struct dccp_service_list *sl;
293         int err = -ENOENT, slen = 0, total_len = sizeof(u32);
294
295         lock_sock(sk);
296         if (dccp_service_not_initialized(sk))
297                 goto out;
298
299         if ((sl = dp->dccps_service_list) != NULL) {
300                 slen = sl->dccpsl_nr * sizeof(u32);
301                 total_len += slen;
302         }
303
304         err = -EINVAL;
305         if (total_len > len)
306                 goto out;
307
308         err = 0;
309         if (put_user(total_len, optlen) ||
310             put_user(dp->dccps_service, optval) ||
311             (sl != NULL && copy_to_user(optval + 1, sl->dccpsl_list, slen)))
312                 err = -EFAULT;
313 out:
314         release_sock(sk);
315         return err;
316 }
317
318 int dccp_getsockopt(struct sock *sk, int level, int optname,
319                     char __user *optval, int __user *optlen)
320 {
321         struct dccp_sock *dp;
322         int val, len;
323
324         if (level != SOL_DCCP)
325                 return inet_csk(sk)->icsk_af_ops->getsockopt(sk, level,
326                                                              optname, optval,
327                                                              optlen);
328         if (get_user(len, optlen))
329                 return -EFAULT;
330
331         if (len < sizeof(int))
332                 return -EINVAL;
333
334         dp = dccp_sk(sk);
335
336         switch (optname) {
337         case DCCP_SOCKOPT_PACKET_SIZE:
338                 val = dp->dccps_packet_size;
339                 len = sizeof(dp->dccps_packet_size);
340                 break;
341         case DCCP_SOCKOPT_SERVICE:
342                 return dccp_getsockopt_service(sk, len,
343                                                (u32 __user *)optval, optlen);
344         case 128 ... 191:
345                 return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname,
346                                              len, (u32 __user *)optval, optlen);
347         case 192 ... 255:
348                 return ccid_hc_tx_getsockopt(dp->dccps_hc_tx_ccid, sk, optname,
349                                              len, (u32 __user *)optval, optlen);
350         default:
351                 return -ENOPROTOOPT;
352         }
353
354         if (put_user(len, optlen) || copy_to_user(optval, &val, len))
355                 return -EFAULT;
356
357         return 0;
358 }
359
360 int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
361                  size_t len)
362 {
363         const struct dccp_sock *dp = dccp_sk(sk);
364         const int flags = msg->msg_flags;
365         const int noblock = flags & MSG_DONTWAIT;
366         struct sk_buff *skb;
367         int rc, size;
368         long timeo;
369
370         if (len > dp->dccps_mss_cache)
371                 return -EMSGSIZE;
372
373         lock_sock(sk);
374         timeo = sock_sndtimeo(sk, noblock);
375
376         /*
377          * We have to use sk_stream_wait_connect here to set sk_write_pending,
378          * so that the trick in dccp_rcv_request_sent_state_process.
379          */
380         /* Wait for a connection to finish. */
381         if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN | DCCPF_CLOSING))
382                 if ((rc = sk_stream_wait_connect(sk, &timeo)) != 0)
383                         goto out_release;
384
385         size = sk->sk_prot->max_header + len;
386         release_sock(sk);
387         skb = sock_alloc_send_skb(sk, size, noblock, &rc);
388         lock_sock(sk);
389         if (skb == NULL)
390                 goto out_release;
391
392         skb_reserve(skb, sk->sk_prot->max_header);
393         rc = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
394         if (rc != 0)
395                 goto out_discard;
396
397         rc = dccp_write_xmit(sk, skb, &timeo);
398         /*
399          * XXX we don't use sk_write_queue, so just discard the packet.
400          *     Current plan however is to _use_ sk_write_queue with
401          *     an algorith similar to tcp_sendmsg, where the main difference
402          *     is that in DCCP we have to respect packet boundaries, so
403          *     no coalescing of skbs.
404          *
405          *     This bug was _quickly_ found & fixed by just looking at an OSTRA
406          *     generated callgraph 8) -acme
407          */
408 out_release:
409         release_sock(sk);
410         return rc ? : len;
411 out_discard:
412         kfree_skb(skb);
413         goto out_release;
414 }
415
416 int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
417                  size_t len, int nonblock, int flags, int *addr_len)
418 {
419         const struct dccp_hdr *dh;
420         long timeo;
421
422         lock_sock(sk);
423
424         if (sk->sk_state == DCCP_LISTEN) {
425                 len = -ENOTCONN;
426                 goto out;
427         }
428
429         timeo = sock_rcvtimeo(sk, nonblock);
430
431         do {
432                 struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
433
434                 if (skb == NULL)
435                         goto verify_sock_status;
436
437                 dh = dccp_hdr(skb);
438
439                 if (dh->dccph_type == DCCP_PKT_DATA ||
440                     dh->dccph_type == DCCP_PKT_DATAACK)
441                         goto found_ok_skb;
442
443                 if (dh->dccph_type == DCCP_PKT_RESET ||
444                     dh->dccph_type == DCCP_PKT_CLOSE) {
445                         dccp_pr_debug("found fin ok!\n");
446                         len = 0;
447                         goto found_fin_ok;
448                 }
449                 dccp_pr_debug("packet_type=%s\n",
450                               dccp_packet_name(dh->dccph_type));
451                 sk_eat_skb(sk, skb);
452 verify_sock_status:
453                 if (sock_flag(sk, SOCK_DONE)) {
454                         len = 0;
455                         break;
456                 }
457
458                 if (sk->sk_err) {
459                         len = sock_error(sk);
460                         break;
461                 }
462
463                 if (sk->sk_shutdown & RCV_SHUTDOWN) {
464                         len = 0;
465                         break;
466                 }
467
468                 if (sk->sk_state == DCCP_CLOSED) {
469                         if (!sock_flag(sk, SOCK_DONE)) {
470                                 /* This occurs when user tries to read
471                                  * from never connected socket.
472                                  */
473                                 len = -ENOTCONN;
474                                 break;
475                         }
476                         len = 0;
477                         break;
478                 }
479
480                 if (!timeo) {
481                         len = -EAGAIN;
482                         break;
483                 }
484
485                 if (signal_pending(current)) {
486                         len = sock_intr_errno(timeo);
487                         break;
488                 }
489
490                 sk_wait_data(sk, &timeo);
491                 continue;
492         found_ok_skb:
493                 if (len > skb->len)
494                         len = skb->len;
495                 else if (len < skb->len)
496                         msg->msg_flags |= MSG_TRUNC;
497
498                 if (skb_copy_datagram_iovec(skb, 0, msg->msg_iov, len)) {
499                         /* Exception. Bailout! */
500                         len = -EFAULT;
501                         break;
502                 }
503         found_fin_ok:
504                 if (!(flags & MSG_PEEK))
505                         sk_eat_skb(sk, skb);
506                 break;
507         } while (1);
508 out:
509         release_sock(sk);
510         return len;
511 }
512
513 static int inet_dccp_listen(struct socket *sock, int backlog)
514 {
515         struct sock *sk = sock->sk;
516         unsigned char old_state;
517         int err;
518
519         lock_sock(sk);
520
521         err = -EINVAL;
522         if (sock->state != SS_UNCONNECTED || sock->type != SOCK_DCCP)
523                 goto out;
524
525         old_state = sk->sk_state;
526         if (!((1 << old_state) & (DCCPF_CLOSED | DCCPF_LISTEN)))
527                 goto out;
528
529         /* Really, if the socket is already in listen state
530          * we can only allow the backlog to be adjusted.
531          */
532         if (old_state != DCCP_LISTEN) {
533                 /*
534                  * FIXME: here it probably should be sk->sk_prot->listen_start
535                  * see tcp_listen_start
536                  */
537                 err = dccp_listen_start(sk);
538                 if (err)
539                         goto out;
540         }
541         sk->sk_max_ack_backlog = backlog;
542         err = 0;
543
544 out:
545         release_sock(sk);
546         return err;
547 }
548
549 static const unsigned char dccp_new_state[] = {
550         /* current state:   new state:      action:     */
551         [0]               = DCCP_CLOSED,
552         [DCCP_OPEN]       = DCCP_CLOSING | DCCP_ACTION_FIN,
553         [DCCP_REQUESTING] = DCCP_CLOSED,
554         [DCCP_PARTOPEN]   = DCCP_CLOSING | DCCP_ACTION_FIN,
555         [DCCP_LISTEN]     = DCCP_CLOSED,
556         [DCCP_RESPOND]    = DCCP_CLOSED,
557         [DCCP_CLOSING]    = DCCP_CLOSED,
558         [DCCP_TIME_WAIT]  = DCCP_CLOSED,
559         [DCCP_CLOSED]     = DCCP_CLOSED,
560 };
561
562 static int dccp_close_state(struct sock *sk)
563 {
564         const int next = dccp_new_state[sk->sk_state];
565         const int ns = next & DCCP_STATE_MASK;
566
567         if (ns != sk->sk_state)
568                 dccp_set_state(sk, ns);
569
570         return next & DCCP_ACTION_FIN;
571 }
572
573 void dccp_close(struct sock *sk, long timeout)
574 {
575         struct sk_buff *skb;
576
577         lock_sock(sk);
578
579         sk->sk_shutdown = SHUTDOWN_MASK;
580
581         if (sk->sk_state == DCCP_LISTEN) {
582                 dccp_set_state(sk, DCCP_CLOSED);
583
584                 /* Special case. */
585                 inet_csk_listen_stop(sk);
586
587                 goto adjudge_to_death;
588         }
589
590         /*
591          * We need to flush the recv. buffs.  We do this only on the
592          * descriptor close, not protocol-sourced closes, because the
593           *reader process may not have drained the data yet!
594          */
595         /* FIXME: check for unread data */
596         while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
597                 __kfree_skb(skb);
598         }
599
600         if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
601                 /* Check zero linger _after_ checking for unread data. */
602                 sk->sk_prot->disconnect(sk, 0);
603         } else if (dccp_close_state(sk)) {
604                 dccp_send_close(sk, 1);
605         }
606
607         sk_stream_wait_close(sk, timeout);
608
609 adjudge_to_death:
610         /*
611          * It is the last release_sock in its life. It will remove backlog.
612          */
613         release_sock(sk);
614         /*
615          * Now socket is owned by kernel and we acquire BH lock
616          * to finish close. No need to check for user refs.
617          */
618         local_bh_disable();
619         bh_lock_sock(sk);
620         BUG_TRAP(!sock_owned_by_user(sk));
621
622         sock_hold(sk);
623         sock_orphan(sk);
624
625         /*
626          * The last release_sock may have processed the CLOSE or RESET
627          * packet moving sock to CLOSED state, if not we have to fire
628          * the CLOSE/CLOSEREQ retransmission timer, see "8.3. Termination"
629          * in draft-ietf-dccp-spec-11. -acme
630          */
631         if (sk->sk_state == DCCP_CLOSING) {
632                 /* FIXME: should start at 2 * RTT */
633                 /* Timer for repeating the CLOSE/CLOSEREQ until an answer. */
634                 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
635                                           inet_csk(sk)->icsk_rto,
636                                           DCCP_RTO_MAX);
637 #if 0
638                 /* Yeah, we should use sk->sk_prot->orphan_count, etc */
639                 dccp_set_state(sk, DCCP_CLOSED);
640 #endif
641         }
642
643         atomic_inc(sk->sk_prot->orphan_count);
644         if (sk->sk_state == DCCP_CLOSED)
645                 inet_csk_destroy_sock(sk);
646
647         /* Otherwise, socket is reprieved until protocol close. */
648
649         bh_unlock_sock(sk);
650         local_bh_enable();
651         sock_put(sk);
652 }
653
654 void dccp_shutdown(struct sock *sk, int how)
655 {
656         dccp_pr_debug("entry\n");
657 }
658
659 static struct proto_ops inet_dccp_ops = {
660         .family         = PF_INET,
661         .owner          = THIS_MODULE,
662         .release        = inet_release,
663         .bind           = inet_bind,
664         .connect        = inet_stream_connect,
665         .socketpair     = sock_no_socketpair,
666         .accept         = inet_accept,
667         .getname        = inet_getname,
668         /* FIXME: work on tcp_poll to rename it to inet_csk_poll */
669         .poll           = dccp_poll,
670         .ioctl          = inet_ioctl,
671         /* FIXME: work on inet_listen to rename it to sock_common_listen */
672         .listen         = inet_dccp_listen,
673         .shutdown       = inet_shutdown,
674         .setsockopt     = sock_common_setsockopt,
675         .getsockopt     = sock_common_getsockopt,
676         .sendmsg        = inet_sendmsg,
677         .recvmsg        = sock_common_recvmsg,
678         .mmap           = sock_no_mmap,
679         .sendpage       = sock_no_sendpage,
680 };
681
682 extern struct net_proto_family inet_family_ops;
683
684 static struct inet_protosw dccp_v4_protosw = {
685         .type           = SOCK_DCCP,
686         .protocol       = IPPROTO_DCCP,
687         .prot           = &dccp_v4_prot,
688         .ops            = &inet_dccp_ops,
689         .capability     = -1,
690         .no_check       = 0,
691         .flags          = 0,
692 };
693
694 /*
695  * This is the global socket data structure used for responding to
696  * the Out-of-the-blue (OOTB) packets. A control sock will be created
697  * for this socket at the initialization time.
698  */
699 struct socket *dccp_ctl_socket;
700
701 static char dccp_ctl_socket_err_msg[] __initdata =
702         KERN_ERR "DCCP: Failed to create the control socket.\n";
703
704 static int __init dccp_ctl_sock_init(void)
705 {
706         int rc = sock_create_kern(PF_INET, SOCK_DCCP, IPPROTO_DCCP,
707                                   &dccp_ctl_socket);
708         if (rc < 0)
709                 printk(dccp_ctl_socket_err_msg);
710         else {
711                 dccp_ctl_socket->sk->sk_allocation = GFP_ATOMIC;
712                 inet_sk(dccp_ctl_socket->sk)->uc_ttl = -1;
713
714                 /* Unhash it so that IP input processing does not even
715                  * see it, we do not wish this socket to see incoming
716                  * packets.
717                  */
718                 dccp_ctl_socket->sk->sk_prot->unhash(dccp_ctl_socket->sk);
719         }
720
721         return rc;
722 }
723
724 #ifdef CONFIG_IP_DCCP_UNLOAD_HACK
725 void dccp_ctl_sock_exit(void)
726 {
727         if (dccp_ctl_socket != NULL) {
728                 sock_release(dccp_ctl_socket);
729                 dccp_ctl_socket = NULL;
730         }
731 }
732
733 EXPORT_SYMBOL_GPL(dccp_ctl_sock_exit);
734 #endif
735
736 static int __init init_dccp_v4_mibs(void)
737 {
738         int rc = -ENOMEM;
739
740         dccp_statistics[0] = alloc_percpu(struct dccp_mib);
741         if (dccp_statistics[0] == NULL)
742                 goto out;
743
744         dccp_statistics[1] = alloc_percpu(struct dccp_mib);
745         if (dccp_statistics[1] == NULL)
746                 goto out_free_one;
747
748         rc = 0;
749 out:
750         return rc;
751 out_free_one:
752         free_percpu(dccp_statistics[0]);
753         dccp_statistics[0] = NULL;
754         goto out;
755
756 }
757
758 static int thash_entries;
759 module_param(thash_entries, int, 0444);
760 MODULE_PARM_DESC(thash_entries, "Number of ehash buckets");
761
762 #ifdef CONFIG_IP_DCCP_DEBUG
763 int dccp_debug;
764 module_param(dccp_debug, int, 0444);
765 MODULE_PARM_DESC(dccp_debug, "Enable debug messages");
766 #endif
767
768 static int __init dccp_init(void)
769 {
770         unsigned long goal;
771         int ehash_order, bhash_order, i;
772         int rc = proto_register(&dccp_v4_prot, 1);
773
774         if (rc)
775                 goto out;
776
777         dccp_hashinfo.bind_bucket_cachep =
778                 kmem_cache_create("dccp_bind_bucket",
779                                   sizeof(struct inet_bind_bucket), 0,
780                                   SLAB_HWCACHE_ALIGN, NULL, NULL);
781         if (!dccp_hashinfo.bind_bucket_cachep)
782                 goto out_proto_unregister;
783
784         /*
785          * Size and allocate the main established and bind bucket
786          * hash tables.
787          *
788          * The methodology is similar to that of the buffer cache.
789          */
790         if (num_physpages >= (128 * 1024))
791                 goal = num_physpages >> (21 - PAGE_SHIFT);
792         else
793                 goal = num_physpages >> (23 - PAGE_SHIFT);
794
795         if (thash_entries)
796                 goal = (thash_entries *
797                         sizeof(struct inet_ehash_bucket)) >> PAGE_SHIFT;
798         for (ehash_order = 0; (1UL << ehash_order) < goal; ehash_order++)
799                 ;
800         do {
801                 dccp_hashinfo.ehash_size = (1UL << ehash_order) * PAGE_SIZE /
802                                         sizeof(struct inet_ehash_bucket);
803                 dccp_hashinfo.ehash_size >>= 1;
804                 while (dccp_hashinfo.ehash_size &
805                        (dccp_hashinfo.ehash_size - 1))
806                         dccp_hashinfo.ehash_size--;
807                 dccp_hashinfo.ehash = (struct inet_ehash_bucket *)
808                         __get_free_pages(GFP_ATOMIC, ehash_order);
809         } while (!dccp_hashinfo.ehash && --ehash_order > 0);
810
811         if (!dccp_hashinfo.ehash) {
812                 printk(KERN_CRIT "Failed to allocate DCCP "
813                                  "established hash table\n");
814                 goto out_free_bind_bucket_cachep;
815         }
816
817         for (i = 0; i < (dccp_hashinfo.ehash_size << 1); i++) {
818                 rwlock_init(&dccp_hashinfo.ehash[i].lock);
819                 INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].chain);
820         }
821
822         bhash_order = ehash_order;
823
824         do {
825                 dccp_hashinfo.bhash_size = (1UL << bhash_order) * PAGE_SIZE /
826                                         sizeof(struct inet_bind_hashbucket);
827                 if ((dccp_hashinfo.bhash_size > (64 * 1024)) &&
828                     bhash_order > 0)
829                         continue;
830                 dccp_hashinfo.bhash = (struct inet_bind_hashbucket *)
831                         __get_free_pages(GFP_ATOMIC, bhash_order);
832         } while (!dccp_hashinfo.bhash && --bhash_order >= 0);
833
834         if (!dccp_hashinfo.bhash) {
835                 printk(KERN_CRIT "Failed to allocate DCCP bind hash table\n");
836                 goto out_free_dccp_ehash;
837         }
838
839         for (i = 0; i < dccp_hashinfo.bhash_size; i++) {
840                 spin_lock_init(&dccp_hashinfo.bhash[i].lock);
841                 INIT_HLIST_HEAD(&dccp_hashinfo.bhash[i].chain);
842         }
843
844         if (init_dccp_v4_mibs())
845                 goto out_free_dccp_bhash;
846
847         rc = -EAGAIN;
848         if (inet_add_protocol(&dccp_protocol, IPPROTO_DCCP))
849                 goto out_free_dccp_v4_mibs;
850
851         inet_register_protosw(&dccp_v4_protosw);
852
853         rc = dccp_ctl_sock_init();
854         if (rc)
855                 goto out_unregister_protosw;
856 out:
857         return rc;
858 out_unregister_protosw:
859         inet_unregister_protosw(&dccp_v4_protosw);
860         inet_del_protocol(&dccp_protocol, IPPROTO_DCCP);
861 out_free_dccp_v4_mibs:
862         free_percpu(dccp_statistics[0]);
863         free_percpu(dccp_statistics[1]);
864         dccp_statistics[0] = dccp_statistics[1] = NULL;
865 out_free_dccp_bhash:
866         free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
867         dccp_hashinfo.bhash = NULL;
868 out_free_dccp_ehash:
869         free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order);
870         dccp_hashinfo.ehash = NULL;
871 out_free_bind_bucket_cachep:
872         kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
873         dccp_hashinfo.bind_bucket_cachep = NULL;
874 out_proto_unregister:
875         proto_unregister(&dccp_v4_prot);
876         goto out;
877 }
878
879 static const char dccp_del_proto_err_msg[] __exitdata =
880         KERN_ERR "can't remove dccp net_protocol\n";
881
882 static void __exit dccp_fini(void)
883 {
884         inet_unregister_protosw(&dccp_v4_protosw);
885
886         if (inet_del_protocol(&dccp_protocol, IPPROTO_DCCP) < 0)
887                 printk(dccp_del_proto_err_msg);
888
889         free_percpu(dccp_statistics[0]);
890         free_percpu(dccp_statistics[1]);
891         free_pages((unsigned long)dccp_hashinfo.bhash,
892                    get_order(dccp_hashinfo.bhash_size *
893                              sizeof(struct inet_bind_hashbucket)));
894         free_pages((unsigned long)dccp_hashinfo.ehash,
895                    get_order(dccp_hashinfo.ehash_size *
896                              sizeof(struct inet_ehash_bucket)));
897         kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
898         proto_unregister(&dccp_v4_prot);
899 }
900
901 module_init(dccp_init);
902 module_exit(dccp_fini);
903
904 /*
905  * __stringify doesn't likes enums, so use SOCK_DCCP (6) and IPPROTO_DCCP (33)
906  * values directly, Also cover the case where the protocol is not specified,
907  * i.e. net-pf-PF_INET-proto-0-type-SOCK_DCCP
908  */
909 MODULE_ALIAS("net-pf-" __stringify(PF_INET) "-proto-33-type-6");
910 MODULE_ALIAS("net-pf-" __stringify(PF_INET) "-proto-0-type-6");
911 MODULE_LICENSE("GPL");
912 MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@conectiva.com.br>");
913 MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol");