]> www.pilppa.org Git - linux-2.6-omap-h63xx.git/blob - net/dccp/proto.c
[IP_SOCKGLUE]: Remove most of the tcp specific calls
[linux-2.6-omap-h63xx.git] / net / dccp / proto.c
1 /*
2  *  net/dccp/proto.c
3  *
4  *  An implementation of the DCCP protocol
5  *  Arnaldo Carvalho de Melo <acme@conectiva.com.br>
6  *
7  *      This program is free software; you can redistribute it and/or modify it
8  *      under the terms of the GNU General Public License version 2 as
9  *      published by the Free Software Foundation.
10  */
11
12 #include <linux/config.h>
13 #include <linux/dccp.h>
14 #include <linux/module.h>
15 #include <linux/types.h>
16 #include <linux/sched.h>
17 #include <linux/kernel.h>
18 #include <linux/skbuff.h>
19 #include <linux/netdevice.h>
20 #include <linux/in.h>
21 #include <linux/if_arp.h>
22 #include <linux/init.h>
23 #include <linux/random.h>
24 #include <net/checksum.h>
25
26 #include <net/inet_common.h>
27 #include <net/ip.h>
28 #include <net/protocol.h>
29 #include <net/sock.h>
30 #include <net/xfrm.h>
31
32 #include <asm/semaphore.h>
33 #include <linux/spinlock.h>
34 #include <linux/timer.h>
35 #include <linux/delay.h>
36 #include <linux/poll.h>
37 #include <linux/dccp.h>
38
39 #include "ccid.h"
40 #include "dccp.h"
41
42 DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly;
43
44 EXPORT_SYMBOL_GPL(dccp_statistics);
45
46 atomic_t dccp_orphan_count = ATOMIC_INIT(0);
47
48 EXPORT_SYMBOL_GPL(dccp_orphan_count);
49
50 static struct net_protocol dccp_protocol = {
51         .handler        = dccp_v4_rcv,
52         .err_handler    = dccp_v4_err,
53         .no_policy      = 1,
54 };
55
56 const char *dccp_packet_name(const int type)
57 {
58         static const char *dccp_packet_names[] = {
59                 [DCCP_PKT_REQUEST]  = "REQUEST",
60                 [DCCP_PKT_RESPONSE] = "RESPONSE",
61                 [DCCP_PKT_DATA]     = "DATA",
62                 [DCCP_PKT_ACK]      = "ACK",
63                 [DCCP_PKT_DATAACK]  = "DATAACK",
64                 [DCCP_PKT_CLOSEREQ] = "CLOSEREQ",
65                 [DCCP_PKT_CLOSE]    = "CLOSE",
66                 [DCCP_PKT_RESET]    = "RESET",
67                 [DCCP_PKT_SYNC]     = "SYNC",
68                 [DCCP_PKT_SYNCACK]  = "SYNCACK",
69         };
70
71         if (type >= DCCP_NR_PKT_TYPES)
72                 return "INVALID";
73         else
74                 return dccp_packet_names[type];
75 }
76
77 EXPORT_SYMBOL_GPL(dccp_packet_name);
78
79 const char *dccp_state_name(const int state)
80 {
81         static char *dccp_state_names[] = {
82         [DCCP_OPEN]       = "OPEN",
83         [DCCP_REQUESTING] = "REQUESTING",
84         [DCCP_PARTOPEN]   = "PARTOPEN",
85         [DCCP_LISTEN]     = "LISTEN",
86         [DCCP_RESPOND]    = "RESPOND",
87         [DCCP_CLOSING]    = "CLOSING",
88         [DCCP_TIME_WAIT]  = "TIME_WAIT",
89         [DCCP_CLOSED]     = "CLOSED",
90         };
91
92         if (state >= DCCP_MAX_STATES)
93                 return "INVALID STATE!";
94         else
95                 return dccp_state_names[state];
96 }
97
98 EXPORT_SYMBOL_GPL(dccp_state_name);
99
100 static inline int dccp_listen_start(struct sock *sk)
101 {
102         struct dccp_sock *dp = dccp_sk(sk);
103
104         dp->dccps_role = DCCP_ROLE_LISTEN;
105         /*
106          * Apps need to use setsockopt(DCCP_SOCKOPT_SERVICE)
107          * before calling listen()
108          */
109         if (dccp_service_not_initialized(sk))
110                 return -EPROTO;
111         return inet_csk_listen_start(sk, TCP_SYNQ_HSIZE);
112 }
113
114 int dccp_disconnect(struct sock *sk, int flags)
115 {
116         struct inet_connection_sock *icsk = inet_csk(sk);
117         struct inet_sock *inet = inet_sk(sk);
118         int err = 0;
119         const int old_state = sk->sk_state;
120
121         if (old_state != DCCP_CLOSED)
122                 dccp_set_state(sk, DCCP_CLOSED);
123
124         /* ABORT function of RFC793 */
125         if (old_state == DCCP_LISTEN) {
126                 inet_csk_listen_stop(sk);
127         /* FIXME: do the active reset thing */
128         } else if (old_state == DCCP_REQUESTING)
129                 sk->sk_err = ECONNRESET;
130
131         dccp_clear_xmit_timers(sk);
132         __skb_queue_purge(&sk->sk_receive_queue);
133         if (sk->sk_send_head != NULL) {
134                 __kfree_skb(sk->sk_send_head);
135                 sk->sk_send_head = NULL;
136         }
137
138         inet->dport = 0;
139
140         if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
141                 inet_reset_saddr(sk);
142
143         sk->sk_shutdown = 0;
144         sock_reset_flag(sk, SOCK_DONE);
145
146         icsk->icsk_backoff = 0;
147         inet_csk_delack_init(sk);
148         __sk_dst_reset(sk);
149
150         BUG_TRAP(!inet->num || icsk->icsk_bind_hash);
151
152         sk->sk_error_report(sk);
153         return err;
154 }
155
156 EXPORT_SYMBOL_GPL(dccp_disconnect);
157
158 /*
159  *      Wait for a DCCP event.
160  *
161  *      Note that we don't need to lock the socket, as the upper poll layers
162  *      take care of normal races (between the test and the event) and we don't
163  *      go look at any of the socket buffers directly.
164  */
165 unsigned int dccp_poll(struct file *file, struct socket *sock,
166                        poll_table *wait)
167 {
168         unsigned int mask;
169         struct sock *sk = sock->sk;
170
171         poll_wait(file, sk->sk_sleep, wait);
172         if (sk->sk_state == DCCP_LISTEN)
173                 return inet_csk_listen_poll(sk);
174
175         /* Socket is not locked. We are protected from async events
176            by poll logic and correct handling of state changes
177            made by another threads is impossible in any case.
178          */
179
180         mask = 0;
181         if (sk->sk_err)
182                 mask = POLLERR;
183
184         if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == DCCP_CLOSED)
185                 mask |= POLLHUP;
186         if (sk->sk_shutdown & RCV_SHUTDOWN)
187                 mask |= POLLIN | POLLRDNORM;
188
189         /* Connected? */
190         if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) {
191                 if (atomic_read(&sk->sk_rmem_alloc) > 0)
192                         mask |= POLLIN | POLLRDNORM;
193
194                 if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
195                         if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) {
196                                 mask |= POLLOUT | POLLWRNORM;
197                         } else {  /* send SIGIO later */
198                                 set_bit(SOCK_ASYNC_NOSPACE,
199                                         &sk->sk_socket->flags);
200                                 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
201
202                                 /* Race breaker. If space is freed after
203                                  * wspace test but before the flags are set,
204                                  * IO signal will be lost.
205                                  */
206                                 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk))
207                                         mask |= POLLOUT | POLLWRNORM;
208                         }
209                 }
210         }
211         return mask;
212 }
213
214 EXPORT_SYMBOL_GPL(dccp_poll);
215
216 int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg)
217 {
218         dccp_pr_debug("entry\n");
219         return -ENOIOCTLCMD;
220 }
221
222 EXPORT_SYMBOL_GPL(dccp_ioctl);
223
224 static int dccp_setsockopt_service(struct sock *sk, const u32 service,
225                                    char __user *optval, int optlen)
226 {
227         struct dccp_sock *dp = dccp_sk(sk);
228         struct dccp_service_list *sl = NULL;
229
230         if (service == DCCP_SERVICE_INVALID_VALUE || 
231             optlen > DCCP_SERVICE_LIST_MAX_LEN * sizeof(u32))
232                 return -EINVAL;
233
234         if (optlen > sizeof(service)) {
235                 sl = kmalloc(optlen, GFP_KERNEL);
236                 if (sl == NULL)
237                         return -ENOMEM;
238
239                 sl->dccpsl_nr = optlen / sizeof(u32) - 1;
240                 if (copy_from_user(sl->dccpsl_list,
241                                    optval + sizeof(service),
242                                    optlen - sizeof(service)) ||
243                     dccp_list_has_service(sl, DCCP_SERVICE_INVALID_VALUE)) {
244                         kfree(sl);
245                         return -EFAULT;
246                 }
247         }
248
249         lock_sock(sk);
250         dp->dccps_service = service;
251
252         kfree(dp->dccps_service_list);
253
254         dp->dccps_service_list = sl;
255         release_sock(sk);
256         return 0;
257 }
258
259 int dccp_setsockopt(struct sock *sk, int level, int optname,
260                     char __user *optval, int optlen)
261 {
262         struct dccp_sock *dp;
263         int err;
264         int val;
265
266         if (level != SOL_DCCP)
267                 return inet_csk(sk)->icsk_af_ops->setsockopt(sk, level,
268                                                              optname, optval,
269                                                              optlen);
270
271         if (optlen < sizeof(int))
272                 return -EINVAL;
273
274         if (get_user(val, (int __user *)optval))
275                 return -EFAULT;
276
277         if (optname == DCCP_SOCKOPT_SERVICE)
278                 return dccp_setsockopt_service(sk, val, optval, optlen);
279
280         lock_sock(sk);
281         dp = dccp_sk(sk);
282         err = 0;
283
284         switch (optname) {
285         case DCCP_SOCKOPT_PACKET_SIZE:
286                 dp->dccps_packet_size = val;
287                 break;
288         default:
289                 err = -ENOPROTOOPT;
290                 break;
291         }
292         
293         release_sock(sk);
294         return err;
295 }
296
297 EXPORT_SYMBOL_GPL(dccp_setsockopt);
298
299 static int dccp_getsockopt_service(struct sock *sk, int len,
300                                    u32 __user *optval,
301                                    int __user *optlen)
302 {
303         const struct dccp_sock *dp = dccp_sk(sk);
304         const struct dccp_service_list *sl;
305         int err = -ENOENT, slen = 0, total_len = sizeof(u32);
306
307         lock_sock(sk);
308         if (dccp_service_not_initialized(sk))
309                 goto out;
310
311         if ((sl = dp->dccps_service_list) != NULL) {
312                 slen = sl->dccpsl_nr * sizeof(u32);
313                 total_len += slen;
314         }
315
316         err = -EINVAL;
317         if (total_len > len)
318                 goto out;
319
320         err = 0;
321         if (put_user(total_len, optlen) ||
322             put_user(dp->dccps_service, optval) ||
323             (sl != NULL && copy_to_user(optval + 1, sl->dccpsl_list, slen)))
324                 err = -EFAULT;
325 out:
326         release_sock(sk);
327         return err;
328 }
329
330 int dccp_getsockopt(struct sock *sk, int level, int optname,
331                     char __user *optval, int __user *optlen)
332 {
333         struct dccp_sock *dp;
334         int val, len;
335
336         if (level != SOL_DCCP)
337                 return inet_csk(sk)->icsk_af_ops->getsockopt(sk, level,
338                                                              optname, optval,
339                                                              optlen);
340         if (get_user(len, optlen))
341                 return -EFAULT;
342
343         if (len < sizeof(int))
344                 return -EINVAL;
345
346         dp = dccp_sk(sk);
347
348         switch (optname) {
349         case DCCP_SOCKOPT_PACKET_SIZE:
350                 val = dp->dccps_packet_size;
351                 len = sizeof(dp->dccps_packet_size);
352                 break;
353         case DCCP_SOCKOPT_SERVICE:
354                 return dccp_getsockopt_service(sk, len,
355                                                (u32 __user *)optval, optlen);
356         case 128 ... 191:
357                 return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname,
358                                              len, (u32 __user *)optval, optlen);
359         case 192 ... 255:
360                 return ccid_hc_tx_getsockopt(dp->dccps_hc_tx_ccid, sk, optname,
361                                              len, (u32 __user *)optval, optlen);
362         default:
363                 return -ENOPROTOOPT;
364         }
365
366         if (put_user(len, optlen) || copy_to_user(optval, &val, len))
367                 return -EFAULT;
368
369         return 0;
370 }
371
372 EXPORT_SYMBOL_GPL(dccp_getsockopt);
373
374 int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
375                  size_t len)
376 {
377         const struct dccp_sock *dp = dccp_sk(sk);
378         const int flags = msg->msg_flags;
379         const int noblock = flags & MSG_DONTWAIT;
380         struct sk_buff *skb;
381         int rc, size;
382         long timeo;
383
384         if (len > dp->dccps_mss_cache)
385                 return -EMSGSIZE;
386
387         lock_sock(sk);
388         timeo = sock_sndtimeo(sk, noblock);
389
390         /*
391          * We have to use sk_stream_wait_connect here to set sk_write_pending,
392          * so that the trick in dccp_rcv_request_sent_state_process.
393          */
394         /* Wait for a connection to finish. */
395         if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN | DCCPF_CLOSING))
396                 if ((rc = sk_stream_wait_connect(sk, &timeo)) != 0)
397                         goto out_release;
398
399         size = sk->sk_prot->max_header + len;
400         release_sock(sk);
401         skb = sock_alloc_send_skb(sk, size, noblock, &rc);
402         lock_sock(sk);
403         if (skb == NULL)
404                 goto out_release;
405
406         skb_reserve(skb, sk->sk_prot->max_header);
407         rc = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
408         if (rc != 0)
409                 goto out_discard;
410
411         rc = dccp_write_xmit(sk, skb, &timeo);
412         /*
413          * XXX we don't use sk_write_queue, so just discard the packet.
414          *     Current plan however is to _use_ sk_write_queue with
415          *     an algorith similar to tcp_sendmsg, where the main difference
416          *     is that in DCCP we have to respect packet boundaries, so
417          *     no coalescing of skbs.
418          *
419          *     This bug was _quickly_ found & fixed by just looking at an OSTRA
420          *     generated callgraph 8) -acme
421          */
422 out_release:
423         release_sock(sk);
424         return rc ? : len;
425 out_discard:
426         kfree_skb(skb);
427         goto out_release;
428 }
429
430 EXPORT_SYMBOL_GPL(dccp_sendmsg);
431
432 int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
433                  size_t len, int nonblock, int flags, int *addr_len)
434 {
435         const struct dccp_hdr *dh;
436         long timeo;
437
438         lock_sock(sk);
439
440         if (sk->sk_state == DCCP_LISTEN) {
441                 len = -ENOTCONN;
442                 goto out;
443         }
444
445         timeo = sock_rcvtimeo(sk, nonblock);
446
447         do {
448                 struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
449
450                 if (skb == NULL)
451                         goto verify_sock_status;
452
453                 dh = dccp_hdr(skb);
454
455                 if (dh->dccph_type == DCCP_PKT_DATA ||
456                     dh->dccph_type == DCCP_PKT_DATAACK)
457                         goto found_ok_skb;
458
459                 if (dh->dccph_type == DCCP_PKT_RESET ||
460                     dh->dccph_type == DCCP_PKT_CLOSE) {
461                         dccp_pr_debug("found fin ok!\n");
462                         len = 0;
463                         goto found_fin_ok;
464                 }
465                 dccp_pr_debug("packet_type=%s\n",
466                               dccp_packet_name(dh->dccph_type));
467                 sk_eat_skb(sk, skb);
468 verify_sock_status:
469                 if (sock_flag(sk, SOCK_DONE)) {
470                         len = 0;
471                         break;
472                 }
473
474                 if (sk->sk_err) {
475                         len = sock_error(sk);
476                         break;
477                 }
478
479                 if (sk->sk_shutdown & RCV_SHUTDOWN) {
480                         len = 0;
481                         break;
482                 }
483
484                 if (sk->sk_state == DCCP_CLOSED) {
485                         if (!sock_flag(sk, SOCK_DONE)) {
486                                 /* This occurs when user tries to read
487                                  * from never connected socket.
488                                  */
489                                 len = -ENOTCONN;
490                                 break;
491                         }
492                         len = 0;
493                         break;
494                 }
495
496                 if (!timeo) {
497                         len = -EAGAIN;
498                         break;
499                 }
500
501                 if (signal_pending(current)) {
502                         len = sock_intr_errno(timeo);
503                         break;
504                 }
505
506                 sk_wait_data(sk, &timeo);
507                 continue;
508         found_ok_skb:
509                 if (len > skb->len)
510                         len = skb->len;
511                 else if (len < skb->len)
512                         msg->msg_flags |= MSG_TRUNC;
513
514                 if (skb_copy_datagram_iovec(skb, 0, msg->msg_iov, len)) {
515                         /* Exception. Bailout! */
516                         len = -EFAULT;
517                         break;
518                 }
519         found_fin_ok:
520                 if (!(flags & MSG_PEEK))
521                         sk_eat_skb(sk, skb);
522                 break;
523         } while (1);
524 out:
525         release_sock(sk);
526         return len;
527 }
528
529 EXPORT_SYMBOL_GPL(dccp_recvmsg);
530
531 int inet_dccp_listen(struct socket *sock, int backlog)
532 {
533         struct sock *sk = sock->sk;
534         unsigned char old_state;
535         int err;
536
537         lock_sock(sk);
538
539         err = -EINVAL;
540         if (sock->state != SS_UNCONNECTED || sock->type != SOCK_DCCP)
541                 goto out;
542
543         old_state = sk->sk_state;
544         if (!((1 << old_state) & (DCCPF_CLOSED | DCCPF_LISTEN)))
545                 goto out;
546
547         /* Really, if the socket is already in listen state
548          * we can only allow the backlog to be adjusted.
549          */
550         if (old_state != DCCP_LISTEN) {
551                 /*
552                  * FIXME: here it probably should be sk->sk_prot->listen_start
553                  * see tcp_listen_start
554                  */
555                 err = dccp_listen_start(sk);
556                 if (err)
557                         goto out;
558         }
559         sk->sk_max_ack_backlog = backlog;
560         err = 0;
561
562 out:
563         release_sock(sk);
564         return err;
565 }
566
567 EXPORT_SYMBOL_GPL(inet_dccp_listen);
568
569 static const unsigned char dccp_new_state[] = {
570         /* current state:   new state:      action:     */
571         [0]               = DCCP_CLOSED,
572         [DCCP_OPEN]       = DCCP_CLOSING | DCCP_ACTION_FIN,
573         [DCCP_REQUESTING] = DCCP_CLOSED,
574         [DCCP_PARTOPEN]   = DCCP_CLOSING | DCCP_ACTION_FIN,
575         [DCCP_LISTEN]     = DCCP_CLOSED,
576         [DCCP_RESPOND]    = DCCP_CLOSED,
577         [DCCP_CLOSING]    = DCCP_CLOSED,
578         [DCCP_TIME_WAIT]  = DCCP_CLOSED,
579         [DCCP_CLOSED]     = DCCP_CLOSED,
580 };
581
582 static int dccp_close_state(struct sock *sk)
583 {
584         const int next = dccp_new_state[sk->sk_state];
585         const int ns = next & DCCP_STATE_MASK;
586
587         if (ns != sk->sk_state)
588                 dccp_set_state(sk, ns);
589
590         return next & DCCP_ACTION_FIN;
591 }
592
593 void dccp_close(struct sock *sk, long timeout)
594 {
595         struct sk_buff *skb;
596
597         lock_sock(sk);
598
599         sk->sk_shutdown = SHUTDOWN_MASK;
600
601         if (sk->sk_state == DCCP_LISTEN) {
602                 dccp_set_state(sk, DCCP_CLOSED);
603
604                 /* Special case. */
605                 inet_csk_listen_stop(sk);
606
607                 goto adjudge_to_death;
608         }
609
610         /*
611          * We need to flush the recv. buffs.  We do this only on the
612          * descriptor close, not protocol-sourced closes, because the
613           *reader process may not have drained the data yet!
614          */
615         /* FIXME: check for unread data */
616         while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
617                 __kfree_skb(skb);
618         }
619
620         if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
621                 /* Check zero linger _after_ checking for unread data. */
622                 sk->sk_prot->disconnect(sk, 0);
623         } else if (dccp_close_state(sk)) {
624                 dccp_send_close(sk, 1);
625         }
626
627         sk_stream_wait_close(sk, timeout);
628
629 adjudge_to_death:
630         /*
631          * It is the last release_sock in its life. It will remove backlog.
632          */
633         release_sock(sk);
634         /*
635          * Now socket is owned by kernel and we acquire BH lock
636          * to finish close. No need to check for user refs.
637          */
638         local_bh_disable();
639         bh_lock_sock(sk);
640         BUG_TRAP(!sock_owned_by_user(sk));
641
642         sock_hold(sk);
643         sock_orphan(sk);
644
645         /*
646          * The last release_sock may have processed the CLOSE or RESET
647          * packet moving sock to CLOSED state, if not we have to fire
648          * the CLOSE/CLOSEREQ retransmission timer, see "8.3. Termination"
649          * in draft-ietf-dccp-spec-11. -acme
650          */
651         if (sk->sk_state == DCCP_CLOSING) {
652                 /* FIXME: should start at 2 * RTT */
653                 /* Timer for repeating the CLOSE/CLOSEREQ until an answer. */
654                 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
655                                           inet_csk(sk)->icsk_rto,
656                                           DCCP_RTO_MAX);
657 #if 0
658                 /* Yeah, we should use sk->sk_prot->orphan_count, etc */
659                 dccp_set_state(sk, DCCP_CLOSED);
660 #endif
661         }
662
663         atomic_inc(sk->sk_prot->orphan_count);
664         if (sk->sk_state == DCCP_CLOSED)
665                 inet_csk_destroy_sock(sk);
666
667         /* Otherwise, socket is reprieved until protocol close. */
668
669         bh_unlock_sock(sk);
670         local_bh_enable();
671         sock_put(sk);
672 }
673
674 EXPORT_SYMBOL_GPL(dccp_close);
675
676 void dccp_shutdown(struct sock *sk, int how)
677 {
678         dccp_pr_debug("entry\n");
679 }
680
681 EXPORT_SYMBOL_GPL(dccp_shutdown);
682
683 static struct proto_ops inet_dccp_ops = {
684         .family         = PF_INET,
685         .owner          = THIS_MODULE,
686         .release        = inet_release,
687         .bind           = inet_bind,
688         .connect        = inet_stream_connect,
689         .socketpair     = sock_no_socketpair,
690         .accept         = inet_accept,
691         .getname        = inet_getname,
692         /* FIXME: work on tcp_poll to rename it to inet_csk_poll */
693         .poll           = dccp_poll,
694         .ioctl          = inet_ioctl,
695         /* FIXME: work on inet_listen to rename it to sock_common_listen */
696         .listen         = inet_dccp_listen,
697         .shutdown       = inet_shutdown,
698         .setsockopt     = sock_common_setsockopt,
699         .getsockopt     = sock_common_getsockopt,
700         .sendmsg        = inet_sendmsg,
701         .recvmsg        = sock_common_recvmsg,
702         .mmap           = sock_no_mmap,
703         .sendpage       = sock_no_sendpage,
704 };
705
706 extern struct net_proto_family inet_family_ops;
707
708 static struct inet_protosw dccp_v4_protosw = {
709         .type           = SOCK_DCCP,
710         .protocol       = IPPROTO_DCCP,
711         .prot           = &dccp_prot,
712         .ops            = &inet_dccp_ops,
713         .capability     = -1,
714         .no_check       = 0,
715         .flags          = INET_PROTOSW_ICSK,
716 };
717
718 /*
719  * This is the global socket data structure used for responding to
720  * the Out-of-the-blue (OOTB) packets. A control sock will be created
721  * for this socket at the initialization time.
722  */
723 struct socket *dccp_ctl_socket;
724
725 static char dccp_ctl_socket_err_msg[] __initdata =
726         KERN_ERR "DCCP: Failed to create the control socket.\n";
727
728 static int __init dccp_ctl_sock_init(void)
729 {
730         int rc = sock_create_kern(PF_INET, SOCK_DCCP, IPPROTO_DCCP,
731                                   &dccp_ctl_socket);
732         if (rc < 0)
733                 printk(dccp_ctl_socket_err_msg);
734         else {
735                 dccp_ctl_socket->sk->sk_allocation = GFP_ATOMIC;
736                 inet_sk(dccp_ctl_socket->sk)->uc_ttl = -1;
737
738                 /* Unhash it so that IP input processing does not even
739                  * see it, we do not wish this socket to see incoming
740                  * packets.
741                  */
742                 dccp_ctl_socket->sk->sk_prot->unhash(dccp_ctl_socket->sk);
743         }
744
745         return rc;
746 }
747
748 #ifdef CONFIG_IP_DCCP_UNLOAD_HACK
749 void dccp_ctl_sock_exit(void)
750 {
751         if (dccp_ctl_socket != NULL) {
752                 sock_release(dccp_ctl_socket);
753                 dccp_ctl_socket = NULL;
754         }
755 }
756
757 EXPORT_SYMBOL_GPL(dccp_ctl_sock_exit);
758 #endif
759
760 static int __init init_dccp_v4_mibs(void)
761 {
762         int rc = -ENOMEM;
763
764         dccp_statistics[0] = alloc_percpu(struct dccp_mib);
765         if (dccp_statistics[0] == NULL)
766                 goto out;
767
768         dccp_statistics[1] = alloc_percpu(struct dccp_mib);
769         if (dccp_statistics[1] == NULL)
770                 goto out_free_one;
771
772         rc = 0;
773 out:
774         return rc;
775 out_free_one:
776         free_percpu(dccp_statistics[0]);
777         dccp_statistics[0] = NULL;
778         goto out;
779
780 }
781
782 static int thash_entries;
783 module_param(thash_entries, int, 0444);
784 MODULE_PARM_DESC(thash_entries, "Number of ehash buckets");
785
786 #ifdef CONFIG_IP_DCCP_DEBUG
787 int dccp_debug;
788 module_param(dccp_debug, int, 0444);
789 MODULE_PARM_DESC(dccp_debug, "Enable debug messages");
790
791 EXPORT_SYMBOL_GPL(dccp_debug);
792 #endif
793
794 static int __init dccp_init(void)
795 {
796         unsigned long goal;
797         int ehash_order, bhash_order, i;
798         int rc = proto_register(&dccp_prot, 1);
799
800         if (rc)
801                 goto out;
802
803         dccp_hashinfo.bind_bucket_cachep =
804                 kmem_cache_create("dccp_bind_bucket",
805                                   sizeof(struct inet_bind_bucket), 0,
806                                   SLAB_HWCACHE_ALIGN, NULL, NULL);
807         if (!dccp_hashinfo.bind_bucket_cachep)
808                 goto out_proto_unregister;
809
810         /*
811          * Size and allocate the main established and bind bucket
812          * hash tables.
813          *
814          * The methodology is similar to that of the buffer cache.
815          */
816         if (num_physpages >= (128 * 1024))
817                 goal = num_physpages >> (21 - PAGE_SHIFT);
818         else
819                 goal = num_physpages >> (23 - PAGE_SHIFT);
820
821         if (thash_entries)
822                 goal = (thash_entries *
823                         sizeof(struct inet_ehash_bucket)) >> PAGE_SHIFT;
824         for (ehash_order = 0; (1UL << ehash_order) < goal; ehash_order++)
825                 ;
826         do {
827                 dccp_hashinfo.ehash_size = (1UL << ehash_order) * PAGE_SIZE /
828                                         sizeof(struct inet_ehash_bucket);
829                 dccp_hashinfo.ehash_size >>= 1;
830                 while (dccp_hashinfo.ehash_size &
831                        (dccp_hashinfo.ehash_size - 1))
832                         dccp_hashinfo.ehash_size--;
833                 dccp_hashinfo.ehash = (struct inet_ehash_bucket *)
834                         __get_free_pages(GFP_ATOMIC, ehash_order);
835         } while (!dccp_hashinfo.ehash && --ehash_order > 0);
836
837         if (!dccp_hashinfo.ehash) {
838                 printk(KERN_CRIT "Failed to allocate DCCP "
839                                  "established hash table\n");
840                 goto out_free_bind_bucket_cachep;
841         }
842
843         for (i = 0; i < (dccp_hashinfo.ehash_size << 1); i++) {
844                 rwlock_init(&dccp_hashinfo.ehash[i].lock);
845                 INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].chain);
846         }
847
848         bhash_order = ehash_order;
849
850         do {
851                 dccp_hashinfo.bhash_size = (1UL << bhash_order) * PAGE_SIZE /
852                                         sizeof(struct inet_bind_hashbucket);
853                 if ((dccp_hashinfo.bhash_size > (64 * 1024)) &&
854                     bhash_order > 0)
855                         continue;
856                 dccp_hashinfo.bhash = (struct inet_bind_hashbucket *)
857                         __get_free_pages(GFP_ATOMIC, bhash_order);
858         } while (!dccp_hashinfo.bhash && --bhash_order >= 0);
859
860         if (!dccp_hashinfo.bhash) {
861                 printk(KERN_CRIT "Failed to allocate DCCP bind hash table\n");
862                 goto out_free_dccp_ehash;
863         }
864
865         for (i = 0; i < dccp_hashinfo.bhash_size; i++) {
866                 spin_lock_init(&dccp_hashinfo.bhash[i].lock);
867                 INIT_HLIST_HEAD(&dccp_hashinfo.bhash[i].chain);
868         }
869
870         if (init_dccp_v4_mibs())
871                 goto out_free_dccp_bhash;
872
873         rc = -EAGAIN;
874         if (inet_add_protocol(&dccp_protocol, IPPROTO_DCCP))
875                 goto out_free_dccp_v4_mibs;
876
877         inet_register_protosw(&dccp_v4_protosw);
878
879         rc = dccp_ctl_sock_init();
880         if (rc)
881                 goto out_unregister_protosw;
882 out:
883         return rc;
884 out_unregister_protosw:
885         inet_unregister_protosw(&dccp_v4_protosw);
886         inet_del_protocol(&dccp_protocol, IPPROTO_DCCP);
887 out_free_dccp_v4_mibs:
888         free_percpu(dccp_statistics[0]);
889         free_percpu(dccp_statistics[1]);
890         dccp_statistics[0] = dccp_statistics[1] = NULL;
891 out_free_dccp_bhash:
892         free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
893         dccp_hashinfo.bhash = NULL;
894 out_free_dccp_ehash:
895         free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order);
896         dccp_hashinfo.ehash = NULL;
897 out_free_bind_bucket_cachep:
898         kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
899         dccp_hashinfo.bind_bucket_cachep = NULL;
900 out_proto_unregister:
901         proto_unregister(&dccp_prot);
902         goto out;
903 }
904
905 static const char dccp_del_proto_err_msg[] __exitdata =
906         KERN_ERR "can't remove dccp net_protocol\n";
907
908 static void __exit dccp_fini(void)
909 {
910         inet_unregister_protosw(&dccp_v4_protosw);
911
912         if (inet_del_protocol(&dccp_protocol, IPPROTO_DCCP) < 0)
913                 printk(dccp_del_proto_err_msg);
914
915         free_percpu(dccp_statistics[0]);
916         free_percpu(dccp_statistics[1]);
917         free_pages((unsigned long)dccp_hashinfo.bhash,
918                    get_order(dccp_hashinfo.bhash_size *
919                              sizeof(struct inet_bind_hashbucket)));
920         free_pages((unsigned long)dccp_hashinfo.ehash,
921                    get_order(dccp_hashinfo.ehash_size *
922                              sizeof(struct inet_ehash_bucket)));
923         kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
924         proto_unregister(&dccp_prot);
925 }
926
927 module_init(dccp_init);
928 module_exit(dccp_fini);
929
930 /*
931  * __stringify doesn't likes enums, so use SOCK_DCCP (6) and IPPROTO_DCCP (33)
932  * values directly, Also cover the case where the protocol is not specified,
933  * i.e. net-pf-PF_INET-proto-0-type-SOCK_DCCP
934  */
935 MODULE_ALIAS("net-pf-" __stringify(PF_INET) "-proto-33-type-6");
936 MODULE_ALIAS("net-pf-" __stringify(PF_INET) "-proto-0-type-6");
937 MODULE_LICENSE("GPL");
938 MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@conectiva.com.br>");
939 MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol");