]> www.pilppa.org Git - linux-2.6-omap-h63xx.git/blob - net/dccp/proto.c
[DCCP]: Move dccp_[un]hash from ipv4.c to the core
[linux-2.6-omap-h63xx.git] / net / dccp / proto.c
1 /*
2  *  net/dccp/proto.c
3  *
4  *  An implementation of the DCCP protocol
5  *  Arnaldo Carvalho de Melo <acme@conectiva.com.br>
6  *
7  *      This program is free software; you can redistribute it and/or modify it
8  *      under the terms of the GNU General Public License version 2 as
9  *      published by the Free Software Foundation.
10  */
11
12 #include <linux/config.h>
13 #include <linux/dccp.h>
14 #include <linux/module.h>
15 #include <linux/types.h>
16 #include <linux/sched.h>
17 #include <linux/kernel.h>
18 #include <linux/skbuff.h>
19 #include <linux/netdevice.h>
20 #include <linux/in.h>
21 #include <linux/if_arp.h>
22 #include <linux/init.h>
23 #include <linux/random.h>
24 #include <net/checksum.h>
25
26 #include <net/inet_common.h>
27 #include <net/inet_sock.h>
28 #include <net/protocol.h>
29 #include <net/sock.h>
30 #include <net/xfrm.h>
31
32 #include <asm/semaphore.h>
33 #include <linux/spinlock.h>
34 #include <linux/timer.h>
35 #include <linux/delay.h>
36 #include <linux/poll.h>
37
38 #include "ccid.h"
39 #include "dccp.h"
40 #include "feat.h"
41
42 DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly;
43
44 EXPORT_SYMBOL_GPL(dccp_statistics);
45
46 atomic_t dccp_orphan_count = ATOMIC_INIT(0);
47
48 EXPORT_SYMBOL_GPL(dccp_orphan_count);
49
50 static struct net_protocol dccp_protocol = {
51         .handler        = dccp_v4_rcv,
52         .err_handler    = dccp_v4_err,
53         .no_policy      = 1,
54 };
55
56 const char *dccp_packet_name(const int type)
57 {
58         static const char *dccp_packet_names[] = {
59                 [DCCP_PKT_REQUEST]  = "REQUEST",
60                 [DCCP_PKT_RESPONSE] = "RESPONSE",
61                 [DCCP_PKT_DATA]     = "DATA",
62                 [DCCP_PKT_ACK]      = "ACK",
63                 [DCCP_PKT_DATAACK]  = "DATAACK",
64                 [DCCP_PKT_CLOSEREQ] = "CLOSEREQ",
65                 [DCCP_PKT_CLOSE]    = "CLOSE",
66                 [DCCP_PKT_RESET]    = "RESET",
67                 [DCCP_PKT_SYNC]     = "SYNC",
68                 [DCCP_PKT_SYNCACK]  = "SYNCACK",
69         };
70
71         if (type >= DCCP_NR_PKT_TYPES)
72                 return "INVALID";
73         else
74                 return dccp_packet_names[type];
75 }
76
77 EXPORT_SYMBOL_GPL(dccp_packet_name);
78
79 const char *dccp_state_name(const int state)
80 {
81         static char *dccp_state_names[] = {
82         [DCCP_OPEN]       = "OPEN",
83         [DCCP_REQUESTING] = "REQUESTING",
84         [DCCP_PARTOPEN]   = "PARTOPEN",
85         [DCCP_LISTEN]     = "LISTEN",
86         [DCCP_RESPOND]    = "RESPOND",
87         [DCCP_CLOSING]    = "CLOSING",
88         [DCCP_TIME_WAIT]  = "TIME_WAIT",
89         [DCCP_CLOSED]     = "CLOSED",
90         };
91
92         if (state >= DCCP_MAX_STATES)
93                 return "INVALID STATE!";
94         else
95                 return dccp_state_names[state];
96 }
97
98 EXPORT_SYMBOL_GPL(dccp_state_name);
99
100 void dccp_hash(struct sock *sk)
101 {
102         inet_hash(&dccp_hashinfo, sk);
103 }
104
105 EXPORT_SYMBOL_GPL(dccp_hash);
106
107 void dccp_unhash(struct sock *sk)
108 {
109         inet_unhash(&dccp_hashinfo, sk);
110 }
111
112 EXPORT_SYMBOL_GPL(dccp_unhash);
113
114 int dccp_init_sock(struct sock *sk)
115 {
116         struct dccp_sock *dp = dccp_sk(sk);
117         struct inet_connection_sock *icsk = inet_csk(sk);
118         static int dccp_ctl_socket_init = 1;
119
120         dccp_options_init(&dp->dccps_options);
121         do_gettimeofday(&dp->dccps_epoch);
122
123         /*
124          * FIXME: We're hardcoding the CCID, and doing this at this point makes
125          * the listening (master) sock get CCID control blocks, which is not
126          * necessary, but for now, to not mess with the test userspace apps,
127          * lets leave it here, later the real solution is to do this in a
128          * setsockopt(CCIDs-I-want/accept). -acme
129          */
130         if (likely(!dccp_ctl_socket_init)) {
131                 int rc = dccp_feat_init(sk);
132
133                 if (rc)
134                         return rc;
135
136                 if (dp->dccps_options.dccpo_send_ack_vector) {
137                         dp->dccps_hc_rx_ackvec = dccp_ackvec_alloc(GFP_KERNEL);
138                         if (dp->dccps_hc_rx_ackvec == NULL)
139                                 return -ENOMEM;
140                 }
141                 dp->dccps_hc_rx_ccid =
142                                 ccid_hc_rx_new(dp->dccps_options.dccpo_rx_ccid,
143                                                sk, GFP_KERNEL);
144                 dp->dccps_hc_tx_ccid =
145                                 ccid_hc_tx_new(dp->dccps_options.dccpo_tx_ccid,
146                                                sk, GFP_KERNEL);
147                 if (unlikely(dp->dccps_hc_rx_ccid == NULL ||
148                              dp->dccps_hc_tx_ccid == NULL)) {
149                         ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
150                         ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
151                         if (dp->dccps_options.dccpo_send_ack_vector) {
152                                 dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
153                                 dp->dccps_hc_rx_ackvec = NULL;
154                         }
155                         dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
156                         return -ENOMEM;
157                 }
158         } else {
159                 /* control socket doesn't need feat nego */
160                 INIT_LIST_HEAD(&dp->dccps_options.dccpo_pending);
161                 INIT_LIST_HEAD(&dp->dccps_options.dccpo_conf);
162                 dccp_ctl_socket_init = 0;
163         }
164
165         dccp_init_xmit_timers(sk);
166         icsk->icsk_rto          = DCCP_TIMEOUT_INIT;
167         sk->sk_state            = DCCP_CLOSED;
168         sk->sk_write_space      = dccp_write_space;
169         icsk->icsk_sync_mss     = dccp_sync_mss;
170         dp->dccps_mss_cache     = 536;
171         dp->dccps_role          = DCCP_ROLE_UNDEFINED;
172         dp->dccps_service       = DCCP_SERVICE_INVALID_VALUE;
173         dp->dccps_l_ack_ratio   = dp->dccps_r_ack_ratio = 1;
174
175         return 0;
176 }
177
178 EXPORT_SYMBOL_GPL(dccp_init_sock);
179
180 int dccp_destroy_sock(struct sock *sk)
181 {
182         struct dccp_sock *dp = dccp_sk(sk);
183
184         /*
185          * DCCP doesn't use sk_write_queue, just sk_send_head
186          * for retransmissions
187          */
188         if (sk->sk_send_head != NULL) {
189                 kfree_skb(sk->sk_send_head);
190                 sk->sk_send_head = NULL;
191         }
192
193         /* Clean up a referenced DCCP bind bucket. */
194         if (inet_csk(sk)->icsk_bind_hash != NULL)
195                 inet_put_port(&dccp_hashinfo, sk);
196
197         kfree(dp->dccps_service_list);
198         dp->dccps_service_list = NULL;
199
200         if (dp->dccps_options.dccpo_send_ack_vector) {
201                 dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
202                 dp->dccps_hc_rx_ackvec = NULL;
203         }
204         ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
205         ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
206         dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
207
208         /* clean up feature negotiation state */
209         dccp_feat_clean(sk);
210
211         return 0;
212 }
213
214 EXPORT_SYMBOL_GPL(dccp_destroy_sock);
215
216 static inline int dccp_listen_start(struct sock *sk)
217 {
218         struct dccp_sock *dp = dccp_sk(sk);
219
220         dp->dccps_role = DCCP_ROLE_LISTEN;
221         /*
222          * Apps need to use setsockopt(DCCP_SOCKOPT_SERVICE)
223          * before calling listen()
224          */
225         if (dccp_service_not_initialized(sk))
226                 return -EPROTO;
227         return inet_csk_listen_start(sk, TCP_SYNQ_HSIZE);
228 }
229
230 int dccp_disconnect(struct sock *sk, int flags)
231 {
232         struct inet_connection_sock *icsk = inet_csk(sk);
233         struct inet_sock *inet = inet_sk(sk);
234         int err = 0;
235         const int old_state = sk->sk_state;
236
237         if (old_state != DCCP_CLOSED)
238                 dccp_set_state(sk, DCCP_CLOSED);
239
240         /* ABORT function of RFC793 */
241         if (old_state == DCCP_LISTEN) {
242                 inet_csk_listen_stop(sk);
243         /* FIXME: do the active reset thing */
244         } else if (old_state == DCCP_REQUESTING)
245                 sk->sk_err = ECONNRESET;
246
247         dccp_clear_xmit_timers(sk);
248         __skb_queue_purge(&sk->sk_receive_queue);
249         if (sk->sk_send_head != NULL) {
250                 __kfree_skb(sk->sk_send_head);
251                 sk->sk_send_head = NULL;
252         }
253
254         inet->dport = 0;
255
256         if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
257                 inet_reset_saddr(sk);
258
259         sk->sk_shutdown = 0;
260         sock_reset_flag(sk, SOCK_DONE);
261
262         icsk->icsk_backoff = 0;
263         inet_csk_delack_init(sk);
264         __sk_dst_reset(sk);
265
266         BUG_TRAP(!inet->num || icsk->icsk_bind_hash);
267
268         sk->sk_error_report(sk);
269         return err;
270 }
271
272 EXPORT_SYMBOL_GPL(dccp_disconnect);
273
274 /*
275  *      Wait for a DCCP event.
276  *
277  *      Note that we don't need to lock the socket, as the upper poll layers
278  *      take care of normal races (between the test and the event) and we don't
279  *      go look at any of the socket buffers directly.
280  */
281 unsigned int dccp_poll(struct file *file, struct socket *sock,
282                        poll_table *wait)
283 {
284         unsigned int mask;
285         struct sock *sk = sock->sk;
286
287         poll_wait(file, sk->sk_sleep, wait);
288         if (sk->sk_state == DCCP_LISTEN)
289                 return inet_csk_listen_poll(sk);
290
291         /* Socket is not locked. We are protected from async events
292            by poll logic and correct handling of state changes
293            made by another threads is impossible in any case.
294          */
295
296         mask = 0;
297         if (sk->sk_err)
298                 mask = POLLERR;
299
300         if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == DCCP_CLOSED)
301                 mask |= POLLHUP;
302         if (sk->sk_shutdown & RCV_SHUTDOWN)
303                 mask |= POLLIN | POLLRDNORM;
304
305         /* Connected? */
306         if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) {
307                 if (atomic_read(&sk->sk_rmem_alloc) > 0)
308                         mask |= POLLIN | POLLRDNORM;
309
310                 if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
311                         if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) {
312                                 mask |= POLLOUT | POLLWRNORM;
313                         } else {  /* send SIGIO later */
314                                 set_bit(SOCK_ASYNC_NOSPACE,
315                                         &sk->sk_socket->flags);
316                                 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
317
318                                 /* Race breaker. If space is freed after
319                                  * wspace test but before the flags are set,
320                                  * IO signal will be lost.
321                                  */
322                                 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk))
323                                         mask |= POLLOUT | POLLWRNORM;
324                         }
325                 }
326         }
327         return mask;
328 }
329
330 EXPORT_SYMBOL_GPL(dccp_poll);
331
332 int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg)
333 {
334         dccp_pr_debug("entry\n");
335         return -ENOIOCTLCMD;
336 }
337
338 EXPORT_SYMBOL_GPL(dccp_ioctl);
339
340 static int dccp_setsockopt_service(struct sock *sk, const __be32 service,
341                                    char __user *optval, int optlen)
342 {
343         struct dccp_sock *dp = dccp_sk(sk);
344         struct dccp_service_list *sl = NULL;
345
346         if (service == DCCP_SERVICE_INVALID_VALUE || 
347             optlen > DCCP_SERVICE_LIST_MAX_LEN * sizeof(u32))
348                 return -EINVAL;
349
350         if (optlen > sizeof(service)) {
351                 sl = kmalloc(optlen, GFP_KERNEL);
352                 if (sl == NULL)
353                         return -ENOMEM;
354
355                 sl->dccpsl_nr = optlen / sizeof(u32) - 1;
356                 if (copy_from_user(sl->dccpsl_list,
357                                    optval + sizeof(service),
358                                    optlen - sizeof(service)) ||
359                     dccp_list_has_service(sl, DCCP_SERVICE_INVALID_VALUE)) {
360                         kfree(sl);
361                         return -EFAULT;
362                 }
363         }
364
365         lock_sock(sk);
366         dp->dccps_service = service;
367
368         kfree(dp->dccps_service_list);
369
370         dp->dccps_service_list = sl;
371         release_sock(sk);
372         return 0;
373 }
374
375 /* byte 1 is feature.  the rest is the preference list */
376 static int dccp_setsockopt_change(struct sock *sk, int type,
377                                   struct dccp_so_feat __user *optval)
378 {
379         struct dccp_so_feat opt;
380         u8 *val;
381         int rc;
382
383         if (copy_from_user(&opt, optval, sizeof(opt)))
384                 return -EFAULT;
385
386         val = kmalloc(opt.dccpsf_len, GFP_KERNEL);
387         if (!val)
388                 return -ENOMEM;
389
390         if (copy_from_user(val, opt.dccpsf_val, opt.dccpsf_len)) {
391                 rc = -EFAULT;
392                 goto out_free_val;
393         }
394
395         rc = dccp_feat_change(sk, type, opt.dccpsf_feat, val, opt.dccpsf_len,
396                               GFP_KERNEL);
397         if (rc)
398                 goto out_free_val;
399
400 out:
401         return rc;
402
403 out_free_val:
404         kfree(val);
405         goto out;
406 }
407
408 int dccp_setsockopt(struct sock *sk, int level, int optname,
409                     char __user *optval, int optlen)
410 {
411         struct dccp_sock *dp;
412         int err;
413         int val;
414
415         if (level != SOL_DCCP)
416                 return inet_csk(sk)->icsk_af_ops->setsockopt(sk, level,
417                                                              optname, optval,
418                                                              optlen);
419
420         if (optlen < sizeof(int))
421                 return -EINVAL;
422
423         if (get_user(val, (int __user *)optval))
424                 return -EFAULT;
425
426         if (optname == DCCP_SOCKOPT_SERVICE)
427                 return dccp_setsockopt_service(sk, val, optval, optlen);
428
429         lock_sock(sk);
430         dp = dccp_sk(sk);
431         err = 0;
432
433         switch (optname) {
434         case DCCP_SOCKOPT_PACKET_SIZE:
435                 dp->dccps_packet_size = val;
436                 break;
437
438         case DCCP_SOCKOPT_CHANGE_L:
439                 if (optlen != sizeof(struct dccp_so_feat))
440                         err = -EINVAL;
441                 else
442                         err = dccp_setsockopt_change(sk, DCCPO_CHANGE_L,
443                                                      (struct dccp_so_feat *)
444                                                      optval);
445                 break;
446
447         case DCCP_SOCKOPT_CHANGE_R:
448                 if (optlen != sizeof(struct dccp_so_feat))
449                         err = -EINVAL;
450                 else
451                         err = dccp_setsockopt_change(sk, DCCPO_CHANGE_R,
452                                                      (struct dccp_so_feat *)
453                                                      optval);
454                 break;
455
456         default:
457                 err = -ENOPROTOOPT;
458                 break;
459         }
460         
461         release_sock(sk);
462         return err;
463 }
464
465 EXPORT_SYMBOL_GPL(dccp_setsockopt);
466
467 static int dccp_getsockopt_service(struct sock *sk, int len,
468                                    __be32 __user *optval,
469                                    int __user *optlen)
470 {
471         const struct dccp_sock *dp = dccp_sk(sk);
472         const struct dccp_service_list *sl;
473         int err = -ENOENT, slen = 0, total_len = sizeof(u32);
474
475         lock_sock(sk);
476         if (dccp_service_not_initialized(sk))
477                 goto out;
478
479         if ((sl = dp->dccps_service_list) != NULL) {
480                 slen = sl->dccpsl_nr * sizeof(u32);
481                 total_len += slen;
482         }
483
484         err = -EINVAL;
485         if (total_len > len)
486                 goto out;
487
488         err = 0;
489         if (put_user(total_len, optlen) ||
490             put_user(dp->dccps_service, optval) ||
491             (sl != NULL && copy_to_user(optval + 1, sl->dccpsl_list, slen)))
492                 err = -EFAULT;
493 out:
494         release_sock(sk);
495         return err;
496 }
497
498 int dccp_getsockopt(struct sock *sk, int level, int optname,
499                     char __user *optval, int __user *optlen)
500 {
501         struct dccp_sock *dp;
502         int val, len;
503
504         if (level != SOL_DCCP)
505                 return inet_csk(sk)->icsk_af_ops->getsockopt(sk, level,
506                                                              optname, optval,
507                                                              optlen);
508         if (get_user(len, optlen))
509                 return -EFAULT;
510
511         if (len < sizeof(int))
512                 return -EINVAL;
513
514         dp = dccp_sk(sk);
515
516         switch (optname) {
517         case DCCP_SOCKOPT_PACKET_SIZE:
518                 val = dp->dccps_packet_size;
519                 len = sizeof(dp->dccps_packet_size);
520                 break;
521         case DCCP_SOCKOPT_SERVICE:
522                 return dccp_getsockopt_service(sk, len,
523                                                (__be32 __user *)optval, optlen);
524         case 128 ... 191:
525                 return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname,
526                                              len, (u32 __user *)optval, optlen);
527         case 192 ... 255:
528                 return ccid_hc_tx_getsockopt(dp->dccps_hc_tx_ccid, sk, optname,
529                                              len, (u32 __user *)optval, optlen);
530         default:
531                 return -ENOPROTOOPT;
532         }
533
534         if (put_user(len, optlen) || copy_to_user(optval, &val, len))
535                 return -EFAULT;
536
537         return 0;
538 }
539
540 EXPORT_SYMBOL_GPL(dccp_getsockopt);
541
542 int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
543                  size_t len)
544 {
545         const struct dccp_sock *dp = dccp_sk(sk);
546         const int flags = msg->msg_flags;
547         const int noblock = flags & MSG_DONTWAIT;
548         struct sk_buff *skb;
549         int rc, size;
550         long timeo;
551
552         if (len > dp->dccps_mss_cache)
553                 return -EMSGSIZE;
554
555         lock_sock(sk);
556         timeo = sock_sndtimeo(sk, noblock);
557
558         /*
559          * We have to use sk_stream_wait_connect here to set sk_write_pending,
560          * so that the trick in dccp_rcv_request_sent_state_process.
561          */
562         /* Wait for a connection to finish. */
563         if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN | DCCPF_CLOSING))
564                 if ((rc = sk_stream_wait_connect(sk, &timeo)) != 0)
565                         goto out_release;
566
567         size = sk->sk_prot->max_header + len;
568         release_sock(sk);
569         skb = sock_alloc_send_skb(sk, size, noblock, &rc);
570         lock_sock(sk);
571         if (skb == NULL)
572                 goto out_release;
573
574         skb_reserve(skb, sk->sk_prot->max_header);
575         rc = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
576         if (rc != 0)
577                 goto out_discard;
578
579         rc = dccp_write_xmit(sk, skb, &timeo);
580         /*
581          * XXX we don't use sk_write_queue, so just discard the packet.
582          *     Current plan however is to _use_ sk_write_queue with
583          *     an algorith similar to tcp_sendmsg, where the main difference
584          *     is that in DCCP we have to respect packet boundaries, so
585          *     no coalescing of skbs.
586          *
587          *     This bug was _quickly_ found & fixed by just looking at an OSTRA
588          *     generated callgraph 8) -acme
589          */
590 out_release:
591         release_sock(sk);
592         return rc ? : len;
593 out_discard:
594         kfree_skb(skb);
595         goto out_release;
596 }
597
598 EXPORT_SYMBOL_GPL(dccp_sendmsg);
599
600 int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
601                  size_t len, int nonblock, int flags, int *addr_len)
602 {
603         const struct dccp_hdr *dh;
604         long timeo;
605
606         lock_sock(sk);
607
608         if (sk->sk_state == DCCP_LISTEN) {
609                 len = -ENOTCONN;
610                 goto out;
611         }
612
613         timeo = sock_rcvtimeo(sk, nonblock);
614
615         do {
616                 struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
617
618                 if (skb == NULL)
619                         goto verify_sock_status;
620
621                 dh = dccp_hdr(skb);
622
623                 if (dh->dccph_type == DCCP_PKT_DATA ||
624                     dh->dccph_type == DCCP_PKT_DATAACK)
625                         goto found_ok_skb;
626
627                 if (dh->dccph_type == DCCP_PKT_RESET ||
628                     dh->dccph_type == DCCP_PKT_CLOSE) {
629                         dccp_pr_debug("found fin ok!\n");
630                         len = 0;
631                         goto found_fin_ok;
632                 }
633                 dccp_pr_debug("packet_type=%s\n",
634                               dccp_packet_name(dh->dccph_type));
635                 sk_eat_skb(sk, skb);
636 verify_sock_status:
637                 if (sock_flag(sk, SOCK_DONE)) {
638                         len = 0;
639                         break;
640                 }
641
642                 if (sk->sk_err) {
643                         len = sock_error(sk);
644                         break;
645                 }
646
647                 if (sk->sk_shutdown & RCV_SHUTDOWN) {
648                         len = 0;
649                         break;
650                 }
651
652                 if (sk->sk_state == DCCP_CLOSED) {
653                         if (!sock_flag(sk, SOCK_DONE)) {
654                                 /* This occurs when user tries to read
655                                  * from never connected socket.
656                                  */
657                                 len = -ENOTCONN;
658                                 break;
659                         }
660                         len = 0;
661                         break;
662                 }
663
664                 if (!timeo) {
665                         len = -EAGAIN;
666                         break;
667                 }
668
669                 if (signal_pending(current)) {
670                         len = sock_intr_errno(timeo);
671                         break;
672                 }
673
674                 sk_wait_data(sk, &timeo);
675                 continue;
676         found_ok_skb:
677                 if (len > skb->len)
678                         len = skb->len;
679                 else if (len < skb->len)
680                         msg->msg_flags |= MSG_TRUNC;
681
682                 if (skb_copy_datagram_iovec(skb, 0, msg->msg_iov, len)) {
683                         /* Exception. Bailout! */
684                         len = -EFAULT;
685                         break;
686                 }
687         found_fin_ok:
688                 if (!(flags & MSG_PEEK))
689                         sk_eat_skb(sk, skb);
690                 break;
691         } while (1);
692 out:
693         release_sock(sk);
694         return len;
695 }
696
697 EXPORT_SYMBOL_GPL(dccp_recvmsg);
698
699 int inet_dccp_listen(struct socket *sock, int backlog)
700 {
701         struct sock *sk = sock->sk;
702         unsigned char old_state;
703         int err;
704
705         lock_sock(sk);
706
707         err = -EINVAL;
708         if (sock->state != SS_UNCONNECTED || sock->type != SOCK_DCCP)
709                 goto out;
710
711         old_state = sk->sk_state;
712         if (!((1 << old_state) & (DCCPF_CLOSED | DCCPF_LISTEN)))
713                 goto out;
714
715         /* Really, if the socket is already in listen state
716          * we can only allow the backlog to be adjusted.
717          */
718         if (old_state != DCCP_LISTEN) {
719                 /*
720                  * FIXME: here it probably should be sk->sk_prot->listen_start
721                  * see tcp_listen_start
722                  */
723                 err = dccp_listen_start(sk);
724                 if (err)
725                         goto out;
726         }
727         sk->sk_max_ack_backlog = backlog;
728         err = 0;
729
730 out:
731         release_sock(sk);
732         return err;
733 }
734
735 EXPORT_SYMBOL_GPL(inet_dccp_listen);
736
737 static const unsigned char dccp_new_state[] = {
738         /* current state:   new state:      action:     */
739         [0]               = DCCP_CLOSED,
740         [DCCP_OPEN]       = DCCP_CLOSING | DCCP_ACTION_FIN,
741         [DCCP_REQUESTING] = DCCP_CLOSED,
742         [DCCP_PARTOPEN]   = DCCP_CLOSING | DCCP_ACTION_FIN,
743         [DCCP_LISTEN]     = DCCP_CLOSED,
744         [DCCP_RESPOND]    = DCCP_CLOSED,
745         [DCCP_CLOSING]    = DCCP_CLOSED,
746         [DCCP_TIME_WAIT]  = DCCP_CLOSED,
747         [DCCP_CLOSED]     = DCCP_CLOSED,
748 };
749
750 static int dccp_close_state(struct sock *sk)
751 {
752         const int next = dccp_new_state[sk->sk_state];
753         const int ns = next & DCCP_STATE_MASK;
754
755         if (ns != sk->sk_state)
756                 dccp_set_state(sk, ns);
757
758         return next & DCCP_ACTION_FIN;
759 }
760
761 void dccp_close(struct sock *sk, long timeout)
762 {
763         struct sk_buff *skb;
764
765         lock_sock(sk);
766
767         sk->sk_shutdown = SHUTDOWN_MASK;
768
769         if (sk->sk_state == DCCP_LISTEN) {
770                 dccp_set_state(sk, DCCP_CLOSED);
771
772                 /* Special case. */
773                 inet_csk_listen_stop(sk);
774
775                 goto adjudge_to_death;
776         }
777
778         /*
779          * We need to flush the recv. buffs.  We do this only on the
780          * descriptor close, not protocol-sourced closes, because the
781           *reader process may not have drained the data yet!
782          */
783         /* FIXME: check for unread data */
784         while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
785                 __kfree_skb(skb);
786         }
787
788         if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
789                 /* Check zero linger _after_ checking for unread data. */
790                 sk->sk_prot->disconnect(sk, 0);
791         } else if (dccp_close_state(sk)) {
792                 dccp_send_close(sk, 1);
793         }
794
795         sk_stream_wait_close(sk, timeout);
796
797 adjudge_to_death:
798         /*
799          * It is the last release_sock in its life. It will remove backlog.
800          */
801         release_sock(sk);
802         /*
803          * Now socket is owned by kernel and we acquire BH lock
804          * to finish close. No need to check for user refs.
805          */
806         local_bh_disable();
807         bh_lock_sock(sk);
808         BUG_TRAP(!sock_owned_by_user(sk));
809
810         sock_hold(sk);
811         sock_orphan(sk);
812
813         /*
814          * The last release_sock may have processed the CLOSE or RESET
815          * packet moving sock to CLOSED state, if not we have to fire
816          * the CLOSE/CLOSEREQ retransmission timer, see "8.3. Termination"
817          * in draft-ietf-dccp-spec-11. -acme
818          */
819         if (sk->sk_state == DCCP_CLOSING) {
820                 /* FIXME: should start at 2 * RTT */
821                 /* Timer for repeating the CLOSE/CLOSEREQ until an answer. */
822                 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
823                                           inet_csk(sk)->icsk_rto,
824                                           DCCP_RTO_MAX);
825 #if 0
826                 /* Yeah, we should use sk->sk_prot->orphan_count, etc */
827                 dccp_set_state(sk, DCCP_CLOSED);
828 #endif
829         }
830
831         atomic_inc(sk->sk_prot->orphan_count);
832         if (sk->sk_state == DCCP_CLOSED)
833                 inet_csk_destroy_sock(sk);
834
835         /* Otherwise, socket is reprieved until protocol close. */
836
837         bh_unlock_sock(sk);
838         local_bh_enable();
839         sock_put(sk);
840 }
841
842 EXPORT_SYMBOL_GPL(dccp_close);
843
844 void dccp_shutdown(struct sock *sk, int how)
845 {
846         dccp_pr_debug("entry\n");
847 }
848
849 EXPORT_SYMBOL_GPL(dccp_shutdown);
850
851 static const struct proto_ops inet_dccp_ops = {
852         .family         = PF_INET,
853         .owner          = THIS_MODULE,
854         .release        = inet_release,
855         .bind           = inet_bind,
856         .connect        = inet_stream_connect,
857         .socketpair     = sock_no_socketpair,
858         .accept         = inet_accept,
859         .getname        = inet_getname,
860         /* FIXME: work on tcp_poll to rename it to inet_csk_poll */
861         .poll           = dccp_poll,
862         .ioctl          = inet_ioctl,
863         /* FIXME: work on inet_listen to rename it to sock_common_listen */
864         .listen         = inet_dccp_listen,
865         .shutdown       = inet_shutdown,
866         .setsockopt     = sock_common_setsockopt,
867         .getsockopt     = sock_common_getsockopt,
868         .sendmsg        = inet_sendmsg,
869         .recvmsg        = sock_common_recvmsg,
870         .mmap           = sock_no_mmap,
871         .sendpage       = sock_no_sendpage,
872 };
873
874 extern struct net_proto_family inet_family_ops;
875
876 static struct inet_protosw dccp_v4_protosw = {
877         .type           = SOCK_DCCP,
878         .protocol       = IPPROTO_DCCP,
879         .prot           = &dccp_prot,
880         .ops            = &inet_dccp_ops,
881         .capability     = -1,
882         .no_check       = 0,
883         .flags          = INET_PROTOSW_ICSK,
884 };
885
886 /*
887  * This is the global socket data structure used for responding to
888  * the Out-of-the-blue (OOTB) packets. A control sock will be created
889  * for this socket at the initialization time.
890  */
891 struct socket *dccp_ctl_socket;
892
893 static char dccp_ctl_socket_err_msg[] __initdata =
894         KERN_ERR "DCCP: Failed to create the control socket.\n";
895
896 static int __init dccp_ctl_sock_init(void)
897 {
898         int rc = sock_create_kern(PF_INET, SOCK_DCCP, IPPROTO_DCCP,
899                                   &dccp_ctl_socket);
900         if (rc < 0)
901                 printk(dccp_ctl_socket_err_msg);
902         else {
903                 dccp_ctl_socket->sk->sk_allocation = GFP_ATOMIC;
904                 inet_sk(dccp_ctl_socket->sk)->uc_ttl = -1;
905
906                 /* Unhash it so that IP input processing does not even
907                  * see it, we do not wish this socket to see incoming
908                  * packets.
909                  */
910                 dccp_ctl_socket->sk->sk_prot->unhash(dccp_ctl_socket->sk);
911         }
912
913         return rc;
914 }
915
916 #ifdef CONFIG_IP_DCCP_UNLOAD_HACK
917 void dccp_ctl_sock_exit(void)
918 {
919         if (dccp_ctl_socket != NULL) {
920                 sock_release(dccp_ctl_socket);
921                 dccp_ctl_socket = NULL;
922         }
923 }
924
925 EXPORT_SYMBOL_GPL(dccp_ctl_sock_exit);
926 #endif
927
928 static int __init init_dccp_v4_mibs(void)
929 {
930         int rc = -ENOMEM;
931
932         dccp_statistics[0] = alloc_percpu(struct dccp_mib);
933         if (dccp_statistics[0] == NULL)
934                 goto out;
935
936         dccp_statistics[1] = alloc_percpu(struct dccp_mib);
937         if (dccp_statistics[1] == NULL)
938                 goto out_free_one;
939
940         rc = 0;
941 out:
942         return rc;
943 out_free_one:
944         free_percpu(dccp_statistics[0]);
945         dccp_statistics[0] = NULL;
946         goto out;
947
948 }
949
950 static int thash_entries;
951 module_param(thash_entries, int, 0444);
952 MODULE_PARM_DESC(thash_entries, "Number of ehash buckets");
953
954 #ifdef CONFIG_IP_DCCP_DEBUG
955 int dccp_debug;
956 module_param(dccp_debug, int, 0444);
957 MODULE_PARM_DESC(dccp_debug, "Enable debug messages");
958
959 EXPORT_SYMBOL_GPL(dccp_debug);
960 #endif
961
962 static int __init dccp_init(void)
963 {
964         unsigned long goal;
965         int ehash_order, bhash_order, i;
966         int rc = proto_register(&dccp_prot, 1);
967
968         if (rc)
969                 goto out;
970
971         rc = -ENOBUFS;
972         dccp_hashinfo.bind_bucket_cachep =
973                 kmem_cache_create("dccp_bind_bucket",
974                                   sizeof(struct inet_bind_bucket), 0,
975                                   SLAB_HWCACHE_ALIGN, NULL, NULL);
976         if (!dccp_hashinfo.bind_bucket_cachep)
977                 goto out_proto_unregister;
978
979         /*
980          * Size and allocate the main established and bind bucket
981          * hash tables.
982          *
983          * The methodology is similar to that of the buffer cache.
984          */
985         if (num_physpages >= (128 * 1024))
986                 goal = num_physpages >> (21 - PAGE_SHIFT);
987         else
988                 goal = num_physpages >> (23 - PAGE_SHIFT);
989
990         if (thash_entries)
991                 goal = (thash_entries *
992                         sizeof(struct inet_ehash_bucket)) >> PAGE_SHIFT;
993         for (ehash_order = 0; (1UL << ehash_order) < goal; ehash_order++)
994                 ;
995         do {
996                 dccp_hashinfo.ehash_size = (1UL << ehash_order) * PAGE_SIZE /
997                                         sizeof(struct inet_ehash_bucket);
998                 dccp_hashinfo.ehash_size >>= 1;
999                 while (dccp_hashinfo.ehash_size &
1000                        (dccp_hashinfo.ehash_size - 1))
1001                         dccp_hashinfo.ehash_size--;
1002                 dccp_hashinfo.ehash = (struct inet_ehash_bucket *)
1003                         __get_free_pages(GFP_ATOMIC, ehash_order);
1004         } while (!dccp_hashinfo.ehash && --ehash_order > 0);
1005
1006         if (!dccp_hashinfo.ehash) {
1007                 printk(KERN_CRIT "Failed to allocate DCCP "
1008                                  "established hash table\n");
1009                 goto out_free_bind_bucket_cachep;
1010         }
1011
1012         for (i = 0; i < (dccp_hashinfo.ehash_size << 1); i++) {
1013                 rwlock_init(&dccp_hashinfo.ehash[i].lock);
1014                 INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].chain);
1015         }
1016
1017         bhash_order = ehash_order;
1018
1019         do {
1020                 dccp_hashinfo.bhash_size = (1UL << bhash_order) * PAGE_SIZE /
1021                                         sizeof(struct inet_bind_hashbucket);
1022                 if ((dccp_hashinfo.bhash_size > (64 * 1024)) &&
1023                     bhash_order > 0)
1024                         continue;
1025                 dccp_hashinfo.bhash = (struct inet_bind_hashbucket *)
1026                         __get_free_pages(GFP_ATOMIC, bhash_order);
1027         } while (!dccp_hashinfo.bhash && --bhash_order >= 0);
1028
1029         if (!dccp_hashinfo.bhash) {
1030                 printk(KERN_CRIT "Failed to allocate DCCP bind hash table\n");
1031                 goto out_free_dccp_ehash;
1032         }
1033
1034         for (i = 0; i < dccp_hashinfo.bhash_size; i++) {
1035                 spin_lock_init(&dccp_hashinfo.bhash[i].lock);
1036                 INIT_HLIST_HEAD(&dccp_hashinfo.bhash[i].chain);
1037         }
1038
1039         rc = init_dccp_v4_mibs();
1040         if (rc)
1041                 goto out_free_dccp_bhash;
1042
1043         rc = -EAGAIN;
1044         if (inet_add_protocol(&dccp_protocol, IPPROTO_DCCP))
1045                 goto out_free_dccp_v4_mibs;
1046
1047         inet_register_protosw(&dccp_v4_protosw);
1048
1049         rc = dccp_ackvec_init();
1050         if (rc)
1051                 goto out_unregister_protosw;
1052
1053         rc = dccp_sysctl_init();
1054         if (rc)
1055                 goto out_ackvec_exit;
1056
1057         rc = dccp_ctl_sock_init();
1058         if (rc)
1059                 goto out_sysctl_exit;
1060 out:
1061         return rc;
1062 out_sysctl_exit:
1063         dccp_sysctl_exit();
1064 out_ackvec_exit:
1065         dccp_ackvec_exit();
1066 out_unregister_protosw:
1067         inet_unregister_protosw(&dccp_v4_protosw);
1068         inet_del_protocol(&dccp_protocol, IPPROTO_DCCP);
1069 out_free_dccp_v4_mibs:
1070         free_percpu(dccp_statistics[0]);
1071         free_percpu(dccp_statistics[1]);
1072         dccp_statistics[0] = dccp_statistics[1] = NULL;
1073 out_free_dccp_bhash:
1074         free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
1075         dccp_hashinfo.bhash = NULL;
1076 out_free_dccp_ehash:
1077         free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order);
1078         dccp_hashinfo.ehash = NULL;
1079 out_free_bind_bucket_cachep:
1080         kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1081         dccp_hashinfo.bind_bucket_cachep = NULL;
1082 out_proto_unregister:
1083         proto_unregister(&dccp_prot);
1084         goto out;
1085 }
1086
1087 static const char dccp_del_proto_err_msg[] __exitdata =
1088         KERN_ERR "can't remove dccp net_protocol\n";
1089
1090 static void __exit dccp_fini(void)
1091 {
1092         inet_unregister_protosw(&dccp_v4_protosw);
1093
1094         if (inet_del_protocol(&dccp_protocol, IPPROTO_DCCP) < 0)
1095                 printk(dccp_del_proto_err_msg);
1096
1097         free_percpu(dccp_statistics[0]);
1098         free_percpu(dccp_statistics[1]);
1099         free_pages((unsigned long)dccp_hashinfo.bhash,
1100                    get_order(dccp_hashinfo.bhash_size *
1101                              sizeof(struct inet_bind_hashbucket)));
1102         free_pages((unsigned long)dccp_hashinfo.ehash,
1103                    get_order(dccp_hashinfo.ehash_size *
1104                              sizeof(struct inet_ehash_bucket)));
1105         kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1106         proto_unregister(&dccp_prot);
1107         dccp_ackvec_exit();
1108         dccp_sysctl_exit();
1109 }
1110
1111 module_init(dccp_init);
1112 module_exit(dccp_fini);
1113
1114 /*
1115  * __stringify doesn't likes enums, so use SOCK_DCCP (6) and IPPROTO_DCCP (33)
1116  * values directly, Also cover the case where the protocol is not specified,
1117  * i.e. net-pf-PF_INET-proto-0-type-SOCK_DCCP
1118  */
1119 MODULE_ALIAS("net-pf-" __stringify(PF_INET) "-proto-33-type-6");
1120 MODULE_ALIAS("net-pf-" __stringify(PF_INET) "-proto-0-type-6");
1121 MODULE_LICENSE("GPL");
1122 MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@conectiva.com.br>");
1123 MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol");