]> www.pilppa.org Git - linux-2.6-omap-h63xx.git/blob - net/dccp/proto.c
[DCCP]: Move dccp_v4_{init,destroy}_sock to the core
[linux-2.6-omap-h63xx.git] / net / dccp / proto.c
1 /*
2  *  net/dccp/proto.c
3  *
4  *  An implementation of the DCCP protocol
5  *  Arnaldo Carvalho de Melo <acme@conectiva.com.br>
6  *
7  *      This program is free software; you can redistribute it and/or modify it
8  *      under the terms of the GNU General Public License version 2 as
9  *      published by the Free Software Foundation.
10  */
11
12 #include <linux/config.h>
13 #include <linux/dccp.h>
14 #include <linux/module.h>
15 #include <linux/types.h>
16 #include <linux/sched.h>
17 #include <linux/kernel.h>
18 #include <linux/skbuff.h>
19 #include <linux/netdevice.h>
20 #include <linux/in.h>
21 #include <linux/if_arp.h>
22 #include <linux/init.h>
23 #include <linux/random.h>
24 #include <net/checksum.h>
25
26 #include <net/inet_common.h>
27 #include <net/inet_sock.h>
28 #include <net/protocol.h>
29 #include <net/sock.h>
30 #include <net/xfrm.h>
31
32 #include <asm/semaphore.h>
33 #include <linux/spinlock.h>
34 #include <linux/timer.h>
35 #include <linux/delay.h>
36 #include <linux/poll.h>
37
38 #include "ccid.h"
39 #include "dccp.h"
40 #include "feat.h"
41
42 DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly;
43
44 EXPORT_SYMBOL_GPL(dccp_statistics);
45
46 atomic_t dccp_orphan_count = ATOMIC_INIT(0);
47
48 EXPORT_SYMBOL_GPL(dccp_orphan_count);
49
50 static struct net_protocol dccp_protocol = {
51         .handler        = dccp_v4_rcv,
52         .err_handler    = dccp_v4_err,
53         .no_policy      = 1,
54 };
55
56 const char *dccp_packet_name(const int type)
57 {
58         static const char *dccp_packet_names[] = {
59                 [DCCP_PKT_REQUEST]  = "REQUEST",
60                 [DCCP_PKT_RESPONSE] = "RESPONSE",
61                 [DCCP_PKT_DATA]     = "DATA",
62                 [DCCP_PKT_ACK]      = "ACK",
63                 [DCCP_PKT_DATAACK]  = "DATAACK",
64                 [DCCP_PKT_CLOSEREQ] = "CLOSEREQ",
65                 [DCCP_PKT_CLOSE]    = "CLOSE",
66                 [DCCP_PKT_RESET]    = "RESET",
67                 [DCCP_PKT_SYNC]     = "SYNC",
68                 [DCCP_PKT_SYNCACK]  = "SYNCACK",
69         };
70
71         if (type >= DCCP_NR_PKT_TYPES)
72                 return "INVALID";
73         else
74                 return dccp_packet_names[type];
75 }
76
77 EXPORT_SYMBOL_GPL(dccp_packet_name);
78
79 const char *dccp_state_name(const int state)
80 {
81         static char *dccp_state_names[] = {
82         [DCCP_OPEN]       = "OPEN",
83         [DCCP_REQUESTING] = "REQUESTING",
84         [DCCP_PARTOPEN]   = "PARTOPEN",
85         [DCCP_LISTEN]     = "LISTEN",
86         [DCCP_RESPOND]    = "RESPOND",
87         [DCCP_CLOSING]    = "CLOSING",
88         [DCCP_TIME_WAIT]  = "TIME_WAIT",
89         [DCCP_CLOSED]     = "CLOSED",
90         };
91
92         if (state >= DCCP_MAX_STATES)
93                 return "INVALID STATE!";
94         else
95                 return dccp_state_names[state];
96 }
97
98 EXPORT_SYMBOL_GPL(dccp_state_name);
99
100 int dccp_init_sock(struct sock *sk)
101 {
102         struct dccp_sock *dp = dccp_sk(sk);
103         struct inet_connection_sock *icsk = inet_csk(sk);
104         static int dccp_ctl_socket_init = 1;
105
106         dccp_options_init(&dp->dccps_options);
107         do_gettimeofday(&dp->dccps_epoch);
108
109         /*
110          * FIXME: We're hardcoding the CCID, and doing this at this point makes
111          * the listening (master) sock get CCID control blocks, which is not
112          * necessary, but for now, to not mess with the test userspace apps,
113          * lets leave it here, later the real solution is to do this in a
114          * setsockopt(CCIDs-I-want/accept). -acme
115          */
116         if (likely(!dccp_ctl_socket_init)) {
117                 int rc = dccp_feat_init(sk);
118
119                 if (rc)
120                         return rc;
121
122                 if (dp->dccps_options.dccpo_send_ack_vector) {
123                         dp->dccps_hc_rx_ackvec = dccp_ackvec_alloc(GFP_KERNEL);
124                         if (dp->dccps_hc_rx_ackvec == NULL)
125                                 return -ENOMEM;
126                 }
127                 dp->dccps_hc_rx_ccid =
128                                 ccid_hc_rx_new(dp->dccps_options.dccpo_rx_ccid,
129                                                sk, GFP_KERNEL);
130                 dp->dccps_hc_tx_ccid =
131                                 ccid_hc_tx_new(dp->dccps_options.dccpo_tx_ccid,
132                                                sk, GFP_KERNEL);
133                 if (unlikely(dp->dccps_hc_rx_ccid == NULL ||
134                              dp->dccps_hc_tx_ccid == NULL)) {
135                         ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
136                         ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
137                         if (dp->dccps_options.dccpo_send_ack_vector) {
138                                 dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
139                                 dp->dccps_hc_rx_ackvec = NULL;
140                         }
141                         dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
142                         return -ENOMEM;
143                 }
144         } else {
145                 /* control socket doesn't need feat nego */
146                 INIT_LIST_HEAD(&dp->dccps_options.dccpo_pending);
147                 INIT_LIST_HEAD(&dp->dccps_options.dccpo_conf);
148                 dccp_ctl_socket_init = 0;
149         }
150
151         dccp_init_xmit_timers(sk);
152         icsk->icsk_rto          = DCCP_TIMEOUT_INIT;
153         sk->sk_state            = DCCP_CLOSED;
154         sk->sk_write_space      = dccp_write_space;
155         icsk->icsk_sync_mss     = dccp_sync_mss;
156         dp->dccps_mss_cache     = 536;
157         dp->dccps_role          = DCCP_ROLE_UNDEFINED;
158         dp->dccps_service       = DCCP_SERVICE_INVALID_VALUE;
159         dp->dccps_l_ack_ratio   = dp->dccps_r_ack_ratio = 1;
160
161         return 0;
162 }
163
164 EXPORT_SYMBOL_GPL(dccp_init_sock);
165
166 int dccp_destroy_sock(struct sock *sk)
167 {
168         struct dccp_sock *dp = dccp_sk(sk);
169
170         /*
171          * DCCP doesn't use sk_write_queue, just sk_send_head
172          * for retransmissions
173          */
174         if (sk->sk_send_head != NULL) {
175                 kfree_skb(sk->sk_send_head);
176                 sk->sk_send_head = NULL;
177         }
178
179         /* Clean up a referenced DCCP bind bucket. */
180         if (inet_csk(sk)->icsk_bind_hash != NULL)
181                 inet_put_port(&dccp_hashinfo, sk);
182
183         kfree(dp->dccps_service_list);
184         dp->dccps_service_list = NULL;
185
186         if (dp->dccps_options.dccpo_send_ack_vector) {
187                 dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
188                 dp->dccps_hc_rx_ackvec = NULL;
189         }
190         ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
191         ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
192         dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
193
194         /* clean up feature negotiation state */
195         dccp_feat_clean(sk);
196
197         return 0;
198 }
199
200 EXPORT_SYMBOL_GPL(dccp_destroy_sock);
201
202 static inline int dccp_listen_start(struct sock *sk)
203 {
204         struct dccp_sock *dp = dccp_sk(sk);
205
206         dp->dccps_role = DCCP_ROLE_LISTEN;
207         /*
208          * Apps need to use setsockopt(DCCP_SOCKOPT_SERVICE)
209          * before calling listen()
210          */
211         if (dccp_service_not_initialized(sk))
212                 return -EPROTO;
213         return inet_csk_listen_start(sk, TCP_SYNQ_HSIZE);
214 }
215
216 int dccp_disconnect(struct sock *sk, int flags)
217 {
218         struct inet_connection_sock *icsk = inet_csk(sk);
219         struct inet_sock *inet = inet_sk(sk);
220         int err = 0;
221         const int old_state = sk->sk_state;
222
223         if (old_state != DCCP_CLOSED)
224                 dccp_set_state(sk, DCCP_CLOSED);
225
226         /* ABORT function of RFC793 */
227         if (old_state == DCCP_LISTEN) {
228                 inet_csk_listen_stop(sk);
229         /* FIXME: do the active reset thing */
230         } else if (old_state == DCCP_REQUESTING)
231                 sk->sk_err = ECONNRESET;
232
233         dccp_clear_xmit_timers(sk);
234         __skb_queue_purge(&sk->sk_receive_queue);
235         if (sk->sk_send_head != NULL) {
236                 __kfree_skb(sk->sk_send_head);
237                 sk->sk_send_head = NULL;
238         }
239
240         inet->dport = 0;
241
242         if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
243                 inet_reset_saddr(sk);
244
245         sk->sk_shutdown = 0;
246         sock_reset_flag(sk, SOCK_DONE);
247
248         icsk->icsk_backoff = 0;
249         inet_csk_delack_init(sk);
250         __sk_dst_reset(sk);
251
252         BUG_TRAP(!inet->num || icsk->icsk_bind_hash);
253
254         sk->sk_error_report(sk);
255         return err;
256 }
257
258 EXPORT_SYMBOL_GPL(dccp_disconnect);
259
260 /*
261  *      Wait for a DCCP event.
262  *
263  *      Note that we don't need to lock the socket, as the upper poll layers
264  *      take care of normal races (between the test and the event) and we don't
265  *      go look at any of the socket buffers directly.
266  */
267 unsigned int dccp_poll(struct file *file, struct socket *sock,
268                        poll_table *wait)
269 {
270         unsigned int mask;
271         struct sock *sk = sock->sk;
272
273         poll_wait(file, sk->sk_sleep, wait);
274         if (sk->sk_state == DCCP_LISTEN)
275                 return inet_csk_listen_poll(sk);
276
277         /* Socket is not locked. We are protected from async events
278            by poll logic and correct handling of state changes
279            made by another threads is impossible in any case.
280          */
281
282         mask = 0;
283         if (sk->sk_err)
284                 mask = POLLERR;
285
286         if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == DCCP_CLOSED)
287                 mask |= POLLHUP;
288         if (sk->sk_shutdown & RCV_SHUTDOWN)
289                 mask |= POLLIN | POLLRDNORM;
290
291         /* Connected? */
292         if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) {
293                 if (atomic_read(&sk->sk_rmem_alloc) > 0)
294                         mask |= POLLIN | POLLRDNORM;
295
296                 if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
297                         if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) {
298                                 mask |= POLLOUT | POLLWRNORM;
299                         } else {  /* send SIGIO later */
300                                 set_bit(SOCK_ASYNC_NOSPACE,
301                                         &sk->sk_socket->flags);
302                                 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
303
304                                 /* Race breaker. If space is freed after
305                                  * wspace test but before the flags are set,
306                                  * IO signal will be lost.
307                                  */
308                                 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk))
309                                         mask |= POLLOUT | POLLWRNORM;
310                         }
311                 }
312         }
313         return mask;
314 }
315
316 EXPORT_SYMBOL_GPL(dccp_poll);
317
318 int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg)
319 {
320         dccp_pr_debug("entry\n");
321         return -ENOIOCTLCMD;
322 }
323
324 EXPORT_SYMBOL_GPL(dccp_ioctl);
325
326 static int dccp_setsockopt_service(struct sock *sk, const __be32 service,
327                                    char __user *optval, int optlen)
328 {
329         struct dccp_sock *dp = dccp_sk(sk);
330         struct dccp_service_list *sl = NULL;
331
332         if (service == DCCP_SERVICE_INVALID_VALUE || 
333             optlen > DCCP_SERVICE_LIST_MAX_LEN * sizeof(u32))
334                 return -EINVAL;
335
336         if (optlen > sizeof(service)) {
337                 sl = kmalloc(optlen, GFP_KERNEL);
338                 if (sl == NULL)
339                         return -ENOMEM;
340
341                 sl->dccpsl_nr = optlen / sizeof(u32) - 1;
342                 if (copy_from_user(sl->dccpsl_list,
343                                    optval + sizeof(service),
344                                    optlen - sizeof(service)) ||
345                     dccp_list_has_service(sl, DCCP_SERVICE_INVALID_VALUE)) {
346                         kfree(sl);
347                         return -EFAULT;
348                 }
349         }
350
351         lock_sock(sk);
352         dp->dccps_service = service;
353
354         kfree(dp->dccps_service_list);
355
356         dp->dccps_service_list = sl;
357         release_sock(sk);
358         return 0;
359 }
360
361 /* byte 1 is feature.  the rest is the preference list */
362 static int dccp_setsockopt_change(struct sock *sk, int type,
363                                   struct dccp_so_feat __user *optval)
364 {
365         struct dccp_so_feat opt;
366         u8 *val;
367         int rc;
368
369         if (copy_from_user(&opt, optval, sizeof(opt)))
370                 return -EFAULT;
371
372         val = kmalloc(opt.dccpsf_len, GFP_KERNEL);
373         if (!val)
374                 return -ENOMEM;
375
376         if (copy_from_user(val, opt.dccpsf_val, opt.dccpsf_len)) {
377                 rc = -EFAULT;
378                 goto out_free_val;
379         }
380
381         rc = dccp_feat_change(sk, type, opt.dccpsf_feat, val, opt.dccpsf_len,
382                               GFP_KERNEL);
383         if (rc)
384                 goto out_free_val;
385
386 out:
387         return rc;
388
389 out_free_val:
390         kfree(val);
391         goto out;
392 }
393
394 int dccp_setsockopt(struct sock *sk, int level, int optname,
395                     char __user *optval, int optlen)
396 {
397         struct dccp_sock *dp;
398         int err;
399         int val;
400
401         if (level != SOL_DCCP)
402                 return inet_csk(sk)->icsk_af_ops->setsockopt(sk, level,
403                                                              optname, optval,
404                                                              optlen);
405
406         if (optlen < sizeof(int))
407                 return -EINVAL;
408
409         if (get_user(val, (int __user *)optval))
410                 return -EFAULT;
411
412         if (optname == DCCP_SOCKOPT_SERVICE)
413                 return dccp_setsockopt_service(sk, val, optval, optlen);
414
415         lock_sock(sk);
416         dp = dccp_sk(sk);
417         err = 0;
418
419         switch (optname) {
420         case DCCP_SOCKOPT_PACKET_SIZE:
421                 dp->dccps_packet_size = val;
422                 break;
423
424         case DCCP_SOCKOPT_CHANGE_L:
425                 if (optlen != sizeof(struct dccp_so_feat))
426                         err = -EINVAL;
427                 else
428                         err = dccp_setsockopt_change(sk, DCCPO_CHANGE_L,
429                                                      (struct dccp_so_feat *)
430                                                      optval);
431                 break;
432
433         case DCCP_SOCKOPT_CHANGE_R:
434                 if (optlen != sizeof(struct dccp_so_feat))
435                         err = -EINVAL;
436                 else
437                         err = dccp_setsockopt_change(sk, DCCPO_CHANGE_R,
438                                                      (struct dccp_so_feat *)
439                                                      optval);
440                 break;
441
442         default:
443                 err = -ENOPROTOOPT;
444                 break;
445         }
446         
447         release_sock(sk);
448         return err;
449 }
450
451 EXPORT_SYMBOL_GPL(dccp_setsockopt);
452
453 static int dccp_getsockopt_service(struct sock *sk, int len,
454                                    __be32 __user *optval,
455                                    int __user *optlen)
456 {
457         const struct dccp_sock *dp = dccp_sk(sk);
458         const struct dccp_service_list *sl;
459         int err = -ENOENT, slen = 0, total_len = sizeof(u32);
460
461         lock_sock(sk);
462         if (dccp_service_not_initialized(sk))
463                 goto out;
464
465         if ((sl = dp->dccps_service_list) != NULL) {
466                 slen = sl->dccpsl_nr * sizeof(u32);
467                 total_len += slen;
468         }
469
470         err = -EINVAL;
471         if (total_len > len)
472                 goto out;
473
474         err = 0;
475         if (put_user(total_len, optlen) ||
476             put_user(dp->dccps_service, optval) ||
477             (sl != NULL && copy_to_user(optval + 1, sl->dccpsl_list, slen)))
478                 err = -EFAULT;
479 out:
480         release_sock(sk);
481         return err;
482 }
483
484 int dccp_getsockopt(struct sock *sk, int level, int optname,
485                     char __user *optval, int __user *optlen)
486 {
487         struct dccp_sock *dp;
488         int val, len;
489
490         if (level != SOL_DCCP)
491                 return inet_csk(sk)->icsk_af_ops->getsockopt(sk, level,
492                                                              optname, optval,
493                                                              optlen);
494         if (get_user(len, optlen))
495                 return -EFAULT;
496
497         if (len < sizeof(int))
498                 return -EINVAL;
499
500         dp = dccp_sk(sk);
501
502         switch (optname) {
503         case DCCP_SOCKOPT_PACKET_SIZE:
504                 val = dp->dccps_packet_size;
505                 len = sizeof(dp->dccps_packet_size);
506                 break;
507         case DCCP_SOCKOPT_SERVICE:
508                 return dccp_getsockopt_service(sk, len,
509                                                (__be32 __user *)optval, optlen);
510         case 128 ... 191:
511                 return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname,
512                                              len, (u32 __user *)optval, optlen);
513         case 192 ... 255:
514                 return ccid_hc_tx_getsockopt(dp->dccps_hc_tx_ccid, sk, optname,
515                                              len, (u32 __user *)optval, optlen);
516         default:
517                 return -ENOPROTOOPT;
518         }
519
520         if (put_user(len, optlen) || copy_to_user(optval, &val, len))
521                 return -EFAULT;
522
523         return 0;
524 }
525
526 EXPORT_SYMBOL_GPL(dccp_getsockopt);
527
528 int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
529                  size_t len)
530 {
531         const struct dccp_sock *dp = dccp_sk(sk);
532         const int flags = msg->msg_flags;
533         const int noblock = flags & MSG_DONTWAIT;
534         struct sk_buff *skb;
535         int rc, size;
536         long timeo;
537
538         if (len > dp->dccps_mss_cache)
539                 return -EMSGSIZE;
540
541         lock_sock(sk);
542         timeo = sock_sndtimeo(sk, noblock);
543
544         /*
545          * We have to use sk_stream_wait_connect here to set sk_write_pending,
546          * so that the trick in dccp_rcv_request_sent_state_process.
547          */
548         /* Wait for a connection to finish. */
549         if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN | DCCPF_CLOSING))
550                 if ((rc = sk_stream_wait_connect(sk, &timeo)) != 0)
551                         goto out_release;
552
553         size = sk->sk_prot->max_header + len;
554         release_sock(sk);
555         skb = sock_alloc_send_skb(sk, size, noblock, &rc);
556         lock_sock(sk);
557         if (skb == NULL)
558                 goto out_release;
559
560         skb_reserve(skb, sk->sk_prot->max_header);
561         rc = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
562         if (rc != 0)
563                 goto out_discard;
564
565         rc = dccp_write_xmit(sk, skb, &timeo);
566         /*
567          * XXX we don't use sk_write_queue, so just discard the packet.
568          *     Current plan however is to _use_ sk_write_queue with
569          *     an algorith similar to tcp_sendmsg, where the main difference
570          *     is that in DCCP we have to respect packet boundaries, so
571          *     no coalescing of skbs.
572          *
573          *     This bug was _quickly_ found & fixed by just looking at an OSTRA
574          *     generated callgraph 8) -acme
575          */
576 out_release:
577         release_sock(sk);
578         return rc ? : len;
579 out_discard:
580         kfree_skb(skb);
581         goto out_release;
582 }
583
584 EXPORT_SYMBOL_GPL(dccp_sendmsg);
585
586 int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
587                  size_t len, int nonblock, int flags, int *addr_len)
588 {
589         const struct dccp_hdr *dh;
590         long timeo;
591
592         lock_sock(sk);
593
594         if (sk->sk_state == DCCP_LISTEN) {
595                 len = -ENOTCONN;
596                 goto out;
597         }
598
599         timeo = sock_rcvtimeo(sk, nonblock);
600
601         do {
602                 struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
603
604                 if (skb == NULL)
605                         goto verify_sock_status;
606
607                 dh = dccp_hdr(skb);
608
609                 if (dh->dccph_type == DCCP_PKT_DATA ||
610                     dh->dccph_type == DCCP_PKT_DATAACK)
611                         goto found_ok_skb;
612
613                 if (dh->dccph_type == DCCP_PKT_RESET ||
614                     dh->dccph_type == DCCP_PKT_CLOSE) {
615                         dccp_pr_debug("found fin ok!\n");
616                         len = 0;
617                         goto found_fin_ok;
618                 }
619                 dccp_pr_debug("packet_type=%s\n",
620                               dccp_packet_name(dh->dccph_type));
621                 sk_eat_skb(sk, skb);
622 verify_sock_status:
623                 if (sock_flag(sk, SOCK_DONE)) {
624                         len = 0;
625                         break;
626                 }
627
628                 if (sk->sk_err) {
629                         len = sock_error(sk);
630                         break;
631                 }
632
633                 if (sk->sk_shutdown & RCV_SHUTDOWN) {
634                         len = 0;
635                         break;
636                 }
637
638                 if (sk->sk_state == DCCP_CLOSED) {
639                         if (!sock_flag(sk, SOCK_DONE)) {
640                                 /* This occurs when user tries to read
641                                  * from never connected socket.
642                                  */
643                                 len = -ENOTCONN;
644                                 break;
645                         }
646                         len = 0;
647                         break;
648                 }
649
650                 if (!timeo) {
651                         len = -EAGAIN;
652                         break;
653                 }
654
655                 if (signal_pending(current)) {
656                         len = sock_intr_errno(timeo);
657                         break;
658                 }
659
660                 sk_wait_data(sk, &timeo);
661                 continue;
662         found_ok_skb:
663                 if (len > skb->len)
664                         len = skb->len;
665                 else if (len < skb->len)
666                         msg->msg_flags |= MSG_TRUNC;
667
668                 if (skb_copy_datagram_iovec(skb, 0, msg->msg_iov, len)) {
669                         /* Exception. Bailout! */
670                         len = -EFAULT;
671                         break;
672                 }
673         found_fin_ok:
674                 if (!(flags & MSG_PEEK))
675                         sk_eat_skb(sk, skb);
676                 break;
677         } while (1);
678 out:
679         release_sock(sk);
680         return len;
681 }
682
683 EXPORT_SYMBOL_GPL(dccp_recvmsg);
684
685 int inet_dccp_listen(struct socket *sock, int backlog)
686 {
687         struct sock *sk = sock->sk;
688         unsigned char old_state;
689         int err;
690
691         lock_sock(sk);
692
693         err = -EINVAL;
694         if (sock->state != SS_UNCONNECTED || sock->type != SOCK_DCCP)
695                 goto out;
696
697         old_state = sk->sk_state;
698         if (!((1 << old_state) & (DCCPF_CLOSED | DCCPF_LISTEN)))
699                 goto out;
700
701         /* Really, if the socket is already in listen state
702          * we can only allow the backlog to be adjusted.
703          */
704         if (old_state != DCCP_LISTEN) {
705                 /*
706                  * FIXME: here it probably should be sk->sk_prot->listen_start
707                  * see tcp_listen_start
708                  */
709                 err = dccp_listen_start(sk);
710                 if (err)
711                         goto out;
712         }
713         sk->sk_max_ack_backlog = backlog;
714         err = 0;
715
716 out:
717         release_sock(sk);
718         return err;
719 }
720
721 EXPORT_SYMBOL_GPL(inet_dccp_listen);
722
723 static const unsigned char dccp_new_state[] = {
724         /* current state:   new state:      action:     */
725         [0]               = DCCP_CLOSED,
726         [DCCP_OPEN]       = DCCP_CLOSING | DCCP_ACTION_FIN,
727         [DCCP_REQUESTING] = DCCP_CLOSED,
728         [DCCP_PARTOPEN]   = DCCP_CLOSING | DCCP_ACTION_FIN,
729         [DCCP_LISTEN]     = DCCP_CLOSED,
730         [DCCP_RESPOND]    = DCCP_CLOSED,
731         [DCCP_CLOSING]    = DCCP_CLOSED,
732         [DCCP_TIME_WAIT]  = DCCP_CLOSED,
733         [DCCP_CLOSED]     = DCCP_CLOSED,
734 };
735
736 static int dccp_close_state(struct sock *sk)
737 {
738         const int next = dccp_new_state[sk->sk_state];
739         const int ns = next & DCCP_STATE_MASK;
740
741         if (ns != sk->sk_state)
742                 dccp_set_state(sk, ns);
743
744         return next & DCCP_ACTION_FIN;
745 }
746
747 void dccp_close(struct sock *sk, long timeout)
748 {
749         struct sk_buff *skb;
750
751         lock_sock(sk);
752
753         sk->sk_shutdown = SHUTDOWN_MASK;
754
755         if (sk->sk_state == DCCP_LISTEN) {
756                 dccp_set_state(sk, DCCP_CLOSED);
757
758                 /* Special case. */
759                 inet_csk_listen_stop(sk);
760
761                 goto adjudge_to_death;
762         }
763
764         /*
765          * We need to flush the recv. buffs.  We do this only on the
766          * descriptor close, not protocol-sourced closes, because the
767           *reader process may not have drained the data yet!
768          */
769         /* FIXME: check for unread data */
770         while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
771                 __kfree_skb(skb);
772         }
773
774         if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
775                 /* Check zero linger _after_ checking for unread data. */
776                 sk->sk_prot->disconnect(sk, 0);
777         } else if (dccp_close_state(sk)) {
778                 dccp_send_close(sk, 1);
779         }
780
781         sk_stream_wait_close(sk, timeout);
782
783 adjudge_to_death:
784         /*
785          * It is the last release_sock in its life. It will remove backlog.
786          */
787         release_sock(sk);
788         /*
789          * Now socket is owned by kernel and we acquire BH lock
790          * to finish close. No need to check for user refs.
791          */
792         local_bh_disable();
793         bh_lock_sock(sk);
794         BUG_TRAP(!sock_owned_by_user(sk));
795
796         sock_hold(sk);
797         sock_orphan(sk);
798
799         /*
800          * The last release_sock may have processed the CLOSE or RESET
801          * packet moving sock to CLOSED state, if not we have to fire
802          * the CLOSE/CLOSEREQ retransmission timer, see "8.3. Termination"
803          * in draft-ietf-dccp-spec-11. -acme
804          */
805         if (sk->sk_state == DCCP_CLOSING) {
806                 /* FIXME: should start at 2 * RTT */
807                 /* Timer for repeating the CLOSE/CLOSEREQ until an answer. */
808                 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
809                                           inet_csk(sk)->icsk_rto,
810                                           DCCP_RTO_MAX);
811 #if 0
812                 /* Yeah, we should use sk->sk_prot->orphan_count, etc */
813                 dccp_set_state(sk, DCCP_CLOSED);
814 #endif
815         }
816
817         atomic_inc(sk->sk_prot->orphan_count);
818         if (sk->sk_state == DCCP_CLOSED)
819                 inet_csk_destroy_sock(sk);
820
821         /* Otherwise, socket is reprieved until protocol close. */
822
823         bh_unlock_sock(sk);
824         local_bh_enable();
825         sock_put(sk);
826 }
827
828 EXPORT_SYMBOL_GPL(dccp_close);
829
830 void dccp_shutdown(struct sock *sk, int how)
831 {
832         dccp_pr_debug("entry\n");
833 }
834
835 EXPORT_SYMBOL_GPL(dccp_shutdown);
836
837 static const struct proto_ops inet_dccp_ops = {
838         .family         = PF_INET,
839         .owner          = THIS_MODULE,
840         .release        = inet_release,
841         .bind           = inet_bind,
842         .connect        = inet_stream_connect,
843         .socketpair     = sock_no_socketpair,
844         .accept         = inet_accept,
845         .getname        = inet_getname,
846         /* FIXME: work on tcp_poll to rename it to inet_csk_poll */
847         .poll           = dccp_poll,
848         .ioctl          = inet_ioctl,
849         /* FIXME: work on inet_listen to rename it to sock_common_listen */
850         .listen         = inet_dccp_listen,
851         .shutdown       = inet_shutdown,
852         .setsockopt     = sock_common_setsockopt,
853         .getsockopt     = sock_common_getsockopt,
854         .sendmsg        = inet_sendmsg,
855         .recvmsg        = sock_common_recvmsg,
856         .mmap           = sock_no_mmap,
857         .sendpage       = sock_no_sendpage,
858 };
859
860 extern struct net_proto_family inet_family_ops;
861
862 static struct inet_protosw dccp_v4_protosw = {
863         .type           = SOCK_DCCP,
864         .protocol       = IPPROTO_DCCP,
865         .prot           = &dccp_prot,
866         .ops            = &inet_dccp_ops,
867         .capability     = -1,
868         .no_check       = 0,
869         .flags          = INET_PROTOSW_ICSK,
870 };
871
872 /*
873  * This is the global socket data structure used for responding to
874  * the Out-of-the-blue (OOTB) packets. A control sock will be created
875  * for this socket at the initialization time.
876  */
877 struct socket *dccp_ctl_socket;
878
879 static char dccp_ctl_socket_err_msg[] __initdata =
880         KERN_ERR "DCCP: Failed to create the control socket.\n";
881
882 static int __init dccp_ctl_sock_init(void)
883 {
884         int rc = sock_create_kern(PF_INET, SOCK_DCCP, IPPROTO_DCCP,
885                                   &dccp_ctl_socket);
886         if (rc < 0)
887                 printk(dccp_ctl_socket_err_msg);
888         else {
889                 dccp_ctl_socket->sk->sk_allocation = GFP_ATOMIC;
890                 inet_sk(dccp_ctl_socket->sk)->uc_ttl = -1;
891
892                 /* Unhash it so that IP input processing does not even
893                  * see it, we do not wish this socket to see incoming
894                  * packets.
895                  */
896                 dccp_ctl_socket->sk->sk_prot->unhash(dccp_ctl_socket->sk);
897         }
898
899         return rc;
900 }
901
902 #ifdef CONFIG_IP_DCCP_UNLOAD_HACK
903 void dccp_ctl_sock_exit(void)
904 {
905         if (dccp_ctl_socket != NULL) {
906                 sock_release(dccp_ctl_socket);
907                 dccp_ctl_socket = NULL;
908         }
909 }
910
911 EXPORT_SYMBOL_GPL(dccp_ctl_sock_exit);
912 #endif
913
914 static int __init init_dccp_v4_mibs(void)
915 {
916         int rc = -ENOMEM;
917
918         dccp_statistics[0] = alloc_percpu(struct dccp_mib);
919         if (dccp_statistics[0] == NULL)
920                 goto out;
921
922         dccp_statistics[1] = alloc_percpu(struct dccp_mib);
923         if (dccp_statistics[1] == NULL)
924                 goto out_free_one;
925
926         rc = 0;
927 out:
928         return rc;
929 out_free_one:
930         free_percpu(dccp_statistics[0]);
931         dccp_statistics[0] = NULL;
932         goto out;
933
934 }
935
936 static int thash_entries;
937 module_param(thash_entries, int, 0444);
938 MODULE_PARM_DESC(thash_entries, "Number of ehash buckets");
939
940 #ifdef CONFIG_IP_DCCP_DEBUG
941 int dccp_debug;
942 module_param(dccp_debug, int, 0444);
943 MODULE_PARM_DESC(dccp_debug, "Enable debug messages");
944
945 EXPORT_SYMBOL_GPL(dccp_debug);
946 #endif
947
948 static int __init dccp_init(void)
949 {
950         unsigned long goal;
951         int ehash_order, bhash_order, i;
952         int rc = proto_register(&dccp_prot, 1);
953
954         if (rc)
955                 goto out;
956
957         rc = -ENOBUFS;
958         dccp_hashinfo.bind_bucket_cachep =
959                 kmem_cache_create("dccp_bind_bucket",
960                                   sizeof(struct inet_bind_bucket), 0,
961                                   SLAB_HWCACHE_ALIGN, NULL, NULL);
962         if (!dccp_hashinfo.bind_bucket_cachep)
963                 goto out_proto_unregister;
964
965         /*
966          * Size and allocate the main established and bind bucket
967          * hash tables.
968          *
969          * The methodology is similar to that of the buffer cache.
970          */
971         if (num_physpages >= (128 * 1024))
972                 goal = num_physpages >> (21 - PAGE_SHIFT);
973         else
974                 goal = num_physpages >> (23 - PAGE_SHIFT);
975
976         if (thash_entries)
977                 goal = (thash_entries *
978                         sizeof(struct inet_ehash_bucket)) >> PAGE_SHIFT;
979         for (ehash_order = 0; (1UL << ehash_order) < goal; ehash_order++)
980                 ;
981         do {
982                 dccp_hashinfo.ehash_size = (1UL << ehash_order) * PAGE_SIZE /
983                                         sizeof(struct inet_ehash_bucket);
984                 dccp_hashinfo.ehash_size >>= 1;
985                 while (dccp_hashinfo.ehash_size &
986                        (dccp_hashinfo.ehash_size - 1))
987                         dccp_hashinfo.ehash_size--;
988                 dccp_hashinfo.ehash = (struct inet_ehash_bucket *)
989                         __get_free_pages(GFP_ATOMIC, ehash_order);
990         } while (!dccp_hashinfo.ehash && --ehash_order > 0);
991
992         if (!dccp_hashinfo.ehash) {
993                 printk(KERN_CRIT "Failed to allocate DCCP "
994                                  "established hash table\n");
995                 goto out_free_bind_bucket_cachep;
996         }
997
998         for (i = 0; i < (dccp_hashinfo.ehash_size << 1); i++) {
999                 rwlock_init(&dccp_hashinfo.ehash[i].lock);
1000                 INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].chain);
1001         }
1002
1003         bhash_order = ehash_order;
1004
1005         do {
1006                 dccp_hashinfo.bhash_size = (1UL << bhash_order) * PAGE_SIZE /
1007                                         sizeof(struct inet_bind_hashbucket);
1008                 if ((dccp_hashinfo.bhash_size > (64 * 1024)) &&
1009                     bhash_order > 0)
1010                         continue;
1011                 dccp_hashinfo.bhash = (struct inet_bind_hashbucket *)
1012                         __get_free_pages(GFP_ATOMIC, bhash_order);
1013         } while (!dccp_hashinfo.bhash && --bhash_order >= 0);
1014
1015         if (!dccp_hashinfo.bhash) {
1016                 printk(KERN_CRIT "Failed to allocate DCCP bind hash table\n");
1017                 goto out_free_dccp_ehash;
1018         }
1019
1020         for (i = 0; i < dccp_hashinfo.bhash_size; i++) {
1021                 spin_lock_init(&dccp_hashinfo.bhash[i].lock);
1022                 INIT_HLIST_HEAD(&dccp_hashinfo.bhash[i].chain);
1023         }
1024
1025         rc = init_dccp_v4_mibs();
1026         if (rc)
1027                 goto out_free_dccp_bhash;
1028
1029         rc = -EAGAIN;
1030         if (inet_add_protocol(&dccp_protocol, IPPROTO_DCCP))
1031                 goto out_free_dccp_v4_mibs;
1032
1033         inet_register_protosw(&dccp_v4_protosw);
1034
1035         rc = dccp_ackvec_init();
1036         if (rc)
1037                 goto out_unregister_protosw;
1038
1039         rc = dccp_sysctl_init();
1040         if (rc)
1041                 goto out_ackvec_exit;
1042
1043         rc = dccp_ctl_sock_init();
1044         if (rc)
1045                 goto out_sysctl_exit;
1046 out:
1047         return rc;
1048 out_sysctl_exit:
1049         dccp_sysctl_exit();
1050 out_ackvec_exit:
1051         dccp_ackvec_exit();
1052 out_unregister_protosw:
1053         inet_unregister_protosw(&dccp_v4_protosw);
1054         inet_del_protocol(&dccp_protocol, IPPROTO_DCCP);
1055 out_free_dccp_v4_mibs:
1056         free_percpu(dccp_statistics[0]);
1057         free_percpu(dccp_statistics[1]);
1058         dccp_statistics[0] = dccp_statistics[1] = NULL;
1059 out_free_dccp_bhash:
1060         free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
1061         dccp_hashinfo.bhash = NULL;
1062 out_free_dccp_ehash:
1063         free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order);
1064         dccp_hashinfo.ehash = NULL;
1065 out_free_bind_bucket_cachep:
1066         kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1067         dccp_hashinfo.bind_bucket_cachep = NULL;
1068 out_proto_unregister:
1069         proto_unregister(&dccp_prot);
1070         goto out;
1071 }
1072
1073 static const char dccp_del_proto_err_msg[] __exitdata =
1074         KERN_ERR "can't remove dccp net_protocol\n";
1075
1076 static void __exit dccp_fini(void)
1077 {
1078         inet_unregister_protosw(&dccp_v4_protosw);
1079
1080         if (inet_del_protocol(&dccp_protocol, IPPROTO_DCCP) < 0)
1081                 printk(dccp_del_proto_err_msg);
1082
1083         free_percpu(dccp_statistics[0]);
1084         free_percpu(dccp_statistics[1]);
1085         free_pages((unsigned long)dccp_hashinfo.bhash,
1086                    get_order(dccp_hashinfo.bhash_size *
1087                              sizeof(struct inet_bind_hashbucket)));
1088         free_pages((unsigned long)dccp_hashinfo.ehash,
1089                    get_order(dccp_hashinfo.ehash_size *
1090                              sizeof(struct inet_ehash_bucket)));
1091         kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1092         proto_unregister(&dccp_prot);
1093         dccp_ackvec_exit();
1094         dccp_sysctl_exit();
1095 }
1096
1097 module_init(dccp_init);
1098 module_exit(dccp_fini);
1099
1100 /*
1101  * __stringify doesn't likes enums, so use SOCK_DCCP (6) and IPPROTO_DCCP (33)
1102  * values directly, Also cover the case where the protocol is not specified,
1103  * i.e. net-pf-PF_INET-proto-0-type-SOCK_DCCP
1104  */
1105 MODULE_ALIAS("net-pf-" __stringify(PF_INET) "-proto-33-type-6");
1106 MODULE_ALIAS("net-pf-" __stringify(PF_INET) "-proto-0-type-6");
1107 MODULE_LICENSE("GPL");
1108 MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@conectiva.com.br>");
1109 MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol");