]> www.pilppa.org Git - linux-2.6-omap-h63xx.git/commitdiff
Merge branch 'for-2.6.30' of git://linux-nfs.org/~bfields/linux
authorLinus Torvalds <torvalds@linux-foundation.org>
Mon, 6 Apr 2009 20:25:56 +0000 (13:25 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Mon, 6 Apr 2009 20:25:56 +0000 (13:25 -0700)
* 'for-2.6.30' of git://linux-nfs.org/~bfields/linux: (81 commits)
  nfsd41: define nfsd4_set_statp as noop for !CONFIG_NFSD_V4
  nfsd41: define NFSD_DRC_SIZE_SHIFT in set_max_drc
  nfsd41: Documentation/filesystems/nfs41-server.txt
  nfsd41: CREATE_EXCLUSIVE4_1
  nfsd41: SUPPATTR_EXCLCREAT attribute
  nfsd41: support for 3-word long attribute bitmask
  nfsd: dynamically skip encoded fattr bitmap in _nfsd4_verify
  nfsd41: pass writable attrs mask to nfsd4_decode_fattr
  nfsd41: provide support for minor version 1 at rpc level
  nfsd41: control nfsv4.1 svc via /proc/fs/nfsd/versions
  nfsd41: add OPEN4_SHARE_ACCESS_WANT nfs4_stateid bmap
  nfsd41: access_valid
  nfsd41: clientid handling
  nfsd41: check encode size for sessions maxresponse cached
  nfsd41: stateid handling
  nfsd: pass nfsd4_compound_state* to nfs4_preprocess_{state,seq}id_op
  nfsd41: destroy_session operation
  nfsd41: non-page DRC for solo sequence responses
  nfsd41: Add a create session replay cache
  nfsd41: create_session operation
  ...

1  2 
fs/nfsd/nfsctl.c
fs/nfsd/nfssvc.c
fs/nfsd/vfs.c
include/linux/sunrpc/svc.h
net/sunrpc/svc.c
net/sunrpc/svc_xprt.c
net/sunrpc/svcsock.c

diff --combined fs/nfsd/nfsctl.c
index a4ed8644d69c0f5050d8e11cc6b66686b7804084,a9b8c75bf0bff899c92de5ce49fe82907aea00d1..af16849d243ac543c1192b04c49362b8c3eac958
@@@ -60,6 -60,7 +60,7 @@@ enum 
        NFSD_FO_UnlockFS,
        NFSD_Threads,
        NFSD_Pool_Threads,
+       NFSD_Pool_Stats,
        NFSD_Versions,
        NFSD_Ports,
        NFSD_MaxBlkSize,
@@@ -172,6 -173,16 +173,16 @@@ static const struct file_operations exp
        .owner          = THIS_MODULE,
  };
  
+ extern int nfsd_pool_stats_open(struct inode *inode, struct file *file);
+ static struct file_operations pool_stats_operations = {
+       .open           = nfsd_pool_stats_open,
+       .read           = seq_read,
+       .llseek         = seq_lseek,
+       .release        = seq_release,
+       .owner          = THIS_MODULE,
+ };
  /*----------------------------------------------------------------------------*/
  /*
   * payload - write methods
@@@ -781,8 -792,9 +792,9 @@@ out_free
  static ssize_t __write_versions(struct file *file, char *buf, size_t size)
  {
        char *mesg = buf;
-       char *vers, sign;
+       char *vers, *minorp, sign;
        int len, num;
+       unsigned minor;
        ssize_t tlen = 0;
        char *sep;
  
                do {
                        sign = *vers;
                        if (sign == '+' || sign == '-')
-                               num = simple_strtol((vers+1), NULL, 0);
+                               num = simple_strtol((vers+1), &minorp, 0);
                        else
-                               num = simple_strtol(vers, NULL, 0);
+                               num = simple_strtol(vers, &minorp, 0);
+                       if (*minorp == '.') {
+                               if (num < 4)
+                                       return -EINVAL;
+                               minor = simple_strtoul(minorp+1, NULL, 0);
+                               if (minor == 0)
+                                       return -EINVAL;
+                               if (nfsd_minorversion(minor, sign == '-' ?
+                                                    NFSD_CLEAR : NFSD_SET) < 0)
+                                       return -EINVAL;
+                               goto next;
+                       }
                        switch(num) {
                        case 2:
                        case 3:
                        default:
                                return -EINVAL;
                        }
+               next:
                        vers += len + 1;
                        tlen += len;
                } while ((len = qword_get(&mesg, vers, size)) > 0);
                                       num);
                        sep = " ";
                }
+       if (nfsd_vers(4, NFSD_AVAIL))
+               for (minor = 1; minor <= NFSD_SUPPORTED_MINOR_VERSION; minor++)
+                       len += sprintf(buf+len, " %c4.%u",
+                                       (nfsd_vers(4, NFSD_TEST) &&
+                                        nfsd_minorversion(minor, NFSD_TEST)) ?
+                                               '+' : '-',
+                                       minor);
        len += sprintf(buf+len, "\n");
        return len;
  }
@@@ -938,12 -969,10 +969,12 @@@ static ssize_t __write_ports(struct fil
                char transport[16];
                int port;
                if (sscanf(buf, "%15s %4d", transport, &port) == 2) {
 +                      if (port < 1 || port > 65535)
 +                              return -EINVAL;
                        err = nfsd_create_serv();
                        if (!err) {
                                err = svc_create_xprt(nfsd_serv,
 -                                                    transport, port,
 +                                                    transport, PF_INET, port,
                                                      SVC_SOCK_ANONYMOUS);
                                if (err == -ENOENT)
                                        /* Give a reasonable perror msg for
                char transport[16];
                int port;
                if (sscanf(&buf[1], "%15s %4d", transport, &port) == 2) {
 -                      if (port == 0)
 +                      if (port < 1 || port > 65535)
                                return -EINVAL;
                        if (nfsd_serv) {
                                xprt = svc_find_xprt(nfsd_serv, transport,
@@@ -1248,6 -1277,7 +1279,7 @@@ static int nfsd_fill_super(struct super
                [NFSD_Fh] = {"filehandle", &transaction_ops, S_IWUSR|S_IRUSR},
                [NFSD_Threads] = {"threads", &transaction_ops, S_IWUSR|S_IRUSR},
                [NFSD_Pool_Threads] = {"pool_threads", &transaction_ops, S_IWUSR|S_IRUSR},
+               [NFSD_Pool_Stats] = {"pool_stats", &pool_stats_operations, S_IRUGO},
                [NFSD_Versions] = {"versions", &transaction_ops, S_IWUSR|S_IRUSR},
                [NFSD_Ports] = {"portlist", &transaction_ops, S_IWUSR|S_IRUGO},
                [NFSD_MaxBlkSize] = {"max_block_size", &transaction_ops, S_IWUSR|S_IRUGO},
diff --combined fs/nfsd/nfssvc.c
index 7c09852be713b91ef9de1f507aafb04ad61a35a9,469c931cca95c3ae0add3f9a5354b42dca178360..cbba4a935786db386ecf0b60e3a001b6f15ebf03
@@@ -22,6 -22,7 +22,7 @@@
  #include <linux/freezer.h>
  #include <linux/fs_struct.h>
  #include <linux/kthread.h>
+ #include <linux/swap.h>
  
  #include <linux/sunrpc/types.h>
  #include <linux/sunrpc/stats.h>
@@@ -40,9 -41,6 +41,6 @@@
  extern struct svc_program     nfsd_program;
  static int                    nfsd(void *vrqstp);
  struct timeval                        nfssvc_boot;
- static atomic_t                       nfsd_busy;
- static unsigned long          nfsd_last_call;
- static DEFINE_SPINLOCK(nfsd_call_lock);
  
  /*
   * nfsd_mutex protects nfsd_serv -- both the pointer itself and the members
@@@ -123,6 -121,8 +121,8 @@@ struct svc_program         nfsd_program = 
  
  };
  
+ u32 nfsd_supported_minorversion;
  int nfsd_vers(int vers, enum vers_op change)
  {
        if (vers < NFSD_MINVERS || vers >= NFSD_NRVERS)
        }
        return 0;
  }
+ int nfsd_minorversion(u32 minorversion, enum vers_op change)
+ {
+       if (minorversion > NFSD_SUPPORTED_MINOR_VERSION)
+               return -1;
+       switch(change) {
+       case NFSD_SET:
+               nfsd_supported_minorversion = minorversion;
+               break;
+       case NFSD_CLEAR:
+               if (minorversion == 0)
+                       return -1;
+               nfsd_supported_minorversion = minorversion - 1;
+               break;
+       case NFSD_TEST:
+               return minorversion <= nfsd_supported_minorversion;
+       case NFSD_AVAIL:
+               return minorversion <= NFSD_SUPPORTED_MINOR_VERSION;
+       }
+       return 0;
+ }
  /*
   * Maximum number of nfsd processes
   */
@@@ -200,6 -222,28 +222,28 @@@ void nfsd_reset_versions(void
        }
  }
  
+ /*
+  * Each session guarantees a negotiated per slot memory cache for replies
+  * which in turn consumes memory beyond the v2/v3/v4.0 server. A dedicated
+  * NFSv4.1 server might want to use more memory for a DRC than a machine
+  * with mutiple services.
+  *
+  * Impose a hard limit on the number of pages for the DRC which varies
+  * according to the machines free pages. This is of course only a default.
+  *
+  * For now this is a #defined shift which could be under admin control
+  * in the future.
+  */
+ static void set_max_drc(void)
+ {
+       /* The percent of nr_free_buffer_pages used by the V4.1 server DRC */
+       #define NFSD_DRC_SIZE_SHIFT     7
+       nfsd_serv->sv_drc_max_pages = nr_free_buffer_pages()
+                                               >> NFSD_DRC_SIZE_SHIFT;
+       nfsd_serv->sv_drc_pages_used = 0;
+       dprintk("%s svc_drc_max_pages %u\n", __func__,
+               nfsd_serv->sv_drc_max_pages);
+ }
  
  int nfsd_create_serv(void)
  {
                        nfsd_max_blksize /= 2;
        }
  
-       atomic_set(&nfsd_busy, 0);
        nfsd_serv = svc_create_pooled(&nfsd_program, nfsd_max_blksize,
 -                                    AF_INET,
                                      nfsd_last_thread, nfsd, THIS_MODULE);
        if (nfsd_serv == NULL)
                err = -ENOMEM;
+       else
+               set_max_drc();
  
        do_gettimeofday(&nfssvc_boot);          /* record boot time */
        return err;
@@@ -243,7 -289,7 +288,7 @@@ static int nfsd_init_socks(int port
        if (!list_empty(&nfsd_serv->sv_permsocks))
                return 0;
  
 -      error = svc_create_xprt(nfsd_serv, "udp", port,
 +      error = svc_create_xprt(nfsd_serv, "udp", PF_INET, port,
                                        SVC_SOCK_DEFAULTS);
        if (error < 0)
                return error;
        if (error < 0)
                return error;
  
 -      error = svc_create_xprt(nfsd_serv, "tcp", port,
 +      error = svc_create_xprt(nfsd_serv, "tcp", PF_INET, port,
                                        SVC_SOCK_DEFAULTS);
        if (error < 0)
                return error;
@@@ -375,26 -421,6 +420,6 @@@ nfsd_svc(unsigned short port, int nrser
        return error;
  }
  
- static inline void
- update_thread_usage(int busy_threads)
- {
-       unsigned long prev_call;
-       unsigned long diff;
-       int decile;
-       spin_lock(&nfsd_call_lock);
-       prev_call = nfsd_last_call;
-       nfsd_last_call = jiffies;
-       decile = busy_threads*10/nfsdstats.th_cnt;
-       if (decile>0 && decile <= 10) {
-               diff = nfsd_last_call - prev_call;
-               if ( (nfsdstats.th_usage[decile-1] += diff) >= NFSD_USAGE_WRAP)
-                       nfsdstats.th_usage[decile-1] -= NFSD_USAGE_WRAP;
-               if (decile == 10)
-                       nfsdstats.th_fullcnt++;
-       }
-       spin_unlock(&nfsd_call_lock);
- }
  
  /*
   * This is the NFS server kernel thread
@@@ -403,6 -429,7 +428,6 @@@ static in
  nfsd(void *vrqstp)
  {
        struct svc_rqst *rqstp = (struct svc_rqst *) vrqstp;
 -      struct fs_struct *fsp;
        int err, preverr = 0;
  
        /* Lock module and set up kernel thread */
        /* At this point, the thread shares current->fs
         * with the init process. We need to create files with a
         * umask of 0 instead of init's umask. */
 -      fsp = copy_fs_struct(current->fs);
 -      if (!fsp) {
 +      if (unshare_fs_struct() < 0) {
                printk("Unable to start nfsd thread: out of memory\n");
                goto out;
        }
 -      exit_fs(current);
 -      current->fs = fsp;
 +
        current->fs->umask = 0;
  
        /*
                        continue;
                }
  
-               update_thread_usage(atomic_read(&nfsd_busy));
-               atomic_inc(&nfsd_busy);
  
                /* Lock the export hash tables for reading. */
                exp_readlock();
  
                /* Unlock export hash tables */
                exp_readunlock();
-               update_thread_usage(atomic_read(&nfsd_busy));
-               atomic_dec(&nfsd_busy);
        }
  
        /* Clear signals before calling svc_exit_thread() */
@@@ -539,6 -564,10 +560,10 @@@ nfsd_dispatch(struct svc_rqst *rqstp, _
                + rqstp->rq_res.head[0].iov_len;
        rqstp->rq_res.head[0].iov_len += sizeof(__be32);
  
+       /* NFSv4.1 DRC requires statp */
+       if (rqstp->rq_vers == 4)
+               nfsd4_set_statp(rqstp, statp);
        /* Now call the procedure handler, and encode NFS status. */
        nfserr = proc->pc_func(rqstp, rqstp->rq_argp, rqstp->rq_resp);
        nfserr = map_new_errors(rqstp->rq_vers, nfserr);
        nfsd_cache_update(rqstp, proc->pc_cachetype, statp + 1);
        return 1;
  }
+ int nfsd_pool_stats_open(struct inode *inode, struct file *file)
+ {
+       if (nfsd_serv == NULL)
+               return -ENODEV;
+       return svc_pool_stats_open(nfsd_serv, file);
+ }
diff --combined fs/nfsd/vfs.c
index 78376b6c0236161009488e0bf4f3a2a707c3bc44,8790571b30fd1b771082462f7deb120f93325131..ab93fcfef254928360073e3452c707d1653b99c8
@@@ -356,7 -356,7 +356,7 @@@ nfsd_setattr(struct svc_rqst *rqstp, st
                        put_write_access(inode);
                        goto out_nfserr;
                }
 -              DQUOT_INIT(inode);
 +              vfs_dq_init(inode);
        }
  
        /* sanitize the mode change */
        }
  
        /* Revoke setuid/setgid on chown */
-       if (((iap->ia_valid & ATTR_UID) && iap->ia_uid != inode->i_uid) ||
-           ((iap->ia_valid & ATTR_GID) && iap->ia_gid != inode->i_gid)) {
+       if (!S_ISDIR(inode->i_mode) &&
+           (((iap->ia_valid & ATTR_UID) && iap->ia_uid != inode->i_uid) ||
+            ((iap->ia_valid & ATTR_GID) && iap->ia_gid != inode->i_gid))) {
                iap->ia_valid |= ATTR_KILL_PRIV;
                if (iap->ia_valid & ATTR_MODE) {
                        /* we're setting mode too, just clear the s*id bits */
@@@ -723,7 -724,7 +724,7 @@@ nfsd_open(struct svc_rqst *rqstp, struc
                else
                        flags = O_WRONLY|O_LARGEFILE;
  
 -              DQUOT_INIT(inode);
 +              vfs_dq_init(inode);
        }
        *filp = dentry_open(dget(dentry), mntget(fhp->fh_export->ex_path.mnt),
                            flags, cred);
@@@ -960,7 -961,7 +961,7 @@@ static void kill_suid(struct dentry *de
  static __be32
  nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
                                loff_t offset, struct kvec *vec, int vlen,
-                               unsigned long cnt, int *stablep)
+                               unsigned long *cnt, int *stablep)
  {
        struct svc_export       *exp;
        struct dentry           *dentry;
        err = nfserr_perm;
  
        if ((fhp->fh_export->ex_flags & NFSEXP_MSNFS) &&
-               (!lock_may_write(file->f_path.dentry->d_inode, offset, cnt)))
+               (!lock_may_write(file->f_path.dentry->d_inode, offset, *cnt)))
                goto out;
  #endif
  
  
        if (!EX_ISSYNC(exp))
                stable = 0;
 -      if (stable && !EX_WGATHER(exp))
 +      if (stable && !EX_WGATHER(exp)) {
 +              spin_lock(&file->f_lock);
                file->f_flags |= O_SYNC;
 +              spin_unlock(&file->f_lock);
 +      }
  
        /* Write the data. */
        oldfs = get_fs(); set_fs(KERNEL_DS);
        host_err = vfs_writev(file, (struct iovec __user *)vec, vlen, &offset);
        set_fs(oldfs);
        if (host_err >= 0) {
-               nfsdstats.io_write += cnt;
+               nfsdstats.io_write += host_err;
                fsnotify_modify(file->f_path.dentry);
        }
  
        }
  
        dprintk("nfsd: write complete host_err=%d\n", host_err);
-       if (host_err >= 0)
+       if (host_err >= 0) {
                err = 0;
-       else 
+               *cnt = host_err;
+       } else
                err = nfserrno(host_err);
  out:
        return err;
@@@ -1098,7 -1097,7 +1100,7 @@@ out
   */
  __be32
  nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
-               loff_t offset, struct kvec *vec, int vlen, unsigned long cnt,
+               loff_t offset, struct kvec *vec, int vlen, unsigned long *cnt,
                int *stablep)
  {
        __be32                  err = 0;
@@@ -1179,6 -1178,21 +1181,21 @@@ nfsd_create_setattr(struct svc_rqst *rq
        return 0;
  }
  
+ /* HPUX client sometimes creates a file in mode 000, and sets size to 0.
+  * setting size to 0 may fail for some specific file systems by the permission
+  * checking which requires WRITE permission but the mode is 000.
+  * we ignore the resizing(to 0) on the just new created file, since the size is
+  * 0 after file created.
+  *
+  * call this only after vfs_create() is called.
+  * */
+ static void
+ nfsd_check_ignore_resizing(struct iattr *iap)
+ {
+       if ((iap->ia_valid & ATTR_SIZE) && (iap->ia_size == 0))
+               iap->ia_valid &= ~ATTR_SIZE;
+ }
  /*
   * Create a file (regular, directory, device, fifo); UNIX sockets 
   * not yet implemented.
@@@ -1274,6 -1288,8 +1291,8 @@@ nfsd_create(struct svc_rqst *rqstp, str
        switch (type) {
        case S_IFREG:
                host_err = vfs_create(dirp, dchild, iap->ia_mode, NULL);
+               if (!host_err)
+                       nfsd_check_ignore_resizing(iap);
                break;
        case S_IFDIR:
                host_err = vfs_mkdir(dirp, dchild, iap->ia_mode);
@@@ -1427,6 -1443,8 +1446,8 @@@ nfsd_create_v3(struct svc_rqst *rqstp, 
                /* setattr will sync the child (or not) */
        }
  
+       nfsd_check_ignore_resizing(iap);
        if (createmode == NFS3_CREATE_EXCLUSIVE) {
                /* Cram the verifier into atime/mtime */
                iap->ia_valid = ATTR_MTIME|ATTR_ATIME
index d3a4c0231933c4e3e5885fd46362b15a6ce96a77,d209c630a4a120c7a87bc053e09dc56ebab0f2cd..2a30775959e9343e187118016cf2ff9bd8214bc9
   */
  typedef int           (*svc_thread_fn)(void *);
  
+ /* statistics for svc_pool structures */
+ struct svc_pool_stats {
+       unsigned long   packets;
+       unsigned long   sockets_queued;
+       unsigned long   threads_woken;
+       unsigned long   overloads_avoided;
+       unsigned long   threads_timedout;
+ };
  /*
   *
   * RPC service thread pool.
@@@ -41,6 -50,8 +50,8 @@@ struct svc_pool 
        struct list_head        sp_sockets;     /* pending sockets */
        unsigned int            sp_nrthreads;   /* # of threads in pool */
        struct list_head        sp_all_threads; /* all server threads */
+       int                     sp_nwaking;     /* number of threads woken but not yet active */
+       struct svc_pool_stats   sp_stats;       /* statistics on pool operation */
  } ____cacheline_aligned_in_smp;
  
  /*
@@@ -69,6 -80,7 +80,6 @@@ struct svc_serv 
        struct list_head        sv_tempsocks;   /* all temporary sockets */
        int                     sv_tmpcnt;      /* count of temporary sockets */
        struct timer_list       sv_temptimer;   /* timer for aging temporary sockets */
 -      sa_family_t             sv_family;      /* listener's address family */
  
        char *                  sv_name;        /* service name */
  
@@@ -83,6 -95,8 +94,8 @@@
        struct module *         sv_module;      /* optional module to count when
                                                 * adding threads */
        svc_thread_fn           sv_function;    /* main function for threads */
+       unsigned int            sv_drc_max_pages; /* Total pages for DRC */
+       unsigned int            sv_drc_pages_used;/* DRC pages used */
  };
  
  /*
@@@ -218,6 -232,7 +231,7 @@@ struct svc_rqst 
        struct svc_cred         rq_cred;        /* auth info */
        void *                  rq_xprt_ctxt;   /* transport specific context ptr */
        struct svc_deferred_req*rq_deferred;    /* deferred request we are replaying */
+       int                     rq_usedeferral; /* use deferral */
  
        size_t                  rq_xprt_hlen;   /* xprt header len */
        struct xdr_buf          rq_arg;
                                                 * cache pages */
        wait_queue_head_t       rq_wait;        /* synchronization */
        struct task_struct      *rq_task;       /* service thread */
+       int                     rq_waking;      /* 1 if thread is being woken */
  };
  
  /*
@@@ -384,19 -400,20 +399,20 @@@ struct svc_procedure 
  /*
   * Function prototypes.
   */
 -struct svc_serv *svc_create(struct svc_program *, unsigned int, sa_family_t,
 +struct svc_serv *svc_create(struct svc_program *, unsigned int,
                            void (*shutdown)(struct svc_serv *));
  struct svc_rqst *svc_prepare_thread(struct svc_serv *serv,
                                        struct svc_pool *pool);
  void             svc_exit_thread(struct svc_rqst *);
  struct svc_serv *  svc_create_pooled(struct svc_program *, unsigned int,
 -                      sa_family_t, void (*shutdown)(struct svc_serv *),
 +                      void (*shutdown)(struct svc_serv *),
                        svc_thread_fn, struct module *);
  int              svc_set_num_threads(struct svc_serv *, struct svc_pool *, int);
+ int              svc_pool_stats_open(struct svc_serv *serv, struct file *file);
  void             svc_destroy(struct svc_serv *);
  int              svc_process(struct svc_rqst *);
 -int              svc_register(const struct svc_serv *, const unsigned short,
 -                              const unsigned short);
 +int              svc_register(const struct svc_serv *, const int,
 +                              const unsigned short, const unsigned short);
  
  void             svc_wake_up(struct svc_serv *);
  void             svc_reserve(struct svc_rqst *rqstp, int space);
diff --combined net/sunrpc/svc.c
index 9b49a6ab8ded04a55401aa8198022f38a4059a5d,45984cbe1bfa430a1c41c6b0b9906c85c7df917a..8847add6ca164a7aea722b96978c44abb5ba7158
@@@ -312,12 -312,13 +312,12 @@@ svc_pool_map_set_cpumask(struct task_st
        switch (m->mode) {
        case SVC_POOL_PERCPU:
        {
 -              set_cpus_allowed_ptr(task, &cpumask_of_cpu(node));
 +              set_cpus_allowed_ptr(task, cpumask_of(node));
                break;
        }
        case SVC_POOL_PERNODE:
        {
 -              node_to_cpumask_ptr(nodecpumask, node);
 -              set_cpus_allowed_ptr(task, nodecpumask);
 +              set_cpus_allowed_ptr(task, cpumask_of_node(node));
                break;
        }
        }
@@@ -358,7 -359,7 +358,7 @@@ svc_pool_for_cpu(struct svc_serv *serv
   */
  static struct svc_serv *
  __svc_create(struct svc_program *prog, unsigned int bufsize, int npools,
 -         sa_family_t family, void (*shutdown)(struct svc_serv *serv))
 +           void (*shutdown)(struct svc_serv *serv))
  {
        struct svc_serv *serv;
        unsigned int vers;
  
        if (!(serv = kzalloc(sizeof(*serv), GFP_KERNEL)))
                return NULL;
 -      serv->sv_family    = family;
        serv->sv_name      = prog->pg_name;
        serv->sv_program   = prog;
        serv->sv_nrthreads = 1;
  
  struct svc_serv *
  svc_create(struct svc_program *prog, unsigned int bufsize,
 -              sa_family_t family, void (*shutdown)(struct svc_serv *serv))
 +         void (*shutdown)(struct svc_serv *serv))
  {
 -      return __svc_create(prog, bufsize, /*npools*/1, family, shutdown);
 +      return __svc_create(prog, bufsize, /*npools*/1, shutdown);
  }
  EXPORT_SYMBOL_GPL(svc_create);
  
  struct svc_serv *
  svc_create_pooled(struct svc_program *prog, unsigned int bufsize,
 -                sa_family_t family, void (*shutdown)(struct svc_serv *serv),
 +                void (*shutdown)(struct svc_serv *serv),
                  svc_thread_fn func, struct module *mod)
  {
        struct svc_serv *serv;
        unsigned int npools = svc_pool_map_get();
  
 -      serv = __svc_create(prog, bufsize, npools, family, shutdown);
 +      serv = __svc_create(prog, bufsize, npools, shutdown);
  
        if (serv != NULL) {
                serv->sv_function = func;
@@@ -717,6 -719,8 +717,6 @@@ svc_exit_thread(struct svc_rqst *rqstp
  }
  EXPORT_SYMBOL_GPL(svc_exit_thread);
  
 -#ifdef CONFIG_SUNRPC_REGISTER_V4
 -
  /*
   * Register an "inet" protocol family netid with the local
   * rpcbind daemon via an rpcbind v4 SET request.
@@@ -731,13 -735,12 +731,13 @@@ static int __svc_rpcb_register4(const u
                                const unsigned short protocol,
                                const unsigned short port)
  {
 -      struct sockaddr_in sin = {
 +      const struct sockaddr_in sin = {
                .sin_family             = AF_INET,
                .sin_addr.s_addr        = htonl(INADDR_ANY),
                .sin_port               = htons(port),
        };
 -      char *netid;
 +      const char *netid;
 +      int error;
  
        switch (protocol) {
        case IPPROTO_UDP:
                netid = RPCBIND_NETID_TCP;
                break;
        default:
 -              return -EPROTONOSUPPORT;
 +              return -ENOPROTOOPT;
        }
  
 -      return rpcb_v4_register(program, version,
 -                              (struct sockaddr *)&sin, netid);
 +      error = rpcb_v4_register(program, version,
 +                                      (const struct sockaddr *)&sin, netid);
 +
 +      /*
 +       * User space didn't support rpcbind v4, so retry this
 +       * registration request with the legacy rpcbind v2 protocol.
 +       */
 +      if (error == -EPROTONOSUPPORT)
 +              error = rpcb_register(program, version, protocol, port);
 +
 +      return error;
  }
  
 +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
  /*
   * Register an "inet6" protocol family netid with the local
   * rpcbind daemon via an rpcbind v4 SET request.
@@@ -778,13 -771,12 +778,13 @@@ static int __svc_rpcb_register6(const u
                                const unsigned short protocol,
                                const unsigned short port)
  {
 -      struct sockaddr_in6 sin6 = {
 +      const struct sockaddr_in6 sin6 = {
                .sin6_family            = AF_INET6,
                .sin6_addr              = IN6ADDR_ANY_INIT,
                .sin6_port              = htons(port),
        };
 -      char *netid;
 +      const char *netid;
 +      int error;
  
        switch (protocol) {
        case IPPROTO_UDP:
                netid = RPCBIND_NETID_TCP6;
                break;
        default:
 -              return -EPROTONOSUPPORT;
 +              return -ENOPROTOOPT;
        }
  
 -      return rpcb_v4_register(program, version,
 -                              (struct sockaddr *)&sin6, netid);
 +      error = rpcb_v4_register(program, version,
 +                                      (const struct sockaddr *)&sin6, netid);
 +
 +      /*
 +       * User space didn't support rpcbind version 4, so we won't
 +       * use a PF_INET6 listener.
 +       */
 +      if (error == -EPROTONOSUPPORT)
 +              error = -EAFNOSUPPORT;
 +
 +      return error;
  }
 +#endif        /* defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) */
  
  /*
   * Register a kernel RPC service via rpcbind version 4.
   * Returns zero on success; a negative errno value is returned
   * if any error occurs.
   */
 -static int __svc_register(const u32 program, const u32 version,
 -                        const sa_family_t family,
 +static int __svc_register(const char *progname,
 +                        const u32 program, const u32 version,
 +                        const int family,
                          const unsigned short protocol,
                          const unsigned short port)
  {
 -      int error;
 +      int error = -EAFNOSUPPORT;
  
        switch (family) {
 -      case AF_INET:
 -              return __svc_rpcb_register4(program, version,
 +      case PF_INET:
 +              error = __svc_rpcb_register4(program, version,
                                                protocol, port);
 -      case AF_INET6:
 +              break;
 +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
 +      case PF_INET6:
                error = __svc_rpcb_register6(program, version,
                                                protocol, port);
 -              if (error < 0)
 -                      return error;
 -
 -              /*
 -               * Work around bug in some versions of Linux rpcbind
 -               * which don't allow registration of both inet and
 -               * inet6 netids.
 -               *
 -               * Error return ignored for now.
 -               */
 -              __svc_rpcb_register4(program, version,
 -                                              protocol, port);
 -              return 0;
 +#endif        /* defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) */
        }
  
 -      return -EAFNOSUPPORT;
 -}
 -
 -#else /* CONFIG_SUNRPC_REGISTER_V4 */
 -
 -/*
 - * Register a kernel RPC service via rpcbind version 2.
 - *
 - * Returns zero on success; a negative errno value is returned
 - * if any error occurs.
 - */
 -static int __svc_register(const u32 program, const u32 version,
 -                        sa_family_t family,
 -                        const unsigned short protocol,
 -                        const unsigned short port)
 -{
 -      if (family != AF_INET)
 -              return -EAFNOSUPPORT;
 -
 -      return rpcb_register(program, version, protocol, port);
 +      if (error < 0)
 +              printk(KERN_WARNING "svc: failed to register %sv%u RPC "
 +                      "service (errno %d).\n", progname, version, -error);
 +      return error;
  }
  
 -#endif /* CONFIG_SUNRPC_REGISTER_V4 */
 -
  /**
   * svc_register - register an RPC service with the local portmapper
   * @serv: svc_serv struct for the service to register
 + * @family: protocol family of service's listener socket
   * @proto: transport protocol number to advertise
   * @port: port to advertise
   *
 - * Service is registered for any address in serv's address family
 + * Service is registered for any address in the passed-in protocol family
   */
 -int svc_register(const struct svc_serv *serv, const unsigned short proto,
 -               const unsigned short port)
 +int svc_register(const struct svc_serv *serv, const int family,
 +               const unsigned short proto, const unsigned short port)
  {
        struct svc_program      *progp;
        unsigned int            i;
                                        i,
                                        proto == IPPROTO_UDP?  "udp" : "tcp",
                                        port,
 -                                      serv->sv_family,
 +                                      family,
                                        progp->pg_vers[i]->vs_hidden?
                                                " (but not telling portmap)" : "");
  
                        if (progp->pg_vers[i]->vs_hidden)
                                continue;
  
 -                      error = __svc_register(progp->pg_prog, i,
 -                                              serv->sv_family, proto, port);
 +                      error = __svc_register(progp->pg_name, progp->pg_prog,
 +                                              i, family, proto, port);
                        if (error < 0)
                                break;
                }
        return error;
  }
  
 -#ifdef CONFIG_SUNRPC_REGISTER_V4
 -
 +/*
 + * If user space is running rpcbind, it should take the v4 UNSET
 + * and clear everything for this [program, version].  If user space
 + * is running portmap, it will reject the v4 UNSET, but won't have
 + * any "inet6" entries anyway.  So a PMAP_UNSET should be sufficient
 + * in this case to clear all existing entries for [program, version].
 + */
  static void __svc_unregister(const u32 program, const u32 version,
                             const char *progname)
  {
 -      struct sockaddr_in6 sin6 = {
 -              .sin6_family            = AF_INET6,
 -              .sin6_addr              = IN6ADDR_ANY_INIT,
 -              .sin6_port              = 0,
 -      };
        int error;
  
 -      error = rpcb_v4_register(program, version,
 -                              (struct sockaddr *)&sin6, "");
 -      dprintk("svc: %s(%sv%u), error %d\n",
 -                      __func__, progname, version, error);
 -}
 -
 -#else /* CONFIG_SUNRPC_REGISTER_V4 */
 +      error = rpcb_v4_register(program, version, NULL, "");
  
 -static void __svc_unregister(const u32 program, const u32 version,
 -                           const char *progname)
 -{
 -      int error;
 +      /*
 +       * User space didn't support rpcbind v4, so retry this
 +       * request with the legacy rpcbind v2 protocol.
 +       */
 +      if (error == -EPROTONOSUPPORT)
 +              error = rpcb_register(program, version, 0, 0);
  
 -      error = rpcb_register(program, version, 0, 0);
        dprintk("svc: %s(%sv%u), error %d\n",
                        __func__, progname, version, error);
  }
  
 -#endif        /* CONFIG_SUNRPC_REGISTER_V4 */
 -
  /*
   * All netids, bind addresses and ports registered for [program, version]
   * are removed from the local rpcbind database (if the service is not
@@@ -1008,6 -1023,8 +1008,8 @@@ svc_process(struct svc_rqst *rqstp
        rqstp->rq_res.tail[0].iov_len = 0;
        /* Will be turned off only in gss privacy case: */
        rqstp->rq_splice_ok = 1;
+       /* Will be turned off only when NFSv4 Sessions are used */
+       rqstp->rq_usedeferral = 1;
  
        /* Setup reply header */
        rqstp->rq_xprt->xpt_ops->xpo_prep_reply_hdr(rqstp);
        procp = versp->vs_proc + proc;
        if (proc >= versp->vs_nproc || !procp->pc_func)
                goto err_bad_proc;
-       rqstp->rq_server   = serv;
        rqstp->rq_procinfo = procp;
  
        /* Syntactic check complete */
diff --combined net/sunrpc/svc_xprt.c
index 2819ee093f365210c21c3895858221e5c4a3b656,600d0918e3aea135a8c9df8cd9b43e169019c550..c200d92e57e4990c98d5f95ebff026104d5fdedc
@@@ -14,6 -14,8 +14,8 @@@
  
  #define RPCDBG_FACILITY       RPCDBG_SVCXPRT
  
+ #define SVC_MAX_WAKING 5
  static struct svc_deferred_req *svc_deferred_dequeue(struct svc_xprt *xprt);
  static int svc_deferred_recv(struct svc_rqst *rqstp);
  static struct cache_deferred_req *svc_defer(struct cache_req *req);
@@@ -161,9 -163,7 +163,9 @@@ EXPORT_SYMBOL_GPL(svc_xprt_init)
  
  static struct svc_xprt *__svc_xpo_create(struct svc_xprt_class *xcl,
                                         struct svc_serv *serv,
 -                                       unsigned short port, int flags)
 +                                       const int family,
 +                                       const unsigned short port,
 +                                       int flags)
  {
        struct sockaddr_in sin = {
                .sin_family             = AF_INET,
        struct sockaddr *sap;
        size_t len;
  
 -      switch (serv->sv_family) {
 -      case AF_INET:
 +      switch (family) {
 +      case PF_INET:
                sap = (struct sockaddr *)&sin;
                len = sizeof(sin);
                break;
 -      case AF_INET6:
 +      case PF_INET6:
                sap = (struct sockaddr *)&sin6;
                len = sizeof(sin6);
                break;
        return xcl->xcl_ops->xpo_create(serv, sap, len, flags);
  }
  
 -int svc_create_xprt(struct svc_serv *serv, char *xprt_name, unsigned short port,
 +int svc_create_xprt(struct svc_serv *serv, const char *xprt_name,
 +                  const int family, const unsigned short port,
                    int flags)
  {
        struct svc_xprt_class *xcl;
                        goto err;
  
                spin_unlock(&svc_xprt_class_lock);
 -              newxprt = __svc_xpo_create(xcl, serv, port, flags);
 +              newxprt = __svc_xpo_create(xcl, serv, family, port, flags);
                if (IS_ERR(newxprt)) {
                        module_put(xcl->xcl_owner);
                        return PTR_ERR(newxprt);
@@@ -301,6 -300,7 +303,7 @@@ void svc_xprt_enqueue(struct svc_xprt *
        struct svc_pool *pool;
        struct svc_rqst *rqstp;
        int cpu;
+       int thread_avail;
  
        if (!(xprt->xpt_flags &
              ((1<<XPT_CONN)|(1<<XPT_DATA)|(1<<XPT_CLOSE)|(1<<XPT_DEFERRED))))
  
        spin_lock_bh(&pool->sp_lock);
  
-       if (!list_empty(&pool->sp_threads) &&
-           !list_empty(&pool->sp_sockets))
-               printk(KERN_ERR
-                      "svc_xprt_enqueue: "
-                      "threads and transports both waiting??\n");
        if (test_bit(XPT_DEAD, &xprt->xpt_flags)) {
                /* Don't enqueue dead transports */
                dprintk("svc: transport %p is dead, not enqueued\n", xprt);
                goto out_unlock;
        }
  
+       pool->sp_stats.packets++;
        /* Mark transport as busy. It will remain in this state until
         * the provider calls svc_xprt_received. We update XPT_BUSY
         * atomically because it also guards against trying to enqueue
        }
  
   process:
-       if (!list_empty(&pool->sp_threads)) {
+       /* Work out whether threads are available */
+       thread_avail = !list_empty(&pool->sp_threads);  /* threads are asleep */
+       if (pool->sp_nwaking >= SVC_MAX_WAKING) {
+               /* too many threads are runnable and trying to wake up */
+               thread_avail = 0;
+               pool->sp_stats.overloads_avoided++;
+       }
+       if (thread_avail) {
                rqstp = list_entry(pool->sp_threads.next,
                                   struct svc_rqst,
                                   rq_list);
                svc_xprt_get(xprt);
                rqstp->rq_reserved = serv->sv_max_mesg;
                atomic_add(rqstp->rq_reserved, &xprt->xpt_reserved);
+               rqstp->rq_waking = 1;
+               pool->sp_nwaking++;
+               pool->sp_stats.threads_woken++;
                BUG_ON(xprt->xpt_pool != pool);
                wake_up(&rqstp->rq_wait);
        } else {
                dprintk("svc: transport %p put into queue\n", xprt);
                list_add_tail(&xprt->xpt_ready, &pool->sp_sockets);
+               pool->sp_stats.sockets_queued++;
                BUG_ON(xprt->xpt_pool != pool);
        }
  
@@@ -588,6 -596,7 +599,7 @@@ int svc_recv(struct svc_rqst *rqstp, lo
        int                     pages;
        struct xdr_buf          *arg;
        DECLARE_WAITQUEUE(wait, current);
+       long                    time_left;
  
        dprintk("svc: server %p waiting for data (to = %ld)\n",
                rqstp, timeout);
                return -EINTR;
  
        spin_lock_bh(&pool->sp_lock);
+       if (rqstp->rq_waking) {
+               rqstp->rq_waking = 0;
+               pool->sp_nwaking--;
+               BUG_ON(pool->sp_nwaking < 0);
+       }
        xprt = svc_xprt_dequeue(pool);
        if (xprt) {
                rqstp->rq_xprt = xprt;
                add_wait_queue(&rqstp->rq_wait, &wait);
                spin_unlock_bh(&pool->sp_lock);
  
-               schedule_timeout(timeout);
+               time_left = schedule_timeout(timeout);
  
                try_to_freeze();
  
                spin_lock_bh(&pool->sp_lock);
                remove_wait_queue(&rqstp->rq_wait, &wait);
+               if (!time_left)
+                       pool->sp_stats.threads_timedout++;
  
                xprt = rqstp->rq_xprt;
                if (!xprt) {
@@@ -958,7 -974,7 +977,7 @@@ static struct cache_deferred_req *svc_d
        struct svc_rqst *rqstp = container_of(req, struct svc_rqst, rq_chandle);
        struct svc_deferred_req *dr;
  
-       if (rqstp->rq_arg.page_len)
+       if (rqstp->rq_arg.page_len || !rqstp->rq_usedeferral)
                return NULL; /* if more than a page, give up FIXME */
        if (rqstp->rq_deferred) {
                dr = rqstp->rq_deferred;
@@@ -1036,13 -1052,7 +1055,13 @@@ static struct svc_deferred_req *svc_def
        return dr;
  }
  
 -/*
 +/**
 + * svc_find_xprt - find an RPC transport instance
 + * @serv: pointer to svc_serv to search
 + * @xcl_name: C string containing transport's class name
 + * @af: Address family of transport's local address
 + * @port: transport's IP port number
 + *
   * Return the transport instance pointer for the endpoint accepting
   * connections/peer traffic from the specified transport class,
   * address family and port.
   * wild-card, and will result in matching the first transport in the
   * service's list that has a matching class name.
   */
 -struct svc_xprt *svc_find_xprt(struct svc_serv *serv, char *xcl_name,
 -                             int af, int port)
 +struct svc_xprt *svc_find_xprt(struct svc_serv *serv, const char *xcl_name,
 +                             const sa_family_t af, const unsigned short port)
  {
        struct svc_xprt *xprt;
        struct svc_xprt *found = NULL;
  
        /* Sanity check the args */
 -      if (!serv || !xcl_name)
 +      if (serv == NULL || xcl_name == NULL)
                return found;
  
        spin_lock_bh(&serv->sv_lock);
                        continue;
                if (af != AF_UNSPEC && af != xprt->xpt_local.ss_family)
                        continue;
 -              if (port && port != svc_xprt_local_port(xprt))
 +              if (port != 0 && port != svc_xprt_local_port(xprt))
                        continue;
                found = xprt;
                svc_xprt_get(xprt);
@@@ -1112,3 -1122,93 +1131,93 @@@ int svc_xprt_names(struct svc_serv *ser
        return totlen;
  }
  EXPORT_SYMBOL_GPL(svc_xprt_names);
+ /*----------------------------------------------------------------------------*/
+ static void *svc_pool_stats_start(struct seq_file *m, loff_t *pos)
+ {
+       unsigned int pidx = (unsigned int)*pos;
+       struct svc_serv *serv = m->private;
+       dprintk("svc_pool_stats_start, *pidx=%u\n", pidx);
+       lock_kernel();
+       /* bump up the pseudo refcount while traversing */
+       svc_get(serv);
+       unlock_kernel();
+       if (!pidx)
+               return SEQ_START_TOKEN;
+       return (pidx > serv->sv_nrpools ? NULL : &serv->sv_pools[pidx-1]);
+ }
+ static void *svc_pool_stats_next(struct seq_file *m, void *p, loff_t *pos)
+ {
+       struct svc_pool *pool = p;
+       struct svc_serv *serv = m->private;
+       dprintk("svc_pool_stats_next, *pos=%llu\n", *pos);
+       if (p == SEQ_START_TOKEN) {
+               pool = &serv->sv_pools[0];
+       } else {
+               unsigned int pidx = (pool - &serv->sv_pools[0]);
+               if (pidx < serv->sv_nrpools-1)
+                       pool = &serv->sv_pools[pidx+1];
+               else
+                       pool = NULL;
+       }
+       ++*pos;
+       return pool;
+ }
+ static void svc_pool_stats_stop(struct seq_file *m, void *p)
+ {
+       struct svc_serv *serv = m->private;
+       lock_kernel();
+       /* this function really, really should have been called svc_put() */
+       svc_destroy(serv);
+       unlock_kernel();
+ }
+ static int svc_pool_stats_show(struct seq_file *m, void *p)
+ {
+       struct svc_pool *pool = p;
+       if (p == SEQ_START_TOKEN) {
+               seq_puts(m, "# pool packets-arrived sockets-enqueued threads-woken overloads-avoided threads-timedout\n");
+               return 0;
+       }
+       seq_printf(m, "%u %lu %lu %lu %lu %lu\n",
+               pool->sp_id,
+               pool->sp_stats.packets,
+               pool->sp_stats.sockets_queued,
+               pool->sp_stats.threads_woken,
+               pool->sp_stats.overloads_avoided,
+               pool->sp_stats.threads_timedout);
+       return 0;
+ }
+ static const struct seq_operations svc_pool_stats_seq_ops = {
+       .start  = svc_pool_stats_start,
+       .next   = svc_pool_stats_next,
+       .stop   = svc_pool_stats_stop,
+       .show   = svc_pool_stats_show,
+ };
+ int svc_pool_stats_open(struct svc_serv *serv, struct file *file)
+ {
+       int err;
+       err = seq_open(file, &svc_pool_stats_seq_ops);
+       if (!err)
+               ((struct seq_file *) file->private_data)->private = serv;
+       return err;
+ }
+ EXPORT_SYMBOL(svc_pool_stats_open);
+ /*----------------------------------------------------------------------------*/
diff --combined net/sunrpc/svcsock.c
index 9d504234af4a664f5f116171b9d6e02825d1ac4f,7a2a90fb2e06e0543cd752c06fedf68842f9d8de..af3198814c154756daa50f2c58819272795df95c
@@@ -345,7 -345,6 +345,6 @@@ static void svc_sock_setbufsize(struct 
        lock_sock(sock->sk);
        sock->sk->sk_sndbuf = snd * 2;
        sock->sk->sk_rcvbuf = rcv * 2;
-       sock->sk->sk_userlocks |= SOCK_SNDBUF_LOCK|SOCK_RCVBUF_LOCK;
        release_sock(sock->sk);
  #endif
  }
@@@ -797,23 -796,6 +796,6 @@@ static int svc_tcp_recvfrom(struct svc_
                test_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags),
                test_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags));
  
-       if (test_and_clear_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags))
-               /* sndbuf needs to have room for one request
-                * per thread, otherwise we can stall even when the
-                * network isn't a bottleneck.
-                *
-                * We count all threads rather than threads in a
-                * particular pool, which provides an upper bound
-                * on the number of threads which will access the socket.
-                *
-                * rcvbuf just needs to be able to hold a few requests.
-                * Normally they will be removed from the queue
-                * as soon a a complete request arrives.
-                */
-               svc_sock_setbufsize(svsk->sk_sock,
-                                   (serv->sv_nrthreads+3) * serv->sv_max_mesg,
-                                   3 * serv->sv_max_mesg);
        clear_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
  
        /* Receive data. If we haven't got the record length yet, get
@@@ -1061,15 -1043,6 +1043,6 @@@ static void svc_tcp_init(struct svc_soc
  
                tcp_sk(sk)->nonagle |= TCP_NAGLE_OFF;
  
-               /* initialise setting must have enough space to
-                * receive and respond to one request.
-                * svc_tcp_recvfrom will re-adjust if necessary
-                */
-               svc_sock_setbufsize(svsk->sk_sock,
-                                   3 * svsk->sk_xprt.xpt_server->sv_max_mesg,
-                                   3 * svsk->sk_xprt.xpt_server->sv_max_mesg);
-               set_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags);
                set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
                if (sk->sk_state != TCP_ESTABLISHED)
                        set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags);
@@@ -1110,6 -1083,7 +1083,6 @@@ static struct svc_sock *svc_setup_socke
        struct svc_sock *svsk;
        struct sock     *inet;
        int             pmap_register = !(flags & SVC_SOCK_ANONYMOUS);
 -      int             val;
  
        dprintk("svc: svc_setup_socket %p\n", sock);
        if (!(svsk = kzalloc(sizeof(*svsk), GFP_KERNEL))) {
  
        /* Register socket with portmapper */
        if (*errp >= 0 && pmap_register)
 -              *errp = svc_register(serv, inet->sk_protocol,
 +              *errp = svc_register(serv, inet->sk_family, inet->sk_protocol,
                                     ntohs(inet_sk(inet)->sport));
  
        if (*errp < 0) {
        /* Initialize the socket */
        if (sock->type == SOCK_DGRAM)
                svc_udp_init(svsk, serv);
-       else
+       else {
+               /* initialise setting must have enough space to
+                * receive and respond to one request.
+                */
+               svc_sock_setbufsize(svsk->sk_sock, 4 * serv->sv_max_mesg,
+                                       4 * serv->sv_max_mesg);
                svc_tcp_init(svsk, serv);
+       }
  
 -      /*
 -       * We start one listener per sv_serv.  We want AF_INET
 -       * requests to be automatically shunted to our AF_INET6
 -       * listener using a mapped IPv4 address.  Make sure
 -       * no-one starts an equivalent IPv4 listener, which
 -       * would steal our incoming connections.
 -       */
 -      val = 0;
 -      if (serv->sv_family == AF_INET6)
 -              kernel_setsockopt(sock, SOL_IPV6, IPV6_V6ONLY,
 -                                      (char *)&val, sizeof(val));
 -
        dprintk("svc: svc_setup_socket created %p (inet %p)\n",
                                svsk, svsk->sk_sk);
  
@@@ -1209,8 -1201,6 +1188,8 @@@ static struct svc_xprt *svc_create_sock
        struct sockaddr_storage addr;
        struct sockaddr *newsin = (struct sockaddr *)&addr;
        int             newlen;
 +      int             family;
 +      int             val;
        RPC_IFDEBUG(char buf[RPC_MAX_ADDRBUFLEN]);
  
        dprintk("svc: svc_create_socket(%s, %d, %s)\n",
                                "sockets supported\n");
                return ERR_PTR(-EINVAL);
        }
 +
        type = (protocol == IPPROTO_UDP)? SOCK_DGRAM : SOCK_STREAM;
 +      switch (sin->sa_family) {
 +      case AF_INET6:
 +              family = PF_INET6;
 +              break;
 +      case AF_INET:
 +              family = PF_INET;
 +              break;
 +      default:
 +              return ERR_PTR(-EINVAL);
 +      }
  
 -      error = sock_create_kern(sin->sa_family, type, protocol, &sock);
 +      error = sock_create_kern(family, type, protocol, &sock);
        if (error < 0)
                return ERR_PTR(error);
  
        svc_reclassify_socket(sock);
  
 +      /*
 +       * If this is an PF_INET6 listener, we want to avoid
 +       * getting requests from IPv4 remotes.  Those should
 +       * be shunted to a PF_INET listener via rpcbind.
 +       */
 +      val = 1;
 +      if (family == PF_INET6)
 +              kernel_setsockopt(sock, SOL_IPV6, IPV6_V6ONLY,
 +                                      (char *)&val, sizeof(val));
 +
        if (type == SOCK_STREAM)
                sock->sk->sk_reuse = 1;         /* allow address reuse */
        error = kernel_bind(sock, sin, len);