NFSD_FO_UnlockFS,
NFSD_Threads,
NFSD_Pool_Threads,
+ NFSD_Pool_Stats,
NFSD_Versions,
NFSD_Ports,
NFSD_MaxBlkSize,
.owner = THIS_MODULE,
};
+ extern int nfsd_pool_stats_open(struct inode *inode, struct file *file);
+
+ static struct file_operations pool_stats_operations = {
+ .open = nfsd_pool_stats_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release,
+ .owner = THIS_MODULE,
+ };
+
/*----------------------------------------------------------------------------*/
/*
* payload - write methods
static ssize_t __write_versions(struct file *file, char *buf, size_t size)
{
char *mesg = buf;
- char *vers, sign;
+ char *vers, *minorp, sign;
int len, num;
+ unsigned minor;
ssize_t tlen = 0;
char *sep;
do {
sign = *vers;
if (sign == '+' || sign == '-')
- num = simple_strtol((vers+1), NULL, 0);
+ num = simple_strtol((vers+1), &minorp, 0);
else
- num = simple_strtol(vers, NULL, 0);
+ num = simple_strtol(vers, &minorp, 0);
+ if (*minorp == '.') {
+ if (num < 4)
+ return -EINVAL;
+ minor = simple_strtoul(minorp+1, NULL, 0);
+ if (minor == 0)
+ return -EINVAL;
+ if (nfsd_minorversion(minor, sign == '-' ?
+ NFSD_CLEAR : NFSD_SET) < 0)
+ return -EINVAL;
+ goto next;
+ }
switch(num) {
case 2:
case 3:
default:
return -EINVAL;
}
+ next:
vers += len + 1;
tlen += len;
} while ((len = qword_get(&mesg, vers, size)) > 0);
num);
sep = " ";
}
+ if (nfsd_vers(4, NFSD_AVAIL))
+ for (minor = 1; minor <= NFSD_SUPPORTED_MINOR_VERSION; minor++)
+ len += sprintf(buf+len, " %c4.%u",
+ (nfsd_vers(4, NFSD_TEST) &&
+ nfsd_minorversion(minor, NFSD_TEST)) ?
+ '+' : '-',
+ minor);
len += sprintf(buf+len, "\n");
return len;
}
char transport[16];
int port;
if (sscanf(buf, "%15s %4d", transport, &port) == 2) {
+ if (port < 1 || port > 65535)
+ return -EINVAL;
err = nfsd_create_serv();
if (!err) {
err = svc_create_xprt(nfsd_serv,
- transport, port,
+ transport, PF_INET, port,
SVC_SOCK_ANONYMOUS);
if (err == -ENOENT)
/* Give a reasonable perror msg for
char transport[16];
int port;
if (sscanf(&buf[1], "%15s %4d", transport, &port) == 2) {
- if (port == 0)
+ if (port < 1 || port > 65535)
return -EINVAL;
if (nfsd_serv) {
xprt = svc_find_xprt(nfsd_serv, transport,
[NFSD_Fh] = {"filehandle", &transaction_ops, S_IWUSR|S_IRUSR},
[NFSD_Threads] = {"threads", &transaction_ops, S_IWUSR|S_IRUSR},
[NFSD_Pool_Threads] = {"pool_threads", &transaction_ops, S_IWUSR|S_IRUSR},
+ [NFSD_Pool_Stats] = {"pool_stats", &pool_stats_operations, S_IRUGO},
[NFSD_Versions] = {"versions", &transaction_ops, S_IWUSR|S_IRUSR},
[NFSD_Ports] = {"portlist", &transaction_ops, S_IWUSR|S_IRUGO},
[NFSD_MaxBlkSize] = {"max_block_size", &transaction_ops, S_IWUSR|S_IRUGO},
#include <linux/freezer.h>
#include <linux/fs_struct.h>
#include <linux/kthread.h>
+ #include <linux/swap.h>
#include <linux/sunrpc/types.h>
#include <linux/sunrpc/stats.h>
extern struct svc_program nfsd_program;
static int nfsd(void *vrqstp);
struct timeval nfssvc_boot;
- static atomic_t nfsd_busy;
- static unsigned long nfsd_last_call;
- static DEFINE_SPINLOCK(nfsd_call_lock);
/*
* nfsd_mutex protects nfsd_serv -- both the pointer itself and the members
};
+ u32 nfsd_supported_minorversion;
+
int nfsd_vers(int vers, enum vers_op change)
{
if (vers < NFSD_MINVERS || vers >= NFSD_NRVERS)
}
return 0;
}
+
+ int nfsd_minorversion(u32 minorversion, enum vers_op change)
+ {
+ if (minorversion > NFSD_SUPPORTED_MINOR_VERSION)
+ return -1;
+ switch(change) {
+ case NFSD_SET:
+ nfsd_supported_minorversion = minorversion;
+ break;
+ case NFSD_CLEAR:
+ if (minorversion == 0)
+ return -1;
+ nfsd_supported_minorversion = minorversion - 1;
+ break;
+ case NFSD_TEST:
+ return minorversion <= nfsd_supported_minorversion;
+ case NFSD_AVAIL:
+ return minorversion <= NFSD_SUPPORTED_MINOR_VERSION;
+ }
+ return 0;
+ }
+
/*
* Maximum number of nfsd processes
*/
}
}
+ /*
+ * Each session guarantees a negotiated per slot memory cache for replies
+ * which in turn consumes memory beyond the v2/v3/v4.0 server. A dedicated
+ * NFSv4.1 server might want to use more memory for a DRC than a machine
+ * with mutiple services.
+ *
+ * Impose a hard limit on the number of pages for the DRC which varies
+ * according to the machines free pages. This is of course only a default.
+ *
+ * For now this is a #defined shift which could be under admin control
+ * in the future.
+ */
+ static void set_max_drc(void)
+ {
+ /* The percent of nr_free_buffer_pages used by the V4.1 server DRC */
+ #define NFSD_DRC_SIZE_SHIFT 7
+ nfsd_serv->sv_drc_max_pages = nr_free_buffer_pages()
+ >> NFSD_DRC_SIZE_SHIFT;
+ nfsd_serv->sv_drc_pages_used = 0;
+ dprintk("%s svc_drc_max_pages %u\n", __func__,
+ nfsd_serv->sv_drc_max_pages);
+ }
int nfsd_create_serv(void)
{
nfsd_max_blksize /= 2;
}
- atomic_set(&nfsd_busy, 0);
nfsd_serv = svc_create_pooled(&nfsd_program, nfsd_max_blksize,
- AF_INET,
nfsd_last_thread, nfsd, THIS_MODULE);
if (nfsd_serv == NULL)
err = -ENOMEM;
+ else
+ set_max_drc();
do_gettimeofday(&nfssvc_boot); /* record boot time */
return err;
if (!list_empty(&nfsd_serv->sv_permsocks))
return 0;
- error = svc_create_xprt(nfsd_serv, "udp", port,
+ error = svc_create_xprt(nfsd_serv, "udp", PF_INET, port,
SVC_SOCK_DEFAULTS);
if (error < 0)
return error;
if (error < 0)
return error;
- error = svc_create_xprt(nfsd_serv, "tcp", port,
+ error = svc_create_xprt(nfsd_serv, "tcp", PF_INET, port,
SVC_SOCK_DEFAULTS);
if (error < 0)
return error;
return error;
}
- static inline void
- update_thread_usage(int busy_threads)
- {
- unsigned long prev_call;
- unsigned long diff;
- int decile;
-
- spin_lock(&nfsd_call_lock);
- prev_call = nfsd_last_call;
- nfsd_last_call = jiffies;
- decile = busy_threads*10/nfsdstats.th_cnt;
- if (decile>0 && decile <= 10) {
- diff = nfsd_last_call - prev_call;
- if ( (nfsdstats.th_usage[decile-1] += diff) >= NFSD_USAGE_WRAP)
- nfsdstats.th_usage[decile-1] -= NFSD_USAGE_WRAP;
- if (decile == 10)
- nfsdstats.th_fullcnt++;
- }
- spin_unlock(&nfsd_call_lock);
- }
/*
* This is the NFS server kernel thread
nfsd(void *vrqstp)
{
struct svc_rqst *rqstp = (struct svc_rqst *) vrqstp;
- struct fs_struct *fsp;
int err, preverr = 0;
/* Lock module and set up kernel thread */
/* At this point, the thread shares current->fs
* with the init process. We need to create files with a
* umask of 0 instead of init's umask. */
- fsp = copy_fs_struct(current->fs);
- if (!fsp) {
+ if (unshare_fs_struct() < 0) {
printk("Unable to start nfsd thread: out of memory\n");
goto out;
}
- exit_fs(current);
- current->fs = fsp;
+
current->fs->umask = 0;
/*
continue;
}
- update_thread_usage(atomic_read(&nfsd_busy));
- atomic_inc(&nfsd_busy);
/* Lock the export hash tables for reading. */
exp_readlock();
/* Unlock export hash tables */
exp_readunlock();
- update_thread_usage(atomic_read(&nfsd_busy));
- atomic_dec(&nfsd_busy);
}
/* Clear signals before calling svc_exit_thread() */
+ rqstp->rq_res.head[0].iov_len;
rqstp->rq_res.head[0].iov_len += sizeof(__be32);
+ /* NFSv4.1 DRC requires statp */
+ if (rqstp->rq_vers == 4)
+ nfsd4_set_statp(rqstp, statp);
+
/* Now call the procedure handler, and encode NFS status. */
nfserr = proc->pc_func(rqstp, rqstp->rq_argp, rqstp->rq_resp);
nfserr = map_new_errors(rqstp->rq_vers, nfserr);
nfsd_cache_update(rqstp, proc->pc_cachetype, statp + 1);
return 1;
}
+
+ int nfsd_pool_stats_open(struct inode *inode, struct file *file)
+ {
+ if (nfsd_serv == NULL)
+ return -ENODEV;
+ return svc_pool_stats_open(nfsd_serv, file);
+ }
put_write_access(inode);
goto out_nfserr;
}
- DQUOT_INIT(inode);
+ vfs_dq_init(inode);
}
/* sanitize the mode change */
}
/* Revoke setuid/setgid on chown */
- if (((iap->ia_valid & ATTR_UID) && iap->ia_uid != inode->i_uid) ||
- ((iap->ia_valid & ATTR_GID) && iap->ia_gid != inode->i_gid)) {
+ if (!S_ISDIR(inode->i_mode) &&
+ (((iap->ia_valid & ATTR_UID) && iap->ia_uid != inode->i_uid) ||
+ ((iap->ia_valid & ATTR_GID) && iap->ia_gid != inode->i_gid))) {
iap->ia_valid |= ATTR_KILL_PRIV;
if (iap->ia_valid & ATTR_MODE) {
/* we're setting mode too, just clear the s*id bits */
else
flags = O_WRONLY|O_LARGEFILE;
- DQUOT_INIT(inode);
+ vfs_dq_init(inode);
}
*filp = dentry_open(dget(dentry), mntget(fhp->fh_export->ex_path.mnt),
flags, cred);
static __be32
nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
loff_t offset, struct kvec *vec, int vlen,
- unsigned long cnt, int *stablep)
+ unsigned long *cnt, int *stablep)
{
struct svc_export *exp;
struct dentry *dentry;
err = nfserr_perm;
if ((fhp->fh_export->ex_flags & NFSEXP_MSNFS) &&
- (!lock_may_write(file->f_path.dentry->d_inode, offset, cnt)))
+ (!lock_may_write(file->f_path.dentry->d_inode, offset, *cnt)))
goto out;
#endif
if (!EX_ISSYNC(exp))
stable = 0;
- if (stable && !EX_WGATHER(exp))
+ if (stable && !EX_WGATHER(exp)) {
+ spin_lock(&file->f_lock);
file->f_flags |= O_SYNC;
+ spin_unlock(&file->f_lock);
+ }
/* Write the data. */
oldfs = get_fs(); set_fs(KERNEL_DS);
host_err = vfs_writev(file, (struct iovec __user *)vec, vlen, &offset);
set_fs(oldfs);
if (host_err >= 0) {
- nfsdstats.io_write += cnt;
+ nfsdstats.io_write += host_err;
fsnotify_modify(file->f_path.dentry);
}
}
dprintk("nfsd: write complete host_err=%d\n", host_err);
- if (host_err >= 0)
+ if (host_err >= 0) {
err = 0;
- else
+ *cnt = host_err;
+ } else
err = nfserrno(host_err);
out:
return err;
*/
__be32
nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
- loff_t offset, struct kvec *vec, int vlen, unsigned long cnt,
+ loff_t offset, struct kvec *vec, int vlen, unsigned long *cnt,
int *stablep)
{
__be32 err = 0;
return 0;
}
+ /* HPUX client sometimes creates a file in mode 000, and sets size to 0.
+ * setting size to 0 may fail for some specific file systems by the permission
+ * checking which requires WRITE permission but the mode is 000.
+ * we ignore the resizing(to 0) on the just new created file, since the size is
+ * 0 after file created.
+ *
+ * call this only after vfs_create() is called.
+ * */
+ static void
+ nfsd_check_ignore_resizing(struct iattr *iap)
+ {
+ if ((iap->ia_valid & ATTR_SIZE) && (iap->ia_size == 0))
+ iap->ia_valid &= ~ATTR_SIZE;
+ }
+
/*
* Create a file (regular, directory, device, fifo); UNIX sockets
* not yet implemented.
switch (type) {
case S_IFREG:
host_err = vfs_create(dirp, dchild, iap->ia_mode, NULL);
+ if (!host_err)
+ nfsd_check_ignore_resizing(iap);
break;
case S_IFDIR:
host_err = vfs_mkdir(dirp, dchild, iap->ia_mode);
/* setattr will sync the child (or not) */
}
+ nfsd_check_ignore_resizing(iap);
+
if (createmode == NFS3_CREATE_EXCLUSIVE) {
/* Cram the verifier into atime/mtime */
iap->ia_valid = ATTR_MTIME|ATTR_ATIME
*/
typedef int (*svc_thread_fn)(void *);
+ /* statistics for svc_pool structures */
+ struct svc_pool_stats {
+ unsigned long packets;
+ unsigned long sockets_queued;
+ unsigned long threads_woken;
+ unsigned long overloads_avoided;
+ unsigned long threads_timedout;
+ };
+
/*
*
* RPC service thread pool.
struct list_head sp_sockets; /* pending sockets */
unsigned int sp_nrthreads; /* # of threads in pool */
struct list_head sp_all_threads; /* all server threads */
+ int sp_nwaking; /* number of threads woken but not yet active */
+ struct svc_pool_stats sp_stats; /* statistics on pool operation */
} ____cacheline_aligned_in_smp;
/*
struct list_head sv_tempsocks; /* all temporary sockets */
int sv_tmpcnt; /* count of temporary sockets */
struct timer_list sv_temptimer; /* timer for aging temporary sockets */
- sa_family_t sv_family; /* listener's address family */
char * sv_name; /* service name */
struct module * sv_module; /* optional module to count when
* adding threads */
svc_thread_fn sv_function; /* main function for threads */
+ unsigned int sv_drc_max_pages; /* Total pages for DRC */
+ unsigned int sv_drc_pages_used;/* DRC pages used */
};
/*
struct svc_cred rq_cred; /* auth info */
void * rq_xprt_ctxt; /* transport specific context ptr */
struct svc_deferred_req*rq_deferred; /* deferred request we are replaying */
+ int rq_usedeferral; /* use deferral */
size_t rq_xprt_hlen; /* xprt header len */
struct xdr_buf rq_arg;
* cache pages */
wait_queue_head_t rq_wait; /* synchronization */
struct task_struct *rq_task; /* service thread */
+ int rq_waking; /* 1 if thread is being woken */
};
/*
/*
* Function prototypes.
*/
-struct svc_serv *svc_create(struct svc_program *, unsigned int, sa_family_t,
+struct svc_serv *svc_create(struct svc_program *, unsigned int,
void (*shutdown)(struct svc_serv *));
struct svc_rqst *svc_prepare_thread(struct svc_serv *serv,
struct svc_pool *pool);
void svc_exit_thread(struct svc_rqst *);
struct svc_serv * svc_create_pooled(struct svc_program *, unsigned int,
- sa_family_t, void (*shutdown)(struct svc_serv *),
+ void (*shutdown)(struct svc_serv *),
svc_thread_fn, struct module *);
int svc_set_num_threads(struct svc_serv *, struct svc_pool *, int);
+ int svc_pool_stats_open(struct svc_serv *serv, struct file *file);
void svc_destroy(struct svc_serv *);
int svc_process(struct svc_rqst *);
-int svc_register(const struct svc_serv *, const unsigned short,
- const unsigned short);
+int svc_register(const struct svc_serv *, const int,
+ const unsigned short, const unsigned short);
void svc_wake_up(struct svc_serv *);
void svc_reserve(struct svc_rqst *rqstp, int space);
switch (m->mode) {
case SVC_POOL_PERCPU:
{
- set_cpus_allowed_ptr(task, &cpumask_of_cpu(node));
+ set_cpus_allowed_ptr(task, cpumask_of(node));
break;
}
case SVC_POOL_PERNODE:
{
- node_to_cpumask_ptr(nodecpumask, node);
- set_cpus_allowed_ptr(task, nodecpumask);
+ set_cpus_allowed_ptr(task, cpumask_of_node(node));
break;
}
}
*/
static struct svc_serv *
__svc_create(struct svc_program *prog, unsigned int bufsize, int npools,
- sa_family_t family, void (*shutdown)(struct svc_serv *serv))
+ void (*shutdown)(struct svc_serv *serv))
{
struct svc_serv *serv;
unsigned int vers;
if (!(serv = kzalloc(sizeof(*serv), GFP_KERNEL)))
return NULL;
- serv->sv_family = family;
serv->sv_name = prog->pg_name;
serv->sv_program = prog;
serv->sv_nrthreads = 1;
struct svc_serv *
svc_create(struct svc_program *prog, unsigned int bufsize,
- sa_family_t family, void (*shutdown)(struct svc_serv *serv))
+ void (*shutdown)(struct svc_serv *serv))
{
- return __svc_create(prog, bufsize, /*npools*/1, family, shutdown);
+ return __svc_create(prog, bufsize, /*npools*/1, shutdown);
}
EXPORT_SYMBOL_GPL(svc_create);
struct svc_serv *
svc_create_pooled(struct svc_program *prog, unsigned int bufsize,
- sa_family_t family, void (*shutdown)(struct svc_serv *serv),
+ void (*shutdown)(struct svc_serv *serv),
svc_thread_fn func, struct module *mod)
{
struct svc_serv *serv;
unsigned int npools = svc_pool_map_get();
- serv = __svc_create(prog, bufsize, npools, family, shutdown);
+ serv = __svc_create(prog, bufsize, npools, shutdown);
if (serv != NULL) {
serv->sv_function = func;
}
EXPORT_SYMBOL_GPL(svc_exit_thread);
-#ifdef CONFIG_SUNRPC_REGISTER_V4
-
/*
* Register an "inet" protocol family netid with the local
* rpcbind daemon via an rpcbind v4 SET request.
const unsigned short protocol,
const unsigned short port)
{
- struct sockaddr_in sin = {
+ const struct sockaddr_in sin = {
.sin_family = AF_INET,
.sin_addr.s_addr = htonl(INADDR_ANY),
.sin_port = htons(port),
};
- char *netid;
+ const char *netid;
+ int error;
switch (protocol) {
case IPPROTO_UDP:
netid = RPCBIND_NETID_TCP;
break;
default:
- return -EPROTONOSUPPORT;
+ return -ENOPROTOOPT;
}
- return rpcb_v4_register(program, version,
- (struct sockaddr *)&sin, netid);
+ error = rpcb_v4_register(program, version,
+ (const struct sockaddr *)&sin, netid);
+
+ /*
+ * User space didn't support rpcbind v4, so retry this
+ * registration request with the legacy rpcbind v2 protocol.
+ */
+ if (error == -EPROTONOSUPPORT)
+ error = rpcb_register(program, version, protocol, port);
+
+ return error;
}
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
/*
* Register an "inet6" protocol family netid with the local
* rpcbind daemon via an rpcbind v4 SET request.
const unsigned short protocol,
const unsigned short port)
{
- struct sockaddr_in6 sin6 = {
+ const struct sockaddr_in6 sin6 = {
.sin6_family = AF_INET6,
.sin6_addr = IN6ADDR_ANY_INIT,
.sin6_port = htons(port),
};
- char *netid;
+ const char *netid;
+ int error;
switch (protocol) {
case IPPROTO_UDP:
netid = RPCBIND_NETID_TCP6;
break;
default:
- return -EPROTONOSUPPORT;
+ return -ENOPROTOOPT;
}
- return rpcb_v4_register(program, version,
- (struct sockaddr *)&sin6, netid);
+ error = rpcb_v4_register(program, version,
+ (const struct sockaddr *)&sin6, netid);
+
+ /*
+ * User space didn't support rpcbind version 4, so we won't
+ * use a PF_INET6 listener.
+ */
+ if (error == -EPROTONOSUPPORT)
+ error = -EAFNOSUPPORT;
+
+ return error;
}
+#endif /* defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) */
/*
* Register a kernel RPC service via rpcbind version 4.
* Returns zero on success; a negative errno value is returned
* if any error occurs.
*/
-static int __svc_register(const u32 program, const u32 version,
- const sa_family_t family,
+static int __svc_register(const char *progname,
+ const u32 program, const u32 version,
+ const int family,
const unsigned short protocol,
const unsigned short port)
{
- int error;
+ int error = -EAFNOSUPPORT;
switch (family) {
- case AF_INET:
- return __svc_rpcb_register4(program, version,
+ case PF_INET:
+ error = __svc_rpcb_register4(program, version,
protocol, port);
- case AF_INET6:
+ break;
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+ case PF_INET6:
error = __svc_rpcb_register6(program, version,
protocol, port);
- if (error < 0)
- return error;
-
- /*
- * Work around bug in some versions of Linux rpcbind
- * which don't allow registration of both inet and
- * inet6 netids.
- *
- * Error return ignored for now.
- */
- __svc_rpcb_register4(program, version,
- protocol, port);
- return 0;
+#endif /* defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) */
}
- return -EAFNOSUPPORT;
-}
-
-#else /* CONFIG_SUNRPC_REGISTER_V4 */
-
-/*
- * Register a kernel RPC service via rpcbind version 2.
- *
- * Returns zero on success; a negative errno value is returned
- * if any error occurs.
- */
-static int __svc_register(const u32 program, const u32 version,
- sa_family_t family,
- const unsigned short protocol,
- const unsigned short port)
-{
- if (family != AF_INET)
- return -EAFNOSUPPORT;
-
- return rpcb_register(program, version, protocol, port);
+ if (error < 0)
+ printk(KERN_WARNING "svc: failed to register %sv%u RPC "
+ "service (errno %d).\n", progname, version, -error);
+ return error;
}
-#endif /* CONFIG_SUNRPC_REGISTER_V4 */
-
/**
* svc_register - register an RPC service with the local portmapper
* @serv: svc_serv struct for the service to register
+ * @family: protocol family of service's listener socket
* @proto: transport protocol number to advertise
* @port: port to advertise
*
- * Service is registered for any address in serv's address family
+ * Service is registered for any address in the passed-in protocol family
*/
-int svc_register(const struct svc_serv *serv, const unsigned short proto,
- const unsigned short port)
+int svc_register(const struct svc_serv *serv, const int family,
+ const unsigned short proto, const unsigned short port)
{
struct svc_program *progp;
unsigned int i;
i,
proto == IPPROTO_UDP? "udp" : "tcp",
port,
- serv->sv_family,
+ family,
progp->pg_vers[i]->vs_hidden?
" (but not telling portmap)" : "");
if (progp->pg_vers[i]->vs_hidden)
continue;
- error = __svc_register(progp->pg_prog, i,
- serv->sv_family, proto, port);
+ error = __svc_register(progp->pg_name, progp->pg_prog,
+ i, family, proto, port);
if (error < 0)
break;
}
return error;
}
-#ifdef CONFIG_SUNRPC_REGISTER_V4
-
+/*
+ * If user space is running rpcbind, it should take the v4 UNSET
+ * and clear everything for this [program, version]. If user space
+ * is running portmap, it will reject the v4 UNSET, but won't have
+ * any "inet6" entries anyway. So a PMAP_UNSET should be sufficient
+ * in this case to clear all existing entries for [program, version].
+ */
static void __svc_unregister(const u32 program, const u32 version,
const char *progname)
{
- struct sockaddr_in6 sin6 = {
- .sin6_family = AF_INET6,
- .sin6_addr = IN6ADDR_ANY_INIT,
- .sin6_port = 0,
- };
int error;
- error = rpcb_v4_register(program, version,
- (struct sockaddr *)&sin6, "");
- dprintk("svc: %s(%sv%u), error %d\n",
- __func__, progname, version, error);
-}
-
-#else /* CONFIG_SUNRPC_REGISTER_V4 */
+ error = rpcb_v4_register(program, version, NULL, "");
-static void __svc_unregister(const u32 program, const u32 version,
- const char *progname)
-{
- int error;
+ /*
+ * User space didn't support rpcbind v4, so retry this
+ * request with the legacy rpcbind v2 protocol.
+ */
+ if (error == -EPROTONOSUPPORT)
+ error = rpcb_register(program, version, 0, 0);
- error = rpcb_register(program, version, 0, 0);
dprintk("svc: %s(%sv%u), error %d\n",
__func__, progname, version, error);
}
-#endif /* CONFIG_SUNRPC_REGISTER_V4 */
-
/*
* All netids, bind addresses and ports registered for [program, version]
* are removed from the local rpcbind database (if the service is not
rqstp->rq_res.tail[0].iov_len = 0;
/* Will be turned off only in gss privacy case: */
rqstp->rq_splice_ok = 1;
+ /* Will be turned off only when NFSv4 Sessions are used */
+ rqstp->rq_usedeferral = 1;
/* Setup reply header */
rqstp->rq_xprt->xpt_ops->xpo_prep_reply_hdr(rqstp);
procp = versp->vs_proc + proc;
if (proc >= versp->vs_nproc || !procp->pc_func)
goto err_bad_proc;
- rqstp->rq_server = serv;
rqstp->rq_procinfo = procp;
/* Syntactic check complete */
#define RPCDBG_FACILITY RPCDBG_SVCXPRT
+ #define SVC_MAX_WAKING 5
+
static struct svc_deferred_req *svc_deferred_dequeue(struct svc_xprt *xprt);
static int svc_deferred_recv(struct svc_rqst *rqstp);
static struct cache_deferred_req *svc_defer(struct cache_req *req);
static struct svc_xprt *__svc_xpo_create(struct svc_xprt_class *xcl,
struct svc_serv *serv,
- unsigned short port, int flags)
+ const int family,
+ const unsigned short port,
+ int flags)
{
struct sockaddr_in sin = {
.sin_family = AF_INET,
struct sockaddr *sap;
size_t len;
- switch (serv->sv_family) {
- case AF_INET:
+ switch (family) {
+ case PF_INET:
sap = (struct sockaddr *)&sin;
len = sizeof(sin);
break;
- case AF_INET6:
+ case PF_INET6:
sap = (struct sockaddr *)&sin6;
len = sizeof(sin6);
break;
return xcl->xcl_ops->xpo_create(serv, sap, len, flags);
}
-int svc_create_xprt(struct svc_serv *serv, char *xprt_name, unsigned short port,
+int svc_create_xprt(struct svc_serv *serv, const char *xprt_name,
+ const int family, const unsigned short port,
int flags)
{
struct svc_xprt_class *xcl;
goto err;
spin_unlock(&svc_xprt_class_lock);
- newxprt = __svc_xpo_create(xcl, serv, port, flags);
+ newxprt = __svc_xpo_create(xcl, serv, family, port, flags);
if (IS_ERR(newxprt)) {
module_put(xcl->xcl_owner);
return PTR_ERR(newxprt);
struct svc_pool *pool;
struct svc_rqst *rqstp;
int cpu;
+ int thread_avail;
if (!(xprt->xpt_flags &
((1<<XPT_CONN)|(1<<XPT_DATA)|(1<<XPT_CLOSE)|(1<<XPT_DEFERRED))))
spin_lock_bh(&pool->sp_lock);
- if (!list_empty(&pool->sp_threads) &&
- !list_empty(&pool->sp_sockets))
- printk(KERN_ERR
- "svc_xprt_enqueue: "
- "threads and transports both waiting??\n");
-
if (test_bit(XPT_DEAD, &xprt->xpt_flags)) {
/* Don't enqueue dead transports */
dprintk("svc: transport %p is dead, not enqueued\n", xprt);
goto out_unlock;
}
+ pool->sp_stats.packets++;
+
/* Mark transport as busy. It will remain in this state until
* the provider calls svc_xprt_received. We update XPT_BUSY
* atomically because it also guards against trying to enqueue
}
process:
- if (!list_empty(&pool->sp_threads)) {
+ /* Work out whether threads are available */
+ thread_avail = !list_empty(&pool->sp_threads); /* threads are asleep */
+ if (pool->sp_nwaking >= SVC_MAX_WAKING) {
+ /* too many threads are runnable and trying to wake up */
+ thread_avail = 0;
+ pool->sp_stats.overloads_avoided++;
+ }
+
+ if (thread_avail) {
rqstp = list_entry(pool->sp_threads.next,
struct svc_rqst,
rq_list);
svc_xprt_get(xprt);
rqstp->rq_reserved = serv->sv_max_mesg;
atomic_add(rqstp->rq_reserved, &xprt->xpt_reserved);
+ rqstp->rq_waking = 1;
+ pool->sp_nwaking++;
+ pool->sp_stats.threads_woken++;
BUG_ON(xprt->xpt_pool != pool);
wake_up(&rqstp->rq_wait);
} else {
dprintk("svc: transport %p put into queue\n", xprt);
list_add_tail(&xprt->xpt_ready, &pool->sp_sockets);
+ pool->sp_stats.sockets_queued++;
BUG_ON(xprt->xpt_pool != pool);
}
int pages;
struct xdr_buf *arg;
DECLARE_WAITQUEUE(wait, current);
+ long time_left;
dprintk("svc: server %p waiting for data (to = %ld)\n",
rqstp, timeout);
return -EINTR;
spin_lock_bh(&pool->sp_lock);
+ if (rqstp->rq_waking) {
+ rqstp->rq_waking = 0;
+ pool->sp_nwaking--;
+ BUG_ON(pool->sp_nwaking < 0);
+ }
xprt = svc_xprt_dequeue(pool);
if (xprt) {
rqstp->rq_xprt = xprt;
add_wait_queue(&rqstp->rq_wait, &wait);
spin_unlock_bh(&pool->sp_lock);
- schedule_timeout(timeout);
+ time_left = schedule_timeout(timeout);
try_to_freeze();
spin_lock_bh(&pool->sp_lock);
remove_wait_queue(&rqstp->rq_wait, &wait);
+ if (!time_left)
+ pool->sp_stats.threads_timedout++;
xprt = rqstp->rq_xprt;
if (!xprt) {
struct svc_rqst *rqstp = container_of(req, struct svc_rqst, rq_chandle);
struct svc_deferred_req *dr;
- if (rqstp->rq_arg.page_len)
+ if (rqstp->rq_arg.page_len || !rqstp->rq_usedeferral)
return NULL; /* if more than a page, give up FIXME */
if (rqstp->rq_deferred) {
dr = rqstp->rq_deferred;
return dr;
}
-/*
+/**
+ * svc_find_xprt - find an RPC transport instance
+ * @serv: pointer to svc_serv to search
+ * @xcl_name: C string containing transport's class name
+ * @af: Address family of transport's local address
+ * @port: transport's IP port number
+ *
* Return the transport instance pointer for the endpoint accepting
* connections/peer traffic from the specified transport class,
* address family and port.
* wild-card, and will result in matching the first transport in the
* service's list that has a matching class name.
*/
-struct svc_xprt *svc_find_xprt(struct svc_serv *serv, char *xcl_name,
- int af, int port)
+struct svc_xprt *svc_find_xprt(struct svc_serv *serv, const char *xcl_name,
+ const sa_family_t af, const unsigned short port)
{
struct svc_xprt *xprt;
struct svc_xprt *found = NULL;
/* Sanity check the args */
- if (!serv || !xcl_name)
+ if (serv == NULL || xcl_name == NULL)
return found;
spin_lock_bh(&serv->sv_lock);
continue;
if (af != AF_UNSPEC && af != xprt->xpt_local.ss_family)
continue;
- if (port && port != svc_xprt_local_port(xprt))
+ if (port != 0 && port != svc_xprt_local_port(xprt))
continue;
found = xprt;
svc_xprt_get(xprt);
return totlen;
}
EXPORT_SYMBOL_GPL(svc_xprt_names);
+
+
+ /*----------------------------------------------------------------------------*/
+
+ static void *svc_pool_stats_start(struct seq_file *m, loff_t *pos)
+ {
+ unsigned int pidx = (unsigned int)*pos;
+ struct svc_serv *serv = m->private;
+
+ dprintk("svc_pool_stats_start, *pidx=%u\n", pidx);
+
+ lock_kernel();
+ /* bump up the pseudo refcount while traversing */
+ svc_get(serv);
+ unlock_kernel();
+
+ if (!pidx)
+ return SEQ_START_TOKEN;
+ return (pidx > serv->sv_nrpools ? NULL : &serv->sv_pools[pidx-1]);
+ }
+
+ static void *svc_pool_stats_next(struct seq_file *m, void *p, loff_t *pos)
+ {
+ struct svc_pool *pool = p;
+ struct svc_serv *serv = m->private;
+
+ dprintk("svc_pool_stats_next, *pos=%llu\n", *pos);
+
+ if (p == SEQ_START_TOKEN) {
+ pool = &serv->sv_pools[0];
+ } else {
+ unsigned int pidx = (pool - &serv->sv_pools[0]);
+ if (pidx < serv->sv_nrpools-1)
+ pool = &serv->sv_pools[pidx+1];
+ else
+ pool = NULL;
+ }
+ ++*pos;
+ return pool;
+ }
+
+ static void svc_pool_stats_stop(struct seq_file *m, void *p)
+ {
+ struct svc_serv *serv = m->private;
+
+ lock_kernel();
+ /* this function really, really should have been called svc_put() */
+ svc_destroy(serv);
+ unlock_kernel();
+ }
+
+ static int svc_pool_stats_show(struct seq_file *m, void *p)
+ {
+ struct svc_pool *pool = p;
+
+ if (p == SEQ_START_TOKEN) {
+ seq_puts(m, "# pool packets-arrived sockets-enqueued threads-woken overloads-avoided threads-timedout\n");
+ return 0;
+ }
+
+ seq_printf(m, "%u %lu %lu %lu %lu %lu\n",
+ pool->sp_id,
+ pool->sp_stats.packets,
+ pool->sp_stats.sockets_queued,
+ pool->sp_stats.threads_woken,
+ pool->sp_stats.overloads_avoided,
+ pool->sp_stats.threads_timedout);
+
+ return 0;
+ }
+
+ static const struct seq_operations svc_pool_stats_seq_ops = {
+ .start = svc_pool_stats_start,
+ .next = svc_pool_stats_next,
+ .stop = svc_pool_stats_stop,
+ .show = svc_pool_stats_show,
+ };
+
+ int svc_pool_stats_open(struct svc_serv *serv, struct file *file)
+ {
+ int err;
+
+ err = seq_open(file, &svc_pool_stats_seq_ops);
+ if (!err)
+ ((struct seq_file *) file->private_data)->private = serv;
+ return err;
+ }
+ EXPORT_SYMBOL(svc_pool_stats_open);
+
+ /*----------------------------------------------------------------------------*/
lock_sock(sock->sk);
sock->sk->sk_sndbuf = snd * 2;
sock->sk->sk_rcvbuf = rcv * 2;
- sock->sk->sk_userlocks |= SOCK_SNDBUF_LOCK|SOCK_RCVBUF_LOCK;
release_sock(sock->sk);
#endif
}
test_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags),
test_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags));
- if (test_and_clear_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags))
- /* sndbuf needs to have room for one request
- * per thread, otherwise we can stall even when the
- * network isn't a bottleneck.
- *
- * We count all threads rather than threads in a
- * particular pool, which provides an upper bound
- * on the number of threads which will access the socket.
- *
- * rcvbuf just needs to be able to hold a few requests.
- * Normally they will be removed from the queue
- * as soon a a complete request arrives.
- */
- svc_sock_setbufsize(svsk->sk_sock,
- (serv->sv_nrthreads+3) * serv->sv_max_mesg,
- 3 * serv->sv_max_mesg);
-
clear_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
/* Receive data. If we haven't got the record length yet, get
tcp_sk(sk)->nonagle |= TCP_NAGLE_OFF;
- /* initialise setting must have enough space to
- * receive and respond to one request.
- * svc_tcp_recvfrom will re-adjust if necessary
- */
- svc_sock_setbufsize(svsk->sk_sock,
- 3 * svsk->sk_xprt.xpt_server->sv_max_mesg,
- 3 * svsk->sk_xprt.xpt_server->sv_max_mesg);
-
- set_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags);
set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
if (sk->sk_state != TCP_ESTABLISHED)
set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags);
struct svc_sock *svsk;
struct sock *inet;
int pmap_register = !(flags & SVC_SOCK_ANONYMOUS);
- int val;
dprintk("svc: svc_setup_socket %p\n", sock);
if (!(svsk = kzalloc(sizeof(*svsk), GFP_KERNEL))) {
/* Register socket with portmapper */
if (*errp >= 0 && pmap_register)
- *errp = svc_register(serv, inet->sk_protocol,
+ *errp = svc_register(serv, inet->sk_family, inet->sk_protocol,
ntohs(inet_sk(inet)->sport));
if (*errp < 0) {
/* Initialize the socket */
if (sock->type == SOCK_DGRAM)
svc_udp_init(svsk, serv);
- else
+ else {
+ /* initialise setting must have enough space to
+ * receive and respond to one request.
+ */
+ svc_sock_setbufsize(svsk->sk_sock, 4 * serv->sv_max_mesg,
+ 4 * serv->sv_max_mesg);
svc_tcp_init(svsk, serv);
+ }
- /*
- * We start one listener per sv_serv. We want AF_INET
- * requests to be automatically shunted to our AF_INET6
- * listener using a mapped IPv4 address. Make sure
- * no-one starts an equivalent IPv4 listener, which
- * would steal our incoming connections.
- */
- val = 0;
- if (serv->sv_family == AF_INET6)
- kernel_setsockopt(sock, SOL_IPV6, IPV6_V6ONLY,
- (char *)&val, sizeof(val));
-
dprintk("svc: svc_setup_socket created %p (inet %p)\n",
svsk, svsk->sk_sk);
struct sockaddr_storage addr;
struct sockaddr *newsin = (struct sockaddr *)&addr;
int newlen;
+ int family;
+ int val;
RPC_IFDEBUG(char buf[RPC_MAX_ADDRBUFLEN]);
dprintk("svc: svc_create_socket(%s, %d, %s)\n",
"sockets supported\n");
return ERR_PTR(-EINVAL);
}
+
type = (protocol == IPPROTO_UDP)? SOCK_DGRAM : SOCK_STREAM;
+ switch (sin->sa_family) {
+ case AF_INET6:
+ family = PF_INET6;
+ break;
+ case AF_INET:
+ family = PF_INET;
+ break;
+ default:
+ return ERR_PTR(-EINVAL);
+ }
- error = sock_create_kern(sin->sa_family, type, protocol, &sock);
+ error = sock_create_kern(family, type, protocol, &sock);
if (error < 0)
return ERR_PTR(error);
svc_reclassify_socket(sock);
+ /*
+ * If this is an PF_INET6 listener, we want to avoid
+ * getting requests from IPv4 remotes. Those should
+ * be shunted to a PF_INET listener via rpcbind.
+ */
+ val = 1;
+ if (family == PF_INET6)
+ kernel_setsockopt(sock, SOL_IPV6, IPV6_V6ONLY,
+ (char *)&val, sizeof(val));
+
if (type == SOCK_STREAM)
sock->sk->sk_reuse = 1; /* allow address reuse */
error = kernel_bind(sock, sin, len);