NR_OPEN (historically set to 1024*1024) actually forbids processes to open
more than 1024*1024 handles.
Unfortunatly some production servers hit the not so 'ridiculously high
value' of 1024*1024 file descriptors per process.
Changing NR_OPEN is not considered safe because of vmalloc space potential
exhaust.
This patch introduces a new sysctl (/proc/sys/fs/nr_open) wich defaults to
1024*1024, so that admins can decide to change this limit if their workload
needs it.
[akpm@linux-foundation.org: export it for sparc64]
Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Cc: Alan Cox <alan@lxorguk.ukuu.org.uk>
Cc: Richard Henderson <rth@twiddle.net>
Cc: Ivan Kokshaysky <ink@jurassic.park.msu.ru>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Ralf Baechle <ralf@linux-mips.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Denotes the number of inodes the system has allocated. This number will
grow and shrink dynamically.
Denotes the number of inodes the system has allocated. This number will
grow and shrink dynamically.
+nr_open
+-------
+
+Denotes the maximum number of file-handles a process can
+allocate. Default value is 1024*1024 (1048576) which should be
+enough for most machines. Actual limit depends on RLIMIT_NOFILE
+resource limit.
+
nr_free_inodes
--------------
nr_free_inodes
--------------
- inode-max
- inode-nr
- inode-state
- inode-max
- inode-nr
- inode-state
- overflowuid
- overflowgid
- suid_dumpable
- overflowuid
- overflowgid
- suid_dumpable
==============================================================
==============================================================
+nr_open:
+
+This denotes the maximum number of file-handles a process can
+allocate. Default value is 1024*1024 (1048576) which should be
+enough for most machines. Actual limit depends on RLIMIT_NOFILE
+resource limit.
+
+==============================================================
+
inode-max, inode-nr & inode-state:
As with file handles, the kernel allocates the inode structures
inode-max, inode-nr & inode-state:
As with file handles, the kernel allocates the inode structures
asmlinkage unsigned long
sys_getdtablesize(void)
{
asmlinkage unsigned long
sys_getdtablesize(void)
{
retval = NGROUPS_MAX;
goto out;
case 5:
retval = NGROUPS_MAX;
goto out;
case 5:
+ retval = sysctl_nr_open;
goto out;
case 6:
retval = 1;
goto out;
case 6:
retval = 1;
EXPORT_SYMBOL(sys_geteuid);
EXPORT_SYMBOL(sys_getuid);
EXPORT_SYMBOL(sys_getegid);
EXPORT_SYMBOL(sys_geteuid);
EXPORT_SYMBOL(sys_getuid);
EXPORT_SYMBOL(sys_getegid);
+EXPORT_SYMBOL(sysctl_nr_open);
EXPORT_SYMBOL(sys_getgid);
EXPORT_SYMBOL(svr4_getcontext);
EXPORT_SYMBOL(svr4_setcontext);
EXPORT_SYMBOL(sys_getgid);
EXPORT_SYMBOL(svr4_getcontext);
EXPORT_SYMBOL(svr4_setcontext);
case 3: /* UL_GMEMLIM */
return current->signal->rlim[RLIMIT_DATA].rlim_cur;
case 4: /* UL_GDESLIM */
case 3: /* UL_GMEMLIM */
return current->signal->rlim[RLIMIT_DATA].rlim_cur;
case 4: /* UL_GDESLIM */
SOLD("entry");
lock_kernel();
SOLD("entry");
lock_kernel();
- if(fd >= NR_OPEN) goto out;
+ if (fd >= sysctl_nr_open)
+ goto out;
fdt = files_fdtable(current->files);
filp = fdt->fd[fd];
fdt = files_fdtable(current->files);
filp = fdt->fd[fd];
SOLD("entry");
lock_kernel();
SOLD("entry");
lock_kernel();
- if(fd >= NR_OPEN) goto out;
+ if (fd >= sysctl_nr_open)
+ goto out;
fdt = files_fdtable(current->files);
filp = fdt->fd[fd];
fdt = files_fdtable(current->files);
filp = fdt->fd[fd];
+int sysctl_nr_open __read_mostly = 1024*1024;
+
/*
* We use this list to defer free fdtables that have vmalloced
* sets/arrays. By keeping a per-cpu list, we avoid having to embed
/*
* We use this list to defer free fdtables that have vmalloced
* sets/arrays. By keeping a per-cpu list, we avoid having to embed
nr /= (1024 / sizeof(struct file *));
nr = roundup_pow_of_two(nr + 1);
nr *= (1024 / sizeof(struct file *));
nr /= (1024 / sizeof(struct file *));
nr = roundup_pow_of_two(nr + 1);
nr *= (1024 / sizeof(struct file *));
- if (nr > NR_OPEN)
- nr = NR_OPEN;
+ if (nr > sysctl_nr_open)
+ nr = sysctl_nr_open;
fdt = kmalloc(sizeof(struct fdtable), GFP_KERNEL);
if (!fdt)
fdt = kmalloc(sizeof(struct fdtable), GFP_KERNEL);
if (!fdt)
if (nr < fdt->max_fds)
return 0;
/* Can we expand? */
if (nr < fdt->max_fds)
return 0;
/* Can we expand? */
+ if (nr >= sysctl_nr_open)
return -EMFILE;
/* All good, so we try */
return -EMFILE;
/* All good, so we try */
/* Fixed constants first: */
#undef NR_OPEN
/* Fixed constants first: */
#undef NR_OPEN
-#define NR_OPEN (1024*1024) /* Absolute upper limit on fd num */
+extern int sysctl_nr_open;
#define INR_OPEN 1024 /* Initial setting for nfile rlimits */
#define BLOCK_SIZE_BITS 10
#define INR_OPEN 1024 /* Initial setting for nfile rlimits */
#define BLOCK_SIZE_BITS 10
if ((new_rlim.rlim_max > old_rlim->rlim_max) &&
!capable(CAP_SYS_RESOURCE))
return -EPERM;
if ((new_rlim.rlim_max > old_rlim->rlim_max) &&
!capable(CAP_SYS_RESOURCE))
return -EPERM;
- if (resource == RLIMIT_NOFILE && new_rlim.rlim_max > NR_OPEN)
+ if (resource == RLIMIT_NOFILE && new_rlim.rlim_max > sysctl_nr_open)
return -EPERM;
retval = security_task_setrlimit(resource, &new_rlim);
return -EPERM;
retval = security_task_setrlimit(resource, &new_rlim);
.mode = 0644,
.proc_handler = &proc_dointvec,
},
.mode = 0644,
.proc_handler = &proc_dointvec,
},
+ {
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "nr_open",
+ .data = &sysctl_nr_open,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ },
{
.ctl_name = FS_DENTRY,
.procname = "dentry-state",
{
.ctl_name = FS_DENTRY,
.procname = "dentry-state",