summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEric Dumazet <dada1@cosmosbay.com>2008-02-06 01:37:16 -0800
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2008-02-06 10:41:06 -0800
commit9cfe015aa424b3c003baba3841a60dd9b5ad319b (patch)
tree5575e06efcf91018f860f2db43979e8e91aba1c3
parent774ed22c21ab95d582dfff38560f11cf290baeb4 (diff)
downloadlwn-9cfe015aa424b3c003baba3841a60dd9b5ad319b.tar.gz
lwn-9cfe015aa424b3c003baba3841a60dd9b5ad319b.zip
get rid of NR_OPEN and introduce a sysctl_nr_open
NR_OPEN (historically set to 1024*1024) actually forbids processes to open more than 1024*1024 handles. Unfortunatly some production servers hit the not so 'ridiculously high value' of 1024*1024 file descriptors per process. Changing NR_OPEN is not considered safe because of vmalloc space potential exhaust. This patch introduces a new sysctl (/proc/sys/fs/nr_open) wich defaults to 1024*1024, so that admins can decide to change this limit if their workload needs it. [akpm@linux-foundation.org: export it for sparc64] Signed-off-by: Eric Dumazet <dada1@cosmosbay.com> Cc: Alan Cox <alan@lxorguk.ukuu.org.uk> Cc: Richard Henderson <rth@twiddle.net> Cc: Ivan Kokshaysky <ink@jurassic.park.msu.ru> Cc: "David S. Miller" <davem@davemloft.net> Cc: Ralf Baechle <ralf@linux-mips.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--Documentation/filesystems/proc.txt8
-rw-r--r--Documentation/sysctl/fs.txt10
-rw-r--r--arch/alpha/kernel/osf_sys.c2
-rw-r--r--arch/mips/kernel/sysirix.c2
-rw-r--r--arch/sparc64/kernel/sparc64_ksyms.c1
-rw-r--r--arch/sparc64/solaris/fs.c2
-rw-r--r--arch/sparc64/solaris/timod.c6
-rw-r--r--fs/file.c8
-rw-r--r--include/linux/fs.h2
-rw-r--r--kernel/sys.c2
-rw-r--r--kernel/sysctl.c8
11 files changed, 41 insertions, 10 deletions
diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt
index e2799b5fafea..5681e2fa1496 100644
--- a/Documentation/filesystems/proc.txt
+++ b/Documentation/filesystems/proc.txt
@@ -1029,6 +1029,14 @@ nr_inodes
Denotes the number of inodes the system has allocated. This number will
grow and shrink dynamically.
+nr_open
+-------
+
+Denotes the maximum number of file-handles a process can
+allocate. Default value is 1024*1024 (1048576) which should be
+enough for most machines. Actual limit depends on RLIMIT_NOFILE
+resource limit.
+
nr_free_inodes
--------------
diff --git a/Documentation/sysctl/fs.txt b/Documentation/sysctl/fs.txt
index aa986a35e994..f99254327ae5 100644
--- a/Documentation/sysctl/fs.txt
+++ b/Documentation/sysctl/fs.txt
@@ -23,6 +23,7 @@ Currently, these files are in /proc/sys/fs:
- inode-max
- inode-nr
- inode-state
+- nr_open
- overflowuid
- overflowgid
- suid_dumpable
@@ -91,6 +92,15 @@ usage of file handles and you don't need to increase the maximum.
==============================================================
+nr_open:
+
+This denotes the maximum number of file-handles a process can
+allocate. Default value is 1024*1024 (1048576) which should be
+enough for most machines. Actual limit depends on RLIMIT_NOFILE
+resource limit.
+
+==============================================================
+
inode-max, inode-nr & inode-state:
As with file handles, the kernel allocates the inode structures
diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c
index 6413c5f23226..72f9a619a66d 100644
--- a/arch/alpha/kernel/osf_sys.c
+++ b/arch/alpha/kernel/osf_sys.c
@@ -430,7 +430,7 @@ sys_getpagesize(void)
asmlinkage unsigned long
sys_getdtablesize(void)
{
- return NR_OPEN;
+ return sysctl_nr_open;
}
/*
diff --git a/arch/mips/kernel/sysirix.c b/arch/mips/kernel/sysirix.c
index 4c477c7ff74a..22fd41e946b2 100644
--- a/arch/mips/kernel/sysirix.c
+++ b/arch/mips/kernel/sysirix.c
@@ -356,7 +356,7 @@ asmlinkage int irix_syssgi(struct pt_regs *regs)
retval = NGROUPS_MAX;
goto out;
case 5:
- retval = NR_OPEN;
+ retval = sysctl_nr_open;
goto out;
case 6:
retval = 1;
diff --git a/arch/sparc64/kernel/sparc64_ksyms.c b/arch/sparc64/kernel/sparc64_ksyms.c
index 60765e314bd8..8649635d6d74 100644
--- a/arch/sparc64/kernel/sparc64_ksyms.c
+++ b/arch/sparc64/kernel/sparc64_ksyms.c
@@ -277,6 +277,7 @@ EXPORT_SYMBOL(sys_getpid);
EXPORT_SYMBOL(sys_geteuid);
EXPORT_SYMBOL(sys_getuid);
EXPORT_SYMBOL(sys_getegid);
+EXPORT_SYMBOL(sysctl_nr_open);
EXPORT_SYMBOL(sys_getgid);
EXPORT_SYMBOL(svr4_getcontext);
EXPORT_SYMBOL(svr4_setcontext);
diff --git a/arch/sparc64/solaris/fs.c b/arch/sparc64/solaris/fs.c
index 61be597bf430..9311bfe4f2f7 100644
--- a/arch/sparc64/solaris/fs.c
+++ b/arch/sparc64/solaris/fs.c
@@ -624,7 +624,7 @@ asmlinkage int solaris_ulimit(int cmd, int val)
case 3: /* UL_GMEMLIM */
return current->signal->rlim[RLIMIT_DATA].rlim_cur;
case 4: /* UL_GDESLIM */
- return NR_OPEN;
+ return sysctl_nr_open;
}
return -EINVAL;
}
diff --git a/arch/sparc64/solaris/timod.c b/arch/sparc64/solaris/timod.c
index a9d32ceabf26..f53123c02c2b 100644
--- a/arch/sparc64/solaris/timod.c
+++ b/arch/sparc64/solaris/timod.c
@@ -859,7 +859,8 @@ asmlinkage int solaris_getmsg(unsigned int fd, u32 arg1, u32 arg2, u32 arg3)
SOLD("entry");
lock_kernel();
- if(fd >= NR_OPEN) goto out;
+ if (fd >= sysctl_nr_open)
+ goto out;
fdt = files_fdtable(current->files);
filp = fdt->fd[fd];
@@ -927,7 +928,8 @@ asmlinkage int solaris_putmsg(unsigned int fd, u32 arg1, u32 arg2, u32 arg3)
SOLD("entry");
lock_kernel();
- if(fd >= NR_OPEN) goto out;
+ if (fd >= sysctl_nr_open)
+ goto out;
fdt = files_fdtable(current->files);
filp = fdt->fd[fd];
diff --git a/fs/file.c b/fs/file.c
index c5575de01113..5110acb1c9ef 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -24,6 +24,8 @@ struct fdtable_defer {
struct fdtable *next;
};
+int sysctl_nr_open __read_mostly = 1024*1024;
+
/*
* We use this list to defer free fdtables that have vmalloced
* sets/arrays. By keeping a per-cpu list, we avoid having to embed
@@ -147,8 +149,8 @@ static struct fdtable * alloc_fdtable(unsigned int nr)
nr /= (1024 / sizeof(struct file *));
nr = roundup_pow_of_two(nr + 1);
nr *= (1024 / sizeof(struct file *));
- if (nr > NR_OPEN)
- nr = NR_OPEN;
+ if (nr > sysctl_nr_open)
+ nr = sysctl_nr_open;
fdt = kmalloc(sizeof(struct fdtable), GFP_KERNEL);
if (!fdt)
@@ -233,7 +235,7 @@ int expand_files(struct files_struct *files, int nr)
if (nr < fdt->max_fds)
return 0;
/* Can we expand? */
- if (nr >= NR_OPEN)
+ if (nr >= sysctl_nr_open)
return -EMFILE;
/* All good, so we try */
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 19aab50c3b8e..109734bf6377 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -21,7 +21,7 @@
/* Fixed constants first: */
#undef NR_OPEN
-#define NR_OPEN (1024*1024) /* Absolute upper limit on fd num */
+extern int sysctl_nr_open;
#define INR_OPEN 1024 /* Initial setting for nfile rlimits */
#define BLOCK_SIZE_BITS 10
diff --git a/kernel/sys.c b/kernel/sys.c
index 53de35fc8245..2b8e2daa9d95 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -1472,7 +1472,7 @@ asmlinkage long sys_setrlimit(unsigned int resource, struct rlimit __user *rlim)
if ((new_rlim.rlim_max > old_rlim->rlim_max) &&
!capable(CAP_SYS_RESOURCE))
return -EPERM;
- if (resource == RLIMIT_NOFILE && new_rlim.rlim_max > NR_OPEN)
+ if (resource == RLIMIT_NOFILE && new_rlim.rlim_max > sysctl_nr_open)
return -EPERM;
retval = security_task_setrlimit(resource, &new_rlim);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 5e2ad5bf88e2..86daaa26d120 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1203,6 +1203,14 @@ static struct ctl_table fs_table[] = {
.proc_handler = &proc_dointvec,
},
{
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "nr_open",
+ .data = &sysctl_nr_open,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ },
+ {
.ctl_name = FS_DENTRY,
.procname = "dentry-state",
.data = &dentry_stat,