diff options
author | Eric Dumazet <dada1@cosmosbay.com> | 2008-02-06 01:37:16 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@woody.linux-foundation.org> | 2008-02-06 10:41:06 -0800 |
commit | 9cfe015aa424b3c003baba3841a60dd9b5ad319b (patch) | |
tree | 5575e06efcf91018f860f2db43979e8e91aba1c3 | |
parent | 774ed22c21ab95d582dfff38560f11cf290baeb4 (diff) | |
download | lwn-9cfe015aa424b3c003baba3841a60dd9b5ad319b.tar.gz lwn-9cfe015aa424b3c003baba3841a60dd9b5ad319b.zip |
get rid of NR_OPEN and introduce a sysctl_nr_open
NR_OPEN (historically set to 1024*1024) actually forbids processes to open
more than 1024*1024 handles.
Unfortunatly some production servers hit the not so 'ridiculously high
value' of 1024*1024 file descriptors per process.
Changing NR_OPEN is not considered safe because of vmalloc space potential
exhaust.
This patch introduces a new sysctl (/proc/sys/fs/nr_open) wich defaults to
1024*1024, so that admins can decide to change this limit if their workload
needs it.
[akpm@linux-foundation.org: export it for sparc64]
Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Cc: Alan Cox <alan@lxorguk.ukuu.org.uk>
Cc: Richard Henderson <rth@twiddle.net>
Cc: Ivan Kokshaysky <ink@jurassic.park.msu.ru>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Ralf Baechle <ralf@linux-mips.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r-- | Documentation/filesystems/proc.txt | 8 | ||||
-rw-r--r-- | Documentation/sysctl/fs.txt | 10 | ||||
-rw-r--r-- | arch/alpha/kernel/osf_sys.c | 2 | ||||
-rw-r--r-- | arch/mips/kernel/sysirix.c | 2 | ||||
-rw-r--r-- | arch/sparc64/kernel/sparc64_ksyms.c | 1 | ||||
-rw-r--r-- | arch/sparc64/solaris/fs.c | 2 | ||||
-rw-r--r-- | arch/sparc64/solaris/timod.c | 6 | ||||
-rw-r--r-- | fs/file.c | 8 | ||||
-rw-r--r-- | include/linux/fs.h | 2 | ||||
-rw-r--r-- | kernel/sys.c | 2 | ||||
-rw-r--r-- | kernel/sysctl.c | 8 |
11 files changed, 41 insertions, 10 deletions
diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt index e2799b5fafea..5681e2fa1496 100644 --- a/Documentation/filesystems/proc.txt +++ b/Documentation/filesystems/proc.txt @@ -1029,6 +1029,14 @@ nr_inodes Denotes the number of inodes the system has allocated. This number will grow and shrink dynamically. +nr_open +------- + +Denotes the maximum number of file-handles a process can +allocate. Default value is 1024*1024 (1048576) which should be +enough for most machines. Actual limit depends on RLIMIT_NOFILE +resource limit. + nr_free_inodes -------------- diff --git a/Documentation/sysctl/fs.txt b/Documentation/sysctl/fs.txt index aa986a35e994..f99254327ae5 100644 --- a/Documentation/sysctl/fs.txt +++ b/Documentation/sysctl/fs.txt @@ -23,6 +23,7 @@ Currently, these files are in /proc/sys/fs: - inode-max - inode-nr - inode-state +- nr_open - overflowuid - overflowgid - suid_dumpable @@ -91,6 +92,15 @@ usage of file handles and you don't need to increase the maximum. ============================================================== +nr_open: + +This denotes the maximum number of file-handles a process can +allocate. Default value is 1024*1024 (1048576) which should be +enough for most machines. Actual limit depends on RLIMIT_NOFILE +resource limit. + +============================================================== + inode-max, inode-nr & inode-state: As with file handles, the kernel allocates the inode structures diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c index 6413c5f23226..72f9a619a66d 100644 --- a/arch/alpha/kernel/osf_sys.c +++ b/arch/alpha/kernel/osf_sys.c @@ -430,7 +430,7 @@ sys_getpagesize(void) asmlinkage unsigned long sys_getdtablesize(void) { - return NR_OPEN; + return sysctl_nr_open; } /* diff --git a/arch/mips/kernel/sysirix.c b/arch/mips/kernel/sysirix.c index 4c477c7ff74a..22fd41e946b2 100644 --- a/arch/mips/kernel/sysirix.c +++ b/arch/mips/kernel/sysirix.c @@ -356,7 +356,7 @@ asmlinkage int irix_syssgi(struct pt_regs *regs) retval = NGROUPS_MAX; goto out; case 5: - retval = NR_OPEN; + retval = sysctl_nr_open; goto out; case 6: retval = 1; diff --git a/arch/sparc64/kernel/sparc64_ksyms.c b/arch/sparc64/kernel/sparc64_ksyms.c index 60765e314bd8..8649635d6d74 100644 --- a/arch/sparc64/kernel/sparc64_ksyms.c +++ b/arch/sparc64/kernel/sparc64_ksyms.c @@ -277,6 +277,7 @@ EXPORT_SYMBOL(sys_getpid); EXPORT_SYMBOL(sys_geteuid); EXPORT_SYMBOL(sys_getuid); EXPORT_SYMBOL(sys_getegid); +EXPORT_SYMBOL(sysctl_nr_open); EXPORT_SYMBOL(sys_getgid); EXPORT_SYMBOL(svr4_getcontext); EXPORT_SYMBOL(svr4_setcontext); diff --git a/arch/sparc64/solaris/fs.c b/arch/sparc64/solaris/fs.c index 61be597bf430..9311bfe4f2f7 100644 --- a/arch/sparc64/solaris/fs.c +++ b/arch/sparc64/solaris/fs.c @@ -624,7 +624,7 @@ asmlinkage int solaris_ulimit(int cmd, int val) case 3: /* UL_GMEMLIM */ return current->signal->rlim[RLIMIT_DATA].rlim_cur; case 4: /* UL_GDESLIM */ - return NR_OPEN; + return sysctl_nr_open; } return -EINVAL; } diff --git a/arch/sparc64/solaris/timod.c b/arch/sparc64/solaris/timod.c index a9d32ceabf26..f53123c02c2b 100644 --- a/arch/sparc64/solaris/timod.c +++ b/arch/sparc64/solaris/timod.c @@ -859,7 +859,8 @@ asmlinkage int solaris_getmsg(unsigned int fd, u32 arg1, u32 arg2, u32 arg3) SOLD("entry"); lock_kernel(); - if(fd >= NR_OPEN) goto out; + if (fd >= sysctl_nr_open) + goto out; fdt = files_fdtable(current->files); filp = fdt->fd[fd]; @@ -927,7 +928,8 @@ asmlinkage int solaris_putmsg(unsigned int fd, u32 arg1, u32 arg2, u32 arg3) SOLD("entry"); lock_kernel(); - if(fd >= NR_OPEN) goto out; + if (fd >= sysctl_nr_open) + goto out; fdt = files_fdtable(current->files); filp = fdt->fd[fd]; diff --git a/fs/file.c b/fs/file.c index c5575de01113..5110acb1c9ef 100644 --- a/fs/file.c +++ b/fs/file.c @@ -24,6 +24,8 @@ struct fdtable_defer { struct fdtable *next; }; +int sysctl_nr_open __read_mostly = 1024*1024; + /* * We use this list to defer free fdtables that have vmalloced * sets/arrays. By keeping a per-cpu list, we avoid having to embed @@ -147,8 +149,8 @@ static struct fdtable * alloc_fdtable(unsigned int nr) nr /= (1024 / sizeof(struct file *)); nr = roundup_pow_of_two(nr + 1); nr *= (1024 / sizeof(struct file *)); - if (nr > NR_OPEN) - nr = NR_OPEN; + if (nr > sysctl_nr_open) + nr = sysctl_nr_open; fdt = kmalloc(sizeof(struct fdtable), GFP_KERNEL); if (!fdt) @@ -233,7 +235,7 @@ int expand_files(struct files_struct *files, int nr) if (nr < fdt->max_fds) return 0; /* Can we expand? */ - if (nr >= NR_OPEN) + if (nr >= sysctl_nr_open) return -EMFILE; /* All good, so we try */ diff --git a/include/linux/fs.h b/include/linux/fs.h index 19aab50c3b8e..109734bf6377 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -21,7 +21,7 @@ /* Fixed constants first: */ #undef NR_OPEN -#define NR_OPEN (1024*1024) /* Absolute upper limit on fd num */ +extern int sysctl_nr_open; #define INR_OPEN 1024 /* Initial setting for nfile rlimits */ #define BLOCK_SIZE_BITS 10 diff --git a/kernel/sys.c b/kernel/sys.c index 53de35fc8245..2b8e2daa9d95 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -1472,7 +1472,7 @@ asmlinkage long sys_setrlimit(unsigned int resource, struct rlimit __user *rlim) if ((new_rlim.rlim_max > old_rlim->rlim_max) && !capable(CAP_SYS_RESOURCE)) return -EPERM; - if (resource == RLIMIT_NOFILE && new_rlim.rlim_max > NR_OPEN) + if (resource == RLIMIT_NOFILE && new_rlim.rlim_max > sysctl_nr_open) return -EPERM; retval = security_task_setrlimit(resource, &new_rlim); diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 5e2ad5bf88e2..86daaa26d120 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1203,6 +1203,14 @@ static struct ctl_table fs_table[] = { .proc_handler = &proc_dointvec, }, { + .ctl_name = CTL_UNNUMBERED, + .procname = "nr_open", + .data = &sysctl_nr_open, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, + { .ctl_name = FS_DENTRY, .procname = "dentry-state", .data = &dentry_stat, |