diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2022-08-07 10:03:24 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2022-08-07 10:03:24 -0700 |
commit | eb5699ba31558bdb2cee6ebde3d0a68091e47dce (patch) | |
tree | 3aeab3158f7ae43431405f3aa4f2e1fa3d103206 | |
parent | b5a8466d37d30cfcc8015789f4a3f0c44b6c7bc6 (diff) | |
parent | b99695580bfc1f91364023c673681ddb88e375dc (diff) | |
download | lwn-eb5699ba31558bdb2cee6ebde3d0a68091e47dce.tar.gz lwn-eb5699ba31558bdb2cee6ebde3d0a68091e47dce.zip |
Merge tag 'mm-nonmm-stable-2022-08-06-2' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Pull misc updates from Andrew Morton:
"Updates to various subsystems which I help look after. lib, ocfs2,
fatfs, autofs, squashfs, procfs, etc. A relatively small amount of
material this time"
* tag 'mm-nonmm-stable-2022-08-06-2' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm: (72 commits)
scripts/gdb: ensure the absolute path is generated on initial source
MAINTAINERS: kunit: add David Gow as a maintainer of KUnit
mailmap: add linux.dev alias for Brendan Higgins
mailmap: update Kirill's email
profile: setup_profiling_timer() is moslty not implemented
ocfs2: fix a typo in a comment
ocfs2: use the bitmap API to simplify code
ocfs2: remove some useless functions
lib/mpi: fix typo 'the the' in comment
proc: add some (hopefully) insightful comments
bdi: remove enum wb_congested_state
kernel/hung_task: fix address space of proc_dohung_task_timeout_secs
lib/lzo/lzo1x_compress.c: replace ternary operator with min() and min_t()
squashfs: support reading fragments in readahead call
squashfs: implement readahead
squashfs: always build "file direct" version of page actor
Revert "squashfs: provide backing_dev_info in order to disable read-ahead"
fs/ocfs2: Fix spelling typo in comment
ia64: old_rr4 added under CONFIG_HUGETLB_PAGE
proc: fix test for "vsyscall=xonly" boot option
...
102 files changed, 1313 insertions, 724 deletions
@@ -78,6 +78,7 @@ Boris Brezillon <bbrezillon@kernel.org> <b.brezillon.dev@gmail.com> Boris Brezillon <bbrezillon@kernel.org> <b.brezillon@overkiz.com> Boris Brezillon <bbrezillon@kernel.org> <boris.brezillon@bootlin.com> Boris Brezillon <bbrezillon@kernel.org> <boris.brezillon@free-electrons.com> +Brendan Higgins <brendan.higgins@linux.dev> <brendanhiggins@google.com> Brian Avery <b.avery@hp.com> Brian King <brking@us.ibm.com> Brian Silverman <bsilver16384@gmail.com> <brian.silverman@bluerivertech.com> @@ -230,7 +231,7 @@ Kees Cook <keescook@chromium.org> <kees@ubuntu.com> Keith Busch <kbusch@kernel.org> <keith.busch@intel.com> Keith Busch <kbusch@kernel.org> <keith.busch@linux.intel.com> Kenneth W Chen <kenneth.w.chen@intel.com> -Kirill Tkhai <kirill.tkhai@openvz.org> <ktkhai@virtuozzo.com> +Kirill Tkhai <tkhai@ya.ru> <ktkhai@virtuozzo.com> Konstantin Khlebnikov <koct9i@gmail.com> <khlebnikov@yandex-team.ru> Konstantin Khlebnikov <koct9i@gmail.com> <k.khlebnikov@samsung.com> Koushik <raghavendra.koushik@neterion.com> diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index f251f5cce63b..bab2b0bf5988 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -1673,6 +1673,19 @@ hlt [BUGS=ARM,SH] + hostname= [KNL] Set the hostname (aka UTS nodename). + Format: <string> + This allows setting the system's hostname during early + startup. This sets the name returned by gethostname. + Using this parameter to set the hostname makes it + possible to ensure the hostname is correctly set before + any userspace processes run, avoiding the possibility + that a process may call gethostname before the hostname + has been explicitly set, resulting in the calling + process getting an incorrect result. The string must + not exceed the maximum allowed hostname length (usually + 64 characters) and will be truncated otherwise. + hpet= [X86-32,HPET] option to control HPET usage Format: { enable (default) | disable | force | verbose } diff --git a/MAINTAINERS b/MAINTAINERS index 5a6e53255eaf..b4d96ed8bcc6 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -11065,6 +11065,7 @@ F: fs/smbfs_common/ KERNEL UNIT TESTING FRAMEWORK (KUnit) M: Brendan Higgins <brendanhiggins@google.com> +M: David Gow <davidgow@google.com> L: linux-kselftest@vger.kernel.org L: kunit-dev@googlegroups.com S: Maintained @@ -21277,6 +21278,7 @@ M: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp> S: Maintained F: Documentation/filesystems/vfat.rst F: fs/fat/ +F: tools/testing/selftests/filesystems/fat/ VFIO DRIVER M: Alex Williamson <alex.williamson@redhat.com> diff --git a/arch/alpha/kernel/smp.c b/arch/alpha/kernel/smp.c index cb64e4797d2a..f4e20f75438f 100644 --- a/arch/alpha/kernel/smp.c +++ b/arch/alpha/kernel/smp.c @@ -497,12 +497,6 @@ smp_cpus_done(unsigned int max_cpus) ((bogosum + 2500) / (5000/HZ)) % 100); } -int -setup_profiling_timer(unsigned int multiplier) -{ - return -EINVAL; -} - static void send_ipi_message(const struct cpumask *to_whom, enum ipi_message_type operation) { diff --git a/arch/arc/kernel/smp.c b/arch/arc/kernel/smp.c index d947473f1e6d..ab9e75e90f72 100644 --- a/arch/arc/kernel/smp.c +++ b/arch/arc/kernel/smp.c @@ -232,14 +232,6 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle) return 0; } -/* - * not supported here - */ -int setup_profiling_timer(unsigned int multiplier) -{ - return -EINVAL; -} - /*****************************************************************************/ /* Inter Processor Interrupt Handling */ /*****************************************************************************/ diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index 73fc645fc4c7..978db2d96b44 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -787,14 +787,6 @@ void panic_smp_self_stop(void) cpu_relax(); } -/* - * not supported here - */ -int setup_profiling_timer(unsigned int multiplier) -{ - return -EINVAL; -} - #ifdef CONFIG_CPU_FREQ static DEFINE_PER_CPU(unsigned long, l_p_j_ref); diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 62ed361a4376..ffc5d76cf695 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -1078,14 +1078,6 @@ bool smp_crash_stop_failed(void) } #endif -/* - * not supported here - */ -int setup_profiling_timer(unsigned int multiplier) -{ - return -EINVAL; -} - static bool have_cpu_die(void) { #ifdef CONFIG_HOTPLUG_CPU diff --git a/arch/csky/kernel/smp.c b/arch/csky/kernel/smp.c index 6bb38bc2f39b..4b605aa2e1d6 100644 --- a/arch/csky/kernel/smp.c +++ b/arch/csky/kernel/smp.c @@ -243,11 +243,6 @@ void __init smp_cpus_done(unsigned int max_cpus) { } -int setup_profiling_timer(unsigned int multiplier) -{ - return -EINVAL; -} - void csky_start_secondary(void) { struct mm_struct *mm = &init_mm; diff --git a/arch/hexagon/kernel/smp.c b/arch/hexagon/kernel/smp.c index 619c56420aa0..4ba93e59370c 100644 --- a/arch/hexagon/kernel/smp.c +++ b/arch/hexagon/kernel/smp.c @@ -240,11 +240,6 @@ void arch_send_call_function_ipi_mask(const struct cpumask *mask) send_ipi(mask, IPI_CALL_FUNC); } -int setup_profiling_timer(unsigned int multiplier) -{ - return -EINVAL; -} - void smp_start_cpus(void) { int i; diff --git a/arch/ia64/include/asm/mmu_context.h b/arch/ia64/include/asm/mmu_context.h index 87a0d5bc11ef..06257e355d00 100644 --- a/arch/ia64/include/asm/mmu_context.h +++ b/arch/ia64/include/asm/mmu_context.h @@ -124,9 +124,12 @@ reload_context (nv_mm_context_t context) { unsigned long rid; unsigned long rid_incr = 0; - unsigned long rr0, rr1, rr2, rr3, rr4, old_rr4; + unsigned long rr0, rr1, rr2, rr3, rr4; +#ifdef CONFIG_HUGETLB_PAGE + unsigned long old_rr4; old_rr4 = ia64_get_rr(RGN_BASE(RGN_HPAGE)); +#endif rid = context << 3; /* make space for encoding the region number */ rid_incr = 1 << 8; diff --git a/arch/ia64/include/uapi/asm/cmpxchg.h b/arch/ia64/include/uapi/asm/cmpxchg.h index 2c2f3cfeaa77..ca2e02685343 100644 --- a/arch/ia64/include/uapi/asm/cmpxchg.h +++ b/arch/ia64/include/uapi/asm/cmpxchg.h @@ -33,24 +33,24 @@ extern void ia64_xchg_called_with_bad_pointer(void); \ switch (size) { \ case 1: \ - __xchg_result = ia64_xchg1((__u8 *)ptr, x); \ + __xchg_result = ia64_xchg1((__u8 __force *)ptr, x); \ break; \ \ case 2: \ - __xchg_result = ia64_xchg2((__u16 *)ptr, x); \ + __xchg_result = ia64_xchg2((__u16 __force *)ptr, x); \ break; \ \ case 4: \ - __xchg_result = ia64_xchg4((__u32 *)ptr, x); \ + __xchg_result = ia64_xchg4((__u32 __force *)ptr, x); \ break; \ \ case 8: \ - __xchg_result = ia64_xchg8((__u64 *)ptr, x); \ + __xchg_result = ia64_xchg8((__u64 __force *)ptr, x); \ break; \ default: \ ia64_xchg_called_with_bad_pointer(); \ } \ - __xchg_result; \ + (__typeof__ (*(ptr)) __force) __xchg_result; \ }) #ifndef __KERNEL__ @@ -76,42 +76,42 @@ extern long ia64_cmpxchg_called_with_bad_pointer(void); \ switch (size) { \ case 1: \ - _o_ = (__u8) (long) (old); \ + _o_ = (__u8) (long __force) (old); \ break; \ case 2: \ - _o_ = (__u16) (long) (old); \ + _o_ = (__u16) (long __force) (old); \ break; \ case 4: \ - _o_ = (__u32) (long) (old); \ + _o_ = (__u32) (long __force) (old); \ break; \ case 8: \ - _o_ = (__u64) (long) (old); \ + _o_ = (__u64) (long __force) (old); \ break; \ default: \ break; \ } \ switch (size) { \ case 1: \ - _r_ = ia64_cmpxchg1_##sem((__u8 *) ptr, new, _o_); \ + _r_ = ia64_cmpxchg1_##sem((__u8 __force *) ptr, new, _o_); \ break; \ \ case 2: \ - _r_ = ia64_cmpxchg2_##sem((__u16 *) ptr, new, _o_); \ + _r_ = ia64_cmpxchg2_##sem((__u16 __force *) ptr, new, _o_); \ break; \ \ case 4: \ - _r_ = ia64_cmpxchg4_##sem((__u32 *) ptr, new, _o_); \ + _r_ = ia64_cmpxchg4_##sem((__u32 __force *) ptr, new, _o_); \ break; \ \ case 8: \ - _r_ = ia64_cmpxchg8_##sem((__u64 *) ptr, new, _o_); \ + _r_ = ia64_cmpxchg8_##sem((__u64 __force *) ptr, new, _o_); \ break; \ \ default: \ _r_ = ia64_cmpxchg_called_with_bad_pointer(); \ break; \ } \ - (__typeof__(old)) _r_; \ + (__typeof__(old) __force) _r_; \ }) #define cmpxchg_acq(ptr, o, n) \ diff --git a/arch/ia64/kernel/smp.c b/arch/ia64/kernel/smp.c index 7b7b64eb3129..e2cc59db86bc 100644 --- a/arch/ia64/kernel/smp.c +++ b/arch/ia64/kernel/smp.c @@ -333,9 +333,3 @@ smp_send_stop (void) { send_IPI_allbutself(IPI_CPU_STOP); } - -int -setup_profiling_timer (unsigned int multiplier) -{ - return -EINVAL; -} diff --git a/arch/openrisc/kernel/smp.c b/arch/openrisc/kernel/smp.c index 27041db2c8b0..e1419095a6f0 100644 --- a/arch/openrisc/kernel/smp.c +++ b/arch/openrisc/kernel/smp.c @@ -197,12 +197,6 @@ void smp_send_stop(void) smp_call_function(stop_this_cpu, NULL, 0); } -/* not supported, yet */ -int setup_profiling_timer(unsigned int multiplier) -{ - return -EINVAL; -} - void __init set_smp_cross_call(void (*fn)(const struct cpumask *, unsigned int)) { smp_cross_call = fn; diff --git a/arch/parisc/kernel/smp.c b/arch/parisc/kernel/smp.c index 24d0744c3b3a..7dbd92cafae3 100644 --- a/arch/parisc/kernel/smp.c +++ b/arch/parisc/kernel/smp.c @@ -513,10 +513,3 @@ void __cpu_die(unsigned int cpu) pdc_cpu_rendezvous_unlock(); } - -#ifdef CONFIG_PROC_FS -int setup_profiling_timer(unsigned int multiplier) -{ - return -EINVAL; -} -#endif diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 6b850c157a62..169703fead57 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -1663,13 +1663,6 @@ void start_secondary(void *unused) BUG(); } -#ifdef CONFIG_PROFILING -int setup_profiling_timer(unsigned int multiplier) -{ - return 0; -} -#endif - static void __init fixup_topology(void) { int i; diff --git a/arch/riscv/kernel/smp.c b/arch/riscv/kernel/smp.c index 018e7dc45df6..760a64518c58 100644 --- a/arch/riscv/kernel/smp.c +++ b/arch/riscv/kernel/smp.c @@ -64,12 +64,6 @@ bool arch_match_cpu_phys_id(int cpu, u64 phys_id) return phys_id == cpuid_to_hartid_map(cpu); } -/* Unsupported */ -int setup_profiling_timer(unsigned int multiplier) -{ - return -EINVAL; -} - static void ipi_stop(void) { set_cpu_online(smp_processor_id(), false); diff --git a/arch/sparc/kernel/smp_32.c b/arch/sparc/kernel/smp_32.c index 22b148e5a5f8..ad8094d955eb 100644 --- a/arch/sparc/kernel/smp_32.c +++ b/arch/sparc/kernel/smp_32.c @@ -174,11 +174,6 @@ void smp_call_function_interrupt(void) irq_exit(); } -int setup_profiling_timer(unsigned int multiplier) -{ - return -EINVAL; -} - void __init smp_prepare_cpus(unsigned int max_cpus) { int i, cpuid, extra; diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c index a1f78e9ddaf3..a55295d1b924 100644 --- a/arch/sparc/kernel/smp_64.c +++ b/arch/sparc/kernel/smp_64.c @@ -1186,12 +1186,6 @@ void __irq_entry smp_penguin_jailcell(int irq, struct pt_regs *regs) preempt_enable(); } -/* /proc/profile writes can call this, don't __init it please. */ -int setup_profiling_timer(unsigned int multiplier) -{ - return -EINVAL; -} - void __init smp_prepare_cpus(unsigned int max_cpus) { } diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index bd8ae0a7010a..3415321c8240 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -98,8 +98,6 @@ static inline bool apic_from_smp_config(void) #include <asm/paravirt.h> #endif -extern int setup_profiling_timer(unsigned int); - static inline void native_apic_mem_write(u32 reg, u32 v) { volatile u32 *addr = (volatile u32 *)(APIC_BASE + reg); diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index a4347605ab00..6d303d1d276c 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -1115,11 +1115,6 @@ DEFINE_IDTENTRY_SYSVEC(sysvec_apic_timer_interrupt) set_irq_regs(old_regs); } -int setup_profiling_timer(unsigned int multiplier) -{ - return -EINVAL; -} - /* * Local APIC start and shutdown */ diff --git a/arch/x86/kernel/cpu/cacheinfo.c b/arch/x86/kernel/cpu/cacheinfo.c index fe98a1465be6..66556833d7af 100644 --- a/arch/x86/kernel/cpu/cacheinfo.c +++ b/arch/x86/kernel/cpu/cacheinfo.c @@ -29,6 +29,12 @@ #define LVL_3 4 #define LVL_TRACE 5 +/* Shared last level cache maps */ +DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_llc_shared_map); + +/* Shared L2 cache maps */ +DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_l2c_shared_map); + struct _cache_table { unsigned char descriptor; char cache_type; diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 5e7f9532a10d..f24227bc3220 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -95,10 +95,6 @@ EXPORT_PER_CPU_SYMBOL(cpu_core_map); DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_die_map); EXPORT_PER_CPU_SYMBOL(cpu_die_map); -DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_llc_shared_map); - -DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_l2c_shared_map); - /* Per CPU bogomips and other parameters */ DEFINE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info); EXPORT_PER_CPU_SYMBOL(cpu_info); diff --git a/fs/autofs/autofs_i.h b/fs/autofs/autofs_i.h index 918826eaceea..d5a44fa88acf 100644 --- a/fs/autofs/autofs_i.h +++ b/fs/autofs/autofs_i.h @@ -51,8 +51,6 @@ extern struct file_system_type autofs_fs_type; */ struct autofs_info { struct dentry *dentry; - struct inode *inode; - int flags; struct completion expire_complete; @@ -148,6 +146,11 @@ static inline int autofs_oz_mode(struct autofs_sb_info *sbi) task_pgrp(current) == sbi->oz_pgrp); } +static inline bool autofs_empty(struct autofs_info *ino) +{ + return ino->count < 2; +} + struct inode *autofs_get_inode(struct super_block *, umode_t); void autofs_free_ino(struct autofs_info *); diff --git a/fs/autofs/expire.c b/fs/autofs/expire.c index b3fefd6237c3..038b3d2d9f57 100644 --- a/fs/autofs/expire.c +++ b/fs/autofs/expire.c @@ -371,7 +371,7 @@ static struct dentry *should_expire(struct dentry *dentry, return NULL; } - if (simple_empty(dentry)) + if (autofs_empty(ino)) return NULL; /* Case 2: tree mount, expire iff entire tree is not busy */ diff --git a/fs/autofs/inode.c b/fs/autofs/inode.c index 9edf243713eb..affa70360b1f 100644 --- a/fs/autofs/inode.c +++ b/fs/autofs/inode.c @@ -20,6 +20,7 @@ struct autofs_info *autofs_new_ino(struct autofs_sb_info *sbi) INIT_LIST_HEAD(&ino->expiring); ino->last_used = jiffies; ino->sbi = sbi; + ino->count = 1; } return ino; } diff --git a/fs/autofs/root.c b/fs/autofs/root.c index 91fe4548c256..ca03c1cae2be 100644 --- a/fs/autofs/root.c +++ b/fs/autofs/root.c @@ -10,6 +10,7 @@ #include "autofs_i.h" +static int autofs_dir_permission(struct user_namespace *, struct inode *, int); static int autofs_dir_symlink(struct user_namespace *, struct inode *, struct dentry *, const char *); static int autofs_dir_unlink(struct inode *, struct dentry *); @@ -50,6 +51,7 @@ const struct file_operations autofs_dir_operations = { const struct inode_operations autofs_dir_inode_operations = { .lookup = autofs_lookup, + .permission = autofs_dir_permission, .unlink = autofs_dir_unlink, .symlink = autofs_dir_symlink, .mkdir = autofs_dir_mkdir, @@ -77,6 +79,7 @@ static int autofs_dir_open(struct inode *inode, struct file *file) { struct dentry *dentry = file->f_path.dentry; struct autofs_sb_info *sbi = autofs_sbi(dentry->d_sb); + struct autofs_info *ino = autofs_dentry_ino(dentry); pr_debug("file=%p dentry=%p %pd\n", file, dentry, dentry); @@ -93,7 +96,7 @@ static int autofs_dir_open(struct inode *inode, struct file *file) * it. */ spin_lock(&sbi->lookup_lock); - if (!path_is_mountpoint(&file->f_path) && simple_empty(dentry)) { + if (!path_is_mountpoint(&file->f_path) && autofs_empty(ino)) { spin_unlock(&sbi->lookup_lock); return -ENOENT; } @@ -288,9 +291,26 @@ static struct dentry *autofs_mountpoint_changed(struct path *path) struct dentry *dentry = path->dentry; struct autofs_sb_info *sbi = autofs_sbi(dentry->d_sb); - /* - * If this is an indirect mount the dentry could have gone away - * as a result of an expire and a new one created. + /* If this is an indirect mount the dentry could have gone away + * and a new one created. + * + * This is unusual and I can't remember the case for which it + * was originally added now. But an example of how this can + * happen is an autofs indirect mount that has the "browse" + * option set and also has the "symlink" option in the autofs + * map entry. In this case the daemon will remove the browse + * directory and create a symlink as the mount leaving the + * struct path stale. + * + * Another not so obvious case is when a mount in an autofs + * indirect mount that uses the "nobrowse" option is being + * expired at the same time as a path walk. If the mount has + * been umounted but the mount point directory seen before + * becoming unhashed (during a lockless path walk) when a stat + * family system call is made the mount won't be re-mounted as + * it should. In this case the mount point that's been removed + * (by the daemon) will be stale and the a new mount point + * dentry created. */ if (autofs_type_indirect(sbi->type) && d_unhashed(dentry)) { struct dentry *parent = dentry->d_parent; @@ -362,7 +382,7 @@ static struct vfsmount *autofs_d_automount(struct path *path) * the mount never trigger mounts themselves (they have an * autofs trigger mount mounted on them). But v4 pseudo direct * mounts do need the leaves to trigger mounts. In this case - * we have no choice but to use the list_empty() check and + * we have no choice but to use the autofs_empty() check and * require user space behave. */ if (sbi->version > 4) { @@ -371,7 +391,7 @@ static struct vfsmount *autofs_d_automount(struct path *path) goto done; } } else { - if (!simple_empty(dentry)) { + if (!autofs_empty(ino)) { spin_unlock(&sbi->fs_lock); goto done; } @@ -426,9 +446,8 @@ static int autofs_d_manage(const struct path *path, bool rcu_walk) if (rcu_walk) { /* We don't need fs_lock in rcu_walk mode, - * just testing 'AUTOFS_INFO_NO_RCU' is enough. - * simple_empty() takes a spinlock, so leave it - * to last. + * just testing 'AUTOFS_INF_WANT_EXPIRE' is enough. + * * We only return -EISDIR when certain this isn't * a mount-trap. */ @@ -441,9 +460,7 @@ static int autofs_d_manage(const struct path *path, bool rcu_walk) inode = d_inode_rcu(dentry); if (inode && S_ISLNK(inode->i_mode)) return -EISDIR; - if (list_empty(&dentry->d_subdirs)) - return 0; - if (!simple_empty(dentry)) + if (!autofs_empty(ino)) return -EISDIR; return 0; } @@ -463,7 +480,7 @@ static int autofs_d_manage(const struct path *path, bool rcu_walk) * we can avoid needless calls ->d_automount() and avoid * an incorrect ELOOP error return. */ - if ((!path_is_mountpoint(path) && !simple_empty(dentry)) || + if ((!path_is_mountpoint(path) && !autofs_empty(ino)) || (d_really_is_positive(dentry) && d_is_symlink(dentry))) status = -EISDIR; } @@ -526,11 +543,30 @@ static struct dentry *autofs_lookup(struct inode *dir, return NULL; } +static int autofs_dir_permission(struct user_namespace *mnt_userns, + struct inode *inode, int mask) +{ + if (mask & MAY_WRITE) { + struct autofs_sb_info *sbi = autofs_sbi(inode->i_sb); + + if (!autofs_oz_mode(sbi)) + return -EACCES; + + /* autofs_oz_mode() needs to allow path walks when the + * autofs mount is catatonic but the state of an autofs + * file system needs to be preserved over restarts. + */ + if (sbi->flags & AUTOFS_SBI_CATATONIC) + return -EACCES; + } + + return generic_permission(mnt_userns, inode, mask); +} + static int autofs_dir_symlink(struct user_namespace *mnt_userns, struct inode *dir, struct dentry *dentry, const char *symname) { - struct autofs_sb_info *sbi = autofs_sbi(dir->i_sb); struct autofs_info *ino = autofs_dentry_ino(dentry); struct autofs_info *p_ino; struct inode *inode; @@ -539,16 +575,6 @@ static int autofs_dir_symlink(struct user_namespace *mnt_userns, pr_debug("%s <- %pd\n", symname, dentry); - if (!autofs_oz_mode(sbi)) - return -EACCES; - - /* autofs_oz_mode() needs to allow path walks when the - * autofs mount is catatonic but the state of an autofs - * file system needs to be preserved over restarts. - */ - if (sbi->flags & AUTOFS_SBI_CATATONIC) - return -EACCES; - BUG_ON(!ino); autofs_clean_ino(ino); @@ -571,7 +597,6 @@ static int autofs_dir_symlink(struct user_namespace *mnt_userns, d_add(dentry, inode); dget(dentry); - ino->count++; p_ino = autofs_dentry_ino(dentry->d_parent); p_ino->count++; @@ -601,17 +626,6 @@ static int autofs_dir_unlink(struct inode *dir, struct dentry *dentry) struct autofs_info *ino = autofs_dentry_ino(dentry); struct autofs_info *p_ino; - if (!autofs_oz_mode(sbi)) - return -EACCES; - - /* autofs_oz_mode() needs to allow path walks when the - * autofs mount is catatonic but the state of an autofs - * file system needs to be preserved over restarts. - */ - if (sbi->flags & AUTOFS_SBI_CATATONIC) - return -EACCES; - - ino->count--; p_ino = autofs_dentry_ino(dentry->d_parent); p_ino->count--; dput(ino->dentry); @@ -683,16 +697,6 @@ static int autofs_dir_rmdir(struct inode *dir, struct dentry *dentry) pr_debug("dentry %p, removing %pd\n", dentry, dentry); - if (!autofs_oz_mode(sbi)) - return -EACCES; - - /* autofs_oz_mode() needs to allow path walks when the - * autofs mount is catatonic but the state of an autofs - * file system needs to be preserved over restarts. - */ - if (sbi->flags & AUTOFS_SBI_CATATONIC) - return -EACCES; - if (ino->count != 1) return -ENOTEMPTY; @@ -704,7 +708,6 @@ static int autofs_dir_rmdir(struct inode *dir, struct dentry *dentry) if (sbi->version < 5) autofs_clear_leaf_automount_flags(dentry); - ino->count--; p_ino = autofs_dentry_ino(dentry->d_parent); p_ino->count--; dput(ino->dentry); @@ -726,16 +729,6 @@ static int autofs_dir_mkdir(struct user_namespace *mnt_userns, struct autofs_info *p_ino; struct inode *inode; - if (!autofs_oz_mode(sbi)) - return -EACCES; - - /* autofs_oz_mode() needs to allow path walks when the - * autofs mount is catatonic but the state of an autofs - * file system needs to be preserved over restarts. - */ - if (sbi->flags & AUTOFS_SBI_CATATONIC) - return -EACCES; - pr_debug("dentry %p, creating %pd\n", dentry, dentry); BUG_ON(!ino); @@ -753,7 +746,6 @@ static int autofs_dir_mkdir(struct user_namespace *mnt_userns, autofs_set_leaf_automount_flags(dentry); dget(dentry); - ino->count++; p_ino = autofs_dentry_ino(dentry->d_parent); p_ino->count++; inc_nlink(dir); diff --git a/fs/eventpoll.c b/fs/eventpoll.c index e2daa940ebce..8b56b94e2f56 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -1747,6 +1747,21 @@ static struct timespec64 *ep_timeout_to_timespec(struct timespec64 *to, long ms) return to; } +/* + * autoremove_wake_function, but remove even on failure to wake up, because we + * know that default_wake_function/ttwu will only fail if the thread is already + * woken, and in that case the ep_poll loop will remove the entry anyways, not + * try to reuse it. + */ +static int ep_autoremove_wake_function(struct wait_queue_entry *wq_entry, + unsigned int mode, int sync, void *key) +{ + int ret = default_wake_function(wq_entry, mode, sync, key); + + list_del_init(&wq_entry->entry); + return ret; +} + /** * ep_poll - Retrieves ready events, and delivers them to the caller-supplied * event buffer. @@ -1828,8 +1843,15 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events, * normal wakeup path no need to call __remove_wait_queue() * explicitly, thus ep->lock is not taken, which halts the * event delivery. + * + * In fact, we now use an even more aggressive function that + * unconditionally removes, because we don't reuse the wait + * entry between loop iterations. This lets us also avoid the + * performance issue if a process is killed, causing all of its + * threads to wake up without being removed normally. */ init_wait(&wait); + wait.func = ep_autoremove_wake_function; write_lock_irq(&ep->lock); /* diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c index c573314806cf..21620054e1c4 100644 --- a/fs/fat/namei_vfat.c +++ b/fs/fat/namei_vfat.c @@ -889,22 +889,57 @@ out: return err; } -static int vfat_rename(struct user_namespace *mnt_userns, struct inode *old_dir, - struct dentry *old_dentry, struct inode *new_dir, - struct dentry *new_dentry, unsigned int flags) +static int vfat_get_dotdot_de(struct inode *inode, struct buffer_head **bh, + struct msdos_dir_entry **de) +{ + if (S_ISDIR(inode->i_mode)) { + if (fat_get_dotdot_entry(inode, bh, de)) + return -EIO; + } + return 0; +} + +static int vfat_sync_ipos(struct inode *dir, struct inode *inode) +{ + if (IS_DIRSYNC(dir)) + return fat_sync_inode(inode); + mark_inode_dirty(inode); + return 0; +} + +static int vfat_update_dotdot_de(struct inode *dir, struct inode *inode, + struct buffer_head *dotdot_bh, + struct msdos_dir_entry *dotdot_de) +{ + fat_set_start(dotdot_de, MSDOS_I(dir)->i_logstart); + mark_buffer_dirty_inode(dotdot_bh, inode); + if (IS_DIRSYNC(dir)) + return sync_dirty_buffer(dotdot_bh); + return 0; +} + +static void vfat_update_dir_metadata(struct inode *dir, struct timespec64 *ts) +{ + inode_inc_iversion(dir); + fat_truncate_time(dir, ts, S_CTIME | S_MTIME); + if (IS_DIRSYNC(dir)) + (void)fat_sync_inode(dir); + else + mark_inode_dirty(dir); +} + +static int vfat_rename(struct inode *old_dir, struct dentry *old_dentry, + struct inode *new_dir, struct dentry *new_dentry) { struct buffer_head *dotdot_bh; - struct msdos_dir_entry *dotdot_de; + struct msdos_dir_entry *dotdot_de = NULL; struct inode *old_inode, *new_inode; struct fat_slot_info old_sinfo, sinfo; struct timespec64 ts; loff_t new_i_pos; - int err, is_dir, update_dotdot, corrupt = 0; + int err, is_dir, corrupt = 0; struct super_block *sb = old_dir->i_sb; - if (flags & ~RENAME_NOREPLACE) - return -EINVAL; - old_sinfo.bh = sinfo.bh = dotdot_bh = NULL; old_inode = d_inode(old_dentry); new_inode = d_inode(new_dentry); @@ -913,15 +948,13 @@ static int vfat_rename(struct user_namespace *mnt_userns, struct inode *old_dir, if (err) goto out; - is_dir = S_ISDIR(old_inode->i_mode); - update_dotdot = (is_dir && old_dir != new_dir); - if (update_dotdot) { - if (fat_get_dotdot_entry(old_inode, &dotdot_bh, &dotdot_de)) { - err = -EIO; + if (old_dir != new_dir) { + err = vfat_get_dotdot_de(old_inode, &dotdot_bh, &dotdot_de); + if (err) goto out; - } } + is_dir = S_ISDIR(old_inode->i_mode); ts = current_time(old_dir); if (new_inode) { if (is_dir) { @@ -942,21 +975,15 @@ static int vfat_rename(struct user_namespace *mnt_userns, struct inode *old_dir, fat_detach(old_inode); fat_attach(old_inode, new_i_pos); - if (IS_DIRSYNC(new_dir)) { - err = fat_sync_inode(old_inode); - if (err) - goto error_inode; - } else - mark_inode_dirty(old_inode); + err = vfat_sync_ipos(new_dir, old_inode); + if (err) + goto error_inode; - if (update_dotdot) { - fat_set_start(dotdot_de, MSDOS_I(new_dir)->i_logstart); - mark_buffer_dirty_inode(dotdot_bh, old_inode); - if (IS_DIRSYNC(new_dir)) { - err = sync_dirty_buffer(dotdot_bh); - if (err) - goto error_dotdot; - } + if (dotdot_de) { + err = vfat_update_dotdot_de(new_dir, old_inode, dotdot_bh, + dotdot_de); + if (err) + goto error_dotdot; drop_nlink(old_dir); if (!new_inode) inc_nlink(new_dir); @@ -966,12 +993,7 @@ static int vfat_rename(struct user_namespace *mnt_userns, struct inode *old_dir, old_sinfo.bh = NULL; if (err) goto error_dotdot; - inode_inc_iversion(old_dir); - fat_truncate_time(old_dir, &ts, S_CTIME|S_MTIME); - if (IS_DIRSYNC(old_dir)) - (void)fat_sync_inode(old_dir); - else - mark_inode_dirty(old_dir); + vfat_update_dir_metadata(old_dir, &ts); if (new_inode) { drop_nlink(new_inode); @@ -991,10 +1013,9 @@ error_dotdot: /* data cluster is shared, serious corruption */ corrupt = 1; - if (update_dotdot) { - fat_set_start(dotdot_de, MSDOS_I(old_dir)->i_logstart); - mark_buffer_dirty_inode(dotdot_bh, old_inode); - corrupt |= sync_dirty_buffer(dotdot_bh); + if (dotdot_de) { + corrupt |= vfat_update_dotdot_de(old_dir, old_inode, dotdot_bh, + dotdot_de); } error_inode: fat_detach(old_inode); @@ -1021,13 +1042,145 @@ error_inode: goto out; } +static void vfat_exchange_ipos(struct inode *old_inode, struct inode *new_inode, + loff_t old_i_pos, loff_t new_i_pos) +{ + fat_detach(old_inode); + fat_detach(new_inode); + fat_attach(old_inode, new_i_pos); + fat_attach(new_inode, old_i_pos); +} + +static void vfat_move_nlink(struct inode *src, struct inode *dst) +{ + drop_nlink(src); + inc_nlink(dst); +} + +static int vfat_rename_exchange(struct inode *old_dir, struct dentry *old_dentry, + struct inode *new_dir, struct dentry *new_dentry) +{ + struct buffer_head *old_dotdot_bh = NULL, *new_dotdot_bh = NULL; + struct msdos_dir_entry *old_dotdot_de = NULL, *new_dotdot_de = NULL; + struct inode *old_inode, *new_inode; + struct timespec64 ts = current_time(old_dir); + loff_t old_i_pos, new_i_pos; + int err, corrupt = 0; + struct super_block *sb = old_dir->i_sb; + + old_inode = d_inode(old_dentry); + new_inode = d_inode(new_dentry); + + /* Acquire super block lock for the operation to be atomic */ + mutex_lock(&MSDOS_SB(sb)->s_lock); + + /* if directories are not the same, get ".." info to update */ + if (old_dir != new_dir) { + err = vfat_get_dotdot_de(old_inode, &old_dotdot_bh, + &old_dotdot_de); + if (err) + goto out; + + err = vfat_get_dotdot_de(new_inode, &new_dotdot_bh, + &new_dotdot_de); + if (err) + goto out; + } + + old_i_pos = MSDOS_I(old_inode)->i_pos; + new_i_pos = MSDOS_I(new_inode)->i_pos; + + vfat_exchange_ipos(old_inode, new_inode, old_i_pos, new_i_pos); + + err = vfat_sync_ipos(old_dir, new_inode); + if (err) + goto error_exchange; + err = vfat_sync_ipos(new_dir, old_inode); + if (err) + goto error_exchange; + + /* update ".." directory entry info */ + if (old_dotdot_de) { + err = vfat_update_dotdot_de(new_dir, old_inode, old_dotdot_bh, + old_dotdot_de); + if (err) + goto error_old_dotdot; + } + if (new_dotdot_de) { + err = vfat_update_dotdot_de(old_dir, new_inode, new_dotdot_bh, + new_dotdot_de); + if (err) + goto error_new_dotdot; + } + + /* if cross directory and only one is a directory, adjust nlink */ + if (!old_dotdot_de != !new_dotdot_de) { + if (old_dotdot_de) + vfat_move_nlink(old_dir, new_dir); + else + vfat_move_nlink(new_dir, old_dir); + } + + vfat_update_dir_metadata(old_dir, &ts); + /* if directories are not the same, update new_dir as well */ + if (old_dir != new_dir) + vfat_update_dir_metadata(new_dir, &ts); + +out: + brelse(old_dotdot_bh); + brelse(new_dotdot_bh); + mutex_unlock(&MSDOS_SB(sb)->s_lock); + + return err; + +error_new_dotdot: + if (new_dotdot_de) { + corrupt |= vfat_update_dotdot_de(new_dir, new_inode, + new_dotdot_bh, new_dotdot_de); + } + +error_old_dotdot: + if (old_dotdot_de) { + corrupt |= vfat_update_dotdot_de(old_dir, old_inode, + old_dotdot_bh, old_dotdot_de); + } + +error_exchange: + vfat_exchange_ipos(old_inode, new_inode, new_i_pos, old_i_pos); + corrupt |= vfat_sync_ipos(new_dir, new_inode); + corrupt |= vfat_sync_ipos(old_dir, old_inode); + + if (corrupt < 0) { + fat_fs_error(new_dir->i_sb, + "%s: Filesystem corrupted (i_pos %lld, %lld)", + __func__, old_i_pos, new_i_pos); + } + goto out; +} + +static int vfat_rename2(struct user_namespace *mnt_userns, struct inode *old_dir, + struct dentry *old_dentry, struct inode *new_dir, + struct dentry *new_dentry, unsigned int flags) +{ + if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE)) + return -EINVAL; + + if (flags & RENAME_EXCHANGE) { + return vfat_rename_exchange(old_dir, old_dentry, + new_dir, new_dentry); + } + + /* VFS already handled RENAME_NOREPLACE, handle it as a normal rename */ + return vfat_rename(old_dir, old_dentry, new_dir, new_dentry); +} + static const struct inode_operations vfat_dir_inode_operations = { .create = vfat_create, .lookup = vfat_lookup, .unlink = vfat_unlink, .mkdir = vfat_mkdir, .rmdir = vfat_rmdir, - .rename = vfat_rename, + .rename = vfat_rename2, .setattr = fat_setattr, .getattr = fat_getattr, .update_time = fat_update_time, diff --git a/fs/kernel_read_file.c b/fs/kernel_read_file.c index 1b07550485b9..5d826274570c 100644 --- a/fs/kernel_read_file.c +++ b/fs/kernel_read_file.c @@ -29,15 +29,15 @@ * change between calls to kernel_read_file(). * * Returns number of bytes read (no single read will be bigger - * than INT_MAX), or negative on error. + * than SSIZE_MAX), or negative on error. * */ -int kernel_read_file(struct file *file, loff_t offset, void **buf, - size_t buf_size, size_t *file_size, - enum kernel_read_file_id id) +ssize_t kernel_read_file(struct file *file, loff_t offset, void **buf, + size_t buf_size, size_t *file_size, + enum kernel_read_file_id id) { loff_t i_size, pos; - size_t copied; + ssize_t copied; void *allocated = NULL; bool whole_file; int ret; @@ -58,7 +58,7 @@ int kernel_read_file(struct file *file, loff_t offset, void **buf, goto out; } /* The file is too big for sane activities. */ - if (i_size > INT_MAX) { + if (i_size > SSIZE_MAX) { ret = -EFBIG; goto out; } @@ -124,12 +124,12 @@ out: } EXPORT_SYMBOL_GPL(kernel_read_file); -int kernel_read_file_from_path(const char *path, loff_t offset, void **buf, - size_t buf_size, size_t *file_size, - enum kernel_read_file_id id) +ssize_t kernel_read_file_from_path(const char *path, loff_t offset, void **buf, + size_t buf_size, size_t *file_size, + enum kernel_read_file_id id) { struct file *file; - int ret; + ssize_t ret; if (!path || !*path) return -EINVAL; @@ -144,14 +144,14 @@ int kernel_read_file_from_path(const char *path, loff_t offset, void **buf, } EXPORT_SYMBOL_GPL(kernel_read_file_from_path); -int kernel_read_file_from_path_initns(const char *path, loff_t offset, - void **buf, size_t buf_size, - size_t *file_size, - enum kernel_read_file_id id) +ssize_t kernel_read_file_from_path_initns(const char *path, loff_t offset, + void **buf, size_t buf_size, + size_t *file_size, + enum kernel_read_file_id id) { struct file *file; struct path root; - int ret; + ssize_t ret; if (!path || !*path) return -EINVAL; @@ -171,12 +171,12 @@ int kernel_read_file_from_path_initns(const char *path, loff_t offset, } EXPORT_SYMBOL_GPL(kernel_read_file_from_path_initns); -int kernel_read_file_from_fd(int fd, loff_t offset, void **buf, - size_t buf_size, size_t *file_size, - enum kernel_read_file_id id) +ssize_t kernel_read_file_from_fd(int fd, loff_t offset, void **buf, + size_t buf_size, size_t *file_size, + enum kernel_read_file_id id) { struct fd f = fdget(fd); - int ret = -EBADF; + ssize_t ret = -EBADF; if (!f.file || !(f.file->f_mode & FMODE_READ)) goto out; diff --git a/fs/ocfs2/dlmfs/dlmfs.c b/fs/ocfs2/dlmfs/dlmfs.c index e360543ad7e7..8b2020f92b5f 100644 --- a/fs/ocfs2/dlmfs/dlmfs.c +++ b/fs/ocfs2/dlmfs/dlmfs.c @@ -296,17 +296,25 @@ static void dlmfs_evict_inode(struct inode *inode) { int status; struct dlmfs_inode_private *ip; + struct user_lock_res *lockres; + int teardown; clear_inode(inode); mlog(0, "inode %lu\n", inode->i_ino); ip = DLMFS_I(inode); + lockres = &ip->ip_lockres; if (S_ISREG(inode->i_mode)) { - status = user_dlm_destroy_lock(&ip->ip_lockres); - if (status < 0) - mlog_errno(status); + spin_lock(&lockres->l_lock); + teardown = !!(lockres->l_flags & USER_LOCK_IN_TEARDOWN); + spin_unlock(&lockres->l_lock); + if (!teardown) { + status = user_dlm_destroy_lock(lockres); + if (status < 0) + mlog_errno(status); + } iput(ip->ip_parent); goto clear_fields; } diff --git a/fs/ocfs2/heartbeat.c b/fs/ocfs2/heartbeat.c index 9099d8fc7599..22da768e65b7 100644 --- a/fs/ocfs2/heartbeat.c +++ b/fs/ocfs2/heartbeat.c @@ -2,12 +2,13 @@ /* * heartbeat.c * - * Register ourselves with the heartbaet service, keep our node maps + * Register ourselves with the heartbeat service, keep our node maps * up to date, and fire off recovery when needed. * * Copyright (C) 2002, 2004 Oracle. All rights reserved. */ +#include <linux/bitmap.h> #include <linux/fs.h> #include <linux/types.h> #include <linux/highmem.h> @@ -24,18 +25,12 @@ #include "buffer_head_io.h" -static inline void __ocfs2_node_map_set_bit(struct ocfs2_node_map *map, - int bit); -static inline void __ocfs2_node_map_clear_bit(struct ocfs2_node_map *map, - int bit); - /* special case -1 for now * TODO: should *really* make sure the calling func never passes -1!! */ static void ocfs2_node_map_init(struct ocfs2_node_map *map) { map->num_nodes = OCFS2_NODE_MAP_MAX_NODES; - memset(map->map, 0, BITS_TO_LONGS(OCFS2_NODE_MAP_MAX_NODES) * - sizeof(unsigned long)); + bitmap_zero(map->map, OCFS2_NODE_MAP_MAX_NODES); } void ocfs2_init_node_maps(struct ocfs2_super *osb) @@ -65,12 +60,6 @@ void ocfs2_do_node_down(int node_num, void *data) ocfs2_recovery_thread(osb, node_num); } -static inline void __ocfs2_node_map_set_bit(struct ocfs2_node_map *map, - int bit) -{ - set_bit(bit, map->map); -} - void ocfs2_node_map_set_bit(struct ocfs2_super *osb, struct ocfs2_node_map *map, int bit) @@ -79,16 +68,10 @@ void ocfs2_node_map_set_bit(struct ocfs2_super *osb, return; BUG_ON(bit >= map->num_nodes); spin_lock(&osb->node_map_lock); - __ocfs2_node_map_set_bit(map, bit); + set_bit(bit, map->map); spin_unlock(&osb->node_map_lock); } -static inline void __ocfs2_node_map_clear_bit(struct ocfs2_node_map *map, - int bit) -{ - clear_bit(bit, map->map); -} - void ocfs2_node_map_clear_bit(struct ocfs2_super *osb, struct ocfs2_node_map *map, int bit) @@ -97,7 +80,7 @@ void ocfs2_node_map_clear_bit(struct ocfs2_super *osb, return; BUG_ON(bit >= map->num_nodes); spin_lock(&osb->node_map_lock); - __ocfs2_node_map_clear_bit(map, bit); + clear_bit(bit, map->map); spin_unlock(&osb->node_map_lock); } diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c index 0b6f551a342a..dc9f76ab7e13 100644 --- a/fs/ocfs2/quota_global.c +++ b/fs/ocfs2/quota_global.c @@ -412,7 +412,7 @@ out_unlock: goto out_err; } -/* Write information to global quota file. Expects exlusive lock on quota +/* Write information to global quota file. Expects exclusive lock on quota * file inode and quota info */ static int __ocfs2_global_write_info(struct super_block *sb, int type) { diff --git a/fs/proc/array.c b/fs/proc/array.c index eb815759842c..99fcbfda8e25 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -69,7 +69,6 @@ #include <linux/sched/cputime.h> #include <linux/proc_fs.h> #include <linux/ioport.h> -#include <linux/uaccess.h> #include <linux/io.h> #include <linux/mm.h> #include <linux/hugetlb.h> @@ -100,6 +99,10 @@ void proc_task_name(struct seq_file *m, struct task_struct *p, bool escape) { char tcomm[64]; + /* + * Test before PF_KTHREAD because all workqueue worker threads are + * kernel threads. + */ if (p->flags & PF_WQ_WORKER) wq_worker_comm(tcomm, sizeof(tcomm), p); else if (p->flags & PF_KTHREAD) diff --git a/fs/proc/base.c b/fs/proc/base.c index 8dfa36a99c74..93f7e3d971e4 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -1885,7 +1885,7 @@ void proc_pid_evict_inode(struct proc_inode *ei) put_pid(pid); } -struct inode *proc_pid_make_inode(struct super_block * sb, +struct inode *proc_pid_make_inode(struct super_block *sb, struct task_struct *task, umode_t mode) { struct inode * inode; @@ -1914,11 +1914,6 @@ struct inode *proc_pid_make_inode(struct super_block * sb, /* Let the pid remember us for quick removal */ ei->pid = pid; - if (S_ISDIR(mode)) { - spin_lock(&pid->lock); - hlist_add_head_rcu(&ei->sibling_inodes, &pid->inodes); - spin_unlock(&pid->lock); - } task_dump_owner(task, 0, &inode->i_uid, &inode->i_gid); security_task_to_inode(task, inode); @@ -1931,6 +1926,39 @@ out_unlock: return NULL; } +/* + * Generating an inode and adding it into @pid->inodes, so that task will + * invalidate inode's dentry before being released. + * + * This helper is used for creating dir-type entries under '/proc' and + * '/proc/<tgid>/task'. Other entries(eg. fd, stat) under '/proc/<tgid>' + * can be released by invalidating '/proc/<tgid>' dentry. + * In theory, dentries under '/proc/<tgid>/task' can also be released by + * invalidating '/proc/<tgid>' dentry, we reserve it to handle single + * thread exiting situation: Any one of threads should invalidate its + * '/proc/<tgid>/task/<pid>' dentry before released. + */ +static struct inode *proc_pid_make_base_inode(struct super_block *sb, + struct task_struct *task, umode_t mode) +{ + struct inode *inode; + struct proc_inode *ei; + struct pid *pid; + + inode = proc_pid_make_inode(sb, task, mode); + if (!inode) + return NULL; + + /* Let proc_flush_pid find this directory inode */ + ei = PROC_I(inode); + pid = ei->pid; + spin_lock(&pid->lock); + hlist_add_head_rcu(&ei->sibling_inodes, &pid->inodes); + spin_unlock(&pid->lock); + + return inode; +} + int pid_getattr(struct user_namespace *mnt_userns, const struct path *path, struct kstat *stat, u32 request_mask, unsigned int query_flags) { @@ -3369,7 +3397,8 @@ static struct dentry *proc_pid_instantiate(struct dentry * dentry, { struct inode *inode; - inode = proc_pid_make_inode(dentry->d_sb, task, S_IFDIR | S_IRUGO | S_IXUGO); + inode = proc_pid_make_base_inode(dentry->d_sb, task, + S_IFDIR | S_IRUGO | S_IXUGO); if (!inode) return ERR_PTR(-ENOENT); @@ -3671,7 +3700,8 @@ static struct dentry *proc_task_instantiate(struct dentry *dentry, struct task_struct *task, const void *ptr) { struct inode *inode; - inode = proc_pid_make_inode(dentry->d_sb, task, S_IFDIR | S_IRUGO | S_IXUGO); + inode = proc_pid_make_base_inode(dentry->d_sb, task, + S_IFDIR | S_IRUGO | S_IXUGO); if (!inode) return ERR_PTR(-ENOENT); diff --git a/fs/proc/inode.c b/fs/proc/inode.c index 73aeb4e6d32e..f130499ad843 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c @@ -26,8 +26,6 @@ #include <linux/mount.h> #include <linux/bug.h> -#include <linux/uaccess.h> - #include "internal.h" static void proc_evict_inode(struct inode *inode) @@ -214,7 +212,15 @@ static void unuse_pde(struct proc_dir_entry *pde) complete(pde->pde_unload_completion); } -/* pde is locked on entry, unlocked on exit */ +/* + * At most 2 contexts can enter this function: the one doing the last + * close on the descriptor and whoever is deleting PDE itself. + * + * First to enter calls ->proc_release hook and signals its completion + * to the second one which waits and then does nothing. + * + * PDE is locked on entry, unlocked on exit. + */ static void close_pdeo(struct proc_dir_entry *pde, struct pde_opener *pdeo) __releases(&pde->pde_unload_lock) { @@ -224,9 +230,6 @@ static void close_pdeo(struct proc_dir_entry *pde, struct pde_opener *pdeo) * * rmmod (remove_proc_entry() et al) can't delete an entry and proceed: * "struct file" needs to be available at the right moment. - * - * Therefore, first process to enter this function does ->release() and - * signals its completion to the other process which does nothing. */ if (pdeo->closing) { /* somebody else is doing that, just wait */ @@ -240,10 +243,12 @@ static void close_pdeo(struct proc_dir_entry *pde, struct pde_opener *pdeo) pdeo->closing = true; spin_unlock(&pde->pde_unload_lock); + file = pdeo->file; pde->proc_ops->proc_release(file_inode(file), file); + spin_lock(&pde->pde_unload_lock); - /* After ->release. */ + /* Strictly after ->proc_release, see above. */ list_del(&pdeo->lh); c = pdeo->c; spin_unlock(&pde->pde_unload_lock); diff --git a/fs/proc/kmsg.c b/fs/proc/kmsg.c index b38ad552887f..592e6dc7c110 100644 --- a/fs/proc/kmsg.c +++ b/fs/proc/kmsg.c @@ -15,7 +15,6 @@ #include <linux/fs.h> #include <linux/syslog.h> -#include <linux/uaccess.h> #include <asm/io.h> extern wait_queue_head_t log_wait; diff --git a/fs/proc/nommu.c b/fs/proc/nommu.c index 13452b32e2bd..4d3493579458 100644 --- a/fs/proc/nommu.c +++ b/fs/proc/nommu.c @@ -21,7 +21,6 @@ #include <linux/seq_file.h> #include <linux/hugetlb.h> #include <linux/vmalloc.h> -#include <linux/uaccess.h> #include <asm/tlb.h> #include <asm/div64.h> #include "internal.h" diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c index 913e5acefbb6..856839b8ae8b 100644 --- a/fs/proc/proc_net.c +++ b/fs/proc/proc_net.c @@ -8,9 +8,6 @@ * * proc net directory handling functions */ - -#include <linux/uaccess.h> - #include <linux/errno.h> #include <linux/time.h> #include <linux/proc_fs.h> @@ -353,6 +350,12 @@ static __net_init int proc_net_ns_init(struct net *net) kgid_t gid; int err; + /* + * This PDE acts only as an anchor for /proc/${pid}/net hierarchy. + * Corresponding inode (PDE(inode) == net->proc_net) is never + * instantiated therefore blanket zeroing is fine. + * net->proc_net_stat inode is instantiated normally. + */ err = -ENOMEM; netd = kmem_cache_zalloc(proc_dir_entry_cache, GFP_KERNEL); if (!netd) diff --git a/fs/proc/proc_tty.c b/fs/proc/proc_tty.c index c69ff191e5d8..5c6a5ceab2f1 100644 --- a/fs/proc/proc_tty.c +++ b/fs/proc/proc_tty.c @@ -4,8 +4,6 @@ * * Copyright 1997, Theodore Ts'o */ - -#include <linux/uaccess.h> #include <linux/module.h> #include <linux/init.h> #include <linux/errno.h> diff --git a/fs/proc/root.c b/fs/proc/root.c index c7e3b1350ef8..3c2ee3eb1138 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c @@ -6,9 +6,6 @@ * * proc root directory handling functions */ - -#include <linux/uaccess.h> - #include <linux/errno.h> #include <linux/time.h> #include <linux/proc_fs.h> @@ -305,6 +302,11 @@ void __init proc_root_init(void) proc_mkdir("bus", NULL); proc_sys_init(); + /* + * Last things last. It is not like userspace processes eager + * to open /proc files exist at this point but register last + * anyway. + */ register_filesystem(&proc_fs_type); } diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c index 4eaeb645e759..f2aa86c421f2 100644 --- a/fs/proc/vmcore.c +++ b/fs/proc/vmcore.c @@ -25,7 +25,6 @@ #include <linux/mutex.h> #include <linux/vmalloc.h> #include <linux/pagemap.h> -#include <linux/uaccess.h> #include <linux/uio.h> #include <linux/cc_platform.h> #include <asm/io.h> diff --git a/fs/squashfs/Makefile b/fs/squashfs/Makefile index 7bd9b8b856d0..477c89a519ee 100644 --- a/fs/squashfs/Makefile +++ b/fs/squashfs/Makefile @@ -5,9 +5,9 @@ obj-$(CONFIG_SQUASHFS) += squashfs.o squashfs-y += block.o cache.o dir.o export.o file.o fragment.o id.o inode.o -squashfs-y += namei.o super.o symlink.o decompressor.o +squashfs-y += namei.o super.o symlink.o decompressor.o page_actor.o squashfs-$(CONFIG_SQUASHFS_FILE_CACHE) += file_cache.o -squashfs-$(CONFIG_SQUASHFS_FILE_DIRECT) += file_direct.o page_actor.o +squashfs-$(CONFIG_SQUASHFS_FILE_DIRECT) += file_direct.o squashfs-$(CONFIG_SQUASHFS_DECOMP_SINGLE) += decompressor_single.o squashfs-$(CONFIG_SQUASHFS_DECOMP_MULTI) += decompressor_multi.o squashfs-$(CONFIG_SQUASHFS_DECOMP_MULTI_PERCPU) += decompressor_multi_percpu.o diff --git a/fs/squashfs/block.c b/fs/squashfs/block.c index 8879d052f96c..833aca92301f 100644 --- a/fs/squashfs/block.c +++ b/fs/squashfs/block.c @@ -34,12 +34,15 @@ static int copy_bio_to_actor(struct bio *bio, struct squashfs_page_actor *actor, int offset, int req_length) { - void *actor_addr = squashfs_first_page(actor); + void *actor_addr; struct bvec_iter_all iter_all = {}; struct bio_vec *bvec = bvec_init_iter_all(&iter_all); int copied_bytes = 0; int actor_offset = 0; + squashfs_actor_nobuff(actor); + actor_addr = squashfs_first_page(actor); + if (WARN_ON_ONCE(!bio_next_segment(bio, &iter_all))) return 0; @@ -49,8 +52,9 @@ static int copy_bio_to_actor(struct bio *bio, bytes_to_copy = min_t(int, bytes_to_copy, req_length - copied_bytes); - memcpy(actor_addr + actor_offset, bvec_virt(bvec) + offset, - bytes_to_copy); + if (!IS_ERR(actor_addr)) + memcpy(actor_addr + actor_offset, bvec_virt(bvec) + + offset, bytes_to_copy); actor_offset += bytes_to_copy; copied_bytes += bytes_to_copy; diff --git a/fs/squashfs/decompressor.h b/fs/squashfs/decompressor.h index 1b9ccfd0aa51..19ab60834389 100644 --- a/fs/squashfs/decompressor.h +++ b/fs/squashfs/decompressor.h @@ -20,6 +20,7 @@ struct squashfs_decompressor { struct bio *, int, int, struct squashfs_page_actor *); int id; char *name; + int alloc_buffer; int supported; }; diff --git a/fs/squashfs/file.c b/fs/squashfs/file.c index 7f0904b20329..98e64fec75b7 100644 --- a/fs/squashfs/file.c +++ b/fs/squashfs/file.c @@ -39,6 +39,7 @@ #include "squashfs_fs_sb.h" #include "squashfs_fs_i.h" #include "squashfs.h" +#include "page_actor.h" /* * Locate cache slot in range [offset, index] for specified inode. If @@ -496,7 +497,137 @@ out: return res; } +static int squashfs_readahead_fragment(struct page **page, + unsigned int pages, unsigned int expected) +{ + struct inode *inode = page[0]->mapping->host; + struct squashfs_cache_entry *buffer = squashfs_get_fragment(inode->i_sb, + squashfs_i(inode)->fragment_block, + squashfs_i(inode)->fragment_size); + struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info; + unsigned int n, mask = (1 << (msblk->block_log - PAGE_SHIFT)) - 1; + + if (buffer->error) + goto out; + + expected += squashfs_i(inode)->fragment_offset; + + for (n = 0; n < pages; n++) { + unsigned int base = (page[n]->index & mask) << PAGE_SHIFT; + unsigned int offset = base + squashfs_i(inode)->fragment_offset; + + if (expected > offset) { + unsigned int avail = min_t(unsigned int, expected - + offset, PAGE_SIZE); + + squashfs_fill_page(page[n], buffer, offset, avail); + } + + unlock_page(page[n]); + put_page(page[n]); + } + +out: + squashfs_cache_put(buffer); + return buffer->error; +} + +static void squashfs_readahead(struct readahead_control *ractl) +{ + struct inode *inode = ractl->mapping->host; + struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info; + size_t mask = (1UL << msblk->block_log) - 1; + unsigned short shift = msblk->block_log - PAGE_SHIFT; + loff_t start = readahead_pos(ractl) & ~mask; + size_t len = readahead_length(ractl) + readahead_pos(ractl) - start; + struct squashfs_page_actor *actor; + unsigned int nr_pages = 0; + struct page **pages; + int i, file_end = i_size_read(inode) >> msblk->block_log; + unsigned int max_pages = 1UL << shift; + + readahead_expand(ractl, start, (len | mask) + 1); + + pages = kmalloc_array(max_pages, sizeof(void *), GFP_KERNEL); + if (!pages) + return; + + for (;;) { + pgoff_t index; + int res, bsize; + u64 block = 0; + unsigned int expected; + + nr_pages = __readahead_batch(ractl, pages, max_pages); + if (!nr_pages) + break; + + if (readahead_pos(ractl) >= i_size_read(inode)) + goto skip_pages; + + index = pages[0]->index >> shift; + if ((pages[nr_pages - 1]->index >> shift) != index) + goto skip_pages; + + expected = index == file_end ? + (i_size_read(inode) & (msblk->block_size - 1)) : + msblk->block_size; + + if (index == file_end && squashfs_i(inode)->fragment_block != + SQUASHFS_INVALID_BLK) { + res = squashfs_readahead_fragment(pages, nr_pages, + expected); + if (res) + goto skip_pages; + continue; + } + + bsize = read_blocklist(inode, index, &block); + if (bsize == 0) + goto skip_pages; + + actor = squashfs_page_actor_init_special(msblk, pages, nr_pages, + expected); + if (!actor) + goto skip_pages; + + res = squashfs_read_data(inode->i_sb, block, bsize, NULL, actor); + + kfree(actor); + + if (res == expected) { + int bytes; + + /* Last page (if present) may have trailing bytes not filled */ + bytes = res % PAGE_SIZE; + if (pages[nr_pages - 1]->index == file_end && bytes) + memzero_page(pages[nr_pages - 1], bytes, + PAGE_SIZE - bytes); + + for (i = 0; i < nr_pages; i++) { + flush_dcache_page(pages[i]); + SetPageUptodate(pages[i]); + } + } + + for (i = 0; i < nr_pages; i++) { + unlock_page(pages[i]); + put_page(pages[i]); + } + } + + kfree(pages); + return; + +skip_pages: + for (i = 0; i < nr_pages; i++) { + unlock_page(pages[i]); + put_page(pages[i]); + } + kfree(pages); +} const struct address_space_operations squashfs_aops = { - .read_folio = squashfs_read_folio + .read_folio = squashfs_read_folio, + .readahead = squashfs_readahead }; diff --git a/fs/squashfs/file_direct.c b/fs/squashfs/file_direct.c index a4894cc59447..be4b12d31e0c 100644 --- a/fs/squashfs/file_direct.c +++ b/fs/squashfs/file_direct.c @@ -18,9 +18,6 @@ #include "squashfs.h" #include "page_actor.h" -static int squashfs_read_cache(struct page *target_page, u64 block, int bsize, - int pages, struct page **page, int bytes); - /* Read separately compressed datablock directly into page cache */ int squashfs_readpage_block(struct page *target_page, u64 block, int bsize, int expected) @@ -33,7 +30,7 @@ int squashfs_readpage_block(struct page *target_page, u64 block, int bsize, int mask = (1 << (msblk->block_log - PAGE_SHIFT)) - 1; int start_index = target_page->index & ~mask; int end_index = start_index | mask; - int i, n, pages, missing_pages, bytes, res = -ENOMEM; + int i, n, pages, bytes, res = -ENOMEM; struct page **page; struct squashfs_page_actor *actor; void *pageaddr; @@ -47,50 +44,38 @@ int squashfs_readpage_block(struct page *target_page, u64 block, int bsize, if (page == NULL) return res; - /* - * Create a "page actor" which will kmap and kunmap the - * page cache pages appropriately within the decompressor - */ - actor = squashfs_page_actor_init_special(page, pages, 0); - if (actor == NULL) - goto out; - /* Try to grab all the pages covered by the Squashfs block */ - for (missing_pages = 0, i = 0, n = start_index; i < pages; i++, n++) { + for (i = 0, n = start_index; n <= end_index; n++) { page[i] = (n == target_page->index) ? target_page : grab_cache_page_nowait(target_page->mapping, n); - if (page[i] == NULL) { - missing_pages++; + if (page[i] == NULL) continue; - } if (PageUptodate(page[i])) { unlock_page(page[i]); put_page(page[i]); - page[i] = NULL; - missing_pages++; + continue; } + + i++; } - if (missing_pages) { - /* - * Couldn't get one or more pages, this page has either - * been VM reclaimed, but others are still in the page cache - * and uptodate, or we're racing with another thread in - * squashfs_readpage also trying to grab them. Fall back to - * using an intermediate buffer. - */ - res = squashfs_read_cache(target_page, block, bsize, pages, - page, expected); - if (res < 0) - goto mark_errored; + pages = i; + /* + * Create a "page actor" which will kmap and kunmap the + * page cache pages appropriately within the decompressor + */ + actor = squashfs_page_actor_init_special(msblk, page, pages, expected); + if (actor == NULL) goto out; - } /* Decompress directly into the page cache buffers */ res = squashfs_read_data(inode->i_sb, block, bsize, NULL, actor); + + kfree(actor); + if (res < 0) goto mark_errored; @@ -99,12 +84,12 @@ int squashfs_readpage_block(struct page *target_page, u64 block, int bsize, goto mark_errored; } - /* Last page may have trailing bytes not filled */ + /* Last page (if present) may have trailing bytes not filled */ bytes = res % PAGE_SIZE; - if (bytes) { - pageaddr = kmap_atomic(page[pages - 1]); + if (page[pages - 1]->index == end_index && bytes) { + pageaddr = kmap_local_page(page[pages - 1]); memset(pageaddr + bytes, 0, PAGE_SIZE - bytes); - kunmap_atomic(pageaddr); + kunmap_local(pageaddr); } /* Mark pages as uptodate, unlock and release */ @@ -116,7 +101,6 @@ int squashfs_readpage_block(struct page *target_page, u64 block, int bsize, put_page(page[i]); } - kfree(actor); kfree(page); return 0; @@ -135,40 +119,6 @@ mark_errored: } out: - kfree(actor); kfree(page); return res; } - - -static int squashfs_read_cache(struct page *target_page, u64 block, int bsize, - int pages, struct page **page, int bytes) -{ - struct inode *i = target_page->mapping->host; - struct squashfs_cache_entry *buffer = squashfs_get_datablock(i->i_sb, - block, bsize); - int res = buffer->error, n, offset = 0; - - if (res) { - ERROR("Unable to read page, block %llx, size %x\n", block, - bsize); - goto out; - } - - for (n = 0; n < pages && bytes > 0; n++, - bytes -= PAGE_SIZE, offset += PAGE_SIZE) { - int avail = min_t(int, bytes, PAGE_SIZE); - - if (page[n] == NULL) - continue; - - squashfs_fill_page(page[n], buffer, offset, avail); - unlock_page(page[n]); - if (page[n] != target_page) - put_page(page[n]); - } - -out: - squashfs_cache_put(buffer); - return res; -} diff --git a/fs/squashfs/lz4_wrapper.c b/fs/squashfs/lz4_wrapper.c index b685b6238316..49797729f143 100644 --- a/fs/squashfs/lz4_wrapper.c +++ b/fs/squashfs/lz4_wrapper.c @@ -119,10 +119,12 @@ static int lz4_uncompress(struct squashfs_sb_info *msblk, void *strm, buff = stream->output; while (data) { if (bytes <= PAGE_SIZE) { - memcpy(data, buff, bytes); + if (!IS_ERR(data)) + memcpy(data, buff, bytes); break; } - memcpy(data, buff, PAGE_SIZE); + if (!IS_ERR(data)) + memcpy(data, buff, PAGE_SIZE); buff += PAGE_SIZE; bytes -= PAGE_SIZE; data = squashfs_next_page(output); @@ -139,5 +141,6 @@ const struct squashfs_decompressor squashfs_lz4_comp_ops = { .decompress = lz4_uncompress, .id = LZ4_COMPRESSION, .name = "lz4", + .alloc_buffer = 0, .supported = 1 }; diff --git a/fs/squashfs/lzo_wrapper.c b/fs/squashfs/lzo_wrapper.c index cb510a631968..d216aeefa865 100644 --- a/fs/squashfs/lzo_wrapper.c +++ b/fs/squashfs/lzo_wrapper.c @@ -93,10 +93,12 @@ static int lzo_uncompress(struct squashfs_sb_info *msblk, void *strm, buff = stream->output; while (data) { if (bytes <= PAGE_SIZE) { - memcpy(data, buff, bytes); + if (!IS_ERR(data)) + memcpy(data, buff, bytes); break; } else { - memcpy(data, buff, PAGE_SIZE); + if (!IS_ERR(data)) + memcpy(data, buff, PAGE_SIZE); buff += PAGE_SIZE; bytes -= PAGE_SIZE; data = squashfs_next_page(output); @@ -116,5 +118,6 @@ const struct squashfs_decompressor squashfs_lzo_comp_ops = { .decompress = lzo_uncompress, .id = LZO_COMPRESSION, .name = "lzo", + .alloc_buffer = 0, .supported = 1 }; diff --git a/fs/squashfs/page_actor.c b/fs/squashfs/page_actor.c index 520d323a99ce..b23b780d8f42 100644 --- a/fs/squashfs/page_actor.c +++ b/fs/squashfs/page_actor.c @@ -7,6 +7,8 @@ #include <linux/kernel.h> #include <linux/slab.h> #include <linux/pagemap.h> +#include "squashfs_fs_sb.h" +#include "decompressor.h" #include "page_actor.h" /* @@ -57,29 +59,62 @@ struct squashfs_page_actor *squashfs_page_actor_init(void **buffer, } /* Implementation of page_actor for decompressing directly into page cache. */ +static void *handle_next_page(struct squashfs_page_actor *actor) +{ + int max_pages = (actor->length + PAGE_SIZE - 1) >> PAGE_SHIFT; + + if (actor->returned_pages == max_pages) + return NULL; + + if ((actor->next_page == actor->pages) || + (actor->next_index != actor->page[actor->next_page]->index)) { + if (actor->alloc_buffer) { + void *tmp_buffer = kmalloc(PAGE_SIZE, GFP_KERNEL); + + if (tmp_buffer) { + actor->tmp_buffer = tmp_buffer; + actor->next_index++; + actor->returned_pages++; + return tmp_buffer; + } + } + + actor->next_index++; + actor->returned_pages++; + return ERR_PTR(-ENOMEM); + } + + actor->next_index++; + actor->returned_pages++; + return actor->pageaddr = kmap_local_page(actor->page[actor->next_page++]); +} + static void *direct_first_page(struct squashfs_page_actor *actor) { - actor->next_page = 1; - return actor->pageaddr = kmap_atomic(actor->page[0]); + return handle_next_page(actor); } static void *direct_next_page(struct squashfs_page_actor *actor) { if (actor->pageaddr) - kunmap_atomic(actor->pageaddr); + kunmap_local(actor->pageaddr); + + kfree(actor->tmp_buffer); + actor->pageaddr = actor->tmp_buffer = NULL; - return actor->pageaddr = actor->next_page == actor->pages ? NULL : - kmap_atomic(actor->page[actor->next_page++]); + return handle_next_page(actor); } static void direct_finish_page(struct squashfs_page_actor *actor) { if (actor->pageaddr) - kunmap_atomic(actor->pageaddr); + kunmap_local(actor->pageaddr); + + kfree(actor->tmp_buffer); } -struct squashfs_page_actor *squashfs_page_actor_init_special(struct page **page, - int pages, int length) +struct squashfs_page_actor *squashfs_page_actor_init_special(struct squashfs_sb_info *msblk, + struct page **page, int pages, int length) { struct squashfs_page_actor *actor = kmalloc(sizeof(*actor), GFP_KERNEL); @@ -90,7 +125,11 @@ struct squashfs_page_actor *squashfs_page_actor_init_special(struct page **page, actor->page = page; actor->pages = pages; actor->next_page = 0; + actor->returned_pages = 0; + actor->next_index = page[0]->index & ~((1 << (msblk->block_log - PAGE_SHIFT)) - 1); actor->pageaddr = NULL; + actor->tmp_buffer = NULL; + actor->alloc_buffer = msblk->decompressor->alloc_buffer; actor->squashfs_first_page = direct_first_page; actor->squashfs_next_page = direct_next_page; actor->squashfs_finish_page = direct_finish_page; diff --git a/fs/squashfs/page_actor.h b/fs/squashfs/page_actor.h index 2e3073ace009..24841d28bc0f 100644 --- a/fs/squashfs/page_actor.h +++ b/fs/squashfs/page_actor.h @@ -6,63 +6,29 @@ * Phillip Lougher <phillip@squashfs.org.uk> */ -#ifndef CONFIG_SQUASHFS_FILE_DIRECT -struct squashfs_page_actor { - void **page; - int pages; - int length; - int next_page; -}; - -static inline struct squashfs_page_actor *squashfs_page_actor_init(void **page, - int pages, int length) -{ - struct squashfs_page_actor *actor = kmalloc(sizeof(*actor), GFP_KERNEL); - - if (actor == NULL) - return NULL; - - actor->length = length ? : pages * PAGE_SIZE; - actor->page = page; - actor->pages = pages; - actor->next_page = 0; - return actor; -} - -static inline void *squashfs_first_page(struct squashfs_page_actor *actor) -{ - actor->next_page = 1; - return actor->page[0]; -} - -static inline void *squashfs_next_page(struct squashfs_page_actor *actor) -{ - return actor->next_page == actor->pages ? NULL : - actor->page[actor->next_page++]; -} - -static inline void squashfs_finish_page(struct squashfs_page_actor *actor) -{ - /* empty */ -} -#else struct squashfs_page_actor { union { void **buffer; struct page **page; }; void *pageaddr; + void *tmp_buffer; void *(*squashfs_first_page)(struct squashfs_page_actor *); void *(*squashfs_next_page)(struct squashfs_page_actor *); void (*squashfs_finish_page)(struct squashfs_page_actor *); int pages; int length; int next_page; + int alloc_buffer; + int returned_pages; + pgoff_t next_index; }; -extern struct squashfs_page_actor *squashfs_page_actor_init(void **, int, int); -extern struct squashfs_page_actor *squashfs_page_actor_init_special(struct page - **, int, int); +extern struct squashfs_page_actor *squashfs_page_actor_init(void **buffer, + int pages, int length); +extern struct squashfs_page_actor *squashfs_page_actor_init_special( + struct squashfs_sb_info *msblk, + struct page **page, int pages, int length); static inline void *squashfs_first_page(struct squashfs_page_actor *actor) { return actor->squashfs_first_page(actor); @@ -75,5 +41,8 @@ static inline void squashfs_finish_page(struct squashfs_page_actor *actor) { actor->squashfs_finish_page(actor); } -#endif +static inline void squashfs_actor_nobuff(struct squashfs_page_actor *actor) +{ + actor->alloc_buffer = 0; +} #endif diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c index 6d594ba2ed28..32565dafa7f3 100644 --- a/fs/squashfs/super.c +++ b/fs/squashfs/super.c @@ -29,7 +29,6 @@ #include <linux/module.h> #include <linux/magic.h> #include <linux/xattr.h> -#include <linux/backing-dev.h> #include "squashfs_fs.h" #include "squashfs_fs_sb.h" @@ -113,24 +112,6 @@ static const struct squashfs_decompressor *supported_squashfs_filesystem( return decompressor; } -static int squashfs_bdi_init(struct super_block *sb) -{ - int err; - unsigned int major = MAJOR(sb->s_dev); - unsigned int minor = MINOR(sb->s_dev); - - bdi_put(sb->s_bdi); - sb->s_bdi = &noop_backing_dev_info; - - err = super_setup_bdi_name(sb, "squashfs_%u_%u", major, minor); - if (err) - return err; - - sb->s_bdi->ra_pages = 0; - sb->s_bdi->io_pages = 0; - - return 0; -} static int squashfs_fill_super(struct super_block *sb, struct fs_context *fc) { @@ -146,20 +127,6 @@ static int squashfs_fill_super(struct super_block *sb, struct fs_context *fc) TRACE("Entered squashfs_fill_superblock\n"); - /* - * squashfs provides 'backing_dev_info' in order to disable read-ahead. For - * squashfs, I/O is not deferred, it is done immediately in read_folio, - * which means the user would always have to wait their own I/O. So the effect - * of readahead is very weak for squashfs. squashfs_bdi_init will set - * sb->s_bdi->ra_pages and sb->s_bdi->io_pages to 0 and close readahead for - * squashfs. - */ - err = squashfs_bdi_init(sb); - if (err) { - errorf(fc, "squashfs init bdi failed"); - return err; - } - sb->s_fs_info = kzalloc(sizeof(*msblk), GFP_KERNEL); if (sb->s_fs_info == NULL) { ERROR("Failed to allocate squashfs_sb_info\n"); diff --git a/fs/squashfs/xz_wrapper.c b/fs/squashfs/xz_wrapper.c index 68f6d09bb3a2..6c49481a2f8c 100644 --- a/fs/squashfs/xz_wrapper.c +++ b/fs/squashfs/xz_wrapper.c @@ -131,6 +131,10 @@ static int squashfs_xz_uncompress(struct squashfs_sb_info *msblk, void *strm, stream->buf.out_pos = 0; stream->buf.out_size = PAGE_SIZE; stream->buf.out = squashfs_first_page(output); + if (IS_ERR(stream->buf.out)) { + error = PTR_ERR(stream->buf.out); + goto finish; + } for (;;) { enum xz_ret xz_err; @@ -156,7 +160,10 @@ static int squashfs_xz_uncompress(struct squashfs_sb_info *msblk, void *strm, if (stream->buf.out_pos == stream->buf.out_size) { stream->buf.out = squashfs_next_page(output); - if (stream->buf.out != NULL) { + if (IS_ERR(stream->buf.out)) { + error = PTR_ERR(stream->buf.out); + break; + } else if (stream->buf.out != NULL) { stream->buf.out_pos = 0; total += PAGE_SIZE; } @@ -171,6 +178,7 @@ static int squashfs_xz_uncompress(struct squashfs_sb_info *msblk, void *strm, } } +finish: squashfs_finish_page(output); return error ? error : total + stream->buf.out_pos; @@ -183,5 +191,6 @@ const struct squashfs_decompressor squashfs_xz_comp_ops = { .decompress = squashfs_xz_uncompress, .id = XZ_COMPRESSION, .name = "xz", + .alloc_buffer = 1, .supported = 1 }; diff --git a/fs/squashfs/zlib_wrapper.c b/fs/squashfs/zlib_wrapper.c index a20e9042146b..cbb7afe7bc46 100644 --- a/fs/squashfs/zlib_wrapper.c +++ b/fs/squashfs/zlib_wrapper.c @@ -62,6 +62,11 @@ static int zlib_uncompress(struct squashfs_sb_info *msblk, void *strm, stream->next_out = squashfs_first_page(output); stream->avail_in = 0; + if (IS_ERR(stream->next_out)) { + error = PTR_ERR(stream->next_out); + goto finish; + } + for (;;) { int zlib_err; @@ -85,7 +90,10 @@ static int zlib_uncompress(struct squashfs_sb_info *msblk, void *strm, if (stream->avail_out == 0) { stream->next_out = squashfs_next_page(output); - if (stream->next_out != NULL) + if (IS_ERR(stream->next_out)) { + error = PTR_ERR(stream->next_out); + break; + } else if (stream->next_out != NULL) stream->avail_out = PAGE_SIZE; } @@ -107,6 +115,7 @@ static int zlib_uncompress(struct squashfs_sb_info *msblk, void *strm, } } +finish: squashfs_finish_page(output); if (!error) @@ -122,6 +131,7 @@ const struct squashfs_decompressor squashfs_zlib_comp_ops = { .decompress = zlib_uncompress, .id = ZLIB_COMPRESSION, .name = "zlib", + .alloc_buffer = 1, .supported = 1 }; diff --git a/fs/squashfs/zstd_wrapper.c b/fs/squashfs/zstd_wrapper.c index c40445dbf38c..0e407c4d8b3b 100644 --- a/fs/squashfs/zstd_wrapper.c +++ b/fs/squashfs/zstd_wrapper.c @@ -80,6 +80,10 @@ static int zstd_uncompress(struct squashfs_sb_info *msblk, void *strm, out_buf.size = PAGE_SIZE; out_buf.dst = squashfs_first_page(output); + if (IS_ERR(out_buf.dst)) { + error = PTR_ERR(out_buf.dst); + goto finish; + } for (;;) { size_t zstd_err; @@ -104,7 +108,10 @@ static int zstd_uncompress(struct squashfs_sb_info *msblk, void *strm, if (out_buf.pos == out_buf.size) { out_buf.dst = squashfs_next_page(output); - if (out_buf.dst == NULL) { + if (IS_ERR(out_buf.dst)) { + error = PTR_ERR(out_buf.dst); + break; + } else if (out_buf.dst == NULL) { /* Shouldn't run out of pages * before stream is done. */ @@ -129,6 +136,8 @@ static int zstd_uncompress(struct squashfs_sb_info *msblk, void *strm, } } +finish: + squashfs_finish_page(output); return error ? error : total_out; @@ -140,5 +149,6 @@ const struct squashfs_decompressor squashfs_zstd_comp_ops = { .decompress = zstd_uncompress, .id = ZSTD_COMPRESSION, .name = "zstd", + .alloc_buffer = 1, .supported = 1 }; diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h index e863c88df95f..ae12696ec492 100644 --- a/include/linux/backing-dev-defs.h +++ b/include/linux/backing-dev-defs.h @@ -28,11 +28,6 @@ enum wb_state { WB_start_all, /* nr_pages == 0 (all) work pending */ }; -enum wb_congested_state { - WB_async_congested, /* The async (write) queue is getting full */ - WB_sync_congested, /* The sync queue is getting full */ -}; - enum wb_stat_item { WB_RECLAIMABLE, WB_WRITEBACK, @@ -122,8 +117,6 @@ struct bdi_writeback { atomic_t writeback_inodes; /* number of inodes under writeback */ struct percpu_counter stat[NR_WB_STAT_ITEMS]; - unsigned long congested; /* WB_[a]sync_congested flags */ - unsigned long bw_time_stamp; /* last time write bw is updated */ unsigned long dirtied_stamp; unsigned long written_stamp; /* pages written at bw_time_stamp */ diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index a0c55eeaeaf1..9b157b71036f 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -66,17 +66,6 @@ __builtin_unreachable(); \ } while (0) -/* - * GCC 'asm goto' miscompiles certain code sequences: - * - * http://gcc.gnu.org/bugzilla/show_bug.cgi?id=58670 - * - * Work it around via a compiler barrier quirk suggested by Jakub Jelinek. - * - * (asm goto is automatically volatile - the naming reflects this.) - */ -#define asm_volatile_goto(x...) do { asm goto(x); asm (""); } while (0) - #if defined(CONFIG_ARCH_USE_BUILTIN_BSWAP) #define __HAVE_BUILTIN_BSWAP32__ #define __HAVE_BUILTIN_BSWAP64__ diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index 4592d0845941..707387323862 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -116,85 +116,6 @@ static __always_inline unsigned int cpumask_check(unsigned int cpu) return cpu; } -#if NR_CPUS == 1 -/* Uniprocessor. Assume all masks are "1". */ -static inline unsigned int cpumask_first(const struct cpumask *srcp) -{ - return 0; -} - -static inline unsigned int cpumask_first_zero(const struct cpumask *srcp) -{ - return 0; -} - -static inline unsigned int cpumask_first_and(const struct cpumask *srcp1, - const struct cpumask *srcp2) -{ - return 0; -} - -static inline unsigned int cpumask_last(const struct cpumask *srcp) -{ - return 0; -} - -/* Valid inputs for n are -1 and 0. */ -static inline unsigned int cpumask_next(int n, const struct cpumask *srcp) -{ - return n+1; -} - -static inline unsigned int cpumask_next_zero(int n, const struct cpumask *srcp) -{ - return n+1; -} - -static inline unsigned int cpumask_next_and(int n, - const struct cpumask *srcp, - const struct cpumask *andp) -{ - return n+1; -} - -static inline unsigned int cpumask_next_wrap(int n, const struct cpumask *mask, - int start, bool wrap) -{ - /* cpu0 unless stop condition, wrap and at cpu0, then nr_cpumask_bits */ - return (wrap && n == 0); -} - -/* cpu must be a valid cpu, ie 0, so there's no other choice. */ -static inline unsigned int cpumask_any_but(const struct cpumask *mask, - unsigned int cpu) -{ - return 1; -} - -static inline unsigned int cpumask_local_spread(unsigned int i, int node) -{ - return 0; -} - -static inline int cpumask_any_and_distribute(const struct cpumask *src1p, - const struct cpumask *src2p) { - return cpumask_first_and(src1p, src2p); -} - -static inline int cpumask_any_distribute(const struct cpumask *srcp) -{ - return cpumask_first(srcp); -} - -#define for_each_cpu(cpu, mask) \ - for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask) -#define for_each_cpu_not(cpu, mask) \ - for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask) -#define for_each_cpu_wrap(cpu, mask, start) \ - for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask, (void)(start)) -#define for_each_cpu_and(cpu, mask1, mask2) \ - for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask1, (void)mask2) -#else /** * cpumask_first - get the first cpu in a cpumask * @srcp: the cpumask pointer @@ -260,10 +181,29 @@ static inline unsigned int cpumask_next_zero(int n, const struct cpumask *srcp) int __pure cpumask_next_and(int n, const struct cpumask *, const struct cpumask *); int __pure cpumask_any_but(const struct cpumask *mask, unsigned int cpu); + +#if NR_CPUS == 1 +/* Uniprocessor: there is only one valid CPU */ +static inline unsigned int cpumask_local_spread(unsigned int i, int node) +{ + return 0; +} + +static inline int cpumask_any_and_distribute(const struct cpumask *src1p, + const struct cpumask *src2p) { + return cpumask_first_and(src1p, src2p); +} + +static inline int cpumask_any_distribute(const struct cpumask *srcp) +{ + return cpumask_first(srcp); +} +#else unsigned int cpumask_local_spread(unsigned int i, int node); int cpumask_any_and_distribute(const struct cpumask *src1p, const struct cpumask *src2p); int cpumask_any_distribute(const struct cpumask *srcp); +#endif /* NR_CPUS */ /** * for_each_cpu - iterate over every cpu in a mask @@ -289,7 +229,7 @@ int cpumask_any_distribute(const struct cpumask *srcp); (cpu) = cpumask_next_zero((cpu), (mask)), \ (cpu) < nr_cpu_ids;) -extern int cpumask_next_wrap(int n, const struct cpumask *mask, int start, bool wrap); +int __pure cpumask_next_wrap(int n, const struct cpumask *mask, int start, bool wrap); /** * for_each_cpu_wrap - iterate over every cpu in a mask, starting at a specified location @@ -324,7 +264,6 @@ extern int cpumask_next_wrap(int n, const struct cpumask *mask, int start, bool for ((cpu) = -1; \ (cpu) = cpumask_next_and((cpu), (mask1), (mask2)), \ (cpu) < nr_cpu_ids;) -#endif /* SMP */ #define CPU_BITS_NONE \ { \ @@ -811,9 +750,16 @@ extern const DECLARE_BITMAP(cpu_all_bits, NR_CPUS); /* First bits of cpu_bit_bitmap are in fact unset. */ #define cpu_none_mask to_cpumask(cpu_bit_bitmap[0]) +#if NR_CPUS == 1 +/* Uniprocessor: the possible/online/present masks are always "1" */ +#define for_each_possible_cpu(cpu) for ((cpu) = 0; (cpu) < 1; (cpu)++) +#define for_each_online_cpu(cpu) for ((cpu) = 0; (cpu) < 1; (cpu)++) +#define for_each_present_cpu(cpu) for ((cpu) = 0; (cpu) < 1; (cpu)++) +#else #define for_each_possible_cpu(cpu) for_each_cpu((cpu), cpu_possible_mask) #define for_each_online_cpu(cpu) for_each_cpu((cpu), cpu_online_mask) #define for_each_present_cpu(cpu) for_each_cpu((cpu), cpu_present_mask) +#endif /* Wrappers for arch boot code to manipulate normally-constant masks */ void init_cpu_present(const struct cpumask *src); diff --git a/include/linux/kernel_read_file.h b/include/linux/kernel_read_file.h index 575ffa1031d3..90451e2e12bd 100644 --- a/include/linux/kernel_read_file.h +++ b/include/linux/kernel_read_file.h @@ -35,21 +35,21 @@ static inline const char *kernel_read_file_id_str(enum kernel_read_file_id id) return kernel_read_file_str[id]; } -int kernel_read_file(struct file *file, loff_t offset, - void **buf, size_t buf_size, - size_t *file_size, - enum kernel_read_file_id id); -int kernel_read_file_from_path(const char *path, loff_t offset, - void **buf, size_t buf_size, - size_t *file_size, - enum kernel_read_file_id id); -int kernel_read_file_from_path_initns(const char *path, loff_t offset, - void **buf, size_t buf_size, - size_t *file_size, - enum kernel_read_file_id id); -int kernel_read_file_from_fd(int fd, loff_t offset, - void **buf, size_t buf_size, - size_t *file_size, - enum kernel_read_file_id id); +ssize_t kernel_read_file(struct file *file, loff_t offset, + void **buf, size_t buf_size, + size_t *file_size, + enum kernel_read_file_id id); +ssize_t kernel_read_file_from_path(const char *path, loff_t offset, + void **buf, size_t buf_size, + size_t *file_size, + enum kernel_read_file_id id); +ssize_t kernel_read_file_from_path_initns(const char *path, loff_t offset, + void **buf, size_t buf_size, + size_t *file_size, + enum kernel_read_file_id id); +ssize_t kernel_read_file_from_fd(int fd, loff_t offset, + void **buf, size_t buf_size, + size_t *file_size, + enum kernel_read_file_id id); #endif /* _LINUX_KERNEL_READ_FILE_H */ diff --git a/include/linux/kfifo.h b/include/linux/kfifo.h index 86249476b57f..0b35a41440ff 100644 --- a/include/linux/kfifo.h +++ b/include/linux/kfifo.h @@ -688,7 +688,7 @@ __kfifo_uint_must_check_helper( \ * writer, you don't need extra locking to use these macro. */ #define kfifo_to_user(fifo, to, len, copied) \ -__kfifo_uint_must_check_helper( \ +__kfifo_int_must_check_helper( \ ({ \ typeof((fifo) + 1) __tmp = (fifo); \ void __user *__to = (to); \ diff --git a/include/linux/limits.h b/include/linux/limits.h index b568b9c30bbf..f6bcc9369010 100644 --- a/include/linux/limits.h +++ b/include/linux/limits.h @@ -7,6 +7,7 @@ #include <vdso/limits.h> #define SIZE_MAX (~(size_t)0) +#define SSIZE_MAX ((ssize_t)(SIZE_MAX >> 1)) #define PHYS_ADDR_MAX (~(phys_addr_t)0) #define U8_MAX ((u8)~0U) diff --git a/include/linux/net.h b/include/linux/net.h index a03485e8cbb2..711c3593c3b8 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -307,8 +307,6 @@ do { \ #define net_get_random_once(buf, nbytes) \ get_random_once((buf), (nbytes)) -#define net_get_random_once_wait(buf, nbytes) \ - get_random_once_wait((buf), (nbytes)) /* * E.g. XFS meta- & log-data is in slab pages, or bcache meta diff --git a/include/linux/once.h b/include/linux/once.h index f54523052bbc..b14d8b309d52 100644 --- a/include/linux/once.h +++ b/include/linux/once.h @@ -54,7 +54,5 @@ void __do_once_done(bool *done, struct static_key_true *once_key, #define get_random_once(buf, nbytes) \ DO_ONCE(get_random_bytes, (buf), (nbytes)) -#define get_random_once_wait(buf, nbytes) \ - DO_ONCE(get_random_bytes_wait, (buf), (nbytes)) \ #endif /* _LINUX_ONCE_H */ diff --git a/include/linux/rbtree.h b/include/linux/rbtree.h index 235047d7a1b5..f7edca369eda 100644 --- a/include/linux/rbtree.h +++ b/include/linux/rbtree.h @@ -17,9 +17,9 @@ #ifndef _LINUX_RBTREE_H #define _LINUX_RBTREE_H +#include <linux/container_of.h> #include <linux/rbtree_types.h> -#include <linux/kernel.h> #include <linux/stddef.h> #include <linux/rcupdate.h> diff --git a/include/uapi/linux/swab.h b/include/uapi/linux/swab.h index 7272f85d6d6a..0723a9cce747 100644 --- a/include/uapi/linux/swab.h +++ b/include/uapi/linux/swab.h @@ -102,7 +102,7 @@ static inline __attribute_const__ __u32 __fswahb32(__u32 val) #define __swab16(x) (__u16)__builtin_bswap16((__u16)(x)) #else #define __swab16(x) \ - (__builtin_constant_p((__u16)(x)) ? \ + (__u16)(__builtin_constant_p(x) ? \ ___constant_swab16(x) : \ __fswab16(x)) #endif @@ -115,7 +115,7 @@ static inline __attribute_const__ __u32 __fswahb32(__u32 val) #define __swab32(x) (__u32)__builtin_bswap32((__u32)(x)) #else #define __swab32(x) \ - (__builtin_constant_p((__u32)(x)) ? \ + (__u32)(__builtin_constant_p(x) ? \ ___constant_swab32(x) : \ __fswab32(x)) #endif @@ -128,7 +128,7 @@ static inline __attribute_const__ __u32 __fswahb32(__u32 val) #define __swab64(x) (__u64)__builtin_bswap64((__u64)(x)) #else #define __swab64(x) \ - (__builtin_constant_p((__u64)(x)) ? \ + (__u64)(__builtin_constant_p(x) ? \ ___constant_swab64(x) : \ __fswab64(x)) #endif diff --git a/init/version.c b/init/version.c index 1a356f5493e8..b7f9559d417c 100644 --- a/init/version.c +++ b/init/version.c @@ -11,6 +11,8 @@ #include <linux/build-salt.h> #include <linux/elfnote-lto.h> #include <linux/export.h> +#include <linux/init.h> +#include <linux/printk.h> #include <linux/uts.h> #include <linux/utsname.h> #include <generated/utsrelease.h> @@ -35,6 +37,21 @@ struct uts_namespace init_uts_ns = { }; EXPORT_SYMBOL_GPL(init_uts_ns); +static int __init early_hostname(char *arg) +{ + size_t bufsize = sizeof(init_uts_ns.name.nodename); + size_t maxlen = bufsize - 1; + size_t arglen; + + arglen = strlcpy(init_uts_ns.name.nodename, arg, bufsize); + if (arglen > maxlen) { + pr_warn("hostname parameter exceeds %zd characters and will be truncated", + maxlen); + } + return 0; +} +early_param("hostname", early_hostname); + /* FIXED STRINGS! Don't touch! */ const char linux_banner[] = "Linux version " UTS_RELEASE " (" LINUX_COMPILE_BY "@" diff --git a/ipc/mqueue.c b/ipc/mqueue.c index 12ad7860bb88..f98de32aeea1 100644 --- a/ipc/mqueue.c +++ b/ipc/mqueue.c @@ -489,7 +489,7 @@ static struct vfsmount *mq_create_mount(struct ipc_namespace *ns) static void init_once(void *foo) { - struct mqueue_inode_info *p = (struct mqueue_inode_info *) foo; + struct mqueue_inode_info *p = foo; inode_init_once(&p->vfs_inode); } diff --git a/kernel/crash_core.c b/kernel/crash_core.c index 71122e01623c..07b26df453a9 100644 --- a/kernel/crash_core.c +++ b/kernel/crash_core.c @@ -9,12 +9,15 @@ #include <linux/init.h> #include <linux/utsname.h> #include <linux/vmalloc.h> +#include <linux/sizes.h> #include <asm/page.h> #include <asm/sections.h> #include <crypto/sha1.h> +#include "kallsyms_internal.h" + /* vmcoreinfo stuff */ unsigned char *vmcoreinfo_data; size_t vmcoreinfo_size; @@ -43,6 +46,15 @@ static int __init parse_crashkernel_mem(char *cmdline, unsigned long long *crash_base) { char *cur = cmdline, *tmp; + unsigned long long total_mem = system_ram; + + /* + * Firmware sometimes reserves some memory regions for its own use, + * so the system memory size is less than the actual physical memory + * size. Work around this by rounding up the total size to 128M, + * which is enough for most test cases. + */ + total_mem = roundup(total_mem, SZ_128M); /* for each entry of the comma-separated list */ do { @@ -87,13 +99,13 @@ static int __init parse_crashkernel_mem(char *cmdline, return -EINVAL; } cur = tmp; - if (size >= system_ram) { + if (size >= total_mem) { pr_warn("crashkernel: invalid size\n"); return -EINVAL; } /* match ? */ - if (system_ram >= start && system_ram < end) { + if (total_mem >= start && total_mem < end) { *crash_size = size; break; } @@ -480,6 +492,18 @@ static int __init crash_save_vmcoreinfo_init(void) VMCOREINFO_NUMBER(PAGE_OFFLINE_MAPCOUNT_VALUE); #endif +#ifdef CONFIG_KALLSYMS + VMCOREINFO_SYMBOL(kallsyms_names); + VMCOREINFO_SYMBOL(kallsyms_token_table); + VMCOREINFO_SYMBOL(kallsyms_token_index); +#ifdef CONFIG_KALLSYMS_BASE_RELATIVE + VMCOREINFO_SYMBOL(kallsyms_offsets); + VMCOREINFO_SYMBOL(kallsyms_relative_base); +#else + VMCOREINFO_SYMBOL(kallsyms_addresses); +#endif /* CONFIG_KALLSYMS_BASE_RELATIVE */ +#endif /* CONFIG_KALLSYMS */ + arch_crash_save_vmcoreinfo(); update_vmcoreinfo_note(); diff --git a/kernel/hung_task.c b/kernel/hung_task.c index cff3ae8c818f..bb2354f73ded 100644 --- a/kernel/hung_task.c +++ b/kernel/hung_task.c @@ -229,7 +229,7 @@ static long hung_timeout_jiffies(unsigned long last_checked, * Process updating of timeout sysctl */ static int proc_dohung_task_timeout_secs(struct ctl_table *table, int write, - void __user *buffer, + void *buffer, size_t *lenp, loff_t *ppos) { int ret; diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c index 79a85834ce9d..3e7e2c2ad2f7 100644 --- a/kernel/kallsyms.c +++ b/kernel/kallsyms.c @@ -32,28 +32,7 @@ #include <linux/bsearch.h> #include <linux/btf_ids.h> -/* - * These will be re-linked against their real values - * during the second link stage. - */ -extern const unsigned long kallsyms_addresses[] __weak; -extern const int kallsyms_offsets[] __weak; -extern const u8 kallsyms_names[] __weak; - -/* - * Tell the compiler that the count isn't in the small data section if the arch - * has one (eg: FRV). - */ -extern const unsigned int kallsyms_num_syms -__section(".rodata") __attribute__((weak)); - -extern const unsigned long kallsyms_relative_base -__section(".rodata") __attribute__((weak)); - -extern const char kallsyms_token_table[] __weak; -extern const u16 kallsyms_token_index[] __weak; - -extern const unsigned int kallsyms_markers[] __weak; +#include "kallsyms_internal.h" /* * Expand a compressed symbol data into the resulting uncompressed string, diff --git a/kernel/kallsyms_internal.h b/kernel/kallsyms_internal.h new file mode 100644 index 000000000000..2d0c6f2f0243 --- /dev/null +++ b/kernel/kallsyms_internal.h @@ -0,0 +1,30 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef LINUX_KALLSYMS_INTERNAL_H_ +#define LINUX_KALLSYMS_INTERNAL_H_ + +#include <linux/types.h> + +/* + * These will be re-linked against their real values + * during the second link stage. + */ +extern const unsigned long kallsyms_addresses[] __weak; +extern const int kallsyms_offsets[] __weak; +extern const u8 kallsyms_names[] __weak; + +/* + * Tell the compiler that the count isn't in the small data section if the arch + * has one (eg: FRV). + */ +extern const unsigned int kallsyms_num_syms +__section(".rodata") __attribute__((weak)); + +extern const unsigned long kallsyms_relative_base +__section(".rodata") __attribute__((weak)); + +extern const char kallsyms_token_table[] __weak; +extern const u16 kallsyms_token_index[] __weak; + +extern const unsigned int kallsyms_markers[] __weak; + +#endif // LINUX_KALLSYMS_INTERNAL_H_ diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c index a7b411c22f19..1d546dc97c50 100644 --- a/kernel/kexec_file.c +++ b/kernel/kexec_file.c @@ -40,6 +40,9 @@ void set_kexec_sig_enforced(void) static int kexec_calculate_store_digests(struct kimage *image); +/* Maximum size in bytes for kernel/initrd files. */ +#define KEXEC_FILE_SIZE_MAX min_t(s64, 4LL << 30, SSIZE_MAX) + /* * Currently this is the only default function that is exported as some * architectures need it to do additional handlings. @@ -190,11 +193,12 @@ kimage_file_prepare_segments(struct kimage *image, int kernel_fd, int initrd_fd, const char __user *cmdline_ptr, unsigned long cmdline_len, unsigned flags) { - int ret; + ssize_t ret; void *ldata; ret = kernel_read_file_from_fd(kernel_fd, 0, &image->kernel_buf, - INT_MAX, NULL, READING_KEXEC_IMAGE); + KEXEC_FILE_SIZE_MAX, NULL, + READING_KEXEC_IMAGE); if (ret < 0) return ret; image->kernel_buf_len = ret; @@ -214,7 +218,7 @@ kimage_file_prepare_segments(struct kimage *image, int kernel_fd, int initrd_fd, /* It is possible that there no initramfs is being loaded */ if (!(flags & KEXEC_FILE_NO_INITRAMFS)) { ret = kernel_read_file_from_fd(initrd_fd, 0, &image->initrd_buf, - INT_MAX, NULL, + KEXEC_FILE_SIZE_MAX, NULL, READING_KEXEC_INITRAMFS); if (ret < 0) goto out; diff --git a/kernel/profile.c b/kernel/profile.c index 37640a0bd8a3..7ea01ba30e75 100644 --- a/kernel/profile.c +++ b/kernel/profile.c @@ -109,6 +109,13 @@ int __ref profile_init(void) /* only text is profiled */ prof_len = (_etext - _stext) >> prof_shift; + + if (!prof_len) { + pr_warn("profiling shift: %u too large\n", prof_shift); + prof_on = 0; + return -EINVAL; + } + buffer_bytes = prof_len*sizeof(atomic_t); if (!alloc_cpumask_var(&prof_cpu_mask, GFP_KERNEL)) @@ -418,6 +425,12 @@ read_profile(struct file *file, char __user *buf, size_t count, loff_t *ppos) return read; } +/* default is to not implement this call */ +int __weak setup_profiling_timer(unsigned mult) +{ + return -EINVAL; +} + /* * Writing to /proc/profile resets the counters * @@ -428,8 +441,6 @@ static ssize_t write_profile(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { #ifdef CONFIG_SMP - extern int setup_profiling_timer(unsigned int multiplier); - if (count == sizeof(int)) { unsigned int multiplier; diff --git a/lib/Kconfig b/lib/Kconfig index 55c27e1ec9cb..dc1ab2ed1dc6 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -692,15 +692,6 @@ config STACKDEPOT_ALWAYS_INIT bool select STACKDEPOT -config STACK_HASH_ORDER - int "stack depot hash size (12 => 4KB, 20 => 1024KB)" - range 12 20 - default 20 - depends on STACKDEPOT - help - Select the hash size as a power of 2 for the stackdepot hash table. - Choose a lower value to reduce the memory impact. - config REF_TRACKER bool depends on STACKTRACE_SUPPORT diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 403071ff0bcf..072e4b289c13 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -2029,6 +2029,15 @@ config LKDTM Documentation on how to use the module can be found in Documentation/fault-injection/provoke-crashes.rst +config TEST_CPUMASK + tristate "cpumask tests" if !KUNIT_ALL_TESTS + depends on KUNIT + default KUNIT_ALL_TESTS + help + Enable to turn on cpumask tests, running at boot or module load time. + + If unsure, say N. + config TEST_LIST_SORT tristate "Linked list sorting test" if !KUNIT_ALL_TESTS depends on KUNIT diff --git a/lib/Makefile b/lib/Makefile index 0eb8114600c1..17e48da223e2 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -34,10 +34,9 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \ is_single_threaded.o plist.o decompress.o kobject_uevent.o \ earlycpio.o seq_buf.o siphash.o dec_and_lock.o \ nmi_backtrace.o nodemask.o win_minmax.o memcat_p.o \ - buildid.o + buildid.o cpumask.o lib-$(CONFIG_PRINTK) += dump_stack.o -lib-$(CONFIG_SMP) += cpumask.o lib-y += kobject.o klist.o obj-y += lockref.o @@ -100,6 +99,7 @@ obj-$(CONFIG_TEST_HMM) += test_hmm.o obj-$(CONFIG_TEST_FREE_PAGES) += test_free_pages.o obj-$(CONFIG_KPROBES_SANITY_TEST) += test_kprobes.o obj-$(CONFIG_TEST_REF_TRACKER) += test_ref_tracker.o +obj-$(CONFIG_TEST_CPUMASK) += test_cpumask.o CFLAGS_test_fprobe.o += $(CC_FLAGS_FTRACE) obj-$(CONFIG_FPROBE_SANITY_TEST) += test_fprobe.o # diff --git a/lib/btree.c b/lib/btree.c index b4cf08a5c267..a82100c73b55 100644 --- a/lib/btree.c +++ b/lib/btree.c @@ -238,7 +238,7 @@ static int keyzero(struct btree_geo *geo, unsigned long *key) return 1; } -void *btree_lookup(struct btree_head *head, struct btree_geo *geo, +static void *btree_lookup_node(struct btree_head *head, struct btree_geo *geo, unsigned long *key) { int i, height = head->height; @@ -257,7 +257,16 @@ void *btree_lookup(struct btree_head *head, struct btree_geo *geo, if (!node) return NULL; } + return node; +} +void *btree_lookup(struct btree_head *head, struct btree_geo *geo, + unsigned long *key) +{ + int i; + unsigned long *node; + + node = btree_lookup_node(head, geo, key); if (!node) return NULL; @@ -271,23 +280,10 @@ EXPORT_SYMBOL_GPL(btree_lookup); int btree_update(struct btree_head *head, struct btree_geo *geo, unsigned long *key, void *val) { - int i, height = head->height; - unsigned long *node = head->node; - - if (height == 0) - return -ENOENT; - - for ( ; height > 1; height--) { - for (i = 0; i < geo->no_pairs; i++) - if (keycmp(geo, node, i, key) <= 0) - break; - if (i == geo->no_pairs) - return -ENOENT; - node = bval(geo, node, i); - if (!node) - return -ENOENT; - } + int i; + unsigned long *node; + node = btree_lookup_node(head, geo, key); if (!node) return -ENOENT; diff --git a/lib/cpumask.c b/lib/cpumask.c index a971a82d2f43..b9728513a4d4 100644 --- a/lib/cpumask.c +++ b/lib/cpumask.c @@ -192,6 +192,7 @@ void __init free_bootmem_cpumask_var(cpumask_var_t mask) } #endif +#if NR_CPUS > 1 /** * cpumask_local_spread - select the i'th cpu with local numa cpu's first * @i: index number @@ -279,3 +280,4 @@ int cpumask_any_distribute(const struct cpumask *srcp) return next; } EXPORT_SYMBOL(cpumask_any_distribute); +#endif /* NR_CPUS */ diff --git a/lib/devres.c b/lib/devres.c index 14664bbb4875..55eb07e80cbb 100644 --- a/lib/devres.c +++ b/lib/devres.c @@ -29,7 +29,8 @@ static void __iomem *__devm_ioremap(struct device *dev, resource_size_t offset, { void __iomem **ptr, *addr = NULL; - ptr = devres_alloc(devm_ioremap_release, sizeof(*ptr), GFP_KERNEL); + ptr = devres_alloc_node(devm_ioremap_release, sizeof(*ptr), GFP_KERNEL, + dev_to_node(dev)); if (!ptr) return NULL; @@ -292,7 +293,8 @@ void __iomem *devm_ioport_map(struct device *dev, unsigned long port, { void __iomem **ptr, *addr; - ptr = devres_alloc(devm_ioport_map_release, sizeof(*ptr), GFP_KERNEL); + ptr = devres_alloc_node(devm_ioport_map_release, sizeof(*ptr), GFP_KERNEL, + dev_to_node(dev)); if (!ptr) return NULL; @@ -366,7 +368,8 @@ void __iomem * const *pcim_iomap_table(struct pci_dev *pdev) if (dr) return dr->table; - new_dr = devres_alloc(pcim_iomap_release, sizeof(*new_dr), GFP_KERNEL); + new_dr = devres_alloc_node(pcim_iomap_release, sizeof(*new_dr), GFP_KERNEL, + dev_to_node(&pdev->dev)); if (!new_dr) return NULL; dr = devres_get(&pdev->dev, new_dr, NULL, NULL); @@ -548,7 +551,8 @@ int devm_arch_phys_wc_add(struct device *dev, unsigned long base, unsigned long int *mtrr; int ret; - mtrr = devres_alloc(devm_arch_phys_ac_add_release, sizeof(*mtrr), GFP_KERNEL); + mtrr = devres_alloc_node(devm_arch_phys_ac_add_release, sizeof(*mtrr), GFP_KERNEL, + dev_to_node(dev)); if (!mtrr) return -ENOMEM; @@ -593,7 +597,8 @@ int devm_arch_io_reserve_memtype_wc(struct device *dev, resource_size_t start, struct arch_io_reserve_memtype_wc_devres *dr; int ret; - dr = devres_alloc(devm_arch_io_free_memtype_wc_release, sizeof(*dr), GFP_KERNEL); + dr = devres_alloc_node(devm_arch_io_free_memtype_wc_release, sizeof(*dr), GFP_KERNEL, + dev_to_node(dev)); if (!dr) return -ENOMEM; diff --git a/lib/error-inject.c b/lib/error-inject.c index 2ff5ef689d72..1afca1b1cdea 100644 --- a/lib/error-inject.c +++ b/lib/error-inject.c @@ -40,12 +40,18 @@ bool within_error_injection_list(unsigned long addr) int get_injectable_error_type(unsigned long addr) { struct ei_entry *ent; + int ei_type = EI_ETYPE_NONE; + mutex_lock(&ei_mutex); list_for_each_entry(ent, &error_injection_list, list) { - if (addr >= ent->start_addr && addr < ent->end_addr) - return ent->etype; + if (addr >= ent->start_addr && addr < ent->end_addr) { + ei_type = ent->etype; + break; + } } - return EI_ETYPE_NONE; + mutex_unlock(&ei_mutex); + + return ei_type; } /* @@ -197,24 +203,14 @@ static int ei_seq_show(struct seq_file *m, void *v) return 0; } -static const struct seq_operations ei_seq_ops = { +static const struct seq_operations ei_sops = { .start = ei_seq_start, .next = ei_seq_next, .stop = ei_seq_stop, .show = ei_seq_show, }; -static int ei_open(struct inode *inode, struct file *filp) -{ - return seq_open(filp, &ei_seq_ops); -} - -static const struct file_operations debugfs_ei_ops = { - .open = ei_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; +DEFINE_SEQ_ATTRIBUTE(ei); static int __init ei_debugfs_init(void) { @@ -224,7 +220,7 @@ static int __init ei_debugfs_init(void) if (!dir) return -ENOMEM; - file = debugfs_create_file("list", 0444, dir, NULL, &debugfs_ei_ops); + file = debugfs_create_file("list", 0444, dir, NULL, &ei_fops); if (!file) { debugfs_remove(dir); return -ENOMEM; diff --git a/lib/flex_proportions.c b/lib/flex_proportions.c index 53e7eb1dd76c..05cccbcf1661 100644 --- a/lib/flex_proportions.c +++ b/lib/flex_proportions.c @@ -63,18 +63,13 @@ void fprop_global_destroy(struct fprop_global *p) */ bool fprop_new_period(struct fprop_global *p, int periods) { - s64 events; - unsigned long flags; + s64 events = percpu_counter_sum(&p->events); - local_irq_save(flags); - events = percpu_counter_sum(&p->events); /* * Don't do anything if there are no events. */ - if (events <= 1) { - local_irq_restore(flags); + if (events <= 1) return false; - } write_seqcount_begin(&p->sequence); if (periods < 64) events -= events >> periods; @@ -82,7 +77,6 @@ bool fprop_new_period(struct fprop_global *p, int periods) percpu_counter_add(&p->events, -events); p->period += periods; write_seqcount_end(&p->sequence); - local_irq_restore(flags); return true; } diff --git a/lib/list_debug.c b/lib/list_debug.c index 9daa3fb9d1cd..d98d43f80958 100644 --- a/lib/list_debug.c +++ b/lib/list_debug.c @@ -20,7 +20,11 @@ bool __list_add_valid(struct list_head *new, struct list_head *prev, struct list_head *next) { - if (CHECK_DATA_CORRUPTION(next->prev != prev, + if (CHECK_DATA_CORRUPTION(prev == NULL, + "list_add corruption. prev is NULL.\n") || + CHECK_DATA_CORRUPTION(next == NULL, + "list_add corruption. next is NULL.\n") || + CHECK_DATA_CORRUPTION(next->prev != prev, "list_add corruption. next->prev should be prev (%px), but was %px. (next=%px).\n", prev, next->prev, next) || CHECK_DATA_CORRUPTION(prev->next != next, @@ -42,7 +46,11 @@ bool __list_del_entry_valid(struct list_head *entry) prev = entry->prev; next = entry->next; - if (CHECK_DATA_CORRUPTION(next == LIST_POISON1, + if (CHECK_DATA_CORRUPTION(next == NULL, + "list_del corruption, %px->next is NULL\n", entry) || + CHECK_DATA_CORRUPTION(prev == NULL, + "list_del corruption, %px->prev is NULL\n", entry) || + CHECK_DATA_CORRUPTION(next == LIST_POISON1, "list_del corruption, %px->next is LIST_POISON1 (%px)\n", entry, LIST_POISON1) || CHECK_DATA_CORRUPTION(prev == LIST_POISON2, diff --git a/lib/lru_cache.c b/lib/lru_cache.c index 52313acbfa62..dc35464216d3 100644 --- a/lib/lru_cache.c +++ b/lib/lru_cache.c @@ -147,8 +147,8 @@ struct lru_cache *lc_create(const char *name, struct kmem_cache *cache, return lc; /* else: could not allocate all elements, give up */ - for (i--; i; i--) { - void *p = element[i]; + while (i) { + void *p = element[--i]; kmem_cache_free(cache, p - e_off); } kfree(lc); diff --git a/lib/lz4/lz4_decompress.c b/lib/lz4/lz4_decompress.c index fd1728d94bab..59fe69a63800 100644 --- a/lib/lz4/lz4_decompress.c +++ b/lib/lz4/lz4_decompress.c @@ -507,9 +507,9 @@ static int LZ4_decompress_safe_withSmallPrefix(const char *source, char *dest, (BYTE *)dest - prefixSize, NULL, 0); } -int LZ4_decompress_safe_forceExtDict(const char *source, char *dest, - int compressedSize, int maxOutputSize, - const void *dictStart, size_t dictSize) +static int LZ4_decompress_safe_forceExtDict(const char *source, char *dest, + int compressedSize, int maxOutputSize, + const void *dictStart, size_t dictSize) { return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, diff --git a/lib/lzo/lzo1x_compress.c b/lib/lzo/lzo1x_compress.c index 76758e9296ba..9d31e7126606 100644 --- a/lib/lzo/lzo1x_compress.c +++ b/lib/lzo/lzo1x_compress.c @@ -50,9 +50,7 @@ next: if (dv == 0 && bitstream_version) { const unsigned char *ir = ip + 4; - const unsigned char *limit = ip_end - < (ip + MAX_ZERO_RUN_LENGTH + 1) - ? ip_end : ip + MAX_ZERO_RUN_LENGTH + 1; + const unsigned char *limit = min(ip_end, ip + MAX_ZERO_RUN_LENGTH + 1); #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && \ defined(LZO_FAST_64BIT_MEMORY_ACCESS) u64 dv64; @@ -326,7 +324,7 @@ static int lzogeneric1x_1_compress(const unsigned char *in, size_t in_len, data_start = op; while (l > 20) { - size_t ll = l <= (m4_max_offset + 1) ? l : (m4_max_offset + 1); + size_t ll = min_t(size_t, l, m4_max_offset + 1); uintptr_t ll_end = (uintptr_t) ip + ll; if ((ll_end + ((t + ll) >> 5)) <= ll_end) break; diff --git a/lib/mpi/mpiutil.c b/lib/mpi/mpiutil.c index bc81419f400c..aa8c46544af8 100644 --- a/lib/mpi/mpiutil.c +++ b/lib/mpi/mpiutil.c @@ -272,7 +272,7 @@ MPI mpi_set_ui(MPI w, unsigned long u) if (!w) w = mpi_alloc(1); /* FIXME: If U is 0 we have no need to resize and thus possible - * allocating the the limbs. + * allocating the limbs. */ RESIZE_IF_NEEDED(w, 1); w->d[0] = u; diff --git a/lib/radix-tree.c b/lib/radix-tree.c index b3afafe46fff..3c78e1e8b2ad 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -677,7 +677,7 @@ static void radix_tree_free_nodes(struct radix_tree_node *node) } static inline int insert_entries(struct radix_tree_node *node, - void __rcu **slot, void *item, bool replace) + void __rcu **slot, void *item) { if (*slot) return -EEXIST; @@ -711,7 +711,7 @@ int radix_tree_insert(struct radix_tree_root *root, unsigned long index, if (error) return error; - error = insert_entries(node, slot, item, false); + error = insert_entries(node, slot, item); if (error < 0) return error; diff --git a/lib/scatterlist.c b/lib/scatterlist.c index d5e82e4a57ad..c8c3d675845c 100644 --- a/lib/scatterlist.c +++ b/lib/scatterlist.c @@ -240,7 +240,7 @@ EXPORT_SYMBOL(__sg_free_table); **/ void sg_free_append_table(struct sg_append_table *table) { - __sg_free_table(&table->sgt, SG_MAX_SINGLE_ALLOC, false, sg_kfree, + __sg_free_table(&table->sgt, SG_MAX_SINGLE_ALLOC, 0, sg_kfree, table->total_nents); } EXPORT_SYMBOL(sg_free_append_table); @@ -253,7 +253,7 @@ EXPORT_SYMBOL(sg_free_append_table); **/ void sg_free_table(struct sg_table *table) { - __sg_free_table(table, SG_MAX_SINGLE_ALLOC, false, sg_kfree, + __sg_free_table(table, SG_MAX_SINGLE_ALLOC, 0, sg_kfree, table->orig_nents); } EXPORT_SYMBOL(sg_free_table); diff --git a/lib/smp_processor_id.c b/lib/smp_processor_id.c index 046ac6297c78..a2bb7738c373 100644 --- a/lib/smp_processor_id.c +++ b/lib/smp_processor_id.c @@ -47,9 +47,9 @@ unsigned int check_preemption_disabled(const char *what1, const char *what2) printk("caller is %pS\n", __builtin_return_address(0)); dump_stack(); - instrumentation_end(); out_enable: + instrumentation_end(); preempt_enable_no_resched_notrace(); out: return this_cpu; diff --git a/lib/stackdepot.c b/lib/stackdepot.c index 5ca0d086ef4a..e73fda23388d 100644 --- a/lib/stackdepot.c +++ b/lib/stackdepot.c @@ -32,6 +32,7 @@ #include <linux/string.h> #include <linux/types.h> #include <linux/memblock.h> +#include <linux/kasan-enabled.h> #define DEPOT_STACK_BITS (sizeof(depot_stack_handle_t) * 8) @@ -145,10 +146,16 @@ depot_alloc_stack(unsigned long *entries, int size, u32 hash, void **prealloc) return stack; } -#define STACK_HASH_SIZE (1L << CONFIG_STACK_HASH_ORDER) -#define STACK_HASH_MASK (STACK_HASH_SIZE - 1) +/* one hash table bucket entry per 16kB of memory */ +#define STACK_HASH_SCALE 14 +/* limited between 4k and 1M buckets */ +#define STACK_HASH_ORDER_MIN 12 +#define STACK_HASH_ORDER_MAX 20 #define STACK_HASH_SEED 0x9747b28c +static unsigned int stack_hash_order; +static unsigned int stack_hash_mask; + static bool stack_depot_disable; static struct stack_record **stack_table; @@ -175,7 +182,7 @@ void __init stack_depot_want_early_init(void) int __init stack_depot_early_init(void) { - size_t size; + unsigned long entries = 0; /* This is supposed to be called only once, from mm_init() */ if (WARN_ON(__stack_depot_early_init_passed)) @@ -183,13 +190,23 @@ int __init stack_depot_early_init(void) __stack_depot_early_init_passed = true; + if (kasan_enabled() && !stack_hash_order) + stack_hash_order = STACK_HASH_ORDER_MAX; + if (!__stack_depot_want_early_init || stack_depot_disable) return 0; - size = (STACK_HASH_SIZE * sizeof(struct stack_record *)); - pr_info("Stack Depot early init allocating hash table with memblock_alloc, %zu bytes\n", - size); - stack_table = memblock_alloc(size, SMP_CACHE_BYTES); + if (stack_hash_order) + entries = 1UL << stack_hash_order; + stack_table = alloc_large_system_hash("stackdepot", + sizeof(struct stack_record *), + entries, + STACK_HASH_SCALE, + HASH_EARLY | HASH_ZERO, + NULL, + &stack_hash_mask, + 1UL << STACK_HASH_ORDER_MIN, + 1UL << STACK_HASH_ORDER_MAX); if (!stack_table) { pr_err("Stack Depot hash table allocation failed, disabling\n"); @@ -207,13 +224,35 @@ int stack_depot_init(void) mutex_lock(&stack_depot_init_mutex); if (!stack_depot_disable && !stack_table) { - pr_info("Stack Depot allocating hash table with kvcalloc\n"); - stack_table = kvcalloc(STACK_HASH_SIZE, sizeof(struct stack_record *), GFP_KERNEL); + unsigned long entries; + int scale = STACK_HASH_SCALE; + + if (stack_hash_order) { + entries = 1UL << stack_hash_order; + } else { + entries = nr_free_buffer_pages(); + entries = roundup_pow_of_two(entries); + + if (scale > PAGE_SHIFT) + entries >>= (scale - PAGE_SHIFT); + else + entries <<= (PAGE_SHIFT - scale); + } + + if (entries < 1UL << STACK_HASH_ORDER_MIN) + entries = 1UL << STACK_HASH_ORDER_MIN; + if (entries > 1UL << STACK_HASH_ORDER_MAX) + entries = 1UL << STACK_HASH_ORDER_MAX; + + pr_info("Stack Depot allocating hash table of %lu entries with kvcalloc\n", + entries); + stack_table = kvcalloc(entries, sizeof(struct stack_record *), GFP_KERNEL); if (!stack_table) { pr_err("Stack Depot hash table allocation failed, disabling\n"); stack_depot_disable = true; ret = -ENOMEM; } + stack_hash_mask = entries - 1; } mutex_unlock(&stack_depot_init_mutex); return ret; @@ -386,7 +425,7 @@ depot_stack_handle_t __stack_depot_save(unsigned long *entries, goto fast_exit; hash = hash_stack(entries, nr_entries); - bucket = &stack_table[hash & STACK_HASH_MASK]; + bucket = &stack_table[hash & stack_hash_mask]; /* * Fast path: look the stack trace up without locking. diff --git a/lib/test_cpumask.c b/lib/test_cpumask.c new file mode 100644 index 000000000000..a31a1622f1f6 --- /dev/null +++ b/lib/test_cpumask.c @@ -0,0 +1,138 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * KUnit tests for cpumask. + * + * Author: Sander Vanheule <sander@svanheule.net> + */ + +#include <kunit/test.h> +#include <linux/cpu.h> +#include <linux/cpumask.h> + +#define EXPECT_FOR_EACH_CPU_EQ(test, mask) \ + do { \ + const cpumask_t *m = (mask); \ + int mask_weight = cpumask_weight(m); \ + int cpu, iter = 0; \ + for_each_cpu(cpu, m) \ + iter++; \ + KUNIT_EXPECT_EQ((test), mask_weight, iter); \ + } while (0) + +#define EXPECT_FOR_EACH_CPU_NOT_EQ(test, mask) \ + do { \ + const cpumask_t *m = (mask); \ + int mask_weight = cpumask_weight(m); \ + int cpu, iter = 0; \ + for_each_cpu_not(cpu, m) \ + iter++; \ + KUNIT_EXPECT_EQ((test), nr_cpu_ids - mask_weight, iter); \ + } while (0) + +#define EXPECT_FOR_EACH_CPU_WRAP_EQ(test, mask) \ + do { \ + const cpumask_t *m = (mask); \ + int mask_weight = cpumask_weight(m); \ + int cpu, iter = 0; \ + for_each_cpu_wrap(cpu, m, nr_cpu_ids / 2) \ + iter++; \ + KUNIT_EXPECT_EQ((test), mask_weight, iter); \ + } while (0) + +#define EXPECT_FOR_EACH_CPU_BUILTIN_EQ(test, name) \ + do { \ + int mask_weight = num_##name##_cpus(); \ + int cpu, iter = 0; \ + for_each_##name##_cpu(cpu) \ + iter++; \ + KUNIT_EXPECT_EQ((test), mask_weight, iter); \ + } while (0) + +static cpumask_t mask_empty; +static cpumask_t mask_all; + +static void test_cpumask_weight(struct kunit *test) +{ + KUNIT_EXPECT_TRUE(test, cpumask_empty(&mask_empty)); + KUNIT_EXPECT_TRUE(test, cpumask_full(cpu_possible_mask)); + KUNIT_EXPECT_TRUE(test, cpumask_full(&mask_all)); + + KUNIT_EXPECT_EQ(test, 0, cpumask_weight(&mask_empty)); + KUNIT_EXPECT_EQ(test, nr_cpu_ids, cpumask_weight(cpu_possible_mask)); + KUNIT_EXPECT_EQ(test, nr_cpumask_bits, cpumask_weight(&mask_all)); +} + +static void test_cpumask_first(struct kunit *test) +{ + KUNIT_EXPECT_LE(test, nr_cpu_ids, cpumask_first(&mask_empty)); + KUNIT_EXPECT_EQ(test, 0, cpumask_first(cpu_possible_mask)); + + KUNIT_EXPECT_EQ(test, 0, cpumask_first_zero(&mask_empty)); + KUNIT_EXPECT_LE(test, nr_cpu_ids, cpumask_first_zero(cpu_possible_mask)); +} + +static void test_cpumask_last(struct kunit *test) +{ + KUNIT_EXPECT_LE(test, nr_cpumask_bits, cpumask_last(&mask_empty)); + KUNIT_EXPECT_EQ(test, nr_cpumask_bits - 1, cpumask_last(cpu_possible_mask)); +} + +static void test_cpumask_next(struct kunit *test) +{ + KUNIT_EXPECT_EQ(test, 0, cpumask_next_zero(-1, &mask_empty)); + KUNIT_EXPECT_LE(test, nr_cpu_ids, cpumask_next_zero(-1, cpu_possible_mask)); + + KUNIT_EXPECT_LE(test, nr_cpu_ids, cpumask_next(-1, &mask_empty)); + KUNIT_EXPECT_EQ(test, 0, cpumask_next(-1, cpu_possible_mask)); +} + +static void test_cpumask_iterators(struct kunit *test) +{ + EXPECT_FOR_EACH_CPU_EQ(test, &mask_empty); + EXPECT_FOR_EACH_CPU_NOT_EQ(test, &mask_empty); + EXPECT_FOR_EACH_CPU_WRAP_EQ(test, &mask_empty); + + EXPECT_FOR_EACH_CPU_EQ(test, cpu_possible_mask); + EXPECT_FOR_EACH_CPU_NOT_EQ(test, cpu_possible_mask); + EXPECT_FOR_EACH_CPU_WRAP_EQ(test, cpu_possible_mask); +} + +static void test_cpumask_iterators_builtin(struct kunit *test) +{ + EXPECT_FOR_EACH_CPU_BUILTIN_EQ(test, possible); + + /* Ensure the dynamic masks are stable while running the tests */ + cpu_hotplug_disable(); + + EXPECT_FOR_EACH_CPU_BUILTIN_EQ(test, online); + EXPECT_FOR_EACH_CPU_BUILTIN_EQ(test, present); + + cpu_hotplug_enable(); +} + +static int test_cpumask_init(struct kunit *test) +{ + cpumask_clear(&mask_empty); + cpumask_setall(&mask_all); + + return 0; +} + +static struct kunit_case test_cpumask_cases[] = { + KUNIT_CASE(test_cpumask_weight), + KUNIT_CASE(test_cpumask_first), + KUNIT_CASE(test_cpumask_last), + KUNIT_CASE(test_cpumask_next), + KUNIT_CASE(test_cpumask_iterators), + KUNIT_CASE(test_cpumask_iterators_builtin), + {} +}; + +static struct kunit_suite test_cpumask_suite = { + .name = "cpumask", + .init = test_cpumask_init, + .test_cases = test_cpumask_cases, +}; +kunit_test_suite(test_cpumask_suite); + +MODULE_LICENSE("GPL"); diff --git a/lib/ts_bm.c b/lib/ts_bm.c index 4cf250031f0f..1f2234221dd1 100644 --- a/lib/ts_bm.c +++ b/lib/ts_bm.c @@ -80,7 +80,7 @@ static unsigned int bm_find(struct ts_config *conf, struct ts_state *state) /* London calling... */ DEBUGP("found!\n"); - return consumed += (shift-(bm->patlen-1)); + return consumed + (shift-(bm->patlen-1)); next: bs = bm->bad_shift[text[shift-i]]; diff --git a/scripts/bloat-o-meter b/scripts/bloat-o-meter index 4dd6a804ce41..f9553f60a14a 100755 --- a/scripts/bloat-o-meter +++ b/scripts/bloat-o-meter @@ -7,24 +7,31 @@ # This software may be used and distributed according to the terms # of the GNU General Public License, incorporated herein by reference. -import sys, os, re +import sys, os, re, argparse from signal import signal, SIGPIPE, SIG_DFL signal(SIGPIPE, SIG_DFL) -if len(sys.argv) < 3: - sys.stderr.write("usage: %s [option] file1 file2\n" % sys.argv[0]) - sys.stderr.write("The options are:\n") - sys.stderr.write("-c categorize output based on symbol type\n") - sys.stderr.write("-d Show delta of Data Section\n") - sys.stderr.write("-t Show delta of text Section\n") - sys.exit(-1) +parser = argparse.ArgumentParser(description="Simple script used to compare the symbol sizes of 2 object files") +group = parser.add_mutually_exclusive_group() +group.add_argument('-c', help='categorize output based on symbol type', action='store_true') +group.add_argument('-d', help='Show delta of Data Section', action='store_true') +group.add_argument('-t', help='Show delta of text Section', action='store_true') +parser.add_argument('-p', dest='prefix', help='Arch prefix for the tool being used. Useful in cross build scenarios') +parser.add_argument('file1', help='First file to compare') +parser.add_argument('file2', help='Second file to compare') + +args = parser.parse_args() re_NUMBER = re.compile(r'\.[0-9]+') def getsizes(file, format): sym = {} - with os.popen("nm --size-sort " + file) as f: + nm = "nm" + if args.prefix: + nm = "{}nm".format(args.prefix) + + with os.popen("{} --size-sort {}".format(nm, file)) as f: for line in f: if line.startswith("\n") or ":" in line: continue @@ -77,9 +84,9 @@ def calc(oldfile, newfile, format): delta.reverse() return grow, shrink, add, remove, up, down, delta, old, new, otot, ntot -def print_result(symboltype, symbolformat, argc): +def print_result(symboltype, symbolformat): grow, shrink, add, remove, up, down, delta, old, new, otot, ntot = \ - calc(sys.argv[argc - 1], sys.argv[argc], symbolformat) + calc(args.file1, args.file2, symbolformat) print("add/remove: %s/%s grow/shrink: %s/%s up/down: %s/%s (%s)" % \ (add, remove, grow, shrink, up, -down, up-down)) @@ -93,13 +100,13 @@ def print_result(symboltype, symbolformat, argc): percent = 0 print("Total: Before=%d, After=%d, chg %+.2f%%" % (otot, ntot, percent)) -if sys.argv[1] == "-c": - print_result("Function", "tT", 3) - print_result("Data", "dDbB", 3) - print_result("RO Data", "rR", 3) -elif sys.argv[1] == "-d": - print_result("Data", "dDbBrR", 3) -elif sys.argv[1] == "-t": - print_result("Function", "tT", 3) +if args.c: + print_result("Function", "tT") + print_result("Data", "dDbB") + print_result("RO Data", "rR") +elif args.d: + print_result("Data", "dDbBrR") +elif args.t: + print_result("Function", "tT") else: - print_result("Function", "tTdDbBrR", 2) + print_result("Function", "tTdDbBrR") diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index 503e8abbb2c1..79e759aac543 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -1042,7 +1042,8 @@ our $FuncArg = qr{$Typecast{0,1}($LvalOrFunc|$Constant|$String)}; our $declaration_macros = qr{(?x: (?:$Storage\s+)?(?:[A-Z_][A-Z0-9]*_){0,2}(?:DEFINE|DECLARE)(?:_[A-Z0-9]+){1,6}\s*\(| (?:$Storage\s+)?[HLP]?LIST_HEAD\s*\(| - (?:SKCIPHER_REQUEST|SHASH_DESC|AHASH_REQUEST)_ON_STACK\s*\( + (?:SKCIPHER_REQUEST|SHASH_DESC|AHASH_REQUEST)_ON_STACK\s*\(| + (?:$Storage\s+)?(?:XA_STATE|XA_STATE_ORDER)\s*\( )}; our %allow_repeated_words = ( @@ -5720,7 +5721,7 @@ sub process { $var !~ /^(?:[a-z0-9_]*|[A-Z0-9_]*)?_?[a-z][A-Z](?:_[a-z0-9_]+|_[A-Z0-9_]+)?$/ && #Ignore some three character SI units explicitly, like MiB and KHz $var !~ /^(?:[a-z_]*?)_?(?:[KMGT]iB|[KMGT]?Hz)(?:_[a-z_]+)?$/) { - while ($var =~ m{($Ident)}g) { + while ($var =~ m{\b($Ident)}g) { my $word = $1; next if ($word !~ /[A-Z][a-z]|[a-z][A-Z]/); if ($check) { diff --git a/scripts/gdb/vmlinux-gdb.py b/scripts/gdb/vmlinux-gdb.py index 4136dc2c59df..3e8d3669f0ce 100644 --- a/scripts/gdb/vmlinux-gdb.py +++ b/scripts/gdb/vmlinux-gdb.py @@ -13,7 +13,7 @@ import os -sys.path.insert(0, os.path.dirname(__file__) + "/scripts/gdb") +sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)) + "/scripts/gdb") try: gdb.parse_and_eval("0") diff --git a/tools/accounting/getdelays.c b/tools/accounting/getdelays.c index e83e6e47a21e..938dec0dfaad 100644 --- a/tools/accounting/getdelays.c +++ b/tools/accounting/getdelays.c @@ -45,7 +45,6 @@ exit(code); \ } while (0) -int done; int rcvbufsz; char name[100]; int dbg; @@ -285,7 +284,6 @@ int main(int argc, char *argv[]) pid_t rtid = 0; int fd = 0; - int count = 0; int write_file = 0; int maskset = 0; char *logfile = NULL; @@ -495,7 +493,6 @@ int main(int argc, char *argv[]) len2 = 0; /* For nested attributes, na follows */ na = (struct nlattr *) NLA_DATA(na); - done = 0; while (len2 < aggr_len) { switch (na->nla_type) { case TASKSTATS_TYPE_PID: @@ -509,7 +506,6 @@ int main(int argc, char *argv[]) printf("TGID\t%d\n", rtid); break; case TASKSTATS_TYPE_STATS: - count++; if (print_delays) print_delayacct((struct taskstats *) NLA_DATA(na)); if (print_io_accounting) diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index 5047d8eef53e..10b34bb03bc1 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -17,6 +17,7 @@ TARGETS += exec TARGETS += filesystems TARGETS += filesystems/binderfs TARGETS += filesystems/epoll +TARGETS += filesystems/fat TARGETS += firmware TARGETS += fpu TARGETS += ftrace diff --git a/tools/testing/selftests/filesystems/fat/.gitignore b/tools/testing/selftests/filesystems/fat/.gitignore new file mode 100644 index 000000000000..b89920ed841c --- /dev/null +++ b/tools/testing/selftests/filesystems/fat/.gitignore @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only +rename_exchange diff --git a/tools/testing/selftests/filesystems/fat/Makefile b/tools/testing/selftests/filesystems/fat/Makefile new file mode 100644 index 000000000000..902033f6ef09 --- /dev/null +++ b/tools/testing/selftests/filesystems/fat/Makefile @@ -0,0 +1,7 @@ +# SPDX-License-Identifier: GPL-2.0 + +TEST_PROGS := run_fat_tests.sh +TEST_GEN_PROGS_EXTENDED := rename_exchange +CFLAGS += -O2 -g -Wall $(KHDR_INCLUDES) + +include ../../lib.mk diff --git a/tools/testing/selftests/filesystems/fat/config b/tools/testing/selftests/filesystems/fat/config new file mode 100644 index 000000000000..6cf95e787a17 --- /dev/null +++ b/tools/testing/selftests/filesystems/fat/config @@ -0,0 +1,2 @@ +CONFIG_BLK_DEV_LOOP=y +CONFIG_VFAT_FS=y diff --git a/tools/testing/selftests/filesystems/fat/rename_exchange.c b/tools/testing/selftests/filesystems/fat/rename_exchange.c new file mode 100644 index 000000000000..e488ad354fce --- /dev/null +++ b/tools/testing/selftests/filesystems/fat/rename_exchange.c @@ -0,0 +1,37 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Program that atomically exchanges two paths using + * the renameat2() system call RENAME_EXCHANGE flag. + * + * Copyright 2022 Red Hat Inc. + * Author: Javier Martinez Canillas <javierm@redhat.com> + */ + +#define _GNU_SOURCE +#include <fcntl.h> +#include <stdio.h> +#include <stdlib.h> + +void print_usage(const char *program) +{ + printf("Usage: %s [oldpath] [newpath]\n", program); + printf("Atomically exchange oldpath and newpath\n"); +} + +int main(int argc, char *argv[]) +{ + int ret; + + if (argc != 3) { + print_usage(argv[0]); + exit(EXIT_FAILURE); + } + + ret = renameat2(AT_FDCWD, argv[1], AT_FDCWD, argv[2], RENAME_EXCHANGE); + if (ret) { + perror("rename exchange failed"); + exit(EXIT_FAILURE); + } + + exit(EXIT_SUCCESS); +} diff --git a/tools/testing/selftests/filesystems/fat/run_fat_tests.sh b/tools/testing/selftests/filesystems/fat/run_fat_tests.sh new file mode 100644 index 000000000000..7f35dc3d15df --- /dev/null +++ b/tools/testing/selftests/filesystems/fat/run_fat_tests.sh @@ -0,0 +1,82 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Run filesystem operations tests on an 1 MiB disk image that is formatted with +# a vfat filesystem and mounted in a temporary directory using a loop device. +# +# Copyright 2022 Red Hat Inc. +# Author: Javier Martinez Canillas <javierm@redhat.com> + +set -e +set -u +set -o pipefail + +BASE_DIR="$(dirname $0)" +TMP_DIR="$(mktemp -d /tmp/fat_tests_tmp.XXXX)" +IMG_PATH="${TMP_DIR}/fat.img" +MNT_PATH="${TMP_DIR}/mnt" + +cleanup() +{ + mountpoint -q "${MNT_PATH}" && unmount_image + rm -rf "${TMP_DIR}" +} +trap cleanup SIGINT SIGTERM EXIT + +create_loopback() +{ + touch "${IMG_PATH}" + chattr +C "${IMG_PATH}" >/dev/null 2>&1 || true + + truncate -s 1M "${IMG_PATH}" + mkfs.vfat "${IMG_PATH}" >/dev/null 2>&1 +} + +mount_image() +{ + mkdir -p "${MNT_PATH}" + sudo mount -o loop "${IMG_PATH}" "${MNT_PATH}" +} + +rename_exchange_test() +{ + local rename_exchange="${BASE_DIR}/rename_exchange" + local old_path="${MNT_PATH}/old_file" + local new_path="${MNT_PATH}/new_file" + + echo old | sudo tee "${old_path}" >/dev/null 2>&1 + echo new | sudo tee "${new_path}" >/dev/null 2>&1 + sudo "${rename_exchange}" "${old_path}" "${new_path}" >/dev/null 2>&1 + sudo sync -f "${MNT_PATH}" + grep new "${old_path}" >/dev/null 2>&1 + grep old "${new_path}" >/dev/null 2>&1 +} + +rename_exchange_subdir_test() +{ + local rename_exchange="${BASE_DIR}/rename_exchange" + local dir_path="${MNT_PATH}/subdir" + local old_path="${MNT_PATH}/old_file" + local new_path="${dir_path}/new_file" + + sudo mkdir -p "${dir_path}" + echo old | sudo tee "${old_path}" >/dev/null 2>&1 + echo new | sudo tee "${new_path}" >/dev/null 2>&1 + sudo "${rename_exchange}" "${old_path}" "${new_path}" >/dev/null 2>&1 + sudo sync -f "${MNT_PATH}" + grep new "${old_path}" >/dev/null 2>&1 + grep old "${new_path}" >/dev/null 2>&1 +} + +unmount_image() +{ + sudo umount "${MNT_PATH}" &> /dev/null +} + +create_loopback +mount_image +rename_exchange_test +rename_exchange_subdir_test +unmount_image + +exit 0 diff --git a/tools/testing/selftests/proc/proc-pid-vm.c b/tools/testing/selftests/proc/proc-pid-vm.c index 28604c9f805c..e5962f4794f5 100644 --- a/tools/testing/selftests/proc/proc-pid-vm.c +++ b/tools/testing/selftests/proc/proc-pid-vm.c @@ -211,10 +211,19 @@ static int make_exe(const uint8_t *payload, size_t len) } #endif -static bool g_vsyscall = false; +/* + * 0: vsyscall VMA doesn't exist vsyscall=none + * 1: vsyscall VMA is r-xp vsyscall=emulate + * 2: vsyscall VMA is --xp vsyscall=xonly + */ +static int g_vsyscall; +static const char *str_vsyscall; -static const char str_vsyscall[] = +static const char str_vsyscall_0[] = ""; +static const char str_vsyscall_1[] = "ffffffffff600000-ffffffffff601000 r-xp 00000000 00:00 0 [vsyscall]\n"; +static const char str_vsyscall_2[] = +"ffffffffff600000-ffffffffff601000 --xp 00000000 00:00 0 [vsyscall]\n"; #ifdef __x86_64__ static void sigaction_SIGSEGV(int _, siginfo_t *__, void *___) @@ -223,7 +232,7 @@ static void sigaction_SIGSEGV(int _, siginfo_t *__, void *___) } /* - * vsyscall page can't be unmapped, probe it with memory load. + * vsyscall page can't be unmapped, probe it directly. */ static void vsyscall(void) { @@ -246,13 +255,52 @@ static void vsyscall(void) act.sa_sigaction = sigaction_SIGSEGV; (void)sigaction(SIGSEGV, &act, NULL); + /* gettimeofday(NULL, NULL); */ + asm volatile ( + "call %P0" + : + : "i" (0xffffffffff600000), "D" (NULL), "S" (NULL) + : "rax", "rcx", "r11" + ); + exit(0); + } + waitpid(pid, &wstatus, 0); + if (WIFEXITED(wstatus) && WEXITSTATUS(wstatus) == 0) { + /* vsyscall page exists and is executable. */ + } else { + /* vsyscall page doesn't exist. */ + g_vsyscall = 0; + return; + } + + pid = fork(); + if (pid < 0) { + fprintf(stderr, "fork, errno %d\n", errno); + exit(1); + } + if (pid == 0) { + struct rlimit rlim = {0, 0}; + (void)setrlimit(RLIMIT_CORE, &rlim); + + /* Hide "segfault at ffffffffff600000" messages. */ + struct sigaction act; + memset(&act, 0, sizeof(struct sigaction)); + act.sa_flags = SA_SIGINFO; + act.sa_sigaction = sigaction_SIGSEGV; + (void)sigaction(SIGSEGV, &act, NULL); + *(volatile int *)0xffffffffff600000UL; exit(0); } waitpid(pid, &wstatus, 0); if (WIFEXITED(wstatus) && WEXITSTATUS(wstatus) == 0) { - g_vsyscall = true; + /* vsyscall page is readable and executable. */ + g_vsyscall = 1; + return; } + + /* vsyscall page is executable but unreadable. */ + g_vsyscall = 2; } int main(void) @@ -261,6 +309,19 @@ int main(void) int exec_fd; vsyscall(); + switch (g_vsyscall) { + case 0: + str_vsyscall = str_vsyscall_0; + break; + case 1: + str_vsyscall = str_vsyscall_1; + break; + case 2: + str_vsyscall = str_vsyscall_2; + break; + default: + abort(); + } atexit(ate); @@ -314,7 +375,7 @@ int main(void) /* Test /proc/$PID/maps */ { - const size_t len = strlen(buf0) + (g_vsyscall ? strlen(str_vsyscall) : 0); + const size_t len = strlen(buf0) + strlen(str_vsyscall); char buf[256]; ssize_t rv; int fd; @@ -327,7 +388,7 @@ int main(void) rv = read(fd, buf, sizeof(buf)); assert(rv == len); assert(memcmp(buf, buf0, strlen(buf0)) == 0); - if (g_vsyscall) { + if (g_vsyscall > 0) { assert(memcmp(buf + strlen(buf0), str_vsyscall, strlen(str_vsyscall)) == 0); } } @@ -374,7 +435,7 @@ int main(void) assert(memmem(buf, rv, S[i], strlen(S[i]))); } - if (g_vsyscall) { + if (g_vsyscall > 0) { assert(memmem(buf, rv, str_vsyscall, strlen(str_vsyscall))); } } |