summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/bpf/hashtab.c6
-rw-r--r--kernel/bpf/offload.c2
-rw-r--r--kernel/bpf/verifier.c2
-rw-r--r--kernel/irq/msi.c4
-rw-r--r--kernel/locking/lockdep.c28
-rw-r--r--kernel/module/main.c58
-rw-r--r--kernel/module/stats.c4
-rw-r--r--kernel/trace/fprobe.c73
-rw-r--r--kernel/trace/rethook.c4
9 files changed, 135 insertions, 46 deletions
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index 00c253b84bf5..9901efee4339 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -1215,7 +1215,7 @@ static long htab_lru_map_update_elem(struct bpf_map *map, void *key, void *value
ret = htab_lock_bucket(htab, b, hash, &flags);
if (ret)
- return ret;
+ goto err_lock_bucket;
l_old = lookup_elem_raw(head, hash, key, key_size);
@@ -1236,6 +1236,7 @@ static long htab_lru_map_update_elem(struct bpf_map *map, void *key, void *value
err:
htab_unlock_bucket(htab, b, hash, flags);
+err_lock_bucket:
if (ret)
htab_lru_push_free(htab, l_new);
else if (l_old)
@@ -1338,7 +1339,7 @@ static long __htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key,
ret = htab_lock_bucket(htab, b, hash, &flags);
if (ret)
- return ret;
+ goto err_lock_bucket;
l_old = lookup_elem_raw(head, hash, key, key_size);
@@ -1361,6 +1362,7 @@ static long __htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key,
ret = 0;
err:
htab_unlock_bucket(htab, b, hash, flags);
+err_lock_bucket:
if (l_new)
bpf_lru_push_free(&htab->lru, &l_new->lru_node);
return ret;
diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c
index d9c9f45e3529..8a26cd8814c1 100644
--- a/kernel/bpf/offload.c
+++ b/kernel/bpf/offload.c
@@ -859,4 +859,4 @@ static int __init bpf_offload_init(void)
return rhashtable_init(&offdevs, &offdevs_params);
}
-late_initcall(bpf_offload_init);
+core_initcall(bpf_offload_init);
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index fbcf5a4e2fcd..5871aa78d01a 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -17033,7 +17033,7 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
insn_buf[cnt++] = BPF_ALU64_IMM(BPF_RSH,
insn->dst_reg,
shift);
- insn_buf[cnt++] = BPF_ALU64_IMM(BPF_AND, insn->dst_reg,
+ insn_buf[cnt++] = BPF_ALU32_IMM(BPF_AND, insn->dst_reg,
(1ULL << size * 8) - 1);
}
}
diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c
index 7a97bcb086bf..b4c31a5c1147 100644
--- a/kernel/irq/msi.c
+++ b/kernel/irq/msi.c
@@ -542,7 +542,7 @@ fail:
return ret;
}
-#ifdef CONFIG_PCI_MSI_ARCH_FALLBACKS
+#if defined(CONFIG_PCI_MSI_ARCH_FALLBACKS) || defined(CONFIG_PCI_XEN)
/**
* msi_device_populate_sysfs - Populate msi_irqs sysfs entries for a device
* @dev: The device (PCI, platform etc) which will get sysfs entries
@@ -574,7 +574,7 @@ void msi_device_destroy_sysfs(struct device *dev)
msi_for_each_desc(desc, dev, MSI_DESC_ALL)
msi_sysfs_remove_desc(dev, desc);
}
-#endif /* CONFIG_PCI_MSI_ARCH_FALLBACK */
+#endif /* CONFIG_PCI_MSI_ARCH_FALLBACK || CONFIG_PCI_XEN */
#else /* CONFIG_SYSFS */
static inline int msi_sysfs_create_group(struct device *dev) { return 0; }
static inline int msi_sysfs_populate_desc(struct device *dev, struct msi_desc *desc) { return 0; }
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index dcd1d5bfc1e0..4dfd2f3e09b2 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -2263,6 +2263,9 @@ static inline bool usage_match(struct lock_list *entry, void *mask)
static inline bool usage_skip(struct lock_list *entry, void *mask)
{
+ if (entry->class->lock_type == LD_LOCK_NORMAL)
+ return false;
+
/*
* Skip local_lock() for irq inversion detection.
*
@@ -2289,14 +2292,16 @@ static inline bool usage_skip(struct lock_list *entry, void *mask)
* As a result, we will skip local_lock(), when we search for irq
* inversion bugs.
*/
- if (entry->class->lock_type == LD_LOCK_PERCPU) {
- if (DEBUG_LOCKS_WARN_ON(entry->class->wait_type_inner < LD_WAIT_CONFIG))
- return false;
+ if (entry->class->lock_type == LD_LOCK_PERCPU &&
+ DEBUG_LOCKS_WARN_ON(entry->class->wait_type_inner < LD_WAIT_CONFIG))
+ return false;
- return true;
- }
+ /*
+ * Skip WAIT_OVERRIDE for irq inversion detection -- it's not actually
+ * a lock and only used to override the wait_type.
+ */
- return false;
+ return true;
}
/*
@@ -4768,7 +4773,8 @@ static int check_wait_context(struct task_struct *curr, struct held_lock *next)
for (; depth < curr->lockdep_depth; depth++) {
struct held_lock *prev = curr->held_locks + depth;
- u8 prev_inner = hlock_class(prev)->wait_type_inner;
+ struct lock_class *class = hlock_class(prev);
+ u8 prev_inner = class->wait_type_inner;
if (prev_inner) {
/*
@@ -4778,6 +4784,14 @@ static int check_wait_context(struct task_struct *curr, struct held_lock *next)
* Also due to trylocks.
*/
curr_inner = min(curr_inner, prev_inner);
+
+ /*
+ * Allow override for annotations -- this is typically
+ * only valid/needed for code that only exists when
+ * CONFIG_PREEMPT_RT=n.
+ */
+ if (unlikely(class->lock_type == LD_LOCK_WAIT_OVERRIDE))
+ curr_inner = prev_inner;
}
}
diff --git a/kernel/module/main.c b/kernel/module/main.c
index 044aa2c9e3cb..b4c7e925fdb0 100644
--- a/kernel/module/main.c
+++ b/kernel/module/main.c
@@ -3057,25 +3057,13 @@ SYSCALL_DEFINE3(init_module, void __user *, umod,
return load_module(&info, uargs, 0);
}
-SYSCALL_DEFINE3(finit_module, int, fd, const char __user *, uargs, int, flags)
+static int file_init_module(struct file *file, const char __user * uargs, int flags)
{
struct load_info info = { };
void *buf = NULL;
int len;
- int err;
-
- err = may_init_module();
- if (err)
- return err;
-
- pr_debug("finit_module: fd=%d, uargs=%p, flags=%i\n", fd, uargs, flags);
- if (flags & ~(MODULE_INIT_IGNORE_MODVERSIONS
- |MODULE_INIT_IGNORE_VERMAGIC
- |MODULE_INIT_COMPRESSED_FILE))
- return -EINVAL;
-
- len = kernel_read_file_from_fd(fd, 0, &buf, INT_MAX, NULL,
+ len = kernel_read_file(file, 0, &buf, INT_MAX, NULL,
READING_MODULE);
if (len < 0) {
mod_stat_inc(&failed_kreads);
@@ -3084,7 +3072,7 @@ SYSCALL_DEFINE3(finit_module, int, fd, const char __user *, uargs, int, flags)
}
if (flags & MODULE_INIT_COMPRESSED_FILE) {
- err = module_decompress(&info, buf, len);
+ int err = module_decompress(&info, buf, len);
vfree(buf); /* compressed data is no longer needed */
if (err) {
mod_stat_inc(&failed_decompress);
@@ -3099,6 +3087,46 @@ SYSCALL_DEFINE3(finit_module, int, fd, const char __user *, uargs, int, flags)
return load_module(&info, uargs, flags);
}
+/*
+ * kernel_read_file() will already deny write access, but module
+ * loading wants _exclusive_ access to the file, so we do that
+ * here, along with basic sanity checks.
+ */
+static int prepare_file_for_module_load(struct file *file)
+{
+ if (!file || !(file->f_mode & FMODE_READ))
+ return -EBADF;
+ if (!S_ISREG(file_inode(file)->i_mode))
+ return -EINVAL;
+ return exclusive_deny_write_access(file);
+}
+
+SYSCALL_DEFINE3(finit_module, int, fd, const char __user *, uargs, int, flags)
+{
+ struct fd f;
+ int err;
+
+ err = may_init_module();
+ if (err)
+ return err;
+
+ pr_debug("finit_module: fd=%d, uargs=%p, flags=%i\n", fd, uargs, flags);
+
+ if (flags & ~(MODULE_INIT_IGNORE_MODVERSIONS
+ |MODULE_INIT_IGNORE_VERMAGIC
+ |MODULE_INIT_COMPRESSED_FILE))
+ return -EINVAL;
+
+ f = fdget(fd);
+ err = prepare_file_for_module_load(f.file);
+ if (!err) {
+ err = file_init_module(f.file, uargs, flags);
+ allow_write_access(f.file);
+ }
+ fdput(f);
+ return err;
+}
+
/* Keep in sync with MODULE_FLAGS_BUF_SIZE !!! */
char *module_flags(struct module *mod, char *buf, bool show_state)
{
diff --git a/kernel/module/stats.c b/kernel/module/stats.c
index ad7b6ada29f2..6ab2c94d6bc3 100644
--- a/kernel/module/stats.c
+++ b/kernel/module/stats.c
@@ -276,6 +276,7 @@ static ssize_t read_file_mod_stats(struct file *file, char __user *user_buf,
struct mod_fail_load *mod_fail;
unsigned int len, size, count_failed = 0;
char *buf;
+ int ret;
u32 live_mod_count, fkreads, fdecompress, fbecoming, floads;
unsigned long total_size, text_size, ikread_bytes, ibecoming_bytes,
idecompress_bytes, imod_bytes, total_virtual_lost;
@@ -390,8 +391,9 @@ static ssize_t read_file_mod_stats(struct file *file, char __user *user_buf,
out_unlock:
mutex_unlock(&module_mutex);
out:
+ ret = simple_read_from_buffer(user_buf, count, ppos, buf, len);
kfree(buf);
- return simple_read_from_buffer(user_buf, count, ppos, buf, len);
+ return ret;
}
#undef MAX_PREAMBLE
#undef MAX_FAILED_MOD_PRINT
diff --git a/kernel/trace/fprobe.c b/kernel/trace/fprobe.c
index 9abb3905bc8e..18d36842faf5 100644
--- a/kernel/trace/fprobe.c
+++ b/kernel/trace/fprobe.c
@@ -17,36 +17,30 @@
struct fprobe_rethook_node {
struct rethook_node node;
unsigned long entry_ip;
+ unsigned long entry_parent_ip;
char data[];
};
-static void fprobe_handler(unsigned long ip, unsigned long parent_ip,
- struct ftrace_ops *ops, struct ftrace_regs *fregs)
+static inline void __fprobe_handler(unsigned long ip, unsigned long parent_ip,
+ struct ftrace_ops *ops, struct ftrace_regs *fregs)
{
struct fprobe_rethook_node *fpr;
struct rethook_node *rh = NULL;
struct fprobe *fp;
void *entry_data = NULL;
- int bit, ret;
+ int ret = 0;
fp = container_of(ops, struct fprobe, ops);
- if (fprobe_disabled(fp))
- return;
-
- bit = ftrace_test_recursion_trylock(ip, parent_ip);
- if (bit < 0) {
- fp->nmissed++;
- return;
- }
if (fp->exit_handler) {
rh = rethook_try_get(fp->rethook);
if (!rh) {
fp->nmissed++;
- goto out;
+ return;
}
fpr = container_of(rh, struct fprobe_rethook_node, node);
fpr->entry_ip = ip;
+ fpr->entry_parent_ip = parent_ip;
if (fp->entry_data_size)
entry_data = fpr->data;
}
@@ -61,23 +55,60 @@ static void fprobe_handler(unsigned long ip, unsigned long parent_ip,
else
rethook_hook(rh, ftrace_get_regs(fregs), true);
}
-out:
+}
+
+static void fprobe_handler(unsigned long ip, unsigned long parent_ip,
+ struct ftrace_ops *ops, struct ftrace_regs *fregs)
+{
+ struct fprobe *fp;
+ int bit;
+
+ fp = container_of(ops, struct fprobe, ops);
+ if (fprobe_disabled(fp))
+ return;
+
+ /* recursion detection has to go before any traceable function and
+ * all functions before this point should be marked as notrace
+ */
+ bit = ftrace_test_recursion_trylock(ip, parent_ip);
+ if (bit < 0) {
+ fp->nmissed++;
+ return;
+ }
+ __fprobe_handler(ip, parent_ip, ops, fregs);
ftrace_test_recursion_unlock(bit);
+
}
NOKPROBE_SYMBOL(fprobe_handler);
static void fprobe_kprobe_handler(unsigned long ip, unsigned long parent_ip,
struct ftrace_ops *ops, struct ftrace_regs *fregs)
{
- struct fprobe *fp = container_of(ops, struct fprobe, ops);
+ struct fprobe *fp;
+ int bit;
+
+ fp = container_of(ops, struct fprobe, ops);
+ if (fprobe_disabled(fp))
+ return;
+
+ /* recursion detection has to go before any traceable function and
+ * all functions called before this point should be marked as notrace
+ */
+ bit = ftrace_test_recursion_trylock(ip, parent_ip);
+ if (bit < 0) {
+ fp->nmissed++;
+ return;
+ }
if (unlikely(kprobe_running())) {
fp->nmissed++;
return;
}
+
kprobe_busy_begin();
- fprobe_handler(ip, parent_ip, ops, fregs);
+ __fprobe_handler(ip, parent_ip, ops, fregs);
kprobe_busy_end();
+ ftrace_test_recursion_unlock(bit);
}
static void fprobe_exit_handler(struct rethook_node *rh, void *data,
@@ -85,14 +116,26 @@ static void fprobe_exit_handler(struct rethook_node *rh, void *data,
{
struct fprobe *fp = (struct fprobe *)data;
struct fprobe_rethook_node *fpr;
+ int bit;
if (!fp || fprobe_disabled(fp))
return;
fpr = container_of(rh, struct fprobe_rethook_node, node);
+ /*
+ * we need to assure no calls to traceable functions in-between the
+ * end of fprobe_handler and the beginning of fprobe_exit_handler.
+ */
+ bit = ftrace_test_recursion_trylock(fpr->entry_ip, fpr->entry_parent_ip);
+ if (bit < 0) {
+ fp->nmissed++;
+ return;
+ }
+
fp->exit_handler(fp, fpr->entry_ip, regs,
fp->entry_data_size ? (void *)fpr->data : NULL);
+ ftrace_test_recursion_unlock(bit);
}
NOKPROBE_SYMBOL(fprobe_exit_handler);
diff --git a/kernel/trace/rethook.c b/kernel/trace/rethook.c
index 32c3dfdb4d6a..60f6cb2b486b 100644
--- a/kernel/trace/rethook.c
+++ b/kernel/trace/rethook.c
@@ -288,7 +288,7 @@ unsigned long rethook_trampoline_handler(struct pt_regs *regs,
* These loops must be protected from rethook_free_rcu() because those
* are accessing 'rhn->rethook'.
*/
- preempt_disable();
+ preempt_disable_notrace();
/*
* Run the handler on the shadow stack. Do not unlink the list here because
@@ -321,7 +321,7 @@ unsigned long rethook_trampoline_handler(struct pt_regs *regs,
first = first->next;
rethook_recycle(rhn);
}
- preempt_enable();
+ preempt_enable_notrace();
return correct_ret_addr;
}