summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/bpf/btf.c24
-rw-r--r--kernel/bpf/core.c43
-rw-r--r--kernel/bpf/verifier.c33
-rw-r--r--kernel/dma/debug.c9
-rw-r--r--kernel/dma/direct.h7
-rw-r--r--kernel/dma/mapping.c6
-rw-r--r--kernel/dma/swiotlb.c21
-rw-r--r--kernel/events/core.c19
-rw-r--r--kernel/rcu/rcu.h9
-rw-r--r--kernel/rcu/srcutiny.c19
-rw-r--r--kernel/rcu/srcutree.c211
-rw-r--r--kernel/trace/ftrace.c4
-rw-r--r--kernel/trace/ring_buffer.c2
-rw-r--r--kernel/trace/trace.c36
14 files changed, 281 insertions, 162 deletions
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index 4872d2a6c42d..71f9143fe90f 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -1787,7 +1787,16 @@ static void btf_free_id(struct btf *btf)
* of the _bh() version.
*/
spin_lock_irqsave(&btf_idr_lock, flags);
- idr_remove(&btf_idr, btf->id);
+ if (btf->id) {
+ idr_remove(&btf_idr, btf->id);
+ /*
+ * Clear the id here to make this function idempotent, since it will get
+ * called a couple of times for module BTFs: on module unload, and then
+ * the final btf_put(). btf_alloc_id() starts IDs with 1, so we can use
+ * 0 as sentinel value.
+ */
+ WRITE_ONCE(btf->id, 0);
+ }
spin_unlock_irqrestore(&btf_idr_lock, flags);
}
@@ -8115,7 +8124,7 @@ static void bpf_btf_show_fdinfo(struct seq_file *m, struct file *filp)
{
const struct btf *btf = filp->private_data;
- seq_printf(m, "btf_id:\t%u\n", btf->id);
+ seq_printf(m, "btf_id:\t%u\n", READ_ONCE(btf->id));
}
#endif
@@ -8197,7 +8206,7 @@ int btf_get_info_by_fd(const struct btf *btf,
if (copy_from_user(&info, uinfo, info_copy))
return -EFAULT;
- info.id = btf->id;
+ info.id = READ_ONCE(btf->id);
ubtf = u64_to_user_ptr(info.btf);
btf_copy = min_t(u32, btf->data_size, info.btf_size);
if (copy_to_user(ubtf, btf->data, btf_copy))
@@ -8260,7 +8269,7 @@ int btf_get_fd_by_id(u32 id)
u32 btf_obj_id(const struct btf *btf)
{
- return btf->id;
+ return READ_ONCE(btf->id);
}
bool btf_is_kernel(const struct btf *btf)
@@ -8382,6 +8391,13 @@ static int btf_module_notify(struct notifier_block *nb, unsigned long op,
if (btf_mod->module != module)
continue;
+ /*
+ * For modules, we do the freeing of BTF IDR as soon as
+ * module goes away to disable BTF discovery, since the
+ * btf_try_get_module() on such BTFs will fail. This may
+ * be called again on btf_put(), but it's ok to do so.
+ */
+ btf_free_id(btf_mod->btf);
list_del(&btf_mod->list);
if (btf_mod->sysfs_attr)
sysfs_remove_bin_file(btf_kobj, btf_mod->sysfs_attr);
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 3ece2da55625..7b675a451ec8 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -1422,6 +1422,27 @@ static int bpf_jit_blind_insn(const struct bpf_insn *from,
*to++ = BPF_ALU64_IMM(BPF_XOR, BPF_REG_AX, imm_rnd);
*to++ = BPF_STX_MEM(from->code, from->dst_reg, BPF_REG_AX, from->off);
break;
+
+ case BPF_ST | BPF_PROBE_MEM32 | BPF_DW:
+ case BPF_ST | BPF_PROBE_MEM32 | BPF_W:
+ case BPF_ST | BPF_PROBE_MEM32 | BPF_H:
+ case BPF_ST | BPF_PROBE_MEM32 | BPF_B:
+ *to++ = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^
+ from->imm);
+ *to++ = BPF_ALU64_IMM(BPF_XOR, BPF_REG_AX, imm_rnd);
+ /*
+ * Cannot use BPF_STX_MEM() macro here as it
+ * hardcodes BPF_MEM mode, losing PROBE_MEM32
+ * and breaking arena addressing in the JIT.
+ */
+ *to++ = (struct bpf_insn) {
+ .code = BPF_STX | BPF_PROBE_MEM32 |
+ BPF_SIZE(from->code),
+ .dst_reg = from->dst_reg,
+ .src_reg = BPF_REG_AX,
+ .off = from->off,
+ };
+ break;
}
out:
return to - to_buff;
@@ -1736,6 +1757,12 @@ bool bpf_opcode_in_insntable(u8 code)
}
#ifndef CONFIG_BPF_JIT_ALWAYS_ON
+/* Absolute value of s32 without undefined behavior for S32_MIN */
+static u32 abs_s32(s32 x)
+{
+ return x >= 0 ? (u32)x : -(u32)x;
+}
+
/**
* ___bpf_prog_run - run eBPF program on a given context
* @regs: is the array of MAX_BPF_EXT_REG eBPF pseudo-registers
@@ -1900,8 +1927,8 @@ select_insn:
DST = do_div(AX, (u32) SRC);
break;
case 1:
- AX = abs((s32)DST);
- AX = do_div(AX, abs((s32)SRC));
+ AX = abs_s32((s32)DST);
+ AX = do_div(AX, abs_s32((s32)SRC));
if ((s32)DST < 0)
DST = (u32)-AX;
else
@@ -1928,8 +1955,8 @@ select_insn:
DST = do_div(AX, (u32) IMM);
break;
case 1:
- AX = abs((s32)DST);
- AX = do_div(AX, abs((s32)IMM));
+ AX = abs_s32((s32)DST);
+ AX = do_div(AX, abs_s32((s32)IMM));
if ((s32)DST < 0)
DST = (u32)-AX;
else
@@ -1955,8 +1982,8 @@ select_insn:
DST = (u32) AX;
break;
case 1:
- AX = abs((s32)DST);
- do_div(AX, abs((s32)SRC));
+ AX = abs_s32((s32)DST);
+ do_div(AX, abs_s32((s32)SRC));
if (((s32)DST < 0) == ((s32)SRC < 0))
DST = (u32)AX;
else
@@ -1982,8 +2009,8 @@ select_insn:
DST = (u32) AX;
break;
case 1:
- AX = abs((s32)DST);
- do_div(AX, abs((s32)IMM));
+ AX = abs_s32((s32)DST);
+ do_div(AX, abs_s32((s32)IMM));
if (((s32)DST < 0) == ((s32)IMM < 0))
DST = (u32)AX;
else
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 159b25f8269d..f108c01ff6d0 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -15910,6 +15910,13 @@ static void scalar_byte_swap(struct bpf_reg_state *dst_reg, struct bpf_insn *ins
/* Apply bswap if alu64 or switch between big-endian and little-endian machines */
bool need_bswap = alu64 || (to_le == is_big_endian);
+ /*
+ * If the register is mutated, manually reset its scalar ID to break
+ * any existing ties and avoid incorrect bounds propagation.
+ */
+ if (need_bswap || insn->imm == 16 || insn->imm == 32)
+ dst_reg->id = 0;
+
if (need_bswap) {
if (insn->imm == 16)
dst_reg->var_off = tnum_bswap16(dst_reg->var_off);
@@ -15992,7 +15999,7 @@ static int maybe_fork_scalars(struct bpf_verifier_env *env, struct bpf_insn *ins
else
return 0;
- branch = push_stack(env, env->insn_idx + 1, env->insn_idx, false);
+ branch = push_stack(env, env->insn_idx, env->insn_idx, false);
if (IS_ERR(branch))
return PTR_ERR(branch);
@@ -17408,6 +17415,12 @@ static void sync_linked_regs(struct bpf_verifier_env *env, struct bpf_verifier_s
continue;
if ((reg->id & ~BPF_ADD_CONST) != (known_reg->id & ~BPF_ADD_CONST))
continue;
+ /*
+ * Skip mixed 32/64-bit links: the delta relationship doesn't
+ * hold across different ALU widths.
+ */
+ if (((reg->id ^ known_reg->id) & BPF_ADD_CONST) == BPF_ADD_CONST)
+ continue;
if ((!(reg->id & BPF_ADD_CONST) && !(known_reg->id & BPF_ADD_CONST)) ||
reg->off == known_reg->off) {
s32 saved_subreg_def = reg->subreg_def;
@@ -17435,7 +17448,7 @@ static void sync_linked_regs(struct bpf_verifier_env *env, struct bpf_verifier_s
scalar32_min_max_add(reg, &fake_reg);
scalar_min_max_add(reg, &fake_reg);
reg->var_off = tnum_add(reg->var_off, fake_reg.var_off);
- if (known_reg->id & BPF_ADD_CONST32)
+ if ((reg->id | known_reg->id) & BPF_ADD_CONST32)
zext_32_to_64(reg);
reg_bounds_sync(reg);
}
@@ -19863,11 +19876,14 @@ static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold,
* Also verify that new value satisfies old value range knowledge.
*/
- /* ADD_CONST mismatch: different linking semantics */
- if ((rold->id & BPF_ADD_CONST) && !(rcur->id & BPF_ADD_CONST))
- return false;
-
- if (rold->id && !(rold->id & BPF_ADD_CONST) && (rcur->id & BPF_ADD_CONST))
+ /*
+ * ADD_CONST flags must match exactly: BPF_ADD_CONST32 and
+ * BPF_ADD_CONST64 have different linking semantics in
+ * sync_linked_regs() (alu32 zero-extends, alu64 does not),
+ * so pruning across different flag types is unsafe.
+ */
+ if (rold->id &&
+ (rold->id & BPF_ADD_CONST) != (rcur->id & BPF_ADD_CONST))
return false;
/* Both have offset linkage: offsets must match */
@@ -20904,7 +20920,8 @@ static int process_bpf_exit_full(struct bpf_verifier_env *env,
* state when it exits.
*/
int err = check_resource_leak(env, exception_exit,
- !env->cur_state->curframe,
+ exception_exit || !env->cur_state->curframe,
+ exception_exit ? "bpf_throw" :
"BPF_EXIT instruction in main prog");
if (err)
return err;
diff --git a/kernel/dma/debug.c b/kernel/dma/debug.c
index 86f87e43438c..0677918f06a8 100644
--- a/kernel/dma/debug.c
+++ b/kernel/dma/debug.c
@@ -453,7 +453,7 @@ static int active_cacheline_set_overlap(phys_addr_t cln, int overlap)
return overlap;
}
-static void active_cacheline_inc_overlap(phys_addr_t cln)
+static void active_cacheline_inc_overlap(phys_addr_t cln, bool is_cache_clean)
{
int overlap = active_cacheline_read_overlap(cln);
@@ -462,7 +462,7 @@ static void active_cacheline_inc_overlap(phys_addr_t cln)
/* If we overflowed the overlap counter then we're potentially
* leaking dma-mappings.
*/
- WARN_ONCE(overlap > ACTIVE_CACHELINE_MAX_OVERLAP,
+ WARN_ONCE(!is_cache_clean && overlap > ACTIVE_CACHELINE_MAX_OVERLAP,
pr_fmt("exceeded %d overlapping mappings of cacheline %pa\n"),
ACTIVE_CACHELINE_MAX_OVERLAP, &cln);
}
@@ -495,7 +495,7 @@ static int active_cacheline_insert(struct dma_debug_entry *entry,
if (rc == -EEXIST) {
struct dma_debug_entry *existing;
- active_cacheline_inc_overlap(cln);
+ active_cacheline_inc_overlap(cln, entry->is_cache_clean);
existing = radix_tree_lookup(&dma_active_cacheline, cln);
/* A lookup failure here after we got -EEXIST is unexpected. */
WARN_ON(!existing);
@@ -601,7 +601,8 @@ static void add_dma_entry(struct dma_debug_entry *entry, unsigned long attrs)
unsigned long flags;
int rc;
- entry->is_cache_clean = !!(attrs & DMA_ATTR_CPU_CACHE_CLEAN);
+ entry->is_cache_clean = attrs & (DMA_ATTR_DEBUGGING_IGNORE_CACHELINES |
+ DMA_ATTR_REQUIRE_COHERENT);
bucket = get_hash_bucket(entry, &flags);
hash_bucket_add(bucket, entry);
diff --git a/kernel/dma/direct.h b/kernel/dma/direct.h
index e89f175e9c2d..6184ff303f08 100644
--- a/kernel/dma/direct.h
+++ b/kernel/dma/direct.h
@@ -84,7 +84,7 @@ static inline dma_addr_t dma_direct_map_phys(struct device *dev,
dma_addr_t dma_addr;
if (is_swiotlb_force_bounce(dev)) {
- if (attrs & DMA_ATTR_MMIO)
+ if (attrs & (DMA_ATTR_MMIO | DMA_ATTR_REQUIRE_COHERENT))
return DMA_MAPPING_ERROR;
return swiotlb_map(dev, phys, size, dir, attrs);
@@ -98,7 +98,8 @@ static inline dma_addr_t dma_direct_map_phys(struct device *dev,
dma_addr = phys_to_dma(dev, phys);
if (unlikely(!dma_capable(dev, dma_addr, size, true)) ||
dma_kmalloc_needs_bounce(dev, size, dir)) {
- if (is_swiotlb_active(dev))
+ if (is_swiotlb_active(dev) &&
+ !(attrs & DMA_ATTR_REQUIRE_COHERENT))
return swiotlb_map(dev, phys, size, dir, attrs);
goto err_overflow;
@@ -123,7 +124,7 @@ static inline void dma_direct_unmap_phys(struct device *dev, dma_addr_t addr,
{
phys_addr_t phys;
- if (attrs & DMA_ATTR_MMIO)
+ if (attrs & (DMA_ATTR_MMIO | DMA_ATTR_REQUIRE_COHERENT))
/* nothing to do: uncached and no swiotlb */
return;
diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c
index 3928a509c44c..6d3dd0bd3a88 100644
--- a/kernel/dma/mapping.c
+++ b/kernel/dma/mapping.c
@@ -164,6 +164,9 @@ dma_addr_t dma_map_phys(struct device *dev, phys_addr_t phys, size_t size,
if (WARN_ON_ONCE(!dev->dma_mask))
return DMA_MAPPING_ERROR;
+ if (!dev_is_dma_coherent(dev) && (attrs & DMA_ATTR_REQUIRE_COHERENT))
+ return DMA_MAPPING_ERROR;
+
if (dma_map_direct(dev, ops) ||
(!is_mmio && arch_dma_map_phys_direct(dev, phys + size)))
addr = dma_direct_map_phys(dev, phys, size, dir, attrs);
@@ -235,6 +238,9 @@ static int __dma_map_sg_attrs(struct device *dev, struct scatterlist *sg,
BUG_ON(!valid_dma_direction(dir));
+ if (!dev_is_dma_coherent(dev) && (attrs & DMA_ATTR_REQUIRE_COHERENT))
+ return -EOPNOTSUPP;
+
if (WARN_ON_ONCE(!dev->dma_mask))
return 0;
diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index d8e6f1d889d5..9fd73700ddcf 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -30,6 +30,7 @@
#include <linux/gfp.h>
#include <linux/highmem.h>
#include <linux/io.h>
+#include <linux/kmsan-checks.h>
#include <linux/iommu-helper.h>
#include <linux/init.h>
#include <linux/memblock.h>
@@ -901,10 +902,19 @@ static void swiotlb_bounce(struct device *dev, phys_addr_t tlb_addr, size_t size
local_irq_save(flags);
page = pfn_to_page(pfn);
- if (dir == DMA_TO_DEVICE)
+ if (dir == DMA_TO_DEVICE) {
+ /*
+ * Ideally, kmsan_check_highmem_page()
+ * could be used here to detect infoleaks,
+ * but callers may map uninitialized buffers
+ * that will be written by the device,
+ * causing false positives.
+ */
memcpy_from_page(vaddr, page, offset, sz);
- else
+ } else {
+ kmsan_unpoison_memory(vaddr, sz);
memcpy_to_page(page, offset, vaddr, sz);
+ }
local_irq_restore(flags);
size -= sz;
@@ -913,8 +923,15 @@ static void swiotlb_bounce(struct device *dev, phys_addr_t tlb_addr, size_t size
offset = 0;
}
} else if (dir == DMA_TO_DEVICE) {
+ /*
+ * Ideally, kmsan_check_memory() could be used here to detect
+ * infoleaks (uninitialized data being sent to device), but
+ * callers may map uninitialized buffers that will be written
+ * by the device, causing false positives.
+ */
memcpy(vaddr, phys_to_virt(orig_addr), size);
} else {
+ kmsan_unpoison_memory(vaddr, size);
memcpy(phys_to_virt(orig_addr), vaddr, size);
}
}
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 1f5699b339ec..89b40e439717 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -4813,7 +4813,7 @@ static void __perf_event_read(void *info)
struct perf_event *sub, *event = data->event;
struct perf_event_context *ctx = event->ctx;
struct perf_cpu_context *cpuctx = this_cpu_ptr(&perf_cpu_context);
- struct pmu *pmu = event->pmu;
+ struct pmu *pmu;
/*
* If this is a task context, we need to check whether it is
@@ -4825,7 +4825,7 @@ static void __perf_event_read(void *info)
if (ctx->task && cpuctx->task_ctx != ctx)
return;
- raw_spin_lock(&ctx->lock);
+ guard(raw_spinlock)(&ctx->lock);
ctx_time_update_event(ctx, event);
perf_event_update_time(event);
@@ -4833,25 +4833,22 @@ static void __perf_event_read(void *info)
perf_event_update_sibling_time(event);
if (event->state != PERF_EVENT_STATE_ACTIVE)
- goto unlock;
+ return;
if (!data->group) {
- pmu->read(event);
+ perf_pmu_read(event);
data->ret = 0;
- goto unlock;
+ return;
}
+ pmu = event->pmu_ctx->pmu;
pmu->start_txn(pmu, PERF_PMU_TXN_READ);
- pmu->read(event);
-
+ perf_pmu_read(event);
for_each_sibling_event(sub, event)
perf_pmu_read(sub);
data->ret = pmu->commit_txn(pmu);
-
-unlock:
- raw_spin_unlock(&ctx->lock);
}
static inline u64 perf_event_count(struct perf_event *event, bool self)
@@ -14744,7 +14741,7 @@ inherit_event(struct perf_event *parent_event,
get_ctx(child_ctx);
child_event->ctx = child_ctx;
- pmu_ctx = find_get_pmu_context(child_event->pmu, child_ctx, child_event);
+ pmu_ctx = find_get_pmu_context(parent_event->pmu_ctx->pmu, child_ctx, child_event);
if (IS_ERR(pmu_ctx)) {
free_event(child_event);
return ERR_CAST(pmu_ctx);
diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h
index dc5d614b372c..9b10b57b79ad 100644
--- a/kernel/rcu/rcu.h
+++ b/kernel/rcu/rcu.h
@@ -502,6 +502,15 @@ do { \
___locked; \
})
+#define raw_spin_trylock_irqsave_rcu_node(p, flags) \
+({ \
+ bool ___locked = raw_spin_trylock_irqsave(&ACCESS_PRIVATE(p, lock), flags); \
+ \
+ if (___locked) \
+ smp_mb__after_unlock_lock(); \
+ ___locked; \
+})
+
#define raw_lockdep_assert_held_rcu_node(p) \
lockdep_assert_held(&ACCESS_PRIVATE(p, lock))
diff --git a/kernel/rcu/srcutiny.c b/kernel/rcu/srcutiny.c
index 3450c3751ef7..a2e2d516e51b 100644
--- a/kernel/rcu/srcutiny.c
+++ b/kernel/rcu/srcutiny.c
@@ -9,6 +9,7 @@
*/
#include <linux/export.h>
+#include <linux/irq_work.h>
#include <linux/mutex.h>
#include <linux/preempt.h>
#include <linux/rcupdate_wait.h>
@@ -41,6 +42,7 @@ static int init_srcu_struct_fields(struct srcu_struct *ssp)
ssp->srcu_idx_max = 0;
INIT_WORK(&ssp->srcu_work, srcu_drive_gp);
INIT_LIST_HEAD(&ssp->srcu_work.entry);
+ init_irq_work(&ssp->srcu_irq_work, srcu_tiny_irq_work);
return 0;
}
@@ -84,6 +86,7 @@ EXPORT_SYMBOL_GPL(init_srcu_struct);
void cleanup_srcu_struct(struct srcu_struct *ssp)
{
WARN_ON(ssp->srcu_lock_nesting[0] || ssp->srcu_lock_nesting[1]);
+ irq_work_sync(&ssp->srcu_irq_work);
flush_work(&ssp->srcu_work);
WARN_ON(ssp->srcu_gp_running);
WARN_ON(ssp->srcu_gp_waiting);
@@ -177,6 +180,20 @@ void srcu_drive_gp(struct work_struct *wp)
}
EXPORT_SYMBOL_GPL(srcu_drive_gp);
+/*
+ * Use an irq_work to defer schedule_work() to avoid acquiring the workqueue
+ * pool->lock while the caller might hold scheduler locks, causing lockdep
+ * splats due to workqueue_init() doing a wakeup.
+ */
+void srcu_tiny_irq_work(struct irq_work *irq_work)
+{
+ struct srcu_struct *ssp;
+
+ ssp = container_of(irq_work, struct srcu_struct, srcu_irq_work);
+ schedule_work(&ssp->srcu_work);
+}
+EXPORT_SYMBOL_GPL(srcu_tiny_irq_work);
+
static void srcu_gp_start_if_needed(struct srcu_struct *ssp)
{
unsigned long cookie;
@@ -189,7 +206,7 @@ static void srcu_gp_start_if_needed(struct srcu_struct *ssp)
WRITE_ONCE(ssp->srcu_idx_max, cookie);
if (!READ_ONCE(ssp->srcu_gp_running)) {
if (likely(srcu_init_done))
- schedule_work(&ssp->srcu_work);
+ irq_work_queue(&ssp->srcu_irq_work);
else if (list_empty(&ssp->srcu_work.entry))
list_add(&ssp->srcu_work.entry, &srcu_boot_list);
}
diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c
index aef8e91ad33e..0d01cd8c4b4a 100644
--- a/kernel/rcu/srcutree.c
+++ b/kernel/rcu/srcutree.c
@@ -19,6 +19,7 @@
#include <linux/mutex.h>
#include <linux/percpu.h>
#include <linux/preempt.h>
+#include <linux/irq_work.h>
#include <linux/rcupdate_wait.h>
#include <linux/sched.h>
#include <linux/smp.h>
@@ -75,44 +76,9 @@ static bool __read_mostly srcu_init_done;
static void srcu_invoke_callbacks(struct work_struct *work);
static void srcu_reschedule(struct srcu_struct *ssp, unsigned long delay);
static void process_srcu(struct work_struct *work);
+static void srcu_irq_work(struct irq_work *work);
static void srcu_delay_timer(struct timer_list *t);
-/* Wrappers for lock acquisition and release, see raw_spin_lock_rcu_node(). */
-#define spin_lock_rcu_node(p) \
-do { \
- spin_lock(&ACCESS_PRIVATE(p, lock)); \
- smp_mb__after_unlock_lock(); \
-} while (0)
-
-#define spin_unlock_rcu_node(p) spin_unlock(&ACCESS_PRIVATE(p, lock))
-
-#define spin_lock_irq_rcu_node(p) \
-do { \
- spin_lock_irq(&ACCESS_PRIVATE(p, lock)); \
- smp_mb__after_unlock_lock(); \
-} while (0)
-
-#define spin_unlock_irq_rcu_node(p) \
- spin_unlock_irq(&ACCESS_PRIVATE(p, lock))
-
-#define spin_lock_irqsave_rcu_node(p, flags) \
-do { \
- spin_lock_irqsave(&ACCESS_PRIVATE(p, lock), flags); \
- smp_mb__after_unlock_lock(); \
-} while (0)
-
-#define spin_trylock_irqsave_rcu_node(p, flags) \
-({ \
- bool ___locked = spin_trylock_irqsave(&ACCESS_PRIVATE(p, lock), flags); \
- \
- if (___locked) \
- smp_mb__after_unlock_lock(); \
- ___locked; \
-})
-
-#define spin_unlock_irqrestore_rcu_node(p, flags) \
- spin_unlock_irqrestore(&ACCESS_PRIVATE(p, lock), flags) \
-
/*
* Initialize SRCU per-CPU data. Note that statically allocated
* srcu_struct structures might already have srcu_read_lock() and
@@ -131,7 +97,7 @@ static void init_srcu_struct_data(struct srcu_struct *ssp)
*/
for_each_possible_cpu(cpu) {
sdp = per_cpu_ptr(ssp->sda, cpu);
- spin_lock_init(&ACCESS_PRIVATE(sdp, lock));
+ raw_spin_lock_init(&ACCESS_PRIVATE(sdp, lock));
rcu_segcblist_init(&sdp->srcu_cblist);
sdp->srcu_cblist_invoking = false;
sdp->srcu_gp_seq_needed = ssp->srcu_sup->srcu_gp_seq;
@@ -186,7 +152,7 @@ static bool init_srcu_struct_nodes(struct srcu_struct *ssp, gfp_t gfp_flags)
/* Each pass through this loop initializes one srcu_node structure. */
srcu_for_each_node_breadth_first(ssp, snp) {
- spin_lock_init(&ACCESS_PRIVATE(snp, lock));
+ raw_spin_lock_init(&ACCESS_PRIVATE(snp, lock));
BUILD_BUG_ON(ARRAY_SIZE(snp->srcu_have_cbs) !=
ARRAY_SIZE(snp->srcu_data_have_cbs));
for (i = 0; i < ARRAY_SIZE(snp->srcu_have_cbs); i++) {
@@ -242,7 +208,7 @@ static int init_srcu_struct_fields(struct srcu_struct *ssp, bool is_static)
if (!ssp->srcu_sup)
return -ENOMEM;
if (!is_static)
- spin_lock_init(&ACCESS_PRIVATE(ssp->srcu_sup, lock));
+ raw_spin_lock_init(&ACCESS_PRIVATE(ssp->srcu_sup, lock));
ssp->srcu_sup->srcu_size_state = SRCU_SIZE_SMALL;
ssp->srcu_sup->node = NULL;
mutex_init(&ssp->srcu_sup->srcu_cb_mutex);
@@ -252,6 +218,7 @@ static int init_srcu_struct_fields(struct srcu_struct *ssp, bool is_static)
mutex_init(&ssp->srcu_sup->srcu_barrier_mutex);
atomic_set(&ssp->srcu_sup->srcu_barrier_cpu_cnt, 0);
INIT_DELAYED_WORK(&ssp->srcu_sup->work, process_srcu);
+ init_irq_work(&ssp->srcu_sup->irq_work, srcu_irq_work);
ssp->srcu_sup->sda_is_static = is_static;
if (!is_static) {
ssp->sda = alloc_percpu(struct srcu_data);
@@ -263,9 +230,12 @@ static int init_srcu_struct_fields(struct srcu_struct *ssp, bool is_static)
ssp->srcu_sup->srcu_gp_seq_needed_exp = SRCU_GP_SEQ_INITIAL_VAL;
ssp->srcu_sup->srcu_last_gp_end = ktime_get_mono_fast_ns();
if (READ_ONCE(ssp->srcu_sup->srcu_size_state) == SRCU_SIZE_SMALL && SRCU_SIZING_IS_INIT()) {
- if (!init_srcu_struct_nodes(ssp, is_static ? GFP_ATOMIC : GFP_KERNEL))
+ if (!preemptible())
+ WRITE_ONCE(ssp->srcu_sup->srcu_size_state, SRCU_SIZE_ALLOC);
+ else if (init_srcu_struct_nodes(ssp, GFP_KERNEL))
+ WRITE_ONCE(ssp->srcu_sup->srcu_size_state, SRCU_SIZE_BIG);
+ else
goto err_free_sda;
- WRITE_ONCE(ssp->srcu_sup->srcu_size_state, SRCU_SIZE_BIG);
}
ssp->srcu_sup->srcu_ssp = ssp;
smp_store_release(&ssp->srcu_sup->srcu_gp_seq_needed,
@@ -394,20 +364,20 @@ static void srcu_transition_to_big(struct srcu_struct *ssp)
/* Double-checked locking on ->srcu_size-state. */
if (smp_load_acquire(&ssp->srcu_sup->srcu_size_state) != SRCU_SIZE_SMALL)
return;
- spin_lock_irqsave_rcu_node(ssp->srcu_sup, flags);
+ raw_spin_lock_irqsave_rcu_node(ssp->srcu_sup, flags);
if (smp_load_acquire(&ssp->srcu_sup->srcu_size_state) != SRCU_SIZE_SMALL) {
- spin_unlock_irqrestore_rcu_node(ssp->srcu_sup, flags);
+ raw_spin_unlock_irqrestore_rcu_node(ssp->srcu_sup, flags);
return;
}
__srcu_transition_to_big(ssp);
- spin_unlock_irqrestore_rcu_node(ssp->srcu_sup, flags);
+ raw_spin_unlock_irqrestore_rcu_node(ssp->srcu_sup, flags);
}
/*
* Check to see if the just-encountered contention event justifies
* a transition to SRCU_SIZE_BIG.
*/
-static void spin_lock_irqsave_check_contention(struct srcu_struct *ssp)
+static void raw_spin_lock_irqsave_check_contention(struct srcu_struct *ssp)
{
unsigned long j;
@@ -429,16 +399,16 @@ static void spin_lock_irqsave_check_contention(struct srcu_struct *ssp)
* to SRCU_SIZE_BIG. But only if the srcutree.convert_to_big module
* parameter permits this.
*/
-static void spin_lock_irqsave_sdp_contention(struct srcu_data *sdp, unsigned long *flags)
+static void raw_spin_lock_irqsave_sdp_contention(struct srcu_data *sdp, unsigned long *flags)
{
struct srcu_struct *ssp = sdp->ssp;
- if (spin_trylock_irqsave_rcu_node(sdp, *flags))
+ if (raw_spin_trylock_irqsave_rcu_node(sdp, *flags))
return;
- spin_lock_irqsave_rcu_node(ssp->srcu_sup, *flags);
- spin_lock_irqsave_check_contention(ssp);
- spin_unlock_irqrestore_rcu_node(ssp->srcu_sup, *flags);
- spin_lock_irqsave_rcu_node(sdp, *flags);
+ raw_spin_lock_irqsave_rcu_node(ssp->srcu_sup, *flags);
+ raw_spin_lock_irqsave_check_contention(ssp);
+ raw_spin_unlock_irqrestore_rcu_node(ssp->srcu_sup, *flags);
+ raw_spin_lock_irqsave_rcu_node(sdp, *flags);
}
/*
@@ -447,12 +417,12 @@ static void spin_lock_irqsave_sdp_contention(struct srcu_data *sdp, unsigned lon
* to SRCU_SIZE_BIG. But only if the srcutree.convert_to_big module
* parameter permits this.
*/
-static void spin_lock_irqsave_ssp_contention(struct srcu_struct *ssp, unsigned long *flags)
+static void raw_spin_lock_irqsave_ssp_contention(struct srcu_struct *ssp, unsigned long *flags)
{
- if (spin_trylock_irqsave_rcu_node(ssp->srcu_sup, *flags))
+ if (raw_spin_trylock_irqsave_rcu_node(ssp->srcu_sup, *flags))
return;
- spin_lock_irqsave_rcu_node(ssp->srcu_sup, *flags);
- spin_lock_irqsave_check_contention(ssp);
+ raw_spin_lock_irqsave_rcu_node(ssp->srcu_sup, *flags);
+ raw_spin_lock_irqsave_check_contention(ssp);
}
/*
@@ -470,13 +440,13 @@ static void check_init_srcu_struct(struct srcu_struct *ssp)
/* The smp_load_acquire() pairs with the smp_store_release(). */
if (!rcu_seq_state(smp_load_acquire(&ssp->srcu_sup->srcu_gp_seq_needed))) /*^^^*/
return; /* Already initialized. */
- spin_lock_irqsave_rcu_node(ssp->srcu_sup, flags);
+ raw_spin_lock_irqsave_rcu_node(ssp->srcu_sup, flags);
if (!rcu_seq_state(ssp->srcu_sup->srcu_gp_seq_needed)) {
- spin_unlock_irqrestore_rcu_node(ssp->srcu_sup, flags);
+ raw_spin_unlock_irqrestore_rcu_node(ssp->srcu_sup, flags);
return;
}
init_srcu_struct_fields(ssp, true);
- spin_unlock_irqrestore_rcu_node(ssp->srcu_sup, flags);
+ raw_spin_unlock_irqrestore_rcu_node(ssp->srcu_sup, flags);
}
/*
@@ -742,13 +712,15 @@ void cleanup_srcu_struct(struct srcu_struct *ssp)
unsigned long delay;
struct srcu_usage *sup = ssp->srcu_sup;
- spin_lock_irq_rcu_node(ssp->srcu_sup);
+ raw_spin_lock_irq_rcu_node(ssp->srcu_sup);
delay = srcu_get_delay(ssp);
- spin_unlock_irq_rcu_node(ssp->srcu_sup);
+ raw_spin_unlock_irq_rcu_node(ssp->srcu_sup);
if (WARN_ON(!delay))
return; /* Just leak it! */
if (WARN_ON(srcu_readers_active(ssp)))
return; /* Just leak it! */
+ /* Wait for irq_work to finish first as it may queue a new work. */
+ irq_work_sync(&sup->irq_work);
flush_delayed_work(&sup->work);
for_each_possible_cpu(cpu) {
struct srcu_data *sdp = per_cpu_ptr(ssp->sda, cpu);
@@ -960,7 +932,7 @@ static void srcu_gp_end(struct srcu_struct *ssp)
mutex_lock(&sup->srcu_cb_mutex);
/* End the current grace period. */
- spin_lock_irq_rcu_node(sup);
+ raw_spin_lock_irq_rcu_node(sup);
idx = rcu_seq_state(sup->srcu_gp_seq);
WARN_ON_ONCE(idx != SRCU_STATE_SCAN2);
if (srcu_gp_is_expedited(ssp))
@@ -971,7 +943,7 @@ static void srcu_gp_end(struct srcu_struct *ssp)
gpseq = rcu_seq_current(&sup->srcu_gp_seq);
if (ULONG_CMP_LT(sup->srcu_gp_seq_needed_exp, gpseq))
WRITE_ONCE(sup->srcu_gp_seq_needed_exp, gpseq);
- spin_unlock_irq_rcu_node(sup);
+ raw_spin_unlock_irq_rcu_node(sup);
mutex_unlock(&sup->srcu_gp_mutex);
/* A new grace period can start at this point. But only one. */
@@ -983,7 +955,7 @@ static void srcu_gp_end(struct srcu_struct *ssp)
} else {
idx = rcu_seq_ctr(gpseq) % ARRAY_SIZE(snp->srcu_have_cbs);
srcu_for_each_node_breadth_first(ssp, snp) {
- spin_lock_irq_rcu_node(snp);
+ raw_spin_lock_irq_rcu_node(snp);
cbs = false;
last_lvl = snp >= sup->level[rcu_num_lvls - 1];
if (last_lvl)
@@ -998,7 +970,7 @@ static void srcu_gp_end(struct srcu_struct *ssp)
else
mask = snp->srcu_data_have_cbs[idx];
snp->srcu_data_have_cbs[idx] = 0;
- spin_unlock_irq_rcu_node(snp);
+ raw_spin_unlock_irq_rcu_node(snp);
if (cbs)
srcu_schedule_cbs_snp(ssp, snp, mask, cbdelay);
}
@@ -1008,27 +980,27 @@ static void srcu_gp_end(struct srcu_struct *ssp)
if (!(gpseq & counter_wrap_check))
for_each_possible_cpu(cpu) {
sdp = per_cpu_ptr(ssp->sda, cpu);
- spin_lock_irq_rcu_node(sdp);
+ raw_spin_lock_irq_rcu_node(sdp);
if (ULONG_CMP_GE(gpseq, sdp->srcu_gp_seq_needed + 100))
sdp->srcu_gp_seq_needed = gpseq;
if (ULONG_CMP_GE(gpseq, sdp->srcu_gp_seq_needed_exp + 100))
sdp->srcu_gp_seq_needed_exp = gpseq;
- spin_unlock_irq_rcu_node(sdp);
+ raw_spin_unlock_irq_rcu_node(sdp);
}
/* Callback initiation done, allow grace periods after next. */
mutex_unlock(&sup->srcu_cb_mutex);
/* Start a new grace period if needed. */
- spin_lock_irq_rcu_node(sup);
+ raw_spin_lock_irq_rcu_node(sup);
gpseq = rcu_seq_current(&sup->srcu_gp_seq);
if (!rcu_seq_state(gpseq) &&
ULONG_CMP_LT(gpseq, sup->srcu_gp_seq_needed)) {
srcu_gp_start(ssp);
- spin_unlock_irq_rcu_node(sup);
+ raw_spin_unlock_irq_rcu_node(sup);
srcu_reschedule(ssp, 0);
} else {
- spin_unlock_irq_rcu_node(sup);
+ raw_spin_unlock_irq_rcu_node(sup);
}
/* Transition to big if needed. */
@@ -1059,19 +1031,19 @@ static void srcu_funnel_exp_start(struct srcu_struct *ssp, struct srcu_node *snp
if (WARN_ON_ONCE(rcu_seq_done(&ssp->srcu_sup->srcu_gp_seq, s)) ||
(!srcu_invl_snp_seq(sgsne) && ULONG_CMP_GE(sgsne, s)))
return;
- spin_lock_irqsave_rcu_node(snp, flags);
+ raw_spin_lock_irqsave_rcu_node(snp, flags);
sgsne = snp->srcu_gp_seq_needed_exp;
if (!srcu_invl_snp_seq(sgsne) && ULONG_CMP_GE(sgsne, s)) {
- spin_unlock_irqrestore_rcu_node(snp, flags);
+ raw_spin_unlock_irqrestore_rcu_node(snp, flags);
return;
}
WRITE_ONCE(snp->srcu_gp_seq_needed_exp, s);
- spin_unlock_irqrestore_rcu_node(snp, flags);
+ raw_spin_unlock_irqrestore_rcu_node(snp, flags);
}
- spin_lock_irqsave_ssp_contention(ssp, &flags);
+ raw_spin_lock_irqsave_ssp_contention(ssp, &flags);
if (ULONG_CMP_LT(ssp->srcu_sup->srcu_gp_seq_needed_exp, s))
WRITE_ONCE(ssp->srcu_sup->srcu_gp_seq_needed_exp, s);
- spin_unlock_irqrestore_rcu_node(ssp->srcu_sup, flags);
+ raw_spin_unlock_irqrestore_rcu_node(ssp->srcu_sup, flags);
}
/*
@@ -1109,12 +1081,12 @@ static void srcu_funnel_gp_start(struct srcu_struct *ssp, struct srcu_data *sdp,
for (snp = snp_leaf; snp != NULL; snp = snp->srcu_parent) {
if (WARN_ON_ONCE(rcu_seq_done(&sup->srcu_gp_seq, s)) && snp != snp_leaf)
return; /* GP already done and CBs recorded. */
- spin_lock_irqsave_rcu_node(snp, flags);
+ raw_spin_lock_irqsave_rcu_node(snp, flags);
snp_seq = snp->srcu_have_cbs[idx];
if (!srcu_invl_snp_seq(snp_seq) && ULONG_CMP_GE(snp_seq, s)) {
if (snp == snp_leaf && snp_seq == s)
snp->srcu_data_have_cbs[idx] |= sdp->grpmask;
- spin_unlock_irqrestore_rcu_node(snp, flags);
+ raw_spin_unlock_irqrestore_rcu_node(snp, flags);
if (snp == snp_leaf && snp_seq != s) {
srcu_schedule_cbs_sdp(sdp, do_norm ? SRCU_INTERVAL : 0);
return;
@@ -1129,11 +1101,11 @@ static void srcu_funnel_gp_start(struct srcu_struct *ssp, struct srcu_data *sdp,
sgsne = snp->srcu_gp_seq_needed_exp;
if (!do_norm && (srcu_invl_snp_seq(sgsne) || ULONG_CMP_LT(sgsne, s)))
WRITE_ONCE(snp->srcu_gp_seq_needed_exp, s);
- spin_unlock_irqrestore_rcu_node(snp, flags);
+ raw_spin_unlock_irqrestore_rcu_node(snp, flags);
}
/* Top of tree, must ensure the grace period will be started. */
- spin_lock_irqsave_ssp_contention(ssp, &flags);
+ raw_spin_lock_irqsave_ssp_contention(ssp, &flags);
if (ULONG_CMP_LT(sup->srcu_gp_seq_needed, s)) {
/*
* Record need for grace period s. Pair with load
@@ -1154,13 +1126,17 @@ static void srcu_funnel_gp_start(struct srcu_struct *ssp, struct srcu_data *sdp,
// it isn't. And it does not have to be. After all, it
// can only be executed during early boot when there is only
// the one boot CPU running with interrupts still disabled.
+ //
+ // Use an irq_work here to avoid acquiring runqueue lock with
+ // srcu rcu_node::lock held. BPF instrument could introduce the
+ // opposite dependency, hence we need to break the possible
+ // locking dependency here.
if (likely(srcu_init_done))
- queue_delayed_work(rcu_gp_wq, &sup->work,
- !!srcu_get_delay(ssp));
+ irq_work_queue(&sup->irq_work);
else if (list_empty(&sup->work.work.entry))
list_add(&sup->work.work.entry, &srcu_boot_list);
}
- spin_unlock_irqrestore_rcu_node(sup, flags);
+ raw_spin_unlock_irqrestore_rcu_node(sup, flags);
}
/*
@@ -1172,9 +1148,9 @@ static bool try_check_zero(struct srcu_struct *ssp, int idx, int trycount)
{
unsigned long curdelay;
- spin_lock_irq_rcu_node(ssp->srcu_sup);
+ raw_spin_lock_irq_rcu_node(ssp->srcu_sup);
curdelay = !srcu_get_delay(ssp);
- spin_unlock_irq_rcu_node(ssp->srcu_sup);
+ raw_spin_unlock_irq_rcu_node(ssp->srcu_sup);
for (;;) {
if (srcu_readers_active_idx_check(ssp, idx))
@@ -1285,12 +1261,12 @@ static bool srcu_should_expedite(struct srcu_struct *ssp)
return false;
/* If the local srcu_data structure has callbacks, not idle. */
sdp = raw_cpu_ptr(ssp->sda);
- spin_lock_irqsave_rcu_node(sdp, flags);
+ raw_spin_lock_irqsave_rcu_node(sdp, flags);
if (rcu_segcblist_pend_cbs(&sdp->srcu_cblist)) {
- spin_unlock_irqrestore_rcu_node(sdp, flags);
+ raw_spin_unlock_irqrestore_rcu_node(sdp, flags);
return false; /* Callbacks already present, so not idle. */
}
- spin_unlock_irqrestore_rcu_node(sdp, flags);
+ raw_spin_unlock_irqrestore_rcu_node(sdp, flags);
/*
* No local callbacks, so probabilistically probe global state.
@@ -1350,7 +1326,7 @@ static unsigned long srcu_gp_start_if_needed(struct srcu_struct *ssp,
sdp = per_cpu_ptr(ssp->sda, get_boot_cpu_id());
else
sdp = raw_cpu_ptr(ssp->sda);
- spin_lock_irqsave_sdp_contention(sdp, &flags);
+ raw_spin_lock_irqsave_sdp_contention(sdp, &flags);
if (rhp)
rcu_segcblist_enqueue(&sdp->srcu_cblist, rhp);
/*
@@ -1410,7 +1386,7 @@ static unsigned long srcu_gp_start_if_needed(struct srcu_struct *ssp,
sdp->srcu_gp_seq_needed_exp = s;
needexp = true;
}
- spin_unlock_irqrestore_rcu_node(sdp, flags);
+ raw_spin_unlock_irqrestore_rcu_node(sdp, flags);
/* Ensure that snp node tree is fully initialized before traversing it */
if (ss_state < SRCU_SIZE_WAIT_BARRIER)
@@ -1522,7 +1498,7 @@ static void __synchronize_srcu(struct srcu_struct *ssp, bool do_norm)
/*
* Make sure that later code is ordered after the SRCU grace
- * period. This pairs with the spin_lock_irq_rcu_node()
+ * period. This pairs with the raw_spin_lock_irq_rcu_node()
* in srcu_invoke_callbacks(). Unlike Tree RCU, this is needed
* because the current CPU might have been totally uninvolved with
* (and thus unordered against) that grace period.
@@ -1701,7 +1677,7 @@ static void srcu_barrier_cb(struct rcu_head *rhp)
*/
static void srcu_barrier_one_cpu(struct srcu_struct *ssp, struct srcu_data *sdp)
{
- spin_lock_irq_rcu_node(sdp);
+ raw_spin_lock_irq_rcu_node(sdp);
atomic_inc(&ssp->srcu_sup->srcu_barrier_cpu_cnt);
sdp->srcu_barrier_head.func = srcu_barrier_cb;
debug_rcu_head_queue(&sdp->srcu_barrier_head);
@@ -1710,7 +1686,7 @@ static void srcu_barrier_one_cpu(struct srcu_struct *ssp, struct srcu_data *sdp)
debug_rcu_head_unqueue(&sdp->srcu_barrier_head);
atomic_dec(&ssp->srcu_sup->srcu_barrier_cpu_cnt);
}
- spin_unlock_irq_rcu_node(sdp);
+ raw_spin_unlock_irq_rcu_node(sdp);
}
/**
@@ -1761,7 +1737,7 @@ static void srcu_expedite_current_cb(struct rcu_head *rhp)
bool needcb = false;
struct srcu_data *sdp = container_of(rhp, struct srcu_data, srcu_ec_head);
- spin_lock_irqsave_sdp_contention(sdp, &flags);
+ raw_spin_lock_irqsave_sdp_contention(sdp, &flags);
if (sdp->srcu_ec_state == SRCU_EC_IDLE) {
WARN_ON_ONCE(1);
} else if (sdp->srcu_ec_state == SRCU_EC_PENDING) {
@@ -1771,7 +1747,7 @@ static void srcu_expedite_current_cb(struct rcu_head *rhp)
sdp->srcu_ec_state = SRCU_EC_PENDING;
needcb = true;
}
- spin_unlock_irqrestore_rcu_node(sdp, flags);
+ raw_spin_unlock_irqrestore_rcu_node(sdp, flags);
// If needed, requeue ourselves as an expedited SRCU callback.
if (needcb)
__call_srcu(sdp->ssp, &sdp->srcu_ec_head, srcu_expedite_current_cb, false);
@@ -1795,7 +1771,7 @@ void srcu_expedite_current(struct srcu_struct *ssp)
migrate_disable();
sdp = this_cpu_ptr(ssp->sda);
- spin_lock_irqsave_sdp_contention(sdp, &flags);
+ raw_spin_lock_irqsave_sdp_contention(sdp, &flags);
if (sdp->srcu_ec_state == SRCU_EC_IDLE) {
sdp->srcu_ec_state = SRCU_EC_PENDING;
needcb = true;
@@ -1804,7 +1780,7 @@ void srcu_expedite_current(struct srcu_struct *ssp)
} else {
WARN_ON_ONCE(sdp->srcu_ec_state != SRCU_EC_REPOST);
}
- spin_unlock_irqrestore_rcu_node(sdp, flags);
+ raw_spin_unlock_irqrestore_rcu_node(sdp, flags);
// If needed, queue an expedited SRCU callback.
if (needcb)
__call_srcu(ssp, &sdp->srcu_ec_head, srcu_expedite_current_cb, false);
@@ -1848,17 +1824,17 @@ static void srcu_advance_state(struct srcu_struct *ssp)
*/
idx = rcu_seq_state(smp_load_acquire(&ssp->srcu_sup->srcu_gp_seq)); /* ^^^ */
if (idx == SRCU_STATE_IDLE) {
- spin_lock_irq_rcu_node(ssp->srcu_sup);
+ raw_spin_lock_irq_rcu_node(ssp->srcu_sup);
if (ULONG_CMP_GE(ssp->srcu_sup->srcu_gp_seq, ssp->srcu_sup->srcu_gp_seq_needed)) {
WARN_ON_ONCE(rcu_seq_state(ssp->srcu_sup->srcu_gp_seq));
- spin_unlock_irq_rcu_node(ssp->srcu_sup);
+ raw_spin_unlock_irq_rcu_node(ssp->srcu_sup);
mutex_unlock(&ssp->srcu_sup->srcu_gp_mutex);
return;
}
idx = rcu_seq_state(READ_ONCE(ssp->srcu_sup->srcu_gp_seq));
if (idx == SRCU_STATE_IDLE)
srcu_gp_start(ssp);
- spin_unlock_irq_rcu_node(ssp->srcu_sup);
+ raw_spin_unlock_irq_rcu_node(ssp->srcu_sup);
if (idx != SRCU_STATE_IDLE) {
mutex_unlock(&ssp->srcu_sup->srcu_gp_mutex);
return; /* Someone else started the grace period. */
@@ -1872,10 +1848,10 @@ static void srcu_advance_state(struct srcu_struct *ssp)
return; /* readers present, retry later. */
}
srcu_flip(ssp);
- spin_lock_irq_rcu_node(ssp->srcu_sup);
+ raw_spin_lock_irq_rcu_node(ssp->srcu_sup);
rcu_seq_set_state(&ssp->srcu_sup->srcu_gp_seq, SRCU_STATE_SCAN2);
ssp->srcu_sup->srcu_n_exp_nodelay = 0;
- spin_unlock_irq_rcu_node(ssp->srcu_sup);
+ raw_spin_unlock_irq_rcu_node(ssp->srcu_sup);
}
if (rcu_seq_state(READ_ONCE(ssp->srcu_sup->srcu_gp_seq)) == SRCU_STATE_SCAN2) {
@@ -1913,7 +1889,7 @@ static void srcu_invoke_callbacks(struct work_struct *work)
ssp = sdp->ssp;
rcu_cblist_init(&ready_cbs);
- spin_lock_irq_rcu_node(sdp);
+ raw_spin_lock_irq_rcu_node(sdp);
WARN_ON_ONCE(!rcu_segcblist_segempty(&sdp->srcu_cblist, RCU_NEXT_TAIL));
rcu_segcblist_advance(&sdp->srcu_cblist,
rcu_seq_current(&ssp->srcu_sup->srcu_gp_seq));
@@ -1924,7 +1900,7 @@ static void srcu_invoke_callbacks(struct work_struct *work)
*/
if (sdp->srcu_cblist_invoking ||
!rcu_segcblist_ready_cbs(&sdp->srcu_cblist)) {
- spin_unlock_irq_rcu_node(sdp);
+ raw_spin_unlock_irq_rcu_node(sdp);
return; /* Someone else on the job or nothing to do. */
}
@@ -1932,7 +1908,7 @@ static void srcu_invoke_callbacks(struct work_struct *work)
sdp->srcu_cblist_invoking = true;
rcu_segcblist_extract_done_cbs(&sdp->srcu_cblist, &ready_cbs);
len = ready_cbs.len;
- spin_unlock_irq_rcu_node(sdp);
+ raw_spin_unlock_irq_rcu_node(sdp);
rhp = rcu_cblist_dequeue(&ready_cbs);
for (; rhp != NULL; rhp = rcu_cblist_dequeue(&ready_cbs)) {
debug_rcu_head_unqueue(rhp);
@@ -1947,11 +1923,11 @@ static void srcu_invoke_callbacks(struct work_struct *work)
* Update counts, accelerate new callbacks, and if needed,
* schedule another round of callback invocation.
*/
- spin_lock_irq_rcu_node(sdp);
+ raw_spin_lock_irq_rcu_node(sdp);
rcu_segcblist_add_len(&sdp->srcu_cblist, -len);
sdp->srcu_cblist_invoking = false;
more = rcu_segcblist_ready_cbs(&sdp->srcu_cblist);
- spin_unlock_irq_rcu_node(sdp);
+ raw_spin_unlock_irq_rcu_node(sdp);
/* An SRCU barrier or callbacks from previous nesting work pending */
if (more)
srcu_schedule_cbs_sdp(sdp, 0);
@@ -1965,7 +1941,7 @@ static void srcu_reschedule(struct srcu_struct *ssp, unsigned long delay)
{
bool pushgp = true;
- spin_lock_irq_rcu_node(ssp->srcu_sup);
+ raw_spin_lock_irq_rcu_node(ssp->srcu_sup);
if (ULONG_CMP_GE(ssp->srcu_sup->srcu_gp_seq, ssp->srcu_sup->srcu_gp_seq_needed)) {
if (!WARN_ON_ONCE(rcu_seq_state(ssp->srcu_sup->srcu_gp_seq))) {
/* All requests fulfilled, time to go idle. */
@@ -1975,7 +1951,7 @@ static void srcu_reschedule(struct srcu_struct *ssp, unsigned long delay)
/* Outstanding request and no GP. Start one. */
srcu_gp_start(ssp);
}
- spin_unlock_irq_rcu_node(ssp->srcu_sup);
+ raw_spin_unlock_irq_rcu_node(ssp->srcu_sup);
if (pushgp)
queue_delayed_work(rcu_gp_wq, &ssp->srcu_sup->work, delay);
@@ -1995,9 +1971,9 @@ static void process_srcu(struct work_struct *work)
ssp = sup->srcu_ssp;
srcu_advance_state(ssp);
- spin_lock_irq_rcu_node(ssp->srcu_sup);
+ raw_spin_lock_irq_rcu_node(ssp->srcu_sup);
curdelay = srcu_get_delay(ssp);
- spin_unlock_irq_rcu_node(ssp->srcu_sup);
+ raw_spin_unlock_irq_rcu_node(ssp->srcu_sup);
if (curdelay) {
WRITE_ONCE(sup->reschedule_count, 0);
} else {
@@ -2015,6 +1991,23 @@ static void process_srcu(struct work_struct *work)
srcu_reschedule(ssp, curdelay);
}
+static void srcu_irq_work(struct irq_work *work)
+{
+ struct srcu_struct *ssp;
+ struct srcu_usage *sup;
+ unsigned long delay;
+ unsigned long flags;
+
+ sup = container_of(work, struct srcu_usage, irq_work);
+ ssp = sup->srcu_ssp;
+
+ raw_spin_lock_irqsave_rcu_node(ssp->srcu_sup, flags);
+ delay = srcu_get_delay(ssp);
+ raw_spin_unlock_irqrestore_rcu_node(ssp->srcu_sup, flags);
+
+ queue_delayed_work(rcu_gp_wq, &sup->work, !!delay);
+}
+
void srcutorture_get_gp_data(struct srcu_struct *ssp, int *flags,
unsigned long *gp_seq)
{
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 8df69e702706..413310912609 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -6606,9 +6606,9 @@ int update_ftrace_direct_mod(struct ftrace_ops *ops, struct ftrace_hash *hash, b
if (!orig_hash)
goto unlock;
- /* Enable the tmp_ops to have the same functions as the direct ops */
+ /* Enable the tmp_ops to have the same functions as the hash object. */
ftrace_ops_init(&tmp_ops);
- tmp_ops.func_hash = ops->func_hash;
+ tmp_ops.func_hash->filter_hash = hash;
err = register_ftrace_function_nolock(&tmp_ops);
if (err)
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 17d0ea0cc3e6..170170bd83bd 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -2053,7 +2053,7 @@ static void rb_meta_validate_events(struct ring_buffer_per_cpu *cpu_buffer)
entries += ret;
entry_bytes += local_read(&head_page->page->commit);
- local_set(&cpu_buffer->head_page->entries, ret);
+ local_set(&head_page->entries, ret);
if (head_page == cpu_buffer->commit_page)
break;
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index ebd996f8710e..a626211ceb9a 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -555,7 +555,7 @@ static bool update_marker_trace(struct trace_array *tr, int enabled)
lockdep_assert_held(&event_mutex);
if (enabled) {
- if (!list_empty(&tr->marker_list))
+ if (tr->trace_flags & TRACE_ITER(COPY_MARKER))
return false;
list_add_rcu(&tr->marker_list, &marker_copies);
@@ -563,10 +563,10 @@ static bool update_marker_trace(struct trace_array *tr, int enabled)
return true;
}
- if (list_empty(&tr->marker_list))
+ if (!(tr->trace_flags & TRACE_ITER(COPY_MARKER)))
return false;
- list_del_init(&tr->marker_list);
+ list_del_rcu(&tr->marker_list);
tr->trace_flags &= ~TRACE_ITER(COPY_MARKER);
return true;
}
@@ -6784,6 +6784,23 @@ char *trace_user_fault_read(struct trace_user_buf_info *tinfo,
do {
/*
+ * It is possible that something is trying to migrate this
+ * task. What happens then, is when preemption is enabled,
+ * the migration thread will preempt this task, try to
+ * migrate it, fail, then let it run again. That will
+ * cause this to loop again and never succeed.
+ * On failures, enabled and disable preemption with
+ * migration enabled, to allow the migration thread to
+ * migrate this task.
+ */
+ if (trys) {
+ preempt_enable_notrace();
+ preempt_disable_notrace();
+ cpu = smp_processor_id();
+ buffer = per_cpu_ptr(tinfo->tbuf, cpu)->buf;
+ }
+
+ /*
* If for some reason, copy_from_user() always causes a context
* switch, this would then cause an infinite loop.
* If this task is preempted by another user space task, it
@@ -9744,18 +9761,19 @@ static int __remove_instance(struct trace_array *tr)
list_del(&tr->list);
- /* Disable all the flags that were enabled coming in */
- for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
- if ((1ULL << i) & ZEROED_TRACE_FLAGS)
- set_tracer_flag(tr, 1ULL << i, 0);
- }
-
if (printk_trace == tr)
update_printk_trace(&global_trace);
+ /* Must be done before disabling all the flags */
if (update_marker_trace(tr, 0))
synchronize_rcu();
+ /* Disable all the flags that were enabled coming in */
+ for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
+ if ((1ULL << i) & ZEROED_TRACE_FLAGS)
+ set_tracer_flag(tr, 1ULL << i, 0);
+ }
+
tracing_set_nop(tr);
clear_ftrace_function_probes(tr);
event_trace_del_tracer(tr);